| Function: flux_calc_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: flux_calc.cpp:36-40 [...] | Coverage (incl. loops): 4.67% | (excl. loops): 0.00% |
|---|
| Function: flux_calc_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: flux_calc.cpp:36-40 [...] | Coverage (incl. loops): 4.67% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/flux_calc.cpp: 36 - 40 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
39: vol_flux_x(i, j) = 0.25 * dt * xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel1(i, j) + xvel1(i + 0, j + 1)); |
40: vol_flux_y(i, j) = 0.25 * dt * yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel1(i, j) + yvel1(i + 1, j + 0)); |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x4357e0 STP X29, X30, [SP, #688]! |
0x4357e4 ADD X29, SP, #0 |
0x4357e8 STP X19, X20, [SP, #16] |
0x4357ec ORR X20, XZR, X0 |
0x4357f0 STP X21, X22, [SP, #32] |
0x4357f4 LDP W22, W1, [X0, #80] |
0x4357f8 LDR W0, [X0, #72] |
0x4357fc LDR W3, [X20, #76] |
0x435800 ADD W2, W1, #3 |
0x435804 ADD W22, W22, #1 |
0x435808 ADD W4, W0, #1 |
0x43580c STP W2, W4, [SP, #232] |
0x435810 CMP W22, W2 |
0x435814 B.GE 435c9c |
0x435818 ADD W19, W3, #3 |
0x43581c STP X23, X24, [SP, #48] |
0x435820 SUB W23, W2, W22 |
0x435824 CMP W4, W19 |
0x435828 B.GE 435c98 |
0x43582c SUB W5, W19, W4 |
0x435830 MUL W24, W23, W5 |
0x435834 STR W5, [SP, #272] |
0x435838 BL 410210 |
0x43583c ORR W21, WZR, W0 |
0x435840 BL 410240 |
0x435844 UDIV W7, W24, W21 |
0x435848 ORR W6, WZR, W0 |
0x43584c MSUB W8, W7, W21, W24 |
0x435850 CMP W0, W8 |
0x435854 B.CC 435cc8 |
0x435858 MADD W4, W7, W6, W8 |
0x43585c ADD W9, W7, W4 |
0x435860 STR W9, [SP, #276] |
0x435864 CMP W4, W9 |
0x435868 B.CS 435c98 |
0x43586c LDR W10, [SP, #272] |
0x435870 FMOV D30, #0.2500000 |
0x435874 STP X25, X26, [SP, #64] |
0x435878 STP X27, X28, [SP, #80] |
0x43587c UDIV W11, W4, W10 |
0x435880 LDR D23, [X20] |
0x435884 LDP X15, X27, [X20, #48] |
0x435888 LDR W16, [SP, #236] |
0x43588c FMUL D30, D23, D30 |
0x435890 STR X15, [SP, #320] |
0x435894 LDR X14, [X20, #8] |
0x435898 MSUB W12, W11, W10, W4 |
0x43589c ADD W13, W11, W22 |
0x4358a0 LDR X17, [X20, #16] |
0x4358a4 SBFM X18, X13, #0, #31 |
0x4358a8 LDR X28, [X20, #24] |
0x4358ac ADD W25, W12, W16 |
0x4358b0 DUP V29.2D, V30.D[0] |
0x4358b4 SUB W12, W19, W25 |
0x4358b8 STR X14, [SP, #280] |
0x4358bc LDR X30, [X20, #32] |
0x4358c0 STR X17, [SP, #304] |
0x4358c4 LDR X26, [X20, #64] |
0x4358c8 STR X28, [SP, #288] |
0x4358cc LDR X20, [X20, #40] |
0x4358d0 STR X30, [SP, #312] |
0x4358d4 STR X26, [SP, #328] |
0x4358d8 STR X20, [SP, #296] |
0x4358dc HINT #0 |
(235) 0x4358e0 CMP W7, W12 |
(235) 0x4358e4 CSEL W3, W7, W12, #9 |
(235) 0x4358e8 ADD W1, W4, W3 |
(235) 0x4358ec STR W1, [SP, #148] |
(235) 0x4358f0 CMP W4, W1 |
(235) 0x4358f4 B.CS 435c74 |
(235) 0x4358f8 LDP X22, X2, [SP, #280] |
(235) 0x4358fc LDP X4, X23, [SP, #296] |
(235) 0x435900 LDP X24, X5, [SP, #312] |
(235) 0x435904 LDR X21, [X2] |
(235) 0x435908 LDR X8, [X4] |
(235) 0x43590c LDR X0, [X27] |
(235) 0x435910 MUL X6, X21, X18 |
(235) 0x435914 LDR X9, [X22] |
(235) 0x435918 LDR X28, [X24] |
(235) 0x43591c ADD X13, X21, X6 |
(235) 0x435920 MUL X21, X8, X18 |
(235) 0x435924 MUL X19, X18, X0 |
(235) 0x435928 LDR X7, [X5] |
(235) 0x43592c MUL X15, X18, X9 |
(235) 0x435930 ADD X17, X8, X21 |
(235) 0x435934 LDR X12, [X23] |
(235) 0x435938 MUL X20, X18, X28 |
(235) 0x43593c LDR X10, [X22, #16] |
(235) 0x435940 MUL X11, X18, X7 |
(235) 0x435944 LDR X1, [X27, #16] |
(235) 0x435948 MUL X26, X18, X12 |
(235) 0x43594c LDR X22, [SP, #328] |
(235) 0x435950 STR X10, [SP, #152] |
(235) 0x435954 LDR X14, [X2, #16] |
(235) 0x435958 LDR X16, [X23, #16] |
(235) 0x43595c LDR X30, [X4, #16] |
(235) 0x435960 STP X1, X26, [SP, #120] |
(235) 0x435964 STP X11, X20, [SP, #168] |
(235) 0x435968 STP X19, X17, [SP, #184] |
(235) 0x43596c STP X21, X13, [SP, #200] |
(235) 0x435970 STP X6, X15, [SP, #216] |
(235) 0x435974 LDR X0, [X22] |
(235) 0x435978 STR X16, [SP, #112] |
(235) 0x43597c LDR X2, [X22, #16] |
(235) 0x435980 LDR X26, [X5, #16] |
(235) 0x435984 MUL X8, X18, X0 |
(235) 0x435988 LDR X28, [X24, #16] |
(235) 0x43598c STR X2, [SP, #104] |
(235) 0x435990 STR X8, [SP, #160] |
(235) 0x435994 CMP W3, #1 |
(235) 0x435998 B.EQ 435bac |
(235) 0x43599c SBFM X9, X25, #0, #31 |
(235) 0x4359a0 UBFM W16, W3, #1, #31 |
(235) 0x4359a4 ADD X22, X17, X9 |
(235) 0x4359a8 LDP X7, X17, [SP, #120] |
(235) 0x4359ac ADD X23, X11, X9 |
(235) 0x4359b0 ADD X5, X15, X9 |
(235) 0x4359b4 ADD X4, X20, X9 |
(235) 0x4359b8 ADD X2, X26, X23,LSL #3 |
(235) 0x4359bc UBFM X12, X23, #61, #60 |
(235) 0x4359c0 ADD X24, X13, X9 |
(235) 0x4359c4 ADD X13, X10, X5,LSL #3 |
(235) 0x4359c8 ADD X23, X6, X9 |
(235) 0x4359cc ADD X1, X8, X9 |
(235) 0x4359d0 ADD X6, X21, X9 |
(235) 0x4359d4 ADD X19, X19, X9 |
(235) 0x4359d8 UBFM X20, X5, #61, #60 |
(235) 0x4359dc ADD X5, X17, X9 |
(235) 0x4359e0 LDR X9, [SP, #112] |
(235) 0x4359e4 ADD X8, X7, X19,LSL #3 |
(235) 0x4359e8 UBFM X17, X5, #61, #60 |
(235) 0x4359ec UBFM X11, X4, #61, #60 |
(235) 0x4359f0 UBFM X15, X1, #61, #60 |
(235) 0x4359f4 ADD X10, X30, X6,LSL #3 |
(235) 0x4359f8 UBFM X21, X6, #61, #60 |
(235) 0x4359fc STP X12, X11, [SP, #240] |
(235) 0x435a00 ADD X4, X28, X4,LSL #3 |
(235) 0x435a04 MOVZ X0, #0 |
(235) 0x435a08 UBFM X19, X19, #61, #60 |
(235) 0x435a0c ADD X7, X9, X5,LSL #3 |
(235) 0x435a10 ADD X9, X11, #8 |
(235) 0x435a14 ADD X11, X12, #8 |
(235) 0x435a18 LDR X5, [SP, #104] |
(235) 0x435a1c ADD X12, X14, X23,LSL #3 |
(235) 0x435a20 ADD X6, X28, X9 |
(235) 0x435a24 STR X11, [SP, #136] |
(235) 0x435a28 UBFM X23, X23, #61, #60 |
(235) 0x435a2c ADD X11, X14, X24,LSL #3 |
(235) 0x435a30 STR X9, [SP, #256] |
(235) 0x435a34 UBFM X24, X24, #61, #60 |
(235) 0x435a38 ADD X9, X30, X22,LSL #3 |
(235) 0x435a3c STR X15, [SP, #264] |
(235) 0x435a40 UBFM X22, X22, #61, #60 |
(235) 0x435a44 UBFM X15, X16, #60, #59 |
(235) 0x435a48 ADD X1, X5, X1,LSL #3 |
(235) 0x435a4c LDR X5, [SP, #136] |
(235) 0x435a50 ADD X5, X26, X5 |
(235) 0x435a54 TBZ W16, #0, 435ae0 |
(235) 0x435a58 LDR Q22, [X14, X24] |
(235) 0x435a5c MOVZ X0, #16 |
(235) 0x435a60 LDR Q21, [X14, X23] |
(235) 0x435a64 LDR Q20, [X30, X22] |
(235) 0x435a68 LDR Q19, [X30, X21] |
(235) 0x435a6c LDR X16, [SP, #152] |
(235) 0x435a70 FADD V0.2D, V22.2D, V21.2D |
(235) 0x435a74 LDR X24, [SP, #136] |
(235) 0x435a78 FADD V1.2D, V20.2D, V19.2D |
(235) 0x435a7c LDR X23, [SP, #240] |
(235) 0x435a80 LDR Q18, [X16, X20] |
(235) 0x435a84 LDR X20, [SP, #120] |
(235) 0x435a88 FADD V2.2D, V0.2D, V1.2D |
(235) 0x435a8c LDR X21, [SP, #256] |
(235) 0x435a90 FMUL V3.2D, V18.2D, V29.2D |
(235) 0x435a94 LDR X22, [SP, #112] |
(235) 0x435a98 LDR X16, [SP, #104] |
(235) 0x435a9c FMUL V4.2D, V2.2D, V3.2D |
(235) 0x435aa0 STR Q4, [X20, X19] |
(235) 0x435aa4 LDR X19, [SP, #248] |
(235) 0x435aa8 LDR Q17, [X28, X21] |
(235) 0x435aac LDR Q7, [X26, X23] |
(235) 0x435ab0 LDR Q16, [X28, X19] |
(235) 0x435ab4 LDR Q6, [X26, X24] |
(235) 0x435ab8 LDR Q26, [X22, X17] |
(235) 0x435abc FADD V5.2D, V17.2D, V16.2D |
(235) 0x435ac0 LDR X17, [SP, #264] |
(235) 0x435ac4 FADD V24.2D, V7.2D, V6.2D |
(235) 0x435ac8 FMUL V27.2D, V26.2D, V29.2D |
(235) 0x435acc FADD V25.2D, V5.2D, V24.2D |
(235) 0x435ad0 FMUL V28.2D, V25.2D, V27.2D |
(235) 0x435ad4 STR Q28, [X16, X17] |
(235) 0x435ad8 CMP X0, X15 |
(235) 0x435adc B.EQ 435ba0 |
(236) 0x435ae0 LDR Q31, [X11, X0] |
(236) 0x435ae4 ADD X20, X0, #16 |
(236) 0x435ae8 LDR Q23, [X12, X0] |
(236) 0x435aec LDR Q22, [X9, X0] |
(236) 0x435af0 LDR Q21, [X10, X0] |
(236) 0x435af4 FADD V20.2D, V31.2D, V23.2D |
(236) 0x435af8 LDR Q19, [X13, X0] |
(236) 0x435afc FADD V0.2D, V22.2D, V21.2D |
(236) 0x435b00 FMUL V1.2D, V19.2D, V29.2D |
(236) 0x435b04 FADD V2.2D, V20.2D, V0.2D |
(236) 0x435b08 FMUL V18.2D, V2.2D, V1.2D |
(236) 0x435b0c STR Q18, [X8, X0] |
(236) 0x435b10 LDR Q3, [X6, X0] |
(236) 0x435b14 LDR Q4, [X4, X0] |
(236) 0x435b18 LDR Q17, [X2, X0] |
(236) 0x435b1c LDR Q16, [X5, X0] |
(236) 0x435b20 FADD V5.2D, V3.2D, V4.2D |
(236) 0x435b24 LDR Q7, [X7, X0] |
(236) 0x435b28 FADD V6.2D, V17.2D, V16.2D |
(236) 0x435b2c FMUL V24.2D, V7.2D, V29.2D |
(236) 0x435b30 FADD V25.2D, V5.2D, V6.2D |
(236) 0x435b34 FMUL V26.2D, V25.2D, V24.2D |
(236) 0x435b38 STR Q26, [X1, X0] |
(236) 0x435b3c ADD X0, X0, #32 |
(236) 0x435b40 LDR Q27, [X11, X20] |
(236) 0x435b44 LDR Q28, [X12, X20] |
(236) 0x435b48 LDR Q31, [X9, X20] |
(236) 0x435b4c LDR Q22, [X10, X20] |
(236) 0x435b50 FADD V23.2D, V27.2D, V28.2D |
(236) 0x435b54 LDR Q20, [X13, X20] |
(236) 0x435b58 FADD V21.2D, V31.2D, V22.2D |
(236) 0x435b5c FMUL V19.2D, V20.2D, V29.2D |
(236) 0x435b60 FADD V0.2D, V23.2D, V21.2D |
(236) 0x435b64 FMUL V1.2D, V0.2D, V19.2D |
(236) 0x435b68 STR Q1, [X8, X20] |
(236) 0x435b6c LDR Q2, [X6, X20] |
(236) 0x435b70 LDR Q18, [X4, X20] |
(236) 0x435b74 LDR Q3, [X2, X20] |
(236) 0x435b78 LDR Q4, [X5, X20] |
(236) 0x435b7c FADD V17.2D, V2.2D, V18.2D |
(236) 0x435b80 LDR Q5, [X7, X20] |
(236) 0x435b84 FADD V16.2D, V3.2D, V4.2D |
(236) 0x435b88 FMUL V7.2D, V5.2D, V29.2D |
(236) 0x435b8c FADD V6.2D, V17.2D, V16.2D |
(236) 0x435b90 FMUL V24.2D, V6.2D, V7.2D |
(236) 0x435b94 STR Q24, [X1, X20] |
(236) 0x435b98 CMP X0, X15 |
(236) 0x435b9c B.NE 435ae0 |
(235) 0x435ba0 TBZ W3, #0, 435c70 |
(235) 0x435ba4 AND W3, W3, #0xfffffffe |
(235) 0x435ba8 ADD W25, W25, W3 |
(235) 0x435bac LDR X15, [SP, #216] |
(235) 0x435bb0 SBFM X13, X25, #0, #31 |
(235) 0x435bb4 ADD W4, W25, #1 |
(235) 0x435bb8 LDP X7, X6, [SP, #184] |
(235) 0x435bbc SBFM X10, X4, #0, #31 |
(235) 0x435bc0 LDP X1, X2, [SP, #200] |
(235) 0x435bc4 ADD X9, X15, X13 |
(235) 0x435bc8 ADD X0, X6, X13 |
(235) 0x435bcc ADD X21, X7, X13 |
(235) 0x435bd0 LDR X22, [SP, #224] |
(235) 0x435bd4 ADD X11, X2, X13 |
(235) 0x435bd8 ADD X5, X1, X13 |
(235) 0x435bdc LDR D26, [X14, X9,LSL #3] |
(235) 0x435be0 LDR D25, [X14, X11,LSL #3] |
(235) 0x435be4 ADD X16, X22, X13 |
(235) 0x435be8 LDR D27, [X30, X0,LSL #3] |
(235) 0x435bec LDR D28, [X30, X5,LSL #3] |
(235) 0x435bf0 LDR X25, [SP, #152] |
(235) 0x435bf4 FADD D31, D25, D26 |
(235) 0x435bf8 LDR X12, [SP, #128] |
(235) 0x435bfc FADD D22, D27, D28 |
(235) 0x435c00 LDR X30, [SP, #160] |
(235) 0x435c04 LDR D20, [X25, X16,LSL #3] |
(235) 0x435c08 ADD X23, X12, X13 |
(235) 0x435c0c LDR X8, [SP, #168] |
(235) 0x435c10 ADD X3, X30, X13 |
(235) 0x435c14 FADD D23, D31, D22 |
(235) 0x435c18 LDR X24, [SP, #176] |
(235) 0x435c1c FMUL D21, D30, D20 |
(235) 0x435c20 ADD X19, X8, X13 |
(235) 0x435c24 ADD X14, X8, X10 |
(235) 0x435c28 ADD X20, X24, X13 |
(235) 0x435c2c LDR X13, [SP, #120] |
(235) 0x435c30 ADD X17, X24, X10 |
(235) 0x435c34 FMUL D19, D23, D21 |
(235) 0x435c38 STR D19, [X13, X21,LSL #3] |
(235) 0x435c3c LDR D1, [X26, X19,LSL #3] |
(235) 0x435c40 LDR D2, [X28, X20,LSL #3] |
(235) 0x435c44 LDR D0, [X26, X14,LSL #3] |
(235) 0x435c48 LDR D18, [X28, X17,LSL #3] |
(235) 0x435c4c LDR X26, [SP, #112] |
(235) 0x435c50 FADD D3, D1, D2 |
(235) 0x435c54 LDR X28, [SP, #104] |
(235) 0x435c58 FADD D4, D0, D18 |
(235) 0x435c5c LDR D5, [X26, X23,LSL #3] |
(235) 0x435c60 FADD D17, D3, D4 |
(235) 0x435c64 FMUL D16, D30, D5 |
(235) 0x435c68 FMUL D7, D17, D16 |
(235) 0x435c6c STR D7, [X28, X3,LSL #3] |
(235) 0x435c70 LDR W4, [SP, #148] |
(235) 0x435c74 ADD X18, X18, #1 |
(235) 0x435c78 LDR W10, [SP, #232] |
(235) 0x435c7c CMP W10, W18 |
(235) 0x435c80 B.LE 435cac |
(235) 0x435c84 LDR W8, [SP, #276] |
(235) 0x435c88 LDR W25, [SP, #236] |
(235) 0x435c8c LDR W12, [SP, #272] |
(235) 0x435c90 SUB W7, W8, W4 |
(235) 0x435c94 B 4358e0 |
0x435c98 LDP X23, X24, [SP, #48] |
0x435c9c LDP X19, X20, [SP, #16] |
0x435ca0 LDP X21, X22, [SP, #32] |
0x435ca4 LDP X29, X30, [SP], #336 |
0x435ca8 RET |
0x435cac LDP X23, X24, [SP, #48] |
0x435cb0 LDP X25, X26, [SP, #64] |
0x435cb4 LDP X27, X28, [SP, #80] |
0x435cb8 LDP X19, X20, [SP, #16] |
0x435cbc LDP X21, X22, [SP, #32] |
0x435cc0 LDP X29, X30, [SP], #336 |
0x435cc4 RET |
0x435cc8 ADD W7, W7, #1 |
0x435ccc MOVZ W8, #0 |
0x435cd0 B 435858 |
0x435cd4 HINT #0 |
0x435cd8 HINT #0 |
0x435cdc HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.43+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.57+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | flux_calc(global_variables&) | flux_calc.cpp:53 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:76 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | flux_calc.cpp:36-40 |
| Module | exec |
| nb instructions | 82 |
| nb uops | 78 |
| loop length | 328 |
| used w registers | 22 |
| used x registers | 19 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 28 |
| micro-operation queue | 9.75 cycles |
| front end | 9.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 13.00 | 13.00 | 13.00 | 8.00 | 8.00 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 13.00 | 13.00 | 13.00 | 8.00 | 8.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 9.75 |
| Dispatch | 13.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 13.00-25.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 10% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 27% |
| load | 34% |
| store | 32% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 27% |
| load | 34% |
| store | 32% |
| mul | 18% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #688]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W3, [X20, #76] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W2, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W2, W4, [SP, #232] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 435c9c <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4bc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W3, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W2, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 435c98 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 435cc8 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4e8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #276] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W4, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 435c98 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D30, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W11, W4, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR D23, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDP X15, X27, [X20, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR W16, [SP, #236] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMUL D30, D23, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| STR X15, [SP, #320] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X14, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W12, W11, W10, W4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X17, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SBFM X18, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X28, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W25, W12, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| DUP V29.2D, V30.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| SUB W12, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X14, [SP, #280] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X30, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X17, [SP, #304] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X26, [X20, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X28, [SP, #288] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X20, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X30, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X26, [SP, #328] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X20, [SP, #296] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #336 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #336 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 435858 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | flux_calc.cpp:36-40 |
| Module | exec |
| nb instructions | 82 |
| nb uops | 78 |
| loop length | 328 |
| used w registers | 22 |
| used x registers | 19 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 28 |
| micro-operation queue | 9.75 cycles |
| front end | 9.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 13.00 | 13.00 | 13.00 | 8.00 | 8.00 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 13.00 | 13.00 | 13.00 | 8.00 | 8.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 9.75 |
| Dispatch | 13.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 13.00-25.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 10% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 27% |
| load | 34% |
| store | 32% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 27% |
| load | 34% |
| store | 32% |
| mul | 18% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #688]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W3, [X20, #76] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W2, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W2, W4, [SP, #232] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 435c9c <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4bc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W3, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W2, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 435c98 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 435cc8 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4e8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #276] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W4, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 435c98 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x4b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D30, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W11, W4, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR D23, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDP X15, X27, [X20, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR W16, [SP, #236] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMUL D30, D23, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| STR X15, [SP, #320] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X14, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W12, W11, W10, W4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X17, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SBFM X18, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X28, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W25, W12, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| DUP V29.2D, V30.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| SUB W12, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X14, [SP, #280] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X30, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X17, [SP, #304] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X26, [X20, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X28, [SP, #288] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X20, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X30, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X26, [SP, #328] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X20, [SP, #296] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #336 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #336 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 435858 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼flux_calc_kernel(int, int, int, int, double, clover::Buffer2D | 4.67 | 6.28 |
| ▼Loop 235 - flux_calc.cpp:38-40 - exec– | 0.02 | 0.03 |
| ○Loop 236 - flux_calc.cpp:39-40 - exec | 4.65 | 6.08 |
