Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage: 2.84% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage: 2.84% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 95 - 100 |
-------------------------------------------------------------------------------- |
95: #pragma omp parallel for simd collapse(2) |
96: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
97: for (int i = (x_min - 1 + 1); i < (x_max + 2 + 2); i++) { |
98: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
99: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
100: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i - 1, j + 0) + node_flux(i, j); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x420d40 STP X29, X30, [SP, #912]! |
0x420d44 ADD X29, SP, #0 |
0x420d48 STP X19, X20, [SP, #16] |
0x420d4c ORR X20, XZR, X0 |
0x420d50 LDP W3, W19, [X0, #48] |
0x420d54 STP X21, X22, [SP, #32] |
0x420d58 LDR W0, [X0, #44] |
0x420d5c ADD W1, W3, #1 |
0x420d60 ADD W19, W19, #3 |
0x420d64 LDR W22, [X20, #40] |
0x420d68 CMP W1, W19 |
0x420d6c B.GE 420f70 |
0x420d70 STP X23, X24, [SP, #48] |
0x420d74 SUB W24, W19, W1 |
0x420d78 STP X27, X28, [SP, #80] |
0x420d7c ADD W28, W0, #4 |
0x420d80 STR W1, [SP, #108] |
0x420d84 CMP W22, W28 |
0x420d88 B.GE 420f80 |
0x420d8c SUB W23, W28, W22 |
0x420d90 BL 403530 |
0x420d94 MADD W27, W24, W23, WZR |
0x420d98 ORR W21, WZR, W0 |
0x420d9c BL 4033c0 |
0x420da0 LDR W5, [SP, #108] |
0x420da4 ORR W2, WZR, W0 |
0x420da8 UDIV W6, W27, W21 |
0x420dac MSUB W4, W6, W21, W27 |
0x420db0 CMP W0, W4 |
0x420db4 B.CC 420f98 |
(229) 0x420db8 MADD W12, W6, W2, W4 |
(229) 0x420dbc ADD W27, W6, W12 |
(229) 0x420dc0 CMP W12, W27 |
(229) 0x420dc4 B.CS 420f80 |
(231) 0x420dc8 UDIV W16, W12, W23 |
(231) 0x420dcc STP X25, X26, [SP, #64] |
(231) 0x420dd0 CNTD X15, ALL |
(231) 0x420dd4 PTRUE P1.B, ALL |
(231) 0x420dd8 FDUP Z5.D, #80 |
(231) 0x420ddc LDP X26, X25, [X20] |
(231) 0x420de0 LDP X24, X21, [X20, #16] |
(231) 0x420de4 MSUB W7, W16, W23, W12 |
(231) 0x420de8 ADD W16, W16, W5 |
(231) 0x420dec LDR X20, [X20, #32] |
(231) 0x420df0 SBFM X13, X16, #0, #31 |
(231) 0x420df4 ADD W7, W7, W22 |
(231) 0x420df8 SUB W1, W28, W7 |
(231) 0x420dfc CMP W6, W1 |
(231) 0x420e00 CSEL X1, X6, X1, #9 |
(231) 0x420e04 ADD W18, W12, W1 |
(231) 0x420e08 CMP W12, W18 |
(231) 0x420e0c B.CS 420f50 |
(232) 0x420e10 LDR X11, [X20] |
(232) 0x420e14 SUB W8, W16, #1 |
(232) 0x420e18 SBFM X9, X7, #0, #31 |
(232) 0x420e1c SBFM X10, X8, #0, #31 |
(232) 0x420e20 MOVZ X0, #0 |
(232) 0x420e24 WHILELO P0.D, XZR, X1 |
(232) 0x420e28 LDR X5, [X26] |
(232) 0x420e2c LDR X12, [X21] |
(232) 0x420e30 LDR X14, [X24] |
(232) 0x420e34 MADD X4, X10, X5, X9 |
(232) 0x420e38 LDR X30, [X25] |
(232) 0x420e3c MADD X10, X10, X11, X9 |
(232) 0x420e40 MADD X3, X13, X12, X9 |
(232) 0x420e44 MADD X12, X13, X5, X9 |
(232) 0x420e48 LDR X17, [X20, #16] |
(232) 0x420e4c UBFM X7, X4, #61, #60 |
(232) 0x420e50 MADD X28, X13, X14, X9 |
(232) 0x420e54 SUB X4, X7, #8 |
(232) 0x420e58 LDR X6, [X26, #16] |
(232) 0x420e5c MADD X2, X13, X30, X9 |
(232) 0x420e60 UBFM X30, X12, #61, #60 |
(232) 0x420e64 MADD X9, X13, X11, X9 |
(232) 0x420e68 SUB X11, X30, #8 |
(232) 0x420e6c UBFM X14, X2, #61, #60 |
(232) 0x420e70 UBFM X2, X10, #61, #60 |
(232) 0x420e74 UBFM X5, X9, #61, #60 |
(232) 0x420e78 SUB X10, X2, #8 |
(232) 0x420e7c SUB X9, X5, #8 |
(232) 0x420e80 ADD X12, X6, X4 |
(232) 0x420e84 LDR X4, [X25, #16] |
(232) 0x420e88 ADD X7, X6, X7 |
(232) 0x420e8c ADD X11, X6, X11 |
(232) 0x420e90 ADD X10, X17, X10 |
(232) 0x420e94 ADD X6, X6, X30 |
(232) 0x420e98 LDR X30, [X24, #16] |
(232) 0x420e9c ADD X2, X17, X2 |
(232) 0x420ea0 ADD X9, X17, X9 |
(232) 0x420ea4 ADD X5, X17, X5 |
(232) 0x420ea8 LDR X17, [X21, #16] |
(232) 0x420eac SUB X8, X14, #8 |
(232) 0x420eb0 ADD X8, X4, X8 |
(232) 0x420eb4 ADD X14, X4, X14 |
(232) 0x420eb8 ADD X28, X30, X28,LSL #3 |
(232) 0x420ebc ADD X3, X17, X3,LSL #3 |
(230) 0x420ec0 LD1D {Z0.D}, P0/Z, [X6, X0,LSL #3] |
(230) 0x420ec4 LD1D {Z1.D}, P0/Z, [X5, X0,LSL #3] |
(230) 0x420ec8 LD1D {Z2.D}, P0/Z, [X7, X0,LSL #3] |
(230) 0x420ecc LD1D {Z6.D}, P0/Z, [X2, X0,LSL #3] |
(230) 0x420ed0 LD1D {Z4.D}, P0/Z, [X11, X0,LSL #3] |
(230) 0x420ed4 LD1D {Z7.D}, P0/Z, [X9, X0,LSL #3] |
(230) 0x420ed8 LD1D {Z16.D}, P0/Z, [X12, X0,LSL #3] |
(230) 0x420edc LD1D {Z18.D}, P0/Z, [X10, X0,LSL #3] |
(230) 0x420ee0 FMUL Z3.D, Z1.D, Z0.D |
(230) 0x420ee4 FMUL Z17.D, Z7.D, Z4.D |
(230) 0x420ee8 FMAD Z6.D, P1/M, Z2.D, Z3.D |
(230) 0x420eec FMAD Z18.D, P1/M, Z16.D, Z17.D |
(230) 0x420ef0 FADD Z19.D, Z6.D, Z18.D |
(230) 0x420ef4 FMUL Z20.D, Z19.D, Z5.D |
(230) 0x420ef8 ST1D {Z20.D}, P0, [X28, X0,LSL #3] |
(230) 0x420efc LD1D {Z21.D}, P0/Z, [X8, X0,LSL #3] |
(230) 0x420f00 LD1D {Z22.D}, P0/Z, [X14, X0,LSL #3] |
(230) 0x420f04 FSUB Z23.D, Z22.D, Z21.D |
(230) 0x420f08 FADD Z24.D, Z23.D, Z20.D |
(230) 0x420f0c ST1D {Z24.D}, P0, [X3, X0,LSL #3] |
(230) 0x420f10 ADD X0, X0, X15 |
(230) 0x420f14 WHILELO P0.D, X0, X1 |
(230) 0x420f18 B.NE 420ec0 |
(232) 0x420f1c ADD W16, W16, #1 |
(232) 0x420f20 ADD X13, X13, #1 |
(232) 0x420f24 CMP W19, W16 |
(232) 0x420f28 B.LE 420f64 |
(232) 0x420f2c SUB W6, W27, W18 |
(232) 0x420f30 ORR W1, WZR, W23 |
(232) 0x420f34 ORR W12, WZR, W18 |
(232) 0x420f38 CMP W6, W1 |
(232) 0x420f3c CSEL X1, X6, X1, #9 |
(232) 0x420f40 ADD W18, W12, W1 |
(232) 0x420f44 ORR W7, WZR, W22 |
(232) 0x420f48 CMP W12, W18 |
(232) 0x420f4c B.CC 420e10 |
(233) 0x420f50 ADD W16, W16, #1 |
(233) 0x420f54 ORR W18, WZR, W12 |
(233) 0x420f58 ADD X13, X13, #1 |
(233) 0x420f5c CMP W19, W16 |
(233) 0x420f60 B.GT 420f2c |
(231) 0x420f64 LDP X23, X24, [SP, #48] |
(231) 0x420f68 LDP X25, X26, [SP, #64] |
(231) 0x420f6c LDP X27, X28, [SP, #80] |
(231) 0x420f70 LDP X19, X20, [SP, #16] |
(231) 0x420f74 LDP X21, X22, [SP, #32] |
(231) 0x420f78 LDP X29, X30, [SP], #112 |
(231) 0x420f7c RET |
(229) 0x420f80 LDP X19, X20, [SP, #16] |
(229) 0x420f84 LDP X21, X22, [SP, #32] |
(229) 0x420f88 LDP X23, X24, [SP, #48] |
(229) 0x420f8c LDP X27, X28, [SP, #80] |
(229) 0x420f90 LDP X29, X30, [SP], #112 |
(229) 0x420f94 RET |
(229) 0x420f98 ADD W6, W6, #1 |
(229) 0x420f9c MOVZ W4, #0 |
(229) 0x420fa0 B 420db8 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.43+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_mom.cpp:95-100 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
cycles | 2.50 | 2.50 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP W3, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W1, W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W22, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W1, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420f70 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x230> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W24, W19, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W28, W0, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W1, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W22, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420f80 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x240> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W23, W28, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MADD W27, W24, W23, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W5, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UDIV W6, W27, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
MSUB W4, W6, W21, W27 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 420f98 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | advec_mom.cpp:95-100 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
cycles | 2.50 | 2.50 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP W3, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W1, W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W22, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W1, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420f70 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x230> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W24, W19, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W28, W0, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W1, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W22, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420f80 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x240> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W23, W28, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MADD W27, W24, W23, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W5, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UDIV W6, W27, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
MSUB W4, W6, W21, W27 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 420f98 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.84 | 3.78 |
▼Loop 231 - advec_mom.cpp:95-100 - exec– | 0 | 0 |
▼Loop 232 - advec_mom.cpp:97-100 - exec– | 0.01 | 0.01 |
○Loop 230 - advec_mom.cpp:98-100 - exec | 2.84 | 3.76 |
○Loop 229 - advec_mom.cpp:95-97 - exec | 0 | 0 |
○Loop 233 - advec_mom.cpp:97-97 - exec | 0 | 0 |