Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.12% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.12% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x421324 STP X29, X30, [SP, #912]! |
0x421328 ADD X29, SP, #0 |
0x42132c STP X19, X20, [SP, #16] |
0x421330 LDR W20, [X0, #52] |
0x421334 STP X21, X22, [SP, #32] |
0x421338 ORR X22, XZR, X0 |
0x42133c LDR W3, [X22, #48] |
0x421340 ADD W20, W20, #4 |
0x421344 LDR W21, [X0, #40] |
0x421348 LDR W0, [X0, #44] |
0x42134c CMP W3, W20 |
0x421350 B.GE 42155c |
0x421354 ADD W21, W21, #1 |
0x421358 ADD W19, W0, #3 |
0x42135c STP X25, X26, [SP, #64] |
0x421360 SUB W25, W20, W3 |
0x421364 STR W3, [SP, #108] |
0x421368 CMP W21, W19 |
0x42136c B.GE 421578 |
0x421370 STP X23, X24, [SP, #48] |
0x421374 SUB W24, W19, W21 |
0x421378 MADD W26, W25, W24, WZR |
0x42137c STP X27, X28, [SP, #80] |
0x421380 BL 403530 |
0x421384 ORR W23, WZR, W0 |
0x421388 BL 4033c0 |
0x42138c UDIV W1, W26, W23 |
0x421390 LDR W4, [SP, #108] |
0x421394 ORR W2, WZR, W0 |
0x421398 MSUB W5, W1, W23, W26 |
0x42139c CMP W0, W5 |
0x4213a0 B.CC 42156c |
(242) 0x4213a4 MADD W11, W1, W2, W5 |
(242) 0x4213a8 ADD W28, W1, W11 |
(242) 0x4213ac CMP W11, W28 |
(242) 0x4213b0 B.CS 421550 |
(242) 0x4213b4 UDIV W16, W11, W24 |
(242) 0x4213b8 CNTD X15, ALL |
(242) 0x4213bc PTRUE P1.B, ALL |
(242) 0x4213c0 FDUP Z5.D, #80 |
(242) 0x4213c4 LDP X27, X26, [X22] |
(242) 0x4213c8 LDP X25, X23, [X22, #16] |
(242) 0x4213cc LDR X22, [X22, #32] |
(242) 0x4213d0 MSUB W9, W16, W24, W11 |
(242) 0x4213d4 ADD W16, W16, W4 |
(242) 0x4213d8 SBFM X14, X16, #0, #31 |
(242) 0x4213dc ADD W7, W9, W21 |
(242) 0x4213e0 SUB W30, W19, W7 |
(242) 0x4213e4 CMP W1, W30 |
(242) 0x4213e8 CSEL X1, X1, X30, #9 |
(242) 0x4213ec ADD W30, W11, W1 |
(242) 0x4213f0 CMP W11, W30 |
(242) 0x4213f4 B.CS 42153c |
(242) 0x4213f8 HINT #0 |
(242) 0x4213fc HINT #0 |
(244) 0x421400 LDR X8, [X22] |
(244) 0x421404 SUB W6, W16, #1 |
(244) 0x421408 SBFM X12, X7, #0, #31 |
(244) 0x42140c SBFM X13, X6, #0, #31 |
(244) 0x421410 MOVZ X0, #0 |
(244) 0x421414 WHILELO P0.D, XZR, X1 |
(244) 0x421418 LDR X18, [X22, #16] |
(244) 0x42141c LDR X6, [X27] |
(244) 0x421420 MADD X7, X13, X8, X12 |
(244) 0x421424 MADD X10, X14, X8, X12 |
(244) 0x421428 LDR X2, [X23] |
(244) 0x42142c UBFM X17, X7, #61, #60 |
(244) 0x421430 LDR X4, [X25] |
(244) 0x421434 UBFM X3, X10, #61, #60 |
(244) 0x421438 SUB X11, X17, #8 |
(244) 0x42143c MADD X8, X13, X6, X12 |
(244) 0x421440 SUB X19, X3, #8 |
(244) 0x421444 ADD X7, X18, X17 |
(244) 0x421448 LDR X17, [X26] |
(244) 0x42144c ADD X10, X18, X19 |
(244) 0x421450 ADD X5, X18, X3 |
(244) 0x421454 MADD X19, X14, X6, X12 |
(244) 0x421458 LDR X3, [X27, #16] |
(244) 0x42145c MADD X9, X14, X2, X12 |
(244) 0x421460 UBFM X2, X8, #61, #60 |
(244) 0x421464 ADD X11, X18, X11 |
(244) 0x421468 MADD X18, X14, X4, X12 |
(244) 0x42146c UBFM X6, X19, #61, #60 |
(244) 0x421470 MADD X4, X13, X17, X12 |
(244) 0x421474 SUB X13, X2, #8 |
(244) 0x421478 MADD X17, X17, X14, X12 |
(244) 0x42147c SUB X12, X6, #8 |
(244) 0x421480 ADD X8, X3, X2 |
(244) 0x421484 ADD X13, X3, X13 |
(244) 0x421488 ADD X12, X3, X12 |
(244) 0x42148c ADD X6, X3, X6 |
(244) 0x421490 LDR X3, [X23, #16] |
(244) 0x421494 ADD X19, X3, X9,LSL #3 |
(244) 0x421498 LDR X9, [X26, #16] |
(244) 0x42149c ADD X2, X9, X4,LSL #3 |
(244) 0x4214a0 ADD X4, X9, X17,LSL #3 |
(244) 0x4214a4 LDR X17, [X25, #16] |
(244) 0x4214a8 ADD X18, X17, X18,LSL #3 |
(243) 0x4214ac LD1D {Z0.D}, P0/Z, [X6, X0,LSL #3] |
(243) 0x4214b0 LD1D {Z1.D}, P0/Z, [X5, X0,LSL #3] |
(243) 0x4214b4 LD1D {Z2.D}, P0/Z, [X8, X0,LSL #3] |
(243) 0x4214b8 LD1D {Z6.D}, P0/Z, [X7, X0,LSL #3] |
(243) 0x4214bc LD1D {Z4.D}, P0/Z, [X12, X0,LSL #3] |
(243) 0x4214c0 LD1D {Z7.D}, P0/Z, [X10, X0,LSL #3] |
(243) 0x4214c4 LD1D {Z16.D}, P0/Z, [X13, X0,LSL #3] |
(243) 0x4214c8 LD1D {Z18.D}, P0/Z, [X11, X0,LSL #3] |
(243) 0x4214cc FMUL Z3.D, Z1.D, Z0.D |
(243) 0x4214d0 FMUL Z17.D, Z7.D, Z4.D |
(243) 0x4214d4 FMAD Z6.D, P1/M, Z2.D, Z3.D |
(243) 0x4214d8 FMAD Z18.D, P1/M, Z16.D, Z17.D |
(243) 0x4214dc FADD Z19.D, Z6.D, Z18.D |
(243) 0x4214e0 FMUL Z20.D, Z19.D, Z5.D |
(243) 0x4214e4 ST1D {Z20.D}, P0, [X18, X0,LSL #3] |
(243) 0x4214e8 LD1D {Z21.D}, P0/Z, [X2, X0,LSL #3] |
(243) 0x4214ec LD1D {Z22.D}, P0/Z, [X4, X0,LSL #3] |
(243) 0x4214f0 FSUB Z23.D, Z22.D, Z21.D |
(243) 0x4214f4 FADD Z24.D, Z23.D, Z20.D |
(243) 0x4214f8 ST1D {Z24.D}, P0, [X19, X0,LSL #3] |
(243) 0x4214fc ADD X0, X0, X15 |
(243) 0x421500 WHILELO P0.D, X0, X1 |
(243) 0x421504 B.NE 4214ac |
(244) 0x421508 ADD W16, W16, #1 |
(244) 0x42150c ADD X14, X14, #1 |
(244) 0x421510 CMP W20, W16 |
(244) 0x421514 B.LE 421550 |
(244) 0x421518 SUB W1, W28, W30 |
(244) 0x42151c ORR W11, WZR, W30 |
(244) 0x421520 ORR W30, WZR, W24 |
(244) 0x421524 ORR W7, WZR, W21 |
(244) 0x421528 CMP W1, W30 |
(244) 0x42152c CSEL X1, X1, X30, #9 |
(244) 0x421530 ADD W30, W11, W1 |
(244) 0x421534 CMP W11, W30 |
(244) 0x421538 B.CC 421400 |
(245) 0x42153c ADD W16, W16, #1 |
(245) 0x421540 ORR W30, WZR, W11 |
(245) 0x421544 ADD X14, X14, #1 |
(245) 0x421548 CMP W20, W16 |
(245) 0x42154c B.GT 421518 |
(242) 0x421550 LDP X23, X24, [SP, #48] |
(242) 0x421554 LDP X25, X26, [SP, #64] |
(242) 0x421558 LDP X27, X28, [SP, #80] |
(242) 0x42155c LDP X19, X20, [SP, #16] |
(242) 0x421560 LDP X21, X22, [SP, #32] |
(242) 0x421564 LDP X29, X30, [SP], #112 |
(242) 0x421568 RET |
(242) 0x42156c ADD W1, W1, #1 |
(242) 0x421570 MOVZ W5, #0 |
(242) 0x421574 B 4213a4 |
0x421578 LDP X25, X26, [SP, #64] |
0x42157c B 42155c |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.43+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 34 |
loop length | 136 |
nb stack references | 0 |
front end | 4.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 4.50 | 4.17 | 4.33 | 3.50 | 3.50 |
cycles | 3.00 | 3.00 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 4.50 | 4.17 | 4.33 | 3.50 | 3.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 4.25 |
Overall L1 | 4.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR W20, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W3, [X22, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W20, W20, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W21, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W3, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 42155c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x238> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W21, W21, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W0, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W25, W20, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W3, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W21, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 421578 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x254> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W24, W19, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W26, W25, W24, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W23, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W26, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
LDR W4, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W5, W1, W23, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 42156c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x248> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
B 42155c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x238> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 34 |
loop length | 136 |
nb stack references | 0 |
front end | 4.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 4.50 | 4.17 | 4.33 | 3.50 | 3.50 |
cycles | 3.00 | 3.00 | 4.50 | 4.50 | 4.50 | 4.50 | 0.00 | 0.00 | 0.00 | 0.00 | 4.50 | 4.17 | 4.33 | 3.50 | 3.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 4.25 |
Overall L1 | 4.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR W20, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W3, [X22, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W20, W20, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W21, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W3, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 42155c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x238> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W21, W21, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W0, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W25, W20, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W3, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W21, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 421578 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x254> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W24, W19, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W26, W25, W24, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W23, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W26, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
LDR W4, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W5, W1, W23, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 42156c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x248> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
B 42155c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x238> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.12 | 4.14 |
▼Loop 242 - advec_mom.cpp:167-172 - exec– | 0 | 0 |
▼Loop 244 - advec_mom.cpp:169-172 - exec– | 0.01 | 0.01 |
○Loop 243 - advec_mom.cpp:170-172 - exec | 3.11 | 4.12 |
○Loop 245 - advec_mom.cpp:169-169 - exec | 0 | 0 |