Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 4.09% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 4.09% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 208 - 216 |
-------------------------------------------------------------------------------- |
208: #pragma omp parallel for simd collapse(2) |
209: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
210: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
211: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
212: double post_mass_s = pre_mass_s + mass_flux_y(i, j) - mass_flux_y(i + 0, j + 1); |
213: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 0, j + 1)) / post_mass_s; |
214: double advec_vol_s = pre_vol(i, j) + vol_flux_y(i, j) - vol_flux_y(i + 0, j + 1); |
215: density1(i, j) = post_mass_s / advec_vol_s; |
216: energy1(i, j) = post_ener_s; |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41fa48 STP X29, X30, [SP, #944]! |
0x41fa4c ADD X29, SP, #0 |
0x41fa50 STP X19, X20, [SP, #16] |
0x41fa54 STP X25, X26, [SP, #64] |
0x41fa58 LDP W25, W19, [X0, #56] |
0x41fa5c STP X23, X24, [SP, #48] |
0x41fa60 ORR X23, XZR, X0 |
0x41fa64 LDR W20, [X0, #48] |
0x41fa68 ADD W25, W25, #1 |
0x41fa6c ADD W19, W19, #2 |
0x41fa70 LDR W0, [X0, #52] |
0x41fa74 CMP W25, W19 |
0x41fa78 B.GE 41fc54 |
0x41fa7c ADD W20, W20, #1 |
0x41fa80 STP X21, X22, [SP, #32] |
0x41fa84 ADD W22, W0, #2 |
0x41fa88 SUB W26, W19, W25 |
0x41fa8c CMP W20, W22 |
0x41fa90 B.GE 41fc50 |
0x41fa94 SUB W21, W22, W20 |
0x41fa98 BL 403530 |
0x41fa9c MADD W26, W26, W21, WZR |
0x41faa0 ORR W24, WZR, W0 |
0x41faa4 BL 4033c0 |
0x41faa8 ORR W1, WZR, W0 |
0x41faac UDIV W3, W26, W24 |
0x41fab0 MSUB W2, W3, W24, W26 |
0x41fab4 CMP W0, W2 |
0x41fab8 B.CC 41fc68 |
(198) 0x41fabc MADD W7, W3, W1, W2 |
(198) 0x41fac0 ADD W17, W3, W7 |
(198) 0x41fac4 CMP W7, W17 |
(198) 0x41fac8 B.CS 41fc50 |
(198) 0x41facc UDIV W4, W7, W21 |
(198) 0x41fad0 MOVZ W18, #0 |
(198) 0x41fad4 CNTD X10, ALL |
(198) 0x41fad8 PTRUE P1.B, ALL |
(198) 0x41fadc LDP X16, X15, [X23] |
(198) 0x41fae0 LDP X14, X13, [X23, #16] |
(198) 0x41fae4 LDP X12, X11, [X23, #32] |
(198) 0x41fae8 MSUB W5, W4, W21, W7 |
(198) 0x41faec ADD W6, W4, W25 |
(198) 0x41faf0 SBFM X9, X6, #0, #31 |
(198) 0x41faf4 ADD W8, W5, W20 |
(198) 0x41faf8 SUB W30, W22, W8 |
(198) 0x41fafc CMP W3, W30 |
(198) 0x41fb00 CSEL X3, X3, X30, #9 |
(198) 0x41fb04 ADD W30, W7, W3 |
(198) 0x41fb08 CMP W7, W30 |
(198) 0x41fb0c B.CS 41fc3c |
(200) 0x41fb10 LDR X7, [X11] |
(200) 0x41fb14 SBFM X5, X8, #0, #31 |
(200) 0x41fb18 MOVZ X0, #0 |
(200) 0x41fb1c WHILELO P0.D, XZR, X3 |
(200) 0x41fb20 LDR X8, [X11, #16] |
(200) 0x41fb24 LDR X1, [X13] |
(200) 0x41fb28 MADD X23, X9, X7, XZR |
(200) 0x41fb2c LDR X24, [X12] |
(200) 0x41fb30 ADD X25, X7, X23 |
(200) 0x41fb34 ADD X22, X23, X5 |
(200) 0x41fb38 ADD X26, X25, X5 |
(200) 0x41fb3c ADD X23, X8, X22,LSL #3 |
(200) 0x41fb40 ADD X7, X8, X26,LSL #3 |
(200) 0x41fb44 LDR X8, [X14] |
(200) 0x41fb48 MADD X2, X9, X1, XZR |
(200) 0x41fb4c MADD X6, X9, X24, X5 |
(200) 0x41fb50 LDR X26, [X14, #16] |
(200) 0x41fb54 ADD X4, X1, X2 |
(200) 0x41fb58 ADD X25, X2, X5 |
(200) 0x41fb5c MADD X24, X9, X8, XZR |
(200) 0x41fb60 ADD X22, X4, X5 |
(200) 0x41fb64 ADD X1, X8, X24 |
(200) 0x41fb68 ADD X2, X24, X5 |
(200) 0x41fb6c LDR X24, [X15] |
(200) 0x41fb70 ADD X4, X1, X5 |
(200) 0x41fb74 ADD X8, X26, X4,LSL #3 |
(200) 0x41fb78 ADD X26, X26, X2,LSL #3 |
(200) 0x41fb7c LDR X2, [X16] |
(200) 0x41fb80 MADD X1, X9, X24, X5 |
(200) 0x41fb84 MADD X24, X9, X2, X5 |
(200) 0x41fb88 LDR X5, [X13, #16] |
(200) 0x41fb8c ADD X4, X5, X22,LSL #3 |
(200) 0x41fb90 LDR X22, [X16, #16] |
(200) 0x41fb94 ADD X25, X5, X25,LSL #3 |
(200) 0x41fb98 LDR X5, [X15, #16] |
(200) 0x41fb9c ADD X2, X22, X24,LSL #3 |
(200) 0x41fba0 LDR X24, [X12, #16] |
(200) 0x41fba4 ADD X1, X5, X1,LSL #3 |
(200) 0x41fba8 ADD X6, X24, X6,LSL #3 |
(199) 0x41fbac LD1D {Z2.D}, P0/Z, [X6, X0,LSL #3] |
(199) 0x41fbb0 LD1D {Z0.D}, P0/Z, [X2, X0,LSL #3] |
(199) 0x41fbb4 LD1D {Z1.D}, P0/Z, [X25, X0,LSL #3] |
(199) 0x41fbb8 LD1D {Z6.D}, P0/Z, [X4, X0,LSL #3] |
(199) 0x41fbbc LD1D {Z3.D}, P0/Z, [X23, X0,LSL #3] |
(199) 0x41fbc0 LD1D {Z16.D}, P0/Z, [X7, X0,LSL #3] |
(199) 0x41fbc4 LD1D {Z17.D}, P0/Z, [X1, X0,LSL #3] |
(199) 0x41fbc8 LD1D {Z19.D}, P0/Z, [X8, X0,LSL #3] |
(199) 0x41fbcc LD1D {Z21.D}, P0/Z, [X26, X0,LSL #3] |
(199) 0x41fbd0 FMUL Z5.D, Z2.D, Z0.D |
(199) 0x41fbd4 FADD Z4.D, Z1.D, Z2.D |
(199) 0x41fbd8 FSUB Z18.D, Z3.D, Z16.D |
(199) 0x41fbdc FSUB Z7.D, Z4.D, Z6.D |
(199) 0x41fbe0 FMAD Z17.D, P1/M, Z5.D, Z18.D |
(199) 0x41fbe4 FSUB Z20.D, Z5.D, Z19.D |
(199) 0x41fbe8 FADD Z22.D, Z20.D, Z21.D |
(199) 0x41fbec FDIVR Z7.D, P1/M, Z7.D, Z22.D |
(199) 0x41fbf0 FDIVR Z22.D, P1/M, Z22.D, Z17.D |
(199) 0x41fbf4 ST1D {Z7.D}, P0, [X2, X0,LSL #3] |
(199) 0x41fbf8 ST1D {Z22.D}, P0, [X1, X0,LSL #3] |
(199) 0x41fbfc ADD X0, X0, X10 |
(199) 0x41fc00 WHILELO P0.D, X0, X3 |
(199) 0x41fc04 B.NE 41fbac |
(200) 0x41fc08 ADD X9, X9, #1 |
(200) 0x41fc0c ADD W3, W18, W9 |
(200) 0x41fc10 CMP W19, W3 |
(200) 0x41fc14 B.LE 41fc50 |
(200) 0x41fc18 SUB W3, W17, W30 |
(200) 0x41fc1c ORR W7, WZR, W30 |
(200) 0x41fc20 ORR W30, WZR, W21 |
(200) 0x41fc24 ORR W8, WZR, W20 |
(200) 0x41fc28 CMP W3, W30 |
(200) 0x41fc2c CSEL X3, X3, X30, #9 |
(200) 0x41fc30 ADD W30, W7, W3 |
(200) 0x41fc34 CMP W7, W30 |
(200) 0x41fc38 B.CC 41fb10 |
(201) 0x41fc3c ADD X9, X9, #1 |
(201) 0x41fc40 ORR W30, WZR, W7 |
(201) 0x41fc44 ADD W3, W18, W9 |
(201) 0x41fc48 CMP W19, W3 |
(201) 0x41fc4c B.GT 41fc18 |
(198) 0x41fc50 LDP X21, X22, [SP, #32] |
(198) 0x41fc54 LDP X19, X20, [SP, #16] |
(198) 0x41fc58 LDP X23, X24, [SP, #48] |
(198) 0x41fc5c LDP X25, X26, [SP, #64] |
(198) 0x41fc60 LDP X29, X30, [SP], #80 |
(198) 0x41fc64 RET |
(198) 0x41fc68 ADD W3, W3, #1 |
(198) 0x41fc6c MOVZ W2, #0 |
(198) 0x41fc70 B 41fabc |
0x41fc74 HINT #0 |
0x41fc78 HINT #0 |
0x41fc7c HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.43+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 3.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
cycles | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.63 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W25, W19, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W20, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W0, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41fc54 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x20c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W22, W0, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41fc50 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x208> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MADD W26, W26, W21, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UDIV W3, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
MSUB W2, W3, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 41fc68 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x220> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 3.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
cycles | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.63 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W25, W19, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W20, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W0, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41fc54 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x20c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W22, W0, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41fc50 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x208> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MADD W26, W26, W21, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UDIV W3, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
MSUB W2, W3, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 41fc68 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x220> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 4.09 | 5.44 |
▼Loop 198 - advec_cell.cpp:208-216 - exec– | 0 | 0 |
▼Loop 200 - advec_cell.cpp:210-216 - exec– | 0.01 | 0.01 |
○Loop 199 - advec_cell.cpp:211-216 - exec | 4.09 | 5.41 |
○Loop 201 - advec_cell.cpp:210-214 - exec | 0 | 0.01 |