Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage: 1.34% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage: 1.34% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 136 - 140 |
-------------------------------------------------------------------------------- |
136: #pragma omp parallel for simd collapse(2) |
137: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
138: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
139: pre_vol(i, j) = volume(i, j) + (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j)); |
140: post_vol(i, j) = pre_vol(i, j) - (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j)); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41f688 STP X29, X30, [SP, #944]! |
0x41f68c ADD X29, SP, #0 |
0x41f690 STP X19, X20, [SP, #16] |
0x41f694 STP X25, X26, [SP, #64] |
0x41f698 LDP W25, W19, [X0, #48] |
0x41f69c LDP W20, W3, [X0, #40] |
0x41f6a0 SUB W25, W25, #1 |
0x41f6a4 ADD W19, W19, #4 |
0x41f6a8 CMP W25, W19 |
0x41f6ac B.GE 41f868 |
0x41f6b0 SUB W20, W20, #1 |
0x41f6b4 STP X21, X22, [SP, #32] |
0x41f6b8 ADD W22, W3, #4 |
0x41f6bc SUB W26, W19, W25 |
0x41f6c0 CMP W20, W22 |
0x41f6c4 B.GE 41f884 |
0x41f6c8 SUB W21, W22, W20 |
0x41f6cc STP X23, X24, [SP, #48] |
0x41f6d0 ORR X23, XZR, X0 |
0x41f6d4 MADD W26, W26, W21, WZR |
0x41f6d8 BL 403530 |
0x41f6dc ORR W24, WZR, W0 |
0x41f6e0 BL 4033c0 |
0x41f6e4 UDIV W1, W26, W24 |
0x41f6e8 ORR W2, WZR, W0 |
0x41f6ec MSUB W0, W1, W24, W26 |
0x41f6f0 CMP W2, W0 |
0x41f6f4 B.CC 41f878 |
(190) 0x41f6f8 MADD W18, W1, W2, W0 |
(190) 0x41f6fc ADD W15, W1, W18 |
(190) 0x41f700 CMP W18, W15 |
(190) 0x41f704 B.CS 41f860 |
(190) 0x41f708 UDIV W4, W18, W21 |
(190) 0x41f70c MOVZ W16, #0 |
(190) 0x41f710 CNTD X9, ALL |
(190) 0x41f714 LDP X14, X13, [X23] |
(190) 0x41f718 LDP X12, X11, [X23, #16] |
(190) 0x41f71c LDR X10, [X23, #32] |
(190) 0x41f720 MSUB W5, W4, W21, W18 |
(190) 0x41f724 ADD W6, W4, W25 |
(190) 0x41f728 SBFM X30, X6, #0, #31 |
(190) 0x41f72c ADD W3, W5, W20 |
(190) 0x41f730 SUB W17, W22, W3 |
(190) 0x41f734 CMP W1, W17 |
(190) 0x41f738 CSEL X25, X1, X17, #9 |
(190) 0x41f73c ADD W17, W18, W25 |
(190) 0x41f740 CMP W18, W17 |
(190) 0x41f744 B.CS 41f844 |
(192) 0x41f748 LDR X24, [X12] |
(192) 0x41f74c SBFM X22, X3, #0, #31 |
(192) 0x41f750 ADD X18, X30, #1 |
(192) 0x41f754 ADD X8, X22, #1 |
(192) 0x41f758 MOVZ X0, #0 |
(192) 0x41f75c WHILELO P0.D, XZR, X25 |
(192) 0x41f760 LDR X7, [X10] |
(192) 0x41f764 LDR X3, [X11] |
(192) 0x41f768 MADD X1, X30, X24, X24 |
(192) 0x41f76c LDR X6, [X12, #16] |
(192) 0x41f770 MADD X23, X30, X7, X22 |
(192) 0x41f774 SUB X2, X1, X24 |
(192) 0x41f778 LDR X7, [X10, #16] |
(192) 0x41f77c ADD X4, X1, X22 |
(192) 0x41f780 ADD X5, X2, X22 |
(192) 0x41f784 MADD X26, X30, X3, X22 |
(192) 0x41f788 LDR X2, [X13] |
(192) 0x41f78c ADD X1, X6, X5,LSL #3 |
(192) 0x41f790 ADD X3, X6, X4,LSL #3 |
(192) 0x41f794 LDR X6, [X14] |
(192) 0x41f798 ADD X24, X7, X23,LSL #3 |
(192) 0x41f79c LDR X23, [X13, #16] |
(192) 0x41f7a0 MADD X4, X30, X2, XZR |
(192) 0x41f7a4 ADD X5, X4, X22 |
(192) 0x41f7a8 ADD X8, X8, X4 |
(192) 0x41f7ac ADD X2, X23, X8,LSL #3 |
(192) 0x41f7b0 ADD X7, X23, X5,LSL #3 |
(192) 0x41f7b4 MADD X23, X30, X6, X22 |
(192) 0x41f7b8 LDR X30, [X11, #16] |
(192) 0x41f7bc ADD X22, X30, X26,LSL #3 |
(192) 0x41f7c0 LDR X26, [X14, #16] |
(192) 0x41f7c4 ADD X4, X26, X23,LSL #3 |
(191) 0x41f7c8 LD1D {Z1.D}, P0/Z, [X4, X0,LSL #3] |
(191) 0x41f7cc LD1D {Z0.D}, P0/Z, [X3, X0,LSL #3] |
(191) 0x41f7d0 LD1D {Z3.D}, P0/Z, [X1, X0,LSL #3] |
(191) 0x41f7d4 LD1D {Z5.D}, P0/Z, [X7, X0,LSL #3] |
(191) 0x41f7d8 LD1D {Z2.D}, P0/Z, [X2, X0,LSL #3] |
(191) 0x41f7dc FADD Z4.D, Z0.D, Z1.D |
(191) 0x41f7e0 FADD Z6.D, Z5.D, Z3.D |
(191) 0x41f7e4 FSUB Z7.D, Z4.D, Z6.D |
(191) 0x41f7e8 FADD Z16.D, Z7.D, Z2.D |
(191) 0x41f7ec ST1D {Z16.D}, P0, [X22, X0,LSL #3] |
(191) 0x41f7f0 LD1D {Z17.D}, P0/Z, [X1, X0,LSL #3] |
(191) 0x41f7f4 LD1D {Z18.D}, P0/Z, [X3, X0,LSL #3] |
(191) 0x41f7f8 FSUB Z19.D, Z17.D, Z18.D |
(191) 0x41f7fc FADD Z20.D, Z19.D, Z16.D |
(191) 0x41f800 ST1D {Z20.D}, P0, [X24, X0,LSL #3] |
(191) 0x41f804 ADD X0, X0, X9 |
(191) 0x41f808 WHILELO P0.D, X0, X25 |
(191) 0x41f80c B.NE 41f7c8 |
(192) 0x41f810 ADD W25, W16, W18 |
(192) 0x41f814 ORR X30, XZR, X18 |
(192) 0x41f818 CMP W19, W25 |
(192) 0x41f81c B.LE 41f860 |
(192) 0x41f820 SUB W1, W15, W17 |
(192) 0x41f824 ORR W18, WZR, W17 |
(192) 0x41f828 ORR W17, WZR, W21 |
(192) 0x41f82c ORR W3, WZR, W20 |
(192) 0x41f830 CMP W1, W17 |
(192) 0x41f834 CSEL X25, X1, X17, #9 |
(192) 0x41f838 ADD W17, W18, W25 |
(192) 0x41f83c CMP W18, W17 |
(192) 0x41f840 B.CC 41f748 |
(193) 0x41f844 ORR W17, WZR, W18 |
(193) 0x41f848 ADD X18, X30, #1 |
(193) 0x41f84c ADD W25, W16, W18 |
(193) 0x41f850 ORR X30, XZR, X18 |
(193) 0x41f854 CMP W19, W25 |
(193) 0x41f858 B.GT 41f820 |
(190) 0x41f85c HINT #0 |
(190) 0x41f860 LDP X21, X22, [SP, #32] |
(190) 0x41f864 LDP X23, X24, [SP, #48] |
(190) 0x41f868 LDP X19, X20, [SP, #16] |
(190) 0x41f86c LDP X25, X26, [SP, #64] |
(190) 0x41f870 LDP X29, X30, [SP], #80 |
(190) 0x41f874 RET |
(190) 0x41f878 ADD W1, W1, #1 |
(190) 0x41f87c MOVZ W0, #0 |
(190) 0x41f880 B 41f6f8 |
0x41f884 LDP X21, X22, [SP, #32] |
0x41f888 B 41f868 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.44+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so | |
►1.56+ | GOMP_parallel | libomp.so | |
○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
○ | advection(global_variables&) | advection.cpp:50 | exec |
○ | hydro(global_variables&, paral[...] | basic_string.h:906 | exec |
○ | main | iostream:74 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | iostream:74 | exec |
Path / |
Source file and lines | advec_cell.cpp:136-140 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W25, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP W20, W3, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41f868 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W22, W3, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41f884 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1fc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W26, W26, W21, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W0, W1, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 41f878 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
B 41f868 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | advec_cell.cpp:136-140 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W25, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP W20, W3, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41f868 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W22, W3, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 41f884 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1fc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W26, W26, W21, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W0, W1, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 41f878 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
B 41f868 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 1.34 | 1.78 |
▼Loop 190 - advec_cell.cpp:136-140 - exec– | 0 | 0 |
○Loop 193 - advec_cell.cpp:140-140 - exec | 0 | 0 |
▼Loop 192 - advec_cell.cpp:138-140 - exec– | 0 | 0.01 |
○Loop 191 - advec_cell.cpp:139-140 - exec | 1.33 | 1.77 |