Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:44-48 [...] | Coverage: 2.79% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:44-48 [...] | Coverage: 2.79% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 44 - 48 |
-------------------------------------------------------------------------------- |
44: #pragma omp parallel for simd collapse(2) |
45: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
46: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
47: post_vol(i, j) = volume(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
48: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x4205e0 STP X29, X30, [SP, #944]! |
0x4205e4 ADD X29, SP, #0 |
0x4205e8 STP X19, X20, [SP, #16] |
0x4205ec STP X25, X26, [SP, #64] |
0x4205f0 LDP W25, W19, [X0, #48] |
0x4205f4 LDP W20, W3, [X0, #40] |
0x4205f8 SUB W25, W25, #1 |
0x4205fc ADD W19, W19, #4 |
0x420600 CMP W25, W19 |
0x420604 B.GE 4207ac |
0x420608 SUB W20, W20, #1 |
0x42060c STP X21, X22, [SP, #32] |
0x420610 ADD W22, W3, #4 |
0x420614 SUB W26, W19, W25 |
0x420618 CMP W20, W22 |
0x42061c B.GE 4207c8 |
0x420620 SUB W21, W22, W20 |
0x420624 STP X23, X24, [SP, #48] |
0x420628 ORR X23, XZR, X0 |
0x42062c MADD W26, W26, W21, WZR |
0x420630 BL 403530 |
0x420634 ORR W24, WZR, W0 |
0x420638 BL 4033c0 |
0x42063c UDIV W1, W26, W24 |
0x420640 ORR W2, WZR, W0 |
0x420644 MSUB W0, W1, W24, W26 |
0x420648 CMP W2, W0 |
0x42064c B.CC 4207bc |
(213) 0x420650 MADD W18, W1, W2, W0 |
(213) 0x420654 ADD W15, W1, W18 |
(213) 0x420658 CMP W18, W15 |
(213) 0x42065c B.CS 4207a4 |
(213) 0x420660 UDIV W4, W18, W21 |
(213) 0x420664 MOVZ W16, #0 |
(213) 0x420668 CNTD X9, ALL |
(213) 0x42066c LDP X14, X13, [X23] |
(213) 0x420670 LDP X12, X11, [X23, #16] |
(213) 0x420674 LDR X10, [X23, #32] |
(213) 0x420678 MSUB W5, W4, W21, W18 |
(213) 0x42067c ADD W6, W4, W25 |
(213) 0x420680 SBFM X30, X6, #0, #31 |
(213) 0x420684 ADD W4, W5, W20 |
(213) 0x420688 SUB W17, W22, W4 |
(213) 0x42068c CMP W1, W17 |
(213) 0x420690 CSEL X25, X1, X17, #9 |
(213) 0x420694 ADD W17, W18, W25 |
(213) 0x420698 CMP W18, W17 |
(213) 0x42069c B.CS 42078c |
(215) 0x4206a0 LDR X2, [X13] |
(215) 0x4206a4 SBFM X22, X4, #0, #31 |
(215) 0x4206a8 ADD X18, X30, #1 |
(215) 0x4206ac ADD X8, X22, #1 |
(215) 0x4206b0 MOVZ X0, #0 |
(215) 0x4206b4 WHILELO P0.D, XZR, X25 |
(215) 0x4206b8 LDR X23, [X11] |
(215) 0x4206bc LDR X24, [X12] |
(215) 0x4206c0 MADD X5, X30, X2, X2 |
(215) 0x4206c4 LDR X7, [X10] |
(215) 0x4206c8 MADD X26, X30, X23, X22 |
(215) 0x4206cc SUB X4, X5, X2 |
(215) 0x4206d0 LDR X2, [X14] |
(215) 0x4206d4 ADD X6, X5, X22 |
(215) 0x4206d8 MADD X1, X30, X24, X22 |
(215) 0x4206dc LDR X23, [X13, #16] |
(215) 0x4206e0 MADD X3, X30, X7, X22 |
(215) 0x4206e4 ADD X7, X4, X22 |
(215) 0x4206e8 LDR X24, [X11, #16] |
(215) 0x4206ec MADD X30, X30, X2, XZR |
(215) 0x4206f0 LDR X2, [X12, #16] |
(215) 0x4206f4 ADD X5, X23, X6,LSL #3 |
(215) 0x4206f8 LDR X6, [X14, #16] |
(215) 0x4206fc ADD X8, X8, X30 |
(215) 0x420700 ADD X22, X30, X22 |
(215) 0x420704 ADD X26, X24, X26,LSL #3 |
(215) 0x420708 LDR X24, [X10, #16] |
(215) 0x42070c ADD X4, X23, X7,LSL #3 |
(215) 0x420710 ADD X1, X2, X1,LSL #3 |
(215) 0x420714 ADD X23, X6, X8,LSL #3 |
(215) 0x420718 ADD X7, X6, X22,LSL #3 |
(215) 0x42071c ADD X3, X24, X3,LSL #3 |
(214) 0x420720 LD1D {Z2.D}, P0/Z, [X1, X0,LSL #3] |
(214) 0x420724 LD1D {Z0.D}, P0/Z, [X5, X0,LSL #3] |
(214) 0x420728 LD1D {Z1.D}, P0/Z, [X4, X0,LSL #3] |
(214) 0x42072c FADD Z3.D, Z0.D, Z2.D |
(214) 0x420730 FSUB Z4.D, Z3.D, Z1.D |
(214) 0x420734 ST1D {Z4.D}, P0, [X3, X0,LSL #3] |
(214) 0x420738 LD1D {Z5.D}, P0/Z, [X23, X0,LSL #3] |
(214) 0x42073c LD1D {Z6.D}, P0/Z, [X7, X0,LSL #3] |
(214) 0x420740 FSUB Z7.D, Z5.D, Z6.D |
(214) 0x420744 FADD Z16.D, Z7.D, Z4.D |
(214) 0x420748 ST1D {Z16.D}, P0, [X26, X0,LSL #3] |
(214) 0x42074c ADD X0, X0, X9 |
(214) 0x420750 WHILELO P0.D, X0, X25 |
(214) 0x420754 B.NE 420720 |
(215) 0x420758 ADD W25, W16, W18 |
(215) 0x42075c ORR X30, XZR, X18 |
(215) 0x420760 CMP W19, W25 |
(215) 0x420764 B.LE 4207a4 |
(215) 0x420768 SUB W1, W15, W17 |
(215) 0x42076c ORR W18, WZR, W17 |
(215) 0x420770 ORR W17, WZR, W21 |
(215) 0x420774 ORR W4, WZR, W20 |
(215) 0x420778 CMP W1, W17 |
(215) 0x42077c CSEL X25, X1, X17, #9 |
(215) 0x420780 ADD W17, W18, W25 |
(215) 0x420784 CMP W18, W17 |
(215) 0x420788 B.CC 4206a0 |
(216) 0x42078c ORR W17, WZR, W18 |
(216) 0x420790 ADD X18, X30, #1 |
(216) 0x420794 ADD W25, W16, W18 |
(216) 0x420798 ORR X30, XZR, X18 |
(216) 0x42079c CMP W19, W25 |
(216) 0x4207a0 B.GT 420768 |
(213) 0x4207a4 LDP X21, X22, [SP, #32] |
(213) 0x4207a8 LDP X23, X24, [SP, #48] |
(213) 0x4207ac LDP X19, X20, [SP, #16] |
(213) 0x4207b0 LDP X25, X26, [SP, #64] |
(213) 0x4207b4 LDP X29, X30, [SP], #80 |
(213) 0x4207b8 RET |
(213) 0x4207bc ADD W1, W1, #1 |
(213) 0x4207c0 MOVZ W0, #0 |
(213) 0x4207c4 B 420650 |
0x4207c8 LDP X21, X22, [SP, #32] |
0x4207cc B 4207ac |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.45+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_mom.cpp:44-48 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W25, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP W20, W3, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 4207ac <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1cc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W22, W3, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 4207c8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1e8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W26, W26, W21, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W0, W1, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 4207bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
B 4207ac <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1cc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | advec_mom.cpp:44-48 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W25, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP W20, W3, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 4207ac <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1cc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W22, W3, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 4207c8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1e8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W26, W26, W21, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W0, W1, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 4207bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
B 4207ac <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x1cc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.79 | 3.71 |
▼Loop 213 - advec_mom.cpp:44-48 - exec– | 0 | 0 |
▼Loop 215 - advec_mom.cpp:46-48 - exec– | 0.01 | 0.01 |
○Loop 214 - advec_mom.cpp:47-48 - exec | 2.78 | 3.69 |
○Loop 216 - advec_mom.cpp:48-48 - exec | 0 | 0 |