Function: flux_calc_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: flux_calc.cpp:36-40 [...] | Coverage: 4.7% |
---|
Function: flux_calc_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: flux_calc.cpp:36-40 [...] | Coverage: 4.7% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/flux_calc.cpp: 36 - 40 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
39: vol_flux_x(i, j) = 0.25 * dt * xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel1(i, j) + xvel1(i + 0, j + 1)); |
40: vol_flux_y(i, j) = 0.25 * dt * yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel1(i, j) + yvel1(i + 1, j + 0)); |
0x4267e0 STP X29, X30, [SP, #912]! |
0x4267e4 ADD X29, SP, #0 |
0x4267e8 STP X19, X20, [SP, #16] |
0x4267ec STP X21, X22, [SP, #32] |
0x4267f0 LDP W22, W19, [X0, #80] |
0x4267f4 STP X27, X28, [SP, #80] |
0x4267f8 ORR X28, XZR, X0 |
0x4267fc LDR W0, [X0, #72] |
0x426800 ADD W27, W22, #1 |
0x426804 ADD W19, W19, #3 |
0x426808 LDR W2, [X28, #76] |
0x42680c ADD W1, W0, #1 |
0x426810 STR W1, [SP, #104] |
0x426814 CMP W27, W19 |
0x426818 B.GE 426a54 |
0x42681c ADD W20, W2, #3 |
0x426820 STP X23, X24, [SP, #48] |
0x426824 SUB W23, W19, W27 |
0x426828 CMP W1, W20 |
0x42682c B.GE 426a68 |
0x426830 SUB W3, W20, W1 |
0x426834 MADD W24, W23, W3, WZR |
0x426838 STR W3, [SP, #108] |
0x42683c BL 403530 |
0x426840 ORR W21, WZR, W0 |
0x426844 BL 4033c0 |
0x426848 UDIV W4, W24, W21 |
0x42684c ORR W5, WZR, W0 |
0x426850 MSUB W6, W4, W21, W24 |
0x426854 CMP W0, W6 |
0x426858 B.CC 426a80 |
(316) 0x42685c MADD W6, W4, W5, W6 |
(316) 0x426860 ADD W30, W4, W6 |
(316) 0x426864 CMP W6, W30 |
(316) 0x426868 B.CS 426a68 |
(318) 0x42686c LDR W7, [SP, #108] |
(318) 0x426870 FMOV D0, #0.2500000 |
(318) 0x426874 CNTD X15, ALL |
(318) 0x426878 STP X25, X26, [SP, #64] |
(318) 0x42687c LDR W11, [SP, #104] |
(318) 0x426880 UDIV W8, W6, W7 |
(318) 0x426884 LDP X25, X24, [X28, #24] |
(318) 0x426888 LDP X23, X22, [X28, #40] |
(318) 0x42688c LDR D3, [X28] |
(318) 0x426890 MSUB W9, W8, W7, W6 |
(318) 0x426894 ADD W10, W8, W27 |
(318) 0x426898 LDP X27, X26, [X28, #8] |
(318) 0x42689c SBFM X8, X10, #0, #31 |
(318) 0x4268a0 ADD W13, W9, W11 |
(318) 0x4268a4 FMUL D1, D3, D0 |
(318) 0x4268a8 DUP Z5.D, Z1.D[0] |
(318) 0x4268ac SUB W18, W20, W13 |
(318) 0x4268b0 LDP X21, X20, [X28, #56] |
(318) 0x4268b4 ORR W28, WZR, W30 |
(318) 0x4268b8 CMP W4, W18 |
(318) 0x4268bc CSEL W12, W4, W18, #9 |
(318) 0x4268c0 ADD W18, W6, W12 |
(318) 0x4268c4 CMP W6, W18 |
(318) 0x4268c8 B.CS 426a38 |
(319) 0x4268cc SBFM X1, X13, #0, #31 |
(319) 0x4268d0 ORR W2, WZR, W12 |
(319) 0x4268d4 WHILELO P0.D, XZR, X2 |
(319) 0x4268d8 LDR X5, [X23] |
(319) 0x4268dc ADD X14, X1, #1 |
(319) 0x4268e0 MOVZ X0, #0 |
(319) 0x4268e4 LDR X13, [X21] |
(319) 0x4268e8 LDR X4, [X23, #16] |
(319) 0x4268ec MADD X30, X5, X8, XZR |
(319) 0x4268f0 LDR X16, [X22] |
(319) 0x4268f4 MADD X3, X8, X13, X1 |
(319) 0x4268f8 LDR X10, [X24] |
(319) 0x4268fc ADD X7, X30, X1 |
(319) 0x426900 ADD X6, X5, X30 |
(319) 0x426904 ADD X13, X4, X7,LSL #3 |
(319) 0x426908 ADD X9, X6, X1 |
(319) 0x42690c LDR X7, [X25] |
(319) 0x426910 ADD X6, X4, X9,LSL #3 |
(319) 0x426914 MADD X17, X8, X16, XZR |
(319) 0x426918 LDR X12, [X20] |
(319) 0x42691c MADD X11, X8, X10, XZR |
(319) 0x426920 ADD X9, X17, X14 |
(319) 0x426924 ADD X4, X17, X1 |
(319) 0x426928 LDR X16, [X21, #16] |
(319) 0x42692c MADD X17, X8, X7, XZR |
(319) 0x426930 ADD X30, X11, X14 |
(319) 0x426934 LDR X14, [X25, #16] |
(319) 0x426938 ADD X5, X11, X1 |
(319) 0x42693c ADD X10, X7, X17 |
(319) 0x426940 ADD X11, X17, X1 |
(319) 0x426944 LDR X17, [X26] |
(319) 0x426948 ADD X7, X10, X1 |
(319) 0x42694c STR X16, [SP, #96] |
(319) 0x426950 LDR X16, [X22, #16] |
(319) 0x426954 ADD X7, X14, X7,LSL #3 |
(319) 0x426958 ADD X14, X14, X11,LSL #3 |
(319) 0x42695c LDR X11, [X27] |
(319) 0x426960 MADD X10, X8, X17, X1 |
(319) 0x426964 LDR X17, [X27, #16] |
(319) 0x426968 ADD X9, X16, X9,LSL #3 |
(319) 0x42696c MADD X11, X8, X11, X1 |
(319) 0x426970 MADD X1, X8, X12, X1 |
(319) 0x426974 LDR X12, [X20, #16] |
(319) 0x426978 ADD X11, X17, X11,LSL #3 |
(319) 0x42697c ADD X17, X12, X1,LSL #3 |
(319) 0x426980 LDR X1, [X24, #16] |
(319) 0x426984 ADD X12, X1, X30,LSL #3 |
(319) 0x426988 LDR X30, [X22, #16] |
(319) 0x42698c ADD X5, X1, X5,LSL #3 |
(319) 0x426990 LDR X1, [X26, #16] |
(319) 0x426994 ADD X4, X30, X4,LSL #3 |
(319) 0x426998 LDR X30, [SP, #96] |
(319) 0x42699c ADD X10, X1, X10,LSL #3 |
(319) 0x4269a0 ADD X3, X30, X3,LSL #3 |
(317) 0x4269a4 LD1D {Z6.D}, P0/Z, [X14, X0,LSL #3] |
(317) 0x4269a8 LD1D {Z2.D}, P0/Z, [X7, X0,LSL #3] |
(317) 0x4269ac LD1D {Z4.D}, P0/Z, [X13, X0,LSL #3] |
(317) 0x4269b0 LD1D {Z16.D}, P0/Z, [X6, X0,LSL #3] |
(317) 0x4269b4 LD1D {Z17.D}, P0/Z, [X11, X0,LSL #3] |
(317) 0x4269b8 FADD Z7.D, Z2.D, Z6.D |
(317) 0x4269bc FADD Z18.D, Z16.D, Z4.D |
(317) 0x4269c0 FMUL Z19.D, Z5.D, Z17.D |
(317) 0x4269c4 FADD Z20.D, Z18.D, Z7.D |
(317) 0x4269c8 FMUL Z21.D, Z20.D, Z19.D |
(317) 0x4269cc ST1D {Z21.D}, P0, [X3, X0,LSL #3] |
(317) 0x4269d0 LD1D {Z22.D}, P0/Z, [X5, X0,LSL #3] |
(317) 0x4269d4 LD1D {Z23.D}, P0/Z, [X12, X0,LSL #3] |
(317) 0x4269d8 LD1D {Z24.D}, P0/Z, [X4, X0,LSL #3] |
(317) 0x4269dc LD1D {Z26.D}, P0/Z, [X9, X0,LSL #3] |
(317) 0x4269e0 LD1D {Z27.D}, P0/Z, [X10, X0,LSL #3] |
(317) 0x4269e4 FADD Z25.D, Z23.D, Z22.D |
(317) 0x4269e8 FADD Z28.D, Z26.D, Z24.D |
(317) 0x4269ec FMUL Z29.D, Z27.D, Z5.D |
(317) 0x4269f0 FADD Z30.D, Z28.D, Z25.D |
(317) 0x4269f4 FMUL Z31.D, Z30.D, Z29.D |
(317) 0x4269f8 ST1D {Z31.D}, P0, [X17, X0,LSL #3] |
(317) 0x4269fc ADD X0, X0, X15 |
(317) 0x426a00 WHILELO P0.D, X0, X2 |
(317) 0x426a04 B.NE 4269a4 |
(319) 0x426a08 ADD X8, X8, #1 |
(319) 0x426a0c ADD W2, W8, #0 |
(319) 0x426a10 CMP W19, W2 |
(319) 0x426a14 B.LE 426a4c |
(319) 0x426a18 SUB W4, W28, W18 |
(319) 0x426a1c ORR W6, WZR, W18 |
(319) 0x426a20 LDP W13, W18, [SP, #104] |
(319) 0x426a24 CMP W4, W18 |
(319) 0x426a28 CSEL W12, W4, W18, #9 |
(319) 0x426a2c ADD W18, W6, W12 |
(319) 0x426a30 CMP W6, W18 |
(319) 0x426a34 B.CC 4268cc |
(320) 0x426a38 ADD X8, X8, #1 |
(320) 0x426a3c ORR W18, WZR, W6 |
(320) 0x426a40 ADD W2, W8, #0 |
(320) 0x426a44 CMP W19, W2 |
(320) 0x426a48 B.GT 426a18 |
(318) 0x426a4c LDP X23, X24, [SP, #48] |
(318) 0x426a50 LDP X25, X26, [SP, #64] |
(318) 0x426a54 LDP X19, X20, [SP, #16] |
(318) 0x426a58 LDP X21, X22, [SP, #32] |
(318) 0x426a5c LDP X27, X28, [SP, #80] |
(318) 0x426a60 LDP X29, X30, [SP], #112 |
(318) 0x426a64 RET |
(316) 0x426a68 LDP X19, X20, [SP, #16] |
(316) 0x426a6c LDP X21, X22, [SP, #32] |
(316) 0x426a70 LDP X23, X24, [SP, #48] |
(316) 0x426a74 LDP X27, X28, [SP, #80] |
(316) 0x426a78 LDP X29, X30, [SP], #112 |
(316) 0x426a7c RET |
(316) 0x426a80 ADD W4, W4, #1 |
(316) 0x426a84 MOVZ W6, #0 |
(316) 0x426a88 B 42685c |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.43+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so | |
►1.57+ | GOMP_parallel | libomp.so | |
○ | flux_calc(global_variables&) | flux_calc.cpp:53 | exec |
○ | hydro(global_variables&, paral[...] | hydro.cpp:76 | exec |
○ | main | iostream:74 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | iostream:74 | exec |
Path / |
Source file and lines | flux_calc.cpp:36-40 |
Module | exec |
nb instructions | 31 |
loop length | 124 |
nb stack references | 0 |
front end | 3.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.50 | 3.50 | 3.00 | 3.50 | 3.50 |
cycles | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.50 | 3.50 | 3.00 | 3.50 | 3.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.88 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W22, W19, [X0, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X28, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W0, [X0, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W27, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W2, [X28, #76] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W1, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W1, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W27, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 426a54 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x274> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W20, W2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W23, W19, W27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W1, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 426a68 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x288> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W3, W20, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W24, W23, W3, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STR W3, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W4, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W6, W4, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 426a80 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x2a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | flux_calc.cpp:36-40 |
Module | exec |
nb instructions | 31 |
loop length | 124 |
nb stack references | 0 |
front end | 3.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.50 | 3.50 | 3.00 | 3.50 | 3.50 |
cycles | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.50 | 3.50 | 3.00 | 3.50 | 3.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.88 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W22, W19, [X0, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X28, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W0, [X0, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W27, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W2, [X28, #76] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W1, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W1, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W27, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 426a54 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x274> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W20, W2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
SUB W23, W19, W27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W1, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 426a68 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x288> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W3, W20, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W24, W23, W3, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STR W3, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W4, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W6, W4, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 426a80 <_Z16flux_calc_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x2a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼flux_calc_kernel(int, int, int, int, double, clover::Buffer2D | 4.7 | 6.25 |
▼Loop 318 - flux_calc.cpp:36-40 - exec– | 0 | 0 |
▼Loop 319 - flux_calc.cpp:38-40 - exec– | 0.01 | 0.02 |
○Loop 317 - flux_calc.cpp:39-40 - exec | 4.69 | 6.22 |
○Loop 320 - flux_calc.cpp:38-40 - exec | 0 | 0 |
○Loop 316 - flux_calc.cpp:36-38 - exec | 0 | 0 |