Function: initialise_chunk(int, global_variables&) [clone ._omp_fn.4] | Module: exec | Source: initialise_chunk.cpp:77-82 [...] | Coverage: 0.03% |
---|
Function: initialise_chunk(int, global_variables&) [clone ._omp_fn.4] | Module: exec | Source: initialise_chunk.cpp:77-82 [...] | Coverage: 0.03% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/initialise_chunk.cpp: 77 - 82 |
-------------------------------------------------------------------------------- |
77: #pragma omp parallel for simd collapse(2) |
78: for (int j = (0); j < (yrange1); j++) { |
79: for (int i = (0); i < (xrange1); i++) { |
80: field.volume(i, j) = dx * dy; |
81: field.xarea(i, j) = field.celldy[j]; |
82: field.yarea(i, j) = field.celldx[i]; |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x427de8 STP X29, X30, [SP, #960]! |
0x427dec ADD X29, SP, #0 |
0x427df0 STP X19, X20, [SP, #16] |
0x427df4 LDP W19, W20, [X0, #24] |
0x427df8 CMP W20, #0 |
0x427dfc B.LE 427f48 |
0x427e00 CMP W19, #0 |
0x427e04 B.LE 427f48 |
0x427e08 STP X21, X22, [SP, #32] |
0x427e0c ORR X22, XZR, X0 |
0x427e10 STR X23, [SP, #48] |
0x427e14 MADD W23, W20, W19, WZR |
0x427e18 BL 403530 |
0x427e1c ORR W21, WZR, W0 |
0x427e20 BL 4033c0 |
0x427e24 UDIV W1, W23, W21 |
0x427e28 ORR W2, WZR, W0 |
0x427e2c MSUB W3, W1, W21, W23 |
0x427e30 CMP W0, W3 |
0x427e34 B.CC 427f54 |
(348) 0x427e38 MADD W4, W1, W2, W3 |
(348) 0x427e3c ADD W13, W1, W4 |
(348) 0x427e40 CMP W4, W13 |
(348) 0x427e44 B.CS 427f40 |
(348) 0x427e48 UDIV W7, W4, W19 |
(348) 0x427e4c LDP D0, D1, [X22] |
(348) 0x427e50 MOVZ W14, #0 |
(348) 0x427e54 CNTD X9, ALL |
(348) 0x427e58 PTRUE P1.B, ALL |
(348) 0x427e5c LDR X8, [X22, #16] |
(348) 0x427e60 FMUL D2, D1, D0 |
(348) 0x427e64 DUP Z3.D, Z2.D[0] |
(348) 0x427e68 MSUB W16, W7, W19, W4 |
(348) 0x427e6c SBFM X7, X7, #0, #31 |
(348) 0x427e70 SUB W10, W19, W16 |
(348) 0x427e74 CMP W1, W10 |
(348) 0x427e78 CSEL W17, W1, W10, #9 |
(348) 0x427e7c ADD W10, W4, W17 |
(348) 0x427e80 CMP W4, W10 |
(348) 0x427e84 B.CS 427f24 |
(350) 0x427e88 LDR X11, [X8, #552] |
(350) 0x427e8c SBFM X5, X16, #0, #31 |
(350) 0x427e90 ADD X16, X8, #672 |
(350) 0x427e94 LDP X15, X18, [X16] |
(350) 0x427e98 MOVZ X0, #0 |
(350) 0x427e9c WHILELO P0.D, WZR, W17 |
(350) 0x427ea0 LDP X12, X30, [X16, #24] |
(350) 0x427ea4 ADD X23, X11, X5,LSL #3 |
(350) 0x427ea8 MADD X21, X7, X18, X5 |
(350) 0x427eac LDR X4, [X8, #656] |
(350) 0x427eb0 MADD X2, X7, X30, X5 |
(350) 0x427eb4 LDR X6, [X8, #584] |
(350) 0x427eb8 ADD X3, X12, X21,LSL #3 |
(350) 0x427ebc LDR X11, [X8, #720] |
(350) 0x427ec0 MADD X22, X7, X4, X5 |
(350) 0x427ec4 ADD X1, X6, X7,LSL #3 |
(350) 0x427ec8 ADD X5, X15, X22,LSL #3 |
(350) 0x427ecc ADD X6, X11, X2,LSL #3 |
(349) 0x427ed0 ST1D {Z3.D}, P0, [X5, X0,LSL #3] |
(349) 0x427ed4 LD1RD {Z4.D}, P1/Z, [X1] |
(349) 0x427ed8 ST1D {Z4.D}, P0, [X3, X0,LSL #3] |
(349) 0x427edc LD1D {Z5.D}, P0/Z, [X23, X0,LSL #3] |
(349) 0x427ee0 ST1D {Z5.D}, P0, [X6, X0,LSL #3] |
(349) 0x427ee4 ADD X0, X0, X9 |
(349) 0x427ee8 WHILELO P0.D, W0, W17 |
(349) 0x427eec B.NE 427ed0 |
(350) 0x427ef0 ADD X7, X7, #1 |
(350) 0x427ef4 ADD W17, W14, W7 |
(350) 0x427ef8 CMP W20, W17 |
(350) 0x427efc B.LE 427f40 |
(350) 0x427f00 SUB W1, W13, W10 |
(350) 0x427f04 ORR W4, WZR, W10 |
(350) 0x427f08 ORR W10, WZR, W19 |
(350) 0x427f0c MOVZ W16, #0 |
(350) 0x427f10 CMP W1, W10 |
(350) 0x427f14 CSEL W17, W1, W10, #9 |
(350) 0x427f18 ADD W10, W4, W17 |
(350) 0x427f1c CMP W4, W10 |
(350) 0x427f20 B.CC 427e88 |
(351) 0x427f24 ADD X7, X7, #1 |
(351) 0x427f28 ORR W10, WZR, W4 |
(351) 0x427f2c ADD W17, W14, W7 |
(351) 0x427f30 CMP W20, W17 |
(351) 0x427f34 B.GT 427f00 |
(348) 0x427f38 HINT #0 |
(348) 0x427f3c HINT #0 |
(348) 0x427f40 LDP X21, X22, [SP, #32] |
(348) 0x427f44 LDR X23, [SP, #48] |
(348) 0x427f48 LDP X19, X20, [SP, #16] |
(348) 0x427f4c LDP X29, X30, [SP], #64 |
(348) 0x427f50 RET |
(348) 0x427f54 ADD W1, W1, #1 |
(348) 0x427f58 MOVZ W3, #0 |
(348) 0x427f5c B 427e38 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.23+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so | |
►1.77+ | GOMP_parallel | libomp.so | |
○ | initialise_chunk(int, global_v[...] | initialise_chunk.cpp:85 | exec |
○ | start(parallel_&, global_confi[...] | start.cpp:83 | exec |
○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
○ | main | iostream:74 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | iostream:74 | exec |
Path / |
Source file and lines | initialise_chunk.cpp:77-82 |
Module | exec |
nb instructions | 20 |
loop length | 80 |
nb stack references | 0 |
front end | 2.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
cycles | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.50 |
Overall L1 | 3.25 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #960]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W19, W20, [X0, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 427f48 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x160> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP W19, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 427f48 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x160> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
MADD W23, W20, W19, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W3, W1, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 427f54 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x16c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | initialise_chunk.cpp:77-82 |
Module | exec |
nb instructions | 20 |
loop length | 80 |
nb stack references | 0 |
front end | 2.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
cycles | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.50 |
Overall L1 | 3.25 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #960]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W19, W20, [X0, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 427f48 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x160> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP W19, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 427f48 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x160> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
MADD W23, W20, W19, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W3, W1, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 427f54 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x16c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼initialise_chunk(int, global_variables&) [clone ._omp_fn.4]– | 0.03 | 0.04 |
▼Loop 348 - initialise_chunk.cpp:77-82 - exec– | 0 | 0 |
○Loop 351 - initialise_chunk.cpp:77-80 - exec | 0 | 0 |
▼Loop 350 - initialise_chunk.cpp:77-82 - exec– | 0 | 0 |
○Loop 349 - initialise_chunk.cpp:80-82 - exec | 0.03 | 0.04 |