Function: generate_chunk(int, global_variables&) [clone ._omp_fn.0] | Module: exec | Source: generate_chunk.cpp:74-80 [...] | Coverage: 0.05% |
---|
Function: generate_chunk(int, global_variables&) [clone ._omp_fn.0] | Module: exec | Source: generate_chunk.cpp:74-80 [...] | Coverage: 0.05% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/generate_chunk.cpp: 74 - 80 |
-------------------------------------------------------------------------------- |
74: #pragma omp parallel for simd collapse(2) |
75: for (int j = (0); j < (yrange); j++) { |
76: for (int i = (0); i < (xrange); i++) { |
77: field.energy0(i, j) = state_energy[0]; |
78: field.density0(i, j) = state_density[0]; |
79: field.xvel0(i, j) = state_xvel[0]; |
80: field.yvel0(i, j) = state_yvel[0]; |
0x426c60 STP X29, X30, [SP, #960]! |
0x426c64 ADD X29, SP, #0 |
0x426c68 STP X19, X20, [SP, #16] |
0x426c6c LDP W19, W20, [X0, #40] |
0x426c70 CMP W20, #0 |
0x426c74 B.LE 426de8 |
0x426c78 CMP W19, #0 |
0x426c7c B.LE 426de8 |
0x426c80 STP X21, X22, [SP, #32] |
0x426c84 ORR X22, XZR, X0 |
0x426c88 STR X23, [SP, #48] |
0x426c8c MADD W23, W20, W19, WZR |
0x426c90 BL 403530 |
0x426c94 ORR W21, WZR, W0 |
0x426c98 BL 4033c0 |
0x426c9c UDIV W1, W23, W21 |
0x426ca0 ORR W2, WZR, W0 |
0x426ca4 MSUB W3, W1, W21, W23 |
0x426ca8 CMP W0, W3 |
0x426cac B.CC 426df4 |
(323) 0x426cb0 MADD W3, W1, W2, W3 |
(323) 0x426cb4 ADD W11, W1, W3 |
(323) 0x426cb8 CMP W3, W11 |
(323) 0x426cbc B.CS 426de0 |
(323) 0x426cc0 UDIV W6, W3, W19 |
(323) 0x426cc4 LDP X16, X15, [X22] |
(323) 0x426cc8 MOVZ W12, #0 |
(323) 0x426ccc CNTD X9, ALL |
(323) 0x426cd0 PTRUE P1.B, ALL |
(323) 0x426cd4 LDP X14, X13, [X22, #16] |
(323) 0x426cd8 LDR X7, [X22, #32] |
(323) 0x426cdc MSUB W21, W6, W19, W3 |
(323) 0x426ce0 SBFM X6, X6, #0, #31 |
(323) 0x426ce4 SUB W10, W19, W21 |
(323) 0x426ce8 CMP W1, W10 |
(323) 0x426cec CSEL W22, W1, W10, #9 |
(323) 0x426cf0 ADD W10, W3, W22 |
(323) 0x426cf4 CMP W3, W10 |
(323) 0x426cf8 B.CS 426dc0 |
(323) 0x426cfc HINT #0 |
(325) 0x426d00 SBFM X8, X21, #0, #31 |
(325) 0x426d04 LDR X5, [X7] |
(325) 0x426d08 MOVZ X0, #0 |
(325) 0x426d0c WHILELO P0.D, WZR, W22 |
(325) 0x426d10 LDR X4, [X7, #48] |
(325) 0x426d14 LDR X21, [X7, #168] |
(325) 0x426d18 MADD X23, X6, X5, X8 |
(325) 0x426d1c LDR X2, [X7, #216] |
(325) 0x426d20 MADD X1, X6, X4, X8 |
(325) 0x426d24 LDR X30, [X7, #16] |
(325) 0x426d28 MADD X3, X6, X21, X8 |
(325) 0x426d2c LDR X18, [X7, #64] |
(325) 0x426d30 MADD X21, X6, X2, X8 |
(325) 0x426d34 LDR X17, [X7, #184] |
(325) 0x426d38 ADD X5, X30, X23,LSL #3 |
(325) 0x426d3c LDR X8, [X7, #232] |
(325) 0x426d40 ADD X4, X18, X1,LSL #3 |
(325) 0x426d44 LDR X30, [X15, #8] |
(325) 0x426d48 ADD X23, X17, X3,LSL #3 |
(325) 0x426d4c LDR X1, [X13, #8] |
(325) 0x426d50 ADD X2, X8, X21,LSL #3 |
(325) 0x426d54 LDR X17, [X14, #8] |
(325) 0x426d58 LDR X18, [X16, #8] |
(325) 0x426d5c HINT #0 |
(324) 0x426d60 LD1RD {Z0.D}, P1/Z, [X30] |
(324) 0x426d64 ST1D {Z0.D}, P0, [X4, X0,LSL #3] |
(324) 0x426d68 LD1RD {Z1.D}, P1/Z, [X18] |
(324) 0x426d6c ST1D {Z1.D}, P0, [X5, X0,LSL #3] |
(324) 0x426d70 LD1RD {Z2.D}, P1/Z, [X17] |
(324) 0x426d74 ST1D {Z2.D}, P0, [X23, X0,LSL #3] |
(324) 0x426d78 LD1RD {Z3.D}, P1/Z, [X1] |
(324) 0x426d7c ST1D {Z3.D}, P0, [X2, X0,LSL #3] |
(324) 0x426d80 ADD X0, X0, X9 |
(324) 0x426d84 WHILELO P0.D, W0, W22 |
(324) 0x426d88 B.NE 426d60 |
(325) 0x426d8c ADD X6, X6, #1 |
(325) 0x426d90 ADD W22, W12, W6 |
(325) 0x426d94 CMP W20, W22 |
(325) 0x426d98 B.LE 426de0 |
(325) 0x426d9c SUB W1, W11, W10 |
(325) 0x426da0 ORR W3, WZR, W10 |
(325) 0x426da4 ORR W10, WZR, W19 |
(325) 0x426da8 MOVZ W21, #0 |
(325) 0x426dac CMP W1, W10 |
(325) 0x426db0 CSEL W22, W1, W10, #9 |
(325) 0x426db4 ADD W10, W3, W22 |
(325) 0x426db8 CMP W3, W10 |
(325) 0x426dbc B.CC 426d00 |
(326) 0x426dc0 ADD X6, X6, #1 |
(326) 0x426dc4 ORR W10, WZR, W3 |
(326) 0x426dc8 ADD W22, W12, W6 |
(326) 0x426dcc CMP W20, W22 |
(326) 0x426dd0 B.GT 426d9c |
(323) 0x426dd4 HINT #0 |
(323) 0x426dd8 HINT #0 |
(323) 0x426ddc HINT #0 |
(323) 0x426de0 LDP X21, X22, [SP, #32] |
(323) 0x426de4 LDR X23, [SP, #48] |
(323) 0x426de8 LDP X19, X20, [SP, #16] |
(323) 0x426dec LDP X29, X30, [SP], #64 |
(323) 0x426df0 RET |
(323) 0x426df4 ADD W1, W1, #1 |
(323) 0x426df8 MOVZ W3, #0 |
(323) 0x426dfc B 426cb0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.44+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so | |
►1.56+ | GOMP_parallel | libomp.so | |
○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
○ | main | iostream:74 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | iostream:74 | exec |
Path / |
Source file and lines | generate_chunk.cpp:74-80 |
Module | exec |
nb instructions | 20 |
loop length | 80 |
nb stack references | 0 |
front end | 2.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
cycles | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.50 |
Overall L1 | 3.25 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #960]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W19, W20, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 426de8 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x188> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP W19, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 426de8 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x188> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
MADD W23, W20, W19, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W3, W1, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 426df4 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x194> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | generate_chunk.cpp:74-80 |
Module | exec |
nb instructions | 20 |
loop length | 80 |
nb stack references | 0 |
front end | 2.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
cycles | 2.50 | 2.50 | 3.25 | 3.25 | 3.25 | 3.25 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.50 |
Overall L1 | 3.25 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #960]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W19, W20, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 426de8 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x188> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP W19, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 426de8 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x188> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
MADD W23, W20, W19, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W1, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W3, W1, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 426df4 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x194> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼generate_chunk(int, global_variables&) [clone ._omp_fn.0]– | 0.05 | 0.06 |
▼Loop 323 - generate_chunk.cpp:74-80 - exec– | 0 | 0 |
○Loop 326 - generate_chunk.cpp:74-80 - exec | 0 | 0 |
▼Loop 325 - generate_chunk.cpp:74-80 - exec– | 0 | 0 |
○Loop 324 - generate_chunk.cpp:77-80 - exec | 0.05 | 0.06 |