Function: .omp_outlined..6#0x468ca0 | Module: exec | Source: par_strength.c:1253-1278 | Coverage: 0.06% |
---|
Function: .omp_outlined..6#0x468ca0 | Module: exec | Source: par_strength.c:1253-1278 | Coverage: 0.06% |
---|
/home/hbollore/qaas/qaas-runs/169-817-3176/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 1253 - 1278 |
-------------------------------------------------------------------------------- |
1253: #pragma omp parallel private(i) |
1254: #endif |
1255: { |
1256: HYPRE_Int num_coarse_private = 0; |
1257: |
1258: HYPRE_Int i_begin, i_end; |
1259: hypre_GetSimpleThreadPartition(&i_begin, &i_end, num_cols_diag_S); |
1260: |
1261: for (i = i_begin; i < i_end; i++) |
1262: { |
1263: if (CF_marker[i] > 0) num_coarse_private++; |
1264: } |
1265: |
1266: hypre_prefix_sum(&num_coarse_private, &num_coarse, num_coarse_prefix_sum); |
1267: |
1268: for (i = i_begin; i < i_end; i++) |
1269: { |
1270: if (CF_marker[i] > 0) |
1271: { |
1272: fine_to_coarse[i] = num_coarse_private; |
1273: coarse_to_fine[num_coarse_private] = i; |
1274: num_coarse_private++; |
1275: } |
1276: else |
1277: { |
1278: fine_to_coarse[i] = -1; |
0x468ca0 SUB SP, SP, #80 |
0x468ca4 STP X29, X30, [SP, #16] |
0x468ca8 STR X23, [SP, #32] |
0x468cac STP X22, X21, [SP, #48] |
0x468cb0 STP X20, X19, [SP, #64] |
0x468cb4 ADD X29, SP, #16 |
0x468cb8 LDR X2, [X2] |
0x468cbc ADD X0, SP, #8 |
0x468cc0 ADD X1, SP, #0 |
0x468cc4 ORR X19, XZR, X7 |
0x468cc8 ORR X20, XZR, X6 |
0x468ccc STR XZR, [X29, #24] |
0x468cd0 ORR X23, XZR, X5 |
0x468cd4 ORR X22, XZR, X4 |
0x468cd8 ORR X21, XZR, X3 |
0x468cdc BL 4b0260 |
0x468ce0 LDP X11, X8, [SP] |
0x468ce4 SUBS X9, X11, X8 |
0x468ce8 B.LE 468d18 |
0x468cec LDR X10, [X21] |
0x468cf0 ADD X12, X8, #1 |
0x468cf4 CMP X11, X12 |
0x468cf8 B.NE 468dac |
0x468cfc ORR X11, XZR, XZR |
(674) 0x468d00 TBZ W9, #0, 468d18 |
0x468d04 LDR X8, [X10, X8,LSL #3] |
0x468d08 CMP X8, #1 |
0x468d0c B.LT 468d18 |
0x468d10 ADD X8, X11, #1 |
0x468d14 STR X8, [X29, #24] |
(674) 0x468d18 LDR X2, [X23] |
(674) 0x468d1c ORR X1, XZR, X22 |
(674) 0x468d20 ADD X0, X29, #24 |
(674) 0x468d24 BL 4b0480 |
(674) 0x468d28 LDP X9, X8, [SP] |
(674) 0x468d2c CMP X8, X9 |
(674) 0x468d30 B.GE 468d94 |
(674) 0x468d34 LDR X9, [X21] |
(674) 0x468d38 LDR X10, [X20] |
(674) 0x468d3c LDR X11, [X19] |
(674) 0x468d40 MOVN X12, #0 |
(674) 0x468d44 B 468d5c |
(675) 0x468d48 STR X12, [X10, X8,LSL #3] |
(675) 0x468d4c LDR X13, [SP] |
(675) 0x468d50 ADD X8, X8, #1 |
(675) 0x468d54 CMP X8, X13 |
(675) 0x468d58 B.GE 468d94 |
(675) 0x468d5c LDR X13, [X9, X8,LSL #3] |
(675) 0x468d60 CMP X13, #1 |
(675) 0x468d64 B.LT 468d48 |
(675) 0x468d68 LDR X13, [X29, #24] |
(675) 0x468d6c STR X13, [X10, X8,LSL #3] |
(675) 0x468d70 LDR X13, [X29, #24] |
(675) 0x468d74 STR X8, [X11, X13,LSL #3] |
(675) 0x468d78 LDR X13, [X29, #24] |
(675) 0x468d7c ADD X13, X13, #1 |
(675) 0x468d80 STR X13, [X29, #24] |
(675) 0x468d84 LDR X13, [SP] |
(675) 0x468d88 ADD X8, X8, #1 |
(675) 0x468d8c CMP X8, X13 |
(675) 0x468d90 B.LT 468d5c |
(674) 0x468d94 LDP X20, X19, [SP, #64] |
(674) 0x468d98 LDP X22, X21, [SP, #48] |
(674) 0x468d9c LDR X23, [SP, #32] |
(674) 0x468da0 LDP X29, X30, [SP, #16] |
(674) 0x468da4 ADD SP, SP, #80 |
(674) 0x468da8 RET |
(674) 0x468dac ORR X11, XZR, XZR |
(674) 0x468db0 AND X12, X9, #8127 |
(674) 0x468db4 B 468dcc |
0x468db8 HINT #0 |
0x468dbc HINT #0 |
(674) 0x468dc0 ADD X8, X8, #2 |
(674) 0x468dc4 SUBS X12, X12, #2 |
(674) 0x468dc8 B.EQ 468d00 |
(674) 0x468dcc LDR X13, [X10, X8,LSL #3] |
(674) 0x468dd0 CMP X13, #1 |
(674) 0x468dd4 B.LT 468de0 |
(674) 0x468dd8 ADD X11, X11, #1 |
(674) 0x468ddc STR X11, [X29, #24] |
(674) 0x468de0 ADD X13, X10, X8,LSL #3 |
(674) 0x468de4 LDR X13, [X13, #8] |
(674) 0x468de8 CMP X13, #1 |
(674) 0x468dec B.LT 468dc0 |
(674) 0x468df0 ADD X11, X11, #1 |
(674) 0x468df4 STR X11, [X29, #24] |
(674) 0x468df8 B 468dc0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | par_strength.c:1253-1278 |
Module | exec |
nb instructions | 31 |
loop length | 124 |
nb stack references | 0 |
front end | 3.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
cycles | 2.00 | 2.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 3.63 |
Overall L1 | 4.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X29, X30, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X23, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X2, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X0, SP, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X1, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X19, XZR, X7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X20, XZR, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR XZR, [X29, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X22, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X21, XZR, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4b0260 <hypre_GetSimpleThreadPartition> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X11, X8, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
SUBS X9, X11, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 468d18 <.omp_outlined..6+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X10, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X12, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X11, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.NE 468dac <.omp_outlined..6+0x10c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X11, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X8, [X10, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 468d18 <.omp_outlined..6+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X8, X11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X8, [X29, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 |
Source file and lines | par_strength.c:1253-1278 |
Module | exec |
nb instructions | 31 |
loop length | 124 |
nb stack references | 0 |
front end | 3.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
cycles | 2.00 | 2.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 3.33 | 3.33 | 3.00 | 3.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 3.63 |
Overall L1 | 4.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X29, X30, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X23, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X2, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X0, SP, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X1, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X19, XZR, X7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X20, XZR, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR XZR, [X29, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X23, XZR, X5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X22, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X21, XZR, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4b0260 <hypre_GetSimpleThreadPartition> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X11, X8, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
SUBS X9, X11, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 468d18 <.omp_outlined..6+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X10, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X12, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X11, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.NE 468dac <.omp_outlined..6+0x10c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X11, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X8, [X10, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 468d18 <.omp_outlined..6+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X8, X11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X8, [X29, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..6#0x468ca0– | 0.06 | 0.01 |
▼Loop 674 - par_strength.c:1253-1278 - exec– | 0.03 | 0 |
○Loop 675 - par_strength.c:1268-1278 - exec | 0.03 | 0 |