Loop Id: 543 | Module: exec | Source: IJMatrix_parcsr.c:3291-3475 [...] | Coverage: 0.25% |
---|
Loop Id: 543 | Module: exec | Source: IJMatrix_parcsr.c:3291-3475 [...] | Coverage: 0.25% |
---|
0x49cfcc ADD X26, X26, #1 |
0x49cfd0 CMP X26, X30 |
0x49cfd4 B.GE 49d774 |
0x49cfd8 LDR X8, [X23] |
0x49cfdc LDR X9, [X13] |
0x49cfe0 LDR X25, [X8, X26,LSL #3] |
0x49cfe4 LDR X8, [X7] |
0x49cfe8 LDR X20, [X8, X26,LSL #3] |
0x49cfec LDR X8, [X28] |
0x49cff0 LDR X10, [X8, X9,LSL #3] |
0x49cff4 SUBS X16, X25, X10 |
0x49cff8 B.LT 49d0c0 |
0x49cffc ADD X8, X8, X9,LSL #3 |
0x49d000 LDR X8, [X8, #8] |
0x49d004 CMP X25, X8 |
0x49d008 B.GE 49d0c0 |
0x49d00c LDUR X8, [X29, #392] |
0x49d010 STUR X26, [X29, #456] |
0x49d014 LDR X8, [X8] |
0x49d018 CBZ X8, 49d20c |
0x49d0c0 LDR X8, [X11] |
0x49d0c4 LDR X12, [X5] |
0x49d0c8 ADD X24, X20, X24 |
0x49d0cc CMP X8, #0 |
0x49d0d0 CCMP X12, #1, #8, #1 |
0x49d0d4 B.LT 49cfcc |
0x49d0d8 CMP X20, #1 |
0x49d0dc B.LT 49cfcc |
0x49d20c LDR X8, [X11] |
0x49d210 UBFM X12, X16, #61, #60 |
0x49d214 CMP X20, #1 |
0x49d218 LDP X10, X9, [X8, #56] |
0x49d21c LDR X8, [X9, X12] |
0x49d220 LDR X9, [X10, X12] |
0x49d224 B.LT 49d594 |
0x49d228 LDP X11, X13, [SP, #32] |
0x49d22c ORR X28, XZR, X16 |
0x49d230 LDP X16, X18, [SP, #64] |
0x49d234 LDR X17, [SP, #88] |
0x49d238 ADD X14, X12, #8 |
0x49d23c ORR X10, XZR, XZR |
0x49d240 ORR X26, XZR, X8 |
0x49d244 LDR X15, [X6] |
0x49d248 LDP X0, X1, [SP, #48] |
0x49d24c ORR X21, XZR, X9 |
0x49d250 LDR X11, [X11] |
0x49d254 LDR X13, [X13] |
0x49d258 LDR X16, [X16] |
0x49d25c LDR X17, [X17] |
0x49d260 LDR X18, [X18] |
0x49d264 LDR X0, [X0] |
0x49d268 LDR X1, [X1] |
0x49d26c LDR X12, [X11, X14] |
0x49d270 LDR X14, [X13, X14] |
0x49d274 B 49d29c |
(548) 0x49d280 LDR D0, [X17, X24,LSL #3] |
(548) 0x49d284 LDP X5, X6, [X29, #984] |
(548) 0x49d288 STR D0, [X3] |
(548) 0x49d28c ADD X10, X10, #1 |
(548) 0x49d290 ADD X24, X24, #1 |
(548) 0x49d294 CMP X10, X20 |
(548) 0x49d298 B.EQ 49d6e0 |
(548) 0x49d29c LDP X3, X4, [X29, #1000] |
(548) 0x49d2a0 LDR X2, [X15, X24,LSL #3] |
(548) 0x49d2a4 LDR X3, [X3] |
(548) 0x49d2a8 LDR X4, [X4] |
(548) 0x49d2ac CMP X2, X3 |
(548) 0x49d2b0 CCMP X2, X4, #0, #10 |
(548) 0x49d2b4 B.LE 49d320 |
(548) 0x49d2b8 LDR X4, [X13, X28,LSL #3] |
(548) 0x49d2bc CMP X4, X8 |
(548) 0x49d2c0 B.GE 49d300 |
(548) 0x49d2c4 UBFM X5, X4, #61, #60 |
(548) 0x49d2c8 SUB X4, X8, X4 |
(548) 0x49d2cc ADD X3, X1, X5 |
(548) 0x49d2d0 ADD X5, X0, X5 |
(548) 0x49d2d4 HINT #0 |
(548) 0x49d2d8 HINT #0 |
(548) 0x49d2dc HINT #0 |
(550) 0x49d2e0 LDR X6, [X5] |
(550) 0x49d2e4 CMP X6, X2 |
(550) 0x49d2e8 B.EQ 49d280 |
(550) 0x49d2ec ADD X3, X3, #8 |
(550) 0x49d2f0 SUBS X4, X4, #1 |
(550) 0x49d2f4 ADD X5, X5, #8 |
(550) 0x49d2f8 B.NE 49d2e0 |
(548) 0x49d2fc LDP X5, X6, [X29, #984] |
(548) 0x49d300 CMP X26, X14 |
(548) 0x49d304 B.GE 49d65c |
(548) 0x49d308 LDR D0, [X17, X24,LSL #3] |
(548) 0x49d30c STR X2, [X0, X26,LSL #3] |
(548) 0x49d310 STR D0, [X1, X26,LSL #3] |
(548) 0x49d314 ADD X26, X26, #1 |
(548) 0x49d318 B 49d28c |
(548) 0x49d320 LDR X4, [X11, X28,LSL #3] |
(548) 0x49d324 CMP X4, X9 |
(548) 0x49d328 B.GE 49d360 |
(548) 0x49d32c UBFM X5, X4, #61, #60 |
(548) 0x49d330 SUB X4, X9, X4 |
(548) 0x49d334 ADD X3, X18, X5 |
(548) 0x49d338 ADD X5, X16, X5 |
(548) 0x49d33c HINT #0 |
(549) 0x49d340 LDR X6, [X5] |
(549) 0x49d344 CMP X6, X2 |
(549) 0x49d348 B.EQ 49d280 |
(549) 0x49d34c ADD X3, X3, #8 |
(549) 0x49d350 SUBS X4, X4, #1 |
(549) 0x49d354 ADD X5, X5, #8 |
(549) 0x49d358 B.NE 49d340 |
(548) 0x49d35c LDP X5, X6, [X29, #984] |
(548) 0x49d360 CMP X21, X12 |
(548) 0x49d364 B.GE 49d698 |
(548) 0x49d368 LDR D0, [X17, X24,LSL #3] |
(548) 0x49d36c STR X2, [X16, X21,LSL #3] |
(548) 0x49d370 STR D0, [X18, X21,LSL #3] |
(548) 0x49d374 ADD X21, X21, #1 |
(548) 0x49d378 B 49d28c |
0x49d594 ORR X21, XZR, X9 |
0x49d598 ORR X26, XZR, X8 |
0x49d59c B 49d6ec |
0x49d65c ORR X3, XZR, XZR |
0x49d660 ADRP X0, |
0x49d664 ADD X0, X0, #1799 |
0x49d668 MOVZ W1, #3406 |
0x49d66c MOVZ W2, #1 |
0x49d670 BL 4b0350 |
0x49d674 LDR X8, [SP, #16] |
0x49d678 MOVZ W9, #1 |
0x49d67c ADRP X0, |
0x49d680 ADD X0, X0, #2140 |
0x49d684 LDADD X9, X8, [X8] |
0x49d688 LDR X8, [SP, #24] |
0x49d68c LDR X8, [X8] |
0x49d690 CBNZ X8, 49d6d0 |
0x49d698 ORR X3, XZR, XZR |
0x49d69c ADRP X0, |
0x49d6a0 ADD X0, X0, #1799 |
0x49d6a4 MOVZ W1, #3440 |
0x49d6a8 MOVZ W2, #1 |
0x49d6ac BL 4b0350 |
0x49d6b0 LDR X8, [SP, #16] |
0x49d6b4 MOVZ W9, #1 |
0x49d6b8 ADRP X0, |
0x49d6bc ADD X0, X0, #2178 |
0x49d6c0 LDADD X9, X8, [X8] |
0x49d6c4 LDR X8, [SP, #24] |
0x49d6c8 LDR X8, [X8] |
0x49d6cc CBZ X8, 49d6d8 |
0x49d6d0 ORR X1, XZR, X25 |
0x49d6d4 BL 4ae2a0 |
0x49d6d8 LDP X30, X7, [X29, #920] |
0x49d6dc LDP X5, X6, [X29, #984] |
0x49d6e0 LDP X13, X11, [X29, #952] |
0x49d6e4 ORR X16, XZR, X28 |
0x49d6e8 LDR X28, [SP, #112] |
0x49d6ec LDR X8, [X11] |
0x49d6f0 LDP X9, X8, [X8, #56] |
0x49d6f4 STR X21, [X9, X16,LSL #3] |
0x49d6f8 STR X26, [X8, X16,LSL #3] |
0x49d6fc LDUR X26, [X29, #456] |
0x49d700 LDUR X21, [X29, #432] |
0x49d704 B 49cfcc |
/home/hbollore/qaas/qaas-runs/169-817-3176/intel/AMG/build/AMG/AMG/IJ_mv/IJMatrix_parcsr.c: 3291 - 3475 |
-------------------------------------------------------------------------------- |
3291: for (ii=ns; ii < ne; ii++) |
3292: { |
3293: row = rows[ii]; |
3294: n = ncols[ii]; |
3295: /* processor owns the row */ |
3296: if (row >= row_partitioning[pstart] && row < row_partitioning[pstart+1]) |
3297: { |
3298: row_local = row - row_partitioning[pstart]; |
3299: /* compute local row number */ |
3300: if (need_aux) |
[...] |
3365: if (tmp_j) |
[...] |
3376: offd_indx = hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local]; |
3377: diag_indx = hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local]; |
[...] |
3383: for (i=0; i < n; i++) |
3384: { |
3385: if (cols[indx] < col_0 || cols[indx] > col_n) |
3386: /* insert into offd */ |
3387: { |
3388: for (j=offd_i[row_local]; j < offd_indx; j++) |
3389: { |
3390: if (offd_j[j] == cols[indx]) |
[...] |
3399: if (cnt_offd < offd_space) |
3400: { |
3401: offd_j[cnt_offd] = cols[indx]; |
3402: offd_data[cnt_offd++] = values[indx]; |
3403: } |
3404: else |
3405: { |
3406: hypre_error(HYPRE_ERROR_GENERIC); |
3407: #ifdef HYPRE_USING_OPENMP |
3408: #pragma omp atomic |
3409: #endif |
3410: error_flag++; |
3411: if (print_level) |
[...] |
3422: for (j=diag_i[row_local]; j < diag_indx; j++) |
3423: { |
3424: if (diag_j[j] == cols[indx]) |
[...] |
3433: if (cnt_diag < diag_space) |
3434: { |
3435: diag_j[cnt_diag] = cols[indx]; |
3436: diag_data[cnt_diag++] = values[indx]; |
3437: } |
3438: else |
3439: { |
3440: hypre_error(HYPRE_ERROR_GENERIC); |
3441: #ifdef HYPRE_USING_OPENMP |
3442: #pragma omp atomic |
3443: #endif |
3444: error_flag++; |
3445: if (print_level) |
[...] |
3454: indx++; |
3455: } |
3456: |
3457: hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local] = cnt_diag; |
3458: hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local] = cnt_offd; |
[...] |
3466: indx += n; |
3467: if (aux_matrix) |
[...] |
3475: for (j=0; j < n; j++) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | NA |
CQA speedup if FP arith vectorized | NA |
CQA speedup if fully vectorized | NA |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | NA |
Bottlenecks | NA |
Function | .omp_outlined..21#0x49ccf0 |
Source | IJMatrix_parcsr.c:3291-3296,IJMatrix_parcsr.c:3300-3300,IJMatrix_parcsr.c:3365-3365,IJMatrix_parcsr.c:3376-3377,IJMatrix_parcsr.c:3383-3383,IJMatrix_parcsr.c:3406-3406,IJMatrix_parcsr.c:3410-3411,IJMatrix_parcsr.c:3440-3440,IJMatrix_parcsr.c:3444-3445,IJMatrix_parcsr.c:3457-3458,IJMatrix_parcsr.c:3466-3467,IJMatrix_parcsr.c:3475-3475 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | NA |
CQA cycles if no scalar integer | NA |
CQA cycles if FP arith vectorized | NA |
CQA cycles if fully vectorized | NA |
Front-end cycles | NA |
DIV/SQRT cycles | NA |
P0 cycles | NA |
P1 cycles | NA |
P2 cycles | NA |
P3 cycles | NA |
P4 cycles | NA |
P5 cycles | NA |
P6 cycles | NA |
P7 cycles | NA |
P8 cycles | NA |
P9 cycles | NA |
P10 cycles | NA |
P11 cycles | NA |
P12 cycles | NA |
P13 cycles | NA |
P14 cycles | NA |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | NA |
Nb uops | NA |
Nb loads | NA |
Nb stores | NA |
Nb stack references | NA |
FLOP/cycle | NA |
Nb FLOP add-sub | NA |
Nb FLOP mul | NA |
Nb FLOP fma | NA |
Nb FLOP div | NA |
Nb FLOP rcp | NA |
Nb FLOP sqrt | NA |
Nb FLOP rsqrt | NA |
Bytes/cycle | NA |
Bytes prefetched | NA |
Bytes loaded | NA |
Bytes stored | NA |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | NA |
Vectorization ratio load | NA |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | NA |
Vector-efficiency ratio all | NA |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | NA |
Path / |
Function | .omp_outlined..21#0x49ccf0 |
Source file and lines | IJMatrix_parcsr.c:3291-3296,IJMatrix_parcsr.c:3300-3300,IJMatrix_parcsr.c:3365-3365,IJMatrix_parcsr.c:3376-3377,IJMatrix_parcsr.c:3383-3383,IJMatrix_parcsr.c:3406-3406,IJMatrix_parcsr.c:3410-3411,IJMatrix_parcsr.c:3440-3440,IJMatrix_parcsr.c:3444-3445,IJMatrix_parcsr.c:3457-3458,IJMatrix_parcsr.c:3466-3467,IJMatrix_parcsr.c:3475-3475 |
Module | exec |