Loop Id: 534 | Module: exec | Source: IJMatrix_parcsr.c:3291-3475 [...] | Coverage: 0.22% |
---|
Loop Id: 534 | Module: exec | Source: IJMatrix_parcsr.c:3291-3475 [...] | Coverage: 0.22% |
---|
0x4a140c ADD X26, X26, #1 |
0x4a1410 CMP X26, X30 |
0x4a1414 B.GE 4a1bb4 |
0x4a1418 LDR X8, [X23] |
0x4a141c LDR X9, [X13] |
0x4a1420 LDR X25, [X8, X26,LSL #3] |
0x4a1424 LDR X8, [X7] |
0x4a1428 LDR X20, [X8, X26,LSL #3] |
0x4a142c LDR X8, [X28] |
0x4a1430 LDR X10, [X8, X9,LSL #3] |
0x4a1434 SUBS X16, X25, X10 |
0x4a1438 B.LT 4a1500 |
0x4a143c ADD X8, X8, X9,LSL #3 |
0x4a1440 LDR X8, [X8, #8] |
0x4a1444 CMP X25, X8 |
0x4a1448 B.GE 4a1500 |
0x4a144c LDUR X8, [X29, #392] |
0x4a1450 STUR X26, [X29, #456] |
0x4a1454 LDR X8, [X8] |
0x4a1458 CBZ X8, 4a164c |
0x4a1500 LDR X8, [X11] |
0x4a1504 LDR X12, [X5] |
0x4a1508 ADD X24, X20, X24 |
0x4a150c CMP X8, #0 |
0x4a1510 CCMP X12, #1, #8, #1 |
0x4a1514 B.LT 4a140c |
0x4a1518 CMP X20, #1 |
0x4a151c B.LT 4a140c |
0x4a164c LDR X8, [X11] |
0x4a1650 UBFM X12, X16, #61, #60 |
0x4a1654 CMP X20, #1 |
0x4a1658 LDP X10, X9, [X8, #56] |
0x4a165c LDR X8, [X9, X12] |
0x4a1660 LDR X9, [X10, X12] |
0x4a1664 B.LT 4a19d4 |
0x4a1668 LDP X11, X13, [SP, #32] |
0x4a166c ORR X28, XZR, X16 |
0x4a1670 LDP X16, X18, [SP, #64] |
0x4a1674 LDR X17, [SP, #88] |
0x4a1678 ADD X14, X12, #8 |
0x4a167c ORR X10, XZR, XZR |
0x4a1680 ORR X26, XZR, X8 |
0x4a1684 LDR X15, [X6] |
0x4a1688 LDP X0, X1, [SP, #48] |
0x4a168c ORR X21, XZR, X9 |
0x4a1690 LDR X11, [X11] |
0x4a1694 LDR X13, [X13] |
0x4a1698 LDR X16, [X16] |
0x4a169c LDR X17, [X17] |
0x4a16a0 LDR X18, [X18] |
0x4a16a4 LDR X0, [X0] |
0x4a16a8 LDR X1, [X1] |
0x4a16ac LDR X12, [X11, X14] |
0x4a16b0 LDR X14, [X13, X14] |
0x4a16b4 B 4a16dc |
(539) 0x4a16c0 LDR D0, [X17, X24,LSL #3] |
(539) 0x4a16c4 LDP X5, X6, [X29, #984] |
(539) 0x4a16c8 STR D0, [X3] |
(539) 0x4a16cc ADD X10, X10, #1 |
(539) 0x4a16d0 ADD X24, X24, #1 |
(539) 0x4a16d4 CMP X10, X20 |
(539) 0x4a16d8 B.EQ 4a1b20 |
(539) 0x4a16dc LDP X3, X4, [X29, #1000] |
(539) 0x4a16e0 LDR X2, [X15, X24,LSL #3] |
(539) 0x4a16e4 LDR X3, [X3] |
(539) 0x4a16e8 LDR X4, [X4] |
(539) 0x4a16ec CMP X2, X3 |
(539) 0x4a16f0 CCMP X2, X4, #0, #10 |
(539) 0x4a16f4 B.LE 4a1760 |
(539) 0x4a16f8 LDR X4, [X13, X28,LSL #3] |
(539) 0x4a16fc CMP X4, X8 |
(539) 0x4a1700 B.GE 4a1740 |
(539) 0x4a1704 UBFM X5, X4, #61, #60 |
(539) 0x4a1708 SUB X4, X8, X4 |
(539) 0x4a170c ADD X3, X1, X5 |
(539) 0x4a1710 ADD X5, X0, X5 |
(539) 0x4a1714 HINT #0 |
(539) 0x4a1718 HINT #0 |
(539) 0x4a171c HINT #0 |
(541) 0x4a1720 LDR X6, [X5] |
(541) 0x4a1724 CMP X6, X2 |
(541) 0x4a1728 B.EQ 4a16c0 |
(541) 0x4a172c ADD X3, X3, #8 |
(541) 0x4a1730 SUBS X4, X4, #1 |
(541) 0x4a1734 ADD X5, X5, #8 |
(541) 0x4a1738 B.NE 4a1720 |
(539) 0x4a173c LDP X5, X6, [X29, #984] |
(539) 0x4a1740 CMP X26, X14 |
(539) 0x4a1744 B.GE 4a1a9c |
(539) 0x4a1748 LDR D0, [X17, X24,LSL #3] |
(539) 0x4a174c STR X2, [X0, X26,LSL #3] |
(539) 0x4a1750 STR D0, [X1, X26,LSL #3] |
(539) 0x4a1754 ADD X26, X26, #1 |
(539) 0x4a1758 B 4a16cc |
(539) 0x4a1760 LDR X4, [X11, X28,LSL #3] |
(539) 0x4a1764 CMP X4, X9 |
(539) 0x4a1768 B.GE 4a17a0 |
(539) 0x4a176c UBFM X5, X4, #61, #60 |
(539) 0x4a1770 SUB X4, X9, X4 |
(539) 0x4a1774 ADD X3, X18, X5 |
(539) 0x4a1778 ADD X5, X16, X5 |
(539) 0x4a177c HINT #0 |
(540) 0x4a1780 LDR X6, [X5] |
(540) 0x4a1784 CMP X6, X2 |
(540) 0x4a1788 B.EQ 4a16c0 |
(540) 0x4a178c ADD X3, X3, #8 |
(540) 0x4a1790 SUBS X4, X4, #1 |
(540) 0x4a1794 ADD X5, X5, #8 |
(540) 0x4a1798 B.NE 4a1780 |
(539) 0x4a179c LDP X5, X6, [X29, #984] |
(539) 0x4a17a0 CMP X21, X12 |
(539) 0x4a17a4 B.GE 4a1ad8 |
(539) 0x4a17a8 LDR D0, [X17, X24,LSL #3] |
(539) 0x4a17ac STR X2, [X16, X21,LSL #3] |
(539) 0x4a17b0 STR D0, [X18, X21,LSL #3] |
(539) 0x4a17b4 ADD X21, X21, #1 |
(539) 0x4a17b8 B 4a16cc |
0x4a19d4 ORR X21, XZR, X9 |
0x4a19d8 ORR X26, XZR, X8 |
0x4a19dc B 4a1b2c |
0x4a1a9c ORR X3, XZR, XZR |
0x4a1aa0 ADRP X0, |
0x4a1aa4 ADD X0, X0, #2359 |
0x4a1aa8 MOVZ W1, #3406 |
0x4a1aac MOVZ W2, #1 |
0x4a1ab0 BL 4b4630 |
0x4a1ab4 LDR X8, [SP, #16] |
0x4a1ab8 MOVZ W9, #1 |
0x4a1abc ADRP X0, |
0x4a1ac0 ADD X0, X0, #2700 |
0x4a1ac4 LDADD X9, X8, [X8] |
0x4a1ac8 LDR X8, [SP, #24] |
0x4a1acc LDR X8, [X8] |
0x4a1ad0 CBNZ X8, 4a1b10 |
0x4a1ad8 ORR X3, XZR, XZR |
0x4a1adc ADRP X0, |
0x4a1ae0 ADD X0, X0, #2359 |
0x4a1ae4 MOVZ W1, #3440 |
0x4a1ae8 MOVZ W2, #1 |
0x4a1aec BL 4b4630 |
0x4a1af0 LDR X8, [SP, #16] |
0x4a1af4 MOVZ W9, #1 |
0x4a1af8 ADRP X0, |
0x4a1afc ADD X0, X0, #2738 |
0x4a1b00 LDADD X9, X8, [X8] |
0x4a1b04 LDR X8, [SP, #24] |
0x4a1b08 LDR X8, [X8] |
0x4a1b0c CBZ X8, 4a1b18 |
0x4a1b10 ORR X1, XZR, X25 |
0x4a1b14 BL 4b2580 |
0x4a1b18 LDP X30, X7, [X29, #920] |
0x4a1b1c LDP X5, X6, [X29, #984] |
0x4a1b20 LDP X13, X11, [X29, #952] |
0x4a1b24 ORR X16, XZR, X28 |
0x4a1b28 LDR X28, [SP, #112] |
0x4a1b2c LDR X8, [X11] |
0x4a1b30 LDP X9, X8, [X8, #56] |
0x4a1b34 STR X21, [X9, X16,LSL #3] |
0x4a1b38 STR X26, [X8, X16,LSL #3] |
0x4a1b3c LDUR X26, [X29, #456] |
0x4a1b40 LDUR X21, [X29, #432] |
0x4a1b44 B 4a140c |
/home/hbollore/qaas/qaas-runs/169-817-3176/intel/AMG/build/AMG/AMG/IJ_mv/IJMatrix_parcsr.c: 3291 - 3475 |
-------------------------------------------------------------------------------- |
3291: for (ii=ns; ii < ne; ii++) |
3292: { |
3293: row = rows[ii]; |
3294: n = ncols[ii]; |
3295: /* processor owns the row */ |
3296: if (row >= row_partitioning[pstart] && row < row_partitioning[pstart+1]) |
3297: { |
3298: row_local = row - row_partitioning[pstart]; |
3299: /* compute local row number */ |
3300: if (need_aux) |
[...] |
3365: if (tmp_j) |
[...] |
3376: offd_indx = hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local]; |
3377: diag_indx = hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local]; |
[...] |
3383: for (i=0; i < n; i++) |
3384: { |
3385: if (cols[indx] < col_0 || cols[indx] > col_n) |
3386: /* insert into offd */ |
3387: { |
3388: for (j=offd_i[row_local]; j < offd_indx; j++) |
3389: { |
3390: if (offd_j[j] == cols[indx]) |
[...] |
3399: if (cnt_offd < offd_space) |
3400: { |
3401: offd_j[cnt_offd] = cols[indx]; |
3402: offd_data[cnt_offd++] = values[indx]; |
3403: } |
3404: else |
3405: { |
3406: hypre_error(HYPRE_ERROR_GENERIC); |
3407: #ifdef HYPRE_USING_OPENMP |
3408: #pragma omp atomic |
3409: #endif |
3410: error_flag++; |
3411: if (print_level) |
[...] |
3422: for (j=diag_i[row_local]; j < diag_indx; j++) |
3423: { |
3424: if (diag_j[j] == cols[indx]) |
[...] |
3433: if (cnt_diag < diag_space) |
3434: { |
3435: diag_j[cnt_diag] = cols[indx]; |
3436: diag_data[cnt_diag++] = values[indx]; |
3437: } |
3438: else |
3439: { |
3440: hypre_error(HYPRE_ERROR_GENERIC); |
3441: #ifdef HYPRE_USING_OPENMP |
3442: #pragma omp atomic |
3443: #endif |
3444: error_flag++; |
3445: if (print_level) |
[...] |
3454: indx++; |
3455: } |
3456: |
3457: hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local] = cnt_diag; |
3458: hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local] = cnt_offd; |
[...] |
3466: indx += n; |
3467: if (aux_matrix) |
[...] |
3475: for (j=0; j < n; j++) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | NA |
CQA speedup if FP arith vectorized | NA |
CQA speedup if fully vectorized | NA |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | NA |
Bottlenecks | NA |
Function | .omp_outlined..21#0x4a1130 |
Source | IJMatrix_parcsr.c:3291-3296,IJMatrix_parcsr.c:3300-3300,IJMatrix_parcsr.c:3365-3365,IJMatrix_parcsr.c:3376-3377,IJMatrix_parcsr.c:3383-3383,IJMatrix_parcsr.c:3406-3406,IJMatrix_parcsr.c:3410-3411,IJMatrix_parcsr.c:3440-3440,IJMatrix_parcsr.c:3444-3445,IJMatrix_parcsr.c:3457-3458,IJMatrix_parcsr.c:3466-3467,IJMatrix_parcsr.c:3475-3475 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | NA |
CQA cycles if no scalar integer | NA |
CQA cycles if FP arith vectorized | NA |
CQA cycles if fully vectorized | NA |
Front-end cycles | NA |
DIV/SQRT cycles | NA |
P0 cycles | NA |
P1 cycles | NA |
P2 cycles | NA |
P3 cycles | NA |
P4 cycles | NA |
P5 cycles | NA |
P6 cycles | NA |
P7 cycles | NA |
P8 cycles | NA |
P9 cycles | NA |
P10 cycles | NA |
P11 cycles | NA |
P12 cycles | NA |
P13 cycles | NA |
P14 cycles | NA |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | NA |
Nb uops | NA |
Nb loads | NA |
Nb stores | NA |
Nb stack references | NA |
FLOP/cycle | NA |
Nb FLOP add-sub | NA |
Nb FLOP mul | NA |
Nb FLOP fma | NA |
Nb FLOP div | NA |
Nb FLOP rcp | NA |
Nb FLOP sqrt | NA |
Nb FLOP rsqrt | NA |
Bytes/cycle | NA |
Bytes prefetched | NA |
Bytes loaded | NA |
Bytes stored | NA |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | NA |
Vectorization ratio load | NA |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | NA |
Vector-efficiency ratio all | NA |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | NA |
Path / |
Function | .omp_outlined..21#0x4a1130 |
Source file and lines | IJMatrix_parcsr.c:3291-3296,IJMatrix_parcsr.c:3300-3300,IJMatrix_parcsr.c:3365-3365,IJMatrix_parcsr.c:3376-3377,IJMatrix_parcsr.c:3383-3383,IJMatrix_parcsr.c:3406-3406,IJMatrix_parcsr.c:3410-3411,IJMatrix_parcsr.c:3440-3440,IJMatrix_parcsr.c:3444-3445,IJMatrix_parcsr.c:3457-3458,IJMatrix_parcsr.c:3466-3467,IJMatrix_parcsr.c:3475-3475 |
Module | exec |