| Loop Id: 3433 | Module: exec | Source: IJMatrix_parcsr.c:3291-3484 [...] | Coverage: 0.03% |
|---|
| Loop Id: 3433 | Module: exec | Source: IJMatrix_parcsr.c:3291-3484 [...] | Coverage: 0.03% |
|---|
0x49bed4 ORR X26, XZR, X9 |
0x49bed8 ORR X21, XZR, X8 |
0x49bedc LDP X4, X3, [X29, #904] |
0x49bee0 LDR X8, [X4] |
0x49bee4 LDP X9, X8, [X8, #56] |
0x49bee8 STR X26, [X9, X5,LSL #3] |
0x49beec STR X21, [X8, X5,LSL #3] |
0x49bef0 LDP X26, X21, [X29, #920] |
0x49bef4 HINT #0 |
0x49bef8 HINT #0 |
0x49befc HINT #0 |
0x49bf00 LDUR X10, [X29, #456] |
(3432) 0x49bf04 ADD X30, X30, #1 |
(3432) 0x49bf08 CMP X30, X7 |
(3432) 0x49bf0c B.GE 49c65c |
(3432) 0x49bf10 LDR X8, [X21] |
(3432) 0x49bf14 LDR X9, [X10] |
(3432) 0x49bf18 LDR X25, [X8, X30,LSL #3] |
(3432) 0x49bf1c LDR X8, [X23] |
(3432) 0x49bf20 LDR X20, [X8, X30,LSL #3] |
(3432) 0x49bf24 LDR X8, [X28] |
(3432) 0x49bf28 ADD X8, X8, X9,LSL #3 |
(3432) 0x49bf2c LDR X9, [X8] |
(3432) 0x49bf30 SUBS X5, X25, X9 |
(3432) 0x49bf34 B.LT 49c000 |
(3432) 0x49bf38 LDR X8, [X8, #8] |
(3432) 0x49bf3c CMP X25, X8 |
(3432) 0x49bf40 B.GE 49c000 |
0x49bf44 LDR X8, [SP, #128] |
0x49bf48 STUR X5, [X29, #488] |
0x49bf4c LDR X8, [X8] |
0x49bf50 CBZ X8, 49c16c |
(3432) 0x49c000 LDR X8, [X4] |
(3432) 0x49c004 LDR X12, [X3] |
(3432) 0x49c008 ADD X24, X20, X24 |
(3432) 0x49c00c CMP X8, #0 |
(3432) 0x49c010 CCMP X12, #1, #8, #1 |
(3432) 0x49c014 B.LT 49bf04 |
(3432) 0x49c018 CMP X20, #1 |
(3432) 0x49c01c B.LT 49bf04 |
0x49c020 LDR X10, [SP, #120] |
0x49c024 ORR X8, XZR, XZR |
0x49c028 ORR X9, XZR, XZR |
0x49c02c LDR X10, [X10] |
0x49c030 B 49c054 |
(3434) 0x49c040 LDR X12, [X3] |
(3434) 0x49c044 ADD X9, X9, #2 |
(3434) 0x49c048 ADD X8, X11, X8 |
(3434) 0x49c04c CMP X9, X12 |
(3434) 0x49c050 B.GE 49bf00 |
(3434) 0x49c054 ADD X13, X10, X9,LSL #3 |
(3434) 0x49c058 LDP X13, X11, [X13] |
(3434) 0x49c05c CMP X13, X25 |
(3434) 0x49c060 B.NE 49c044 |
(3434) 0x49c064 CMP X11, #1 |
(3434) 0x49c068 B.LT 49c044 |
(3434) 0x49c06c LDUR X14, [X29, #496] |
(3434) 0x49c070 LDR X13, [X26] |
(3434) 0x49c074 AND X15, X11, #0x0 |
(3434) 0x49c078 ORR X12, XZR, XZR |
(3434) 0x49c07c LDR X14, [X14] |
(3434) 0x49c080 ADD X16, X13, X8,LSL #3 |
(3434) 0x49c084 ADD X16, X16, #8 |
(3434) 0x49c088 B 49c098 |
(3435) 0x49c08c ADD X12, X12, #1 |
(3435) 0x49c090 CMP X12, X20 |
(3435) 0x49c094 B.EQ 49c040 |
(3435) 0x49c098 LDR X17, [X19] |
(3435) 0x49c09c CMP X11, #1 |
(3435) 0x49c0a0 ORR X18, XZR, X8 |
(3435) 0x49c0a4 B.EQ 49c148 |
(3435) 0x49c0a8 AND X18, X11, #0x0 |
(3435) 0x49c0ac ORR X0, XZR, X16 |
(3435) 0x49c0b0 B 49c0cc |
(3436) 0x49c0c0 ADD X0, X0, #16 |
(3436) 0x49c0c4 SUBS X18, X18, #2 |
(3436) 0x49c0c8 B.EQ 49c140 |
(3436) 0x49c0cc LDUR X1, [X0, #504] |
(3436) 0x49c0d0 LDR X2, [X14, X12,LSL #3] |
(3436) 0x49c0d4 CMP X1, X2 |
(3436) 0x49c0d8 B.EQ 49c100 |
(3436) 0x49c0dc LDR X1, [X0] |
(3436) 0x49c0e0 LDR X2, [X14, X12,LSL #3] |
(3436) 0x49c0e4 CMP X1, X2 |
(3436) 0x49c0e8 B.NE 49c0c0 |
(3436) 0x49c0ec B 49c120 |
(3436) 0x49c100 STUR X27, [X0, #504] |
(3436) 0x49c104 LDR X1, [X17, X22,LSL #3] |
(3436) 0x49c108 ADD X1, X1, #1 |
(3436) 0x49c10c STR X1, [X17, X22,LSL #3] |
(3436) 0x49c110 LDR X1, [X0] |
(3436) 0x49c114 LDR X2, [X14, X12,LSL #3] |
(3436) 0x49c118 CMP X1, X2 |
(3436) 0x49c11c B.NE 49c0c0 |
(3436) 0x49c120 STR X27, [X0] |
(3436) 0x49c124 LDR X1, [X17, X22,LSL #3] |
(3436) 0x49c128 ADD X1, X1, #1 |
(3436) 0x49c12c STR X1, [X17, X22,LSL #3] |
(3436) 0x49c130 B 49c0c0 |
(3435) 0x49c140 ADD X18, X8, X15 |
(3435) 0x49c144 TBZ W11, #0, 49c08c |
(3435) 0x49c148 LDR X0, [X13, X18,LSL #3] |
(3435) 0x49c14c LDR X1, [X14, X12,LSL #3] |
(3435) 0x49c150 CMP X0, X1 |
(3435) 0x49c154 B.NE 49c08c |
(3435) 0x49c158 STR X27, [X13, X18,LSL #3] |
(3435) 0x49c15c LDR X18, [X17, X22,LSL #3] |
(3435) 0x49c160 ADD X18, X18, #1 |
(3435) 0x49c164 STR X18, [X17, X22,LSL #3] |
(3435) 0x49c168 B 49c08c |
0x49c16c LDR X8, [X4] |
0x49c170 UBFM X12, X5, #61, #60 |
0x49c174 CMP X20, #1 |
0x49c178 LDP X10, X9, [X8, #56] |
0x49c17c LDR X8, [X9, X12] |
0x49c180 LDR X9, [X10, X12] |
0x49c184 B.LT 49bed4 |
0x49c188 LDP X11, X13, [SP, #24] |
0x49c18c LDUR X15, [X29, #496] |
0x49c190 ADD X14, X12, #8 |
0x49c194 ORR X10, XZR, XZR |
0x49c198 ORR X21, XZR, X8 |
0x49c19c ORR X26, XZR, X9 |
0x49c1a0 LDR X17, [SP, #80] |
0x49c1a4 LDP X16, X18, [SP, #56] |
0x49c1a8 LDR X11, [X11] |
0x49c1ac LDR X13, [X13] |
0x49c1b0 LDP X0, X1, [SP, #40] |
0x49c1b4 LDR X15, [X15] |
0x49c1b8 LDR X16, [X16] |
0x49c1bc LDR X17, [X17] |
0x49c1c0 LDR X18, [X18] |
0x49c1c4 LDR X0, [X0] |
0x49c1c8 LDR X1, [X1] |
0x49c1cc LDR X12, [X11, X14] |
0x49c1d0 LDR X14, [X13, X14] |
0x49c1d4 B 49c1fc |
(3437) 0x49c1e0 LDR D0, [X17, X24,LSL #3] |
(3437) 0x49c1e4 STR D0, [X4] |
(3437) 0x49c1e8 LDUR X5, [X29, #488] |
(3437) 0x49c1ec ADD X10, X10, #1 |
(3437) 0x49c1f0 ADD X24, X24, #1 |
(3437) 0x49c1f4 CMP X10, X20 |
(3437) 0x49c1f8 B.EQ 49bedc |
(3437) 0x49c1fc LDP X3, X4, [X29, #984] |
(3437) 0x49c200 LDR X2, [X15, X24,LSL #3] |
(3437) 0x49c204 LDR X3, [X3] |
(3437) 0x49c208 LDR X4, [X4] |
(3437) 0x49c20c CMP X2, X3 |
(3437) 0x49c210 CCMP X2, X4, #0, #10 |
(3437) 0x49c214 B.LE 49c260 |
(3437) 0x49c218 LDR X5, [X13, X5,LSL #3] |
(3437) 0x49c21c SUBS X3, X8, X5 |
(3437) 0x49c220 B.LE 49c244 |
(3437) 0x49c224 ADD X4, X1, X5,LSL #3 |
(3437) 0x49c228 ADD X5, X0, X5,LSL #3 |
(3439) 0x49c22c LDR X6, [X5], #8 |
(3439) 0x49c230 CMP X6, X2 |
(3439) 0x49c234 B.EQ 49c1e0 |
(3439) 0x49c238 ADD X4, X4, #8 |
(3439) 0x49c23c SUBS X3, X3, #1 |
(3439) 0x49c240 B.NE 49c22c |
(3437) 0x49c244 CMP X21, X14 |
(3437) 0x49c248 B.GE 49c514 |
(3437) 0x49c24c LDR D0, [X17, X24,LSL #3] |
(3437) 0x49c250 STR X2, [X0, X21,LSL #3] |
(3437) 0x49c254 STR D0, [X1, X21,LSL #3] |
(3437) 0x49c258 ADD X21, X21, #1 |
(3437) 0x49c25c B 49c1e8 |
(3437) 0x49c260 LDR X5, [X11, X5,LSL #3] |
(3437) 0x49c264 SUBS X3, X9, X5 |
(3437) 0x49c268 B.LE 49c298 |
(3437) 0x49c26c ADD X4, X18, X5,LSL #3 |
(3437) 0x49c270 ADD X5, X16, X5,LSL #3 |
(3437) 0x49c274 HINT #0 |
(3437) 0x49c278 HINT #0 |
(3437) 0x49c27c HINT #0 |
(3438) 0x49c280 LDR X6, [X5], #8 |
(3438) 0x49c284 CMP X6, X2 |
(3438) 0x49c288 B.EQ 49c1e0 |
(3438) 0x49c28c ADD X4, X4, #8 |
(3438) 0x49c290 SUBS X3, X3, #1 |
(3438) 0x49c294 B.NE 49c280 |
(3437) 0x49c298 CMP X26, X12 |
(3437) 0x49c29c B.GE 49c568 |
(3437) 0x49c2a0 LDR D0, [X17, X24,LSL #3] |
(3437) 0x49c2a4 STR X2, [X16, X26,LSL #3] |
(3437) 0x49c2a8 STR D0, [X18, X26,LSL #3] |
(3437) 0x49c2ac ADD X26, X26, #1 |
(3437) 0x49c2b0 B 49c1e8 |
0x49c514 ADRP X0, |
0x49c518 ADD X0, X0, #1344 |
0x49c51c MOVZ W1, #3406 |
0x49c520 MOVZ W2, #1 |
0x49c524 ORR X20, XZR, X30 |
0x49c528 ORR X3, XZR, XZR |
0x49c52c BL 4aeb20 |
0x49c530 LDR X8, [SP, #8] |
0x49c534 MOVZ W9, #1 |
0x49c538 ADRP X0, |
0x49c53c ADD X0, X0, #1684 |
0x49c540 LDADD X9, X8, [X8] |
0x49c544 LDR X8, [SP, #16] |
0x49c548 LDR X8, [X8] |
0x49c54c CBZ X8, 49c558 |
0x49c550 ORR X1, XZR, X25 |
0x49c554 BL 4ac6c0 |
0x49c558 LDUR X7, [X29, #424] |
0x49c55c LDUR X5, [X29, #488] |
0x49c560 ORR X30, XZR, X20 |
0x49c564 B 49bedc |
0x49c568 ADRP X0, |
0x49c56c ADD X0, X0, #1344 |
0x49c570 MOVZ W1, #3440 |
0x49c574 MOVZ W2, #1 |
0x49c578 ORR X20, XZR, X30 |
0x49c57c ORR X3, XZR, XZR |
0x49c580 BL 4aeb20 |
0x49c584 LDR X8, [SP, #8] |
0x49c588 MOVZ W9, #1 |
0x49c58c ADRP X0, |
0x49c590 ADD X0, X0, #1722 |
0x49c594 LDADD X9, X8, [X8] |
0x49c598 LDR X8, [SP, #16] |
0x49c59c LDR X8, [X8] |
0x49c5a0 CBNZ X8, 49c550 |
0x49c5a4 B 49c558 |
/home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/build/AMG/AMG/IJ_mv/IJMatrix_parcsr.c: 3291 - 3484 |
-------------------------------------------------------------------------------- |
3291: for (ii=ns; ii < ne; ii++) |
3292: { |
3293: row = rows[ii]; |
3294: n = ncols[ii]; |
3295: /* processor owns the row */ |
3296: if (row >= row_partitioning[pstart] && row < row_partitioning[pstart+1]) |
3297: { |
3298: row_local = row - row_partitioning[pstart]; |
3299: /* compute local row number */ |
3300: if (need_aux) |
[...] |
3376: offd_indx = hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local]; |
3377: diag_indx = hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local]; |
[...] |
3383: for (i=0; i < n; i++) |
3384: { |
3385: if (cols[indx] < col_0 || cols[indx] > col_n) |
3386: /* insert into offd */ |
3387: { |
3388: for (j=offd_i[row_local]; j < offd_indx; j++) |
3389: { |
3390: if (offd_j[j] == cols[indx]) |
[...] |
3399: if (cnt_offd < offd_space) |
3400: { |
3401: offd_j[cnt_offd] = cols[indx]; |
3402: offd_data[cnt_offd++] = values[indx]; |
3403: } |
3404: else |
3405: { |
3406: hypre_error(HYPRE_ERROR_GENERIC); |
3407: #ifdef HYPRE_USING_OPENMP |
3408: #pragma omp atomic |
3409: #endif |
3410: error_flag++; |
3411: if (print_level) |
[...] |
3417: } |
3418: not_found = 1; |
3419: } |
3420: else /* insert into diag */ |
3421: { |
3422: for (j=diag_i[row_local]; j < diag_indx; j++) |
3423: { |
3424: if (diag_j[j] == cols[indx]) |
[...] |
3433: if (cnt_diag < diag_space) |
3434: { |
3435: diag_j[cnt_diag] = cols[indx]; |
3436: diag_data[cnt_diag++] = values[indx]; |
3437: } |
3438: else |
3439: { |
3440: hypre_error(HYPRE_ERROR_GENERIC); |
3441: #ifdef HYPRE_USING_OPENMP |
3442: #pragma omp atomic |
3443: #endif |
3444: error_flag++; |
3445: if (print_level) |
[...] |
3454: indx++; |
3455: } |
3456: |
3457: hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local] = cnt_diag; |
3458: hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local] = cnt_offd; |
[...] |
3466: indx += n; |
3467: if (aux_matrix) |
3468: { |
3469: col_indx = 0; |
3470: for (i=0; i < off_proc_i_indx; i=i+2) |
3471: { |
3472: row_len = off_proc_i[i+1]; |
3473: if (off_proc_i[i] == row) |
3474: { |
3475: for (j=0; j < n; j++) |
3476: { |
3477: cnt1 = col_indx; |
3478: for (k=0; k < row_len; k++) |
3479: { |
3480: if (off_proc_j[cnt1] == cols[j]) |
3481: { |
3482: off_proc_j[cnt1++] = -1; |
3483: /*cancel_indx++;*/ |
3484: offproc_cnt[my_thread_num]++; |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.62+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.38+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | hypre_IJMatrixSetValuesOMPParC[...] | IJMatrix_parcsr.c:3509 | exec |
| ○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
| ○ | main | amg.c:274 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 6.87 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.01 |
| Bottlenecks | P10, |
| Function | hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14 |
| Source | IJMatrix_parcsr.c:3291-3291,IJMatrix_parcsr.c:3296-3296,IJMatrix_parcsr.c:3300-3300,IJMatrix_parcsr.c:3376-3377,IJMatrix_parcsr.c:3383-3383,IJMatrix_parcsr.c:3406-3406,IJMatrix_parcsr.c:3410-3411,IJMatrix_parcsr.c:3440-3440,IJMatrix_parcsr.c:3444-3445,IJMatrix_parcsr.c:3457-3458,IJMatrix_parcsr.c:3467-3467 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 13.17 |
| CQA cycles if no scalar integer | 13.17 |
| CQA cycles if FP arith vectorized | 13.17 |
| CQA cycles if fully vectorized | 1.92 |
| Front-end cycles | 10.00 |
| P0 cycles | 5.50 |
| P1 cycles | 5.50 |
| P2 cycles | 8.25 |
| P3 cycles | 8.25 |
| P4 cycles | 8.25 |
| P5 cycles | 8.25 |
| P6 cycles | 0.00 |
| P7 cycles | 0.00 |
| P8 cycles | 0.00 |
| P9 cycles | 0.00 |
| P10 cycles | 13.17 |
| P11 cycles | 12.83 |
| P12 cycles | 13.00 |
| P13 cycles | 1.50 |
| P14 cycles | 1.50 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 85.00 |
| Nb uops | 80.00 |
| Nb loads | NA |
| Nb stores | 5.00 |
| Nb stack references | 10.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 2.43 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 16.00 |
| Bytes stored | 16.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | NA |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 25.00 |
| Vector-efficiency ratio load | 37.50 |
| Vector-efficiency ratio store | 25.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | NA |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 22.92 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 6.87 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.01 |
| Bottlenecks | P10, |
| Function | hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14 |
| Source | IJMatrix_parcsr.c:3291-3291,IJMatrix_parcsr.c:3296-3296,IJMatrix_parcsr.c:3300-3300,IJMatrix_parcsr.c:3376-3377,IJMatrix_parcsr.c:3383-3383,IJMatrix_parcsr.c:3406-3406,IJMatrix_parcsr.c:3410-3411,IJMatrix_parcsr.c:3440-3440,IJMatrix_parcsr.c:3444-3445,IJMatrix_parcsr.c:3457-3458,IJMatrix_parcsr.c:3467-3467 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 13.17 |
| CQA cycles if no scalar integer | 13.17 |
| CQA cycles if FP arith vectorized | 13.17 |
| CQA cycles if fully vectorized | 1.92 |
| Front-end cycles | 10.00 |
| P0 cycles | 5.50 |
| P1 cycles | 5.50 |
| P2 cycles | 8.25 |
| P3 cycles | 8.25 |
| P4 cycles | 8.25 |
| P5 cycles | 8.25 |
| P6 cycles | 0.00 |
| P7 cycles | 0.00 |
| P8 cycles | 0.00 |
| P9 cycles | 0.00 |
| P10 cycles | 13.17 |
| P11 cycles | 12.83 |
| P12 cycles | 13.00 |
| P13 cycles | 1.50 |
| P14 cycles | 1.50 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 85.00 |
| Nb uops | 80.00 |
| Nb loads | NA |
| Nb stores | 5.00 |
| Nb stack references | 10.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 2.43 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 16.00 |
| Bytes stored | 16.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | NA |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 25.00 |
| Vector-efficiency ratio load | 37.50 |
| Vector-efficiency ratio store | 25.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | NA |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 22.92 |
| Path / |
| Function | hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14 |
| Source file and lines | IJMatrix_parcsr.c:3291-3484 |
| Module | exec |
| nb instructions | 85 |
| nb uops | 80 |
| loop length | 340 |
| used w registers | 3 |
| used x registers | 24 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 10 |
| micro-operation queue | 10.00 cycles |
| front end | 10.00 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.50 | 5.50 | 8.25 | 8.25 | 8.25 | 8.25 | 0.00 | 0.00 | 0.00 | 0.00 | 13.17 | 12.83 | 13.00 | 1.50 | 1.50 |
| cycles | 5.50 | 5.50 | 8.25 | 8.25 | 8.25 | 8.25 | 0.00 | 0.00 | 0.00 | 0.00 | 13.17 | 12.83 | 13.00 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 10.00 |
| Dispatch | 13.17 |
| Overall L1 | 13.17 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 37% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ORR X26, XZR, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X21, XZR, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X4, X3, [X29, #904] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X8, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X9, X8, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| STR X26, [X9, X5,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X21, [X8, X5,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X26, X21, [X29, #920] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDUR X10, [X29, #456] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STUR X5, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CBZ X8, 49c16c <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x53c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X10, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X8, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X9, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X10, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 49c054 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x424> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| UBFM X12, X5, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP X20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| LDP X10, X9, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X8, [X9, X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X9, [X10, X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B.LT 49bed4 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x2a4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X11, X13, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDUR X15, [X29, #496] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X14, X12, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X10, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X21, XZR, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X26, XZR, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X17, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X16, X18, [SP, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X11, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X13, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X0, X1, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X15, [X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X16, [X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X17, [X17] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X18, [X18] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X1, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X12, [X11, X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X14, [X13, X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 49c1fc <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x5cc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <4c2514> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1344 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W1, #3406 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X20, XZR, X30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X3, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| BL 4aeb20 <hypre_error_handler> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADRP X0, <4c2538> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1684 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDADD X9, X8, [X8] | N/A | ||||||||||||||||||
| LDR X8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CBZ X8, 49c558 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x928> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X1, XZR, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4ac6c0 <hypre_printf> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDUR X7, [X29, #424] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDUR X5, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X30, XZR, X20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 49bedc <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x2ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <4c2568> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1344 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W1, #3440 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X20, XZR, X30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X3, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| BL 4aeb20 <hypre_error_handler> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADRP X0, <4c258c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1722 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDADD X9, X8, [X8] | N/A | ||||||||||||||||||
| LDR X8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CBNZ X8, 49c550 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x920> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 49c558 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x928> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Function | hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14 |
| Source file and lines | IJMatrix_parcsr.c:3291-3484 |
| Module | exec |
| nb instructions | 85 |
| nb uops | 80 |
| loop length | 340 |
| used w registers | 3 |
| used x registers | 24 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 10 |
| micro-operation queue | 10.00 cycles |
| front end | 10.00 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.50 | 5.50 | 8.25 | 8.25 | 8.25 | 8.25 | 0.00 | 0.00 | 0.00 | 0.00 | 13.17 | 12.83 | 13.00 | 1.50 | 1.50 |
| cycles | 5.50 | 5.50 | 8.25 | 8.25 | 8.25 | 8.25 | 0.00 | 0.00 | 0.00 | 0.00 | 13.17 | 12.83 | 13.00 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 10.00 |
| Dispatch | 13.17 |
| Overall L1 | 13.17 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 37% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ORR X26, XZR, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X21, XZR, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X4, X3, [X29, #904] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X8, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X9, X8, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| STR X26, [X9, X5,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X21, [X8, X5,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X26, X21, [X29, #920] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDUR X10, [X29, #456] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STUR X5, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CBZ X8, 49c16c <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x53c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X10, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X8, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X9, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X10, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 49c054 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x424> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| UBFM X12, X5, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP X20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| LDP X10, X9, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X8, [X9, X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X9, [X10, X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B.LT 49bed4 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x2a4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X11, X13, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDUR X15, [X29, #496] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X14, X12, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X10, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X21, XZR, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X26, XZR, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X17, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X16, X18, [SP, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X11, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X13, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X0, X1, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X15, [X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X16, [X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X17, [X17] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X18, [X18] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X1, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X12, [X11, X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X14, [X13, X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 49c1fc <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x5cc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <4c2514> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1344 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W1, #3406 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X20, XZR, X30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X3, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| BL 4aeb20 <hypre_error_handler> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADRP X0, <4c2538> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1684 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDADD X9, X8, [X8] | N/A | ||||||||||||||||||
| LDR X8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CBZ X8, 49c558 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x928> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X1, XZR, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4ac6c0 <hypre_printf> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDUR X7, [X29, #424] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDUR X5, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X30, XZR, X20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 49bedc <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x2ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <4c2568> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1344 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W1, #3440 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X20, XZR, X30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X3, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| BL 4aeb20 <hypre_error_handler> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADRP X0, <4c258c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #1722 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDADD X9, X8, [X8] | N/A | ||||||||||||||||||
| LDR X8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CBNZ X8, 49c550 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x920> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 49c558 <hypre_IJMatrixSetValuesOMPParCSR.omp_outlined.14+0x928> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
