| Loop Id: 2740 | Module: exec | Source: ams.c:3664-3682 | Coverage: 1.21% |
|---|
| Loop Id: 2740 | Module: exec | Source: ams.c:3664-3682 | Coverage: 1.21% |
|---|
0x47d560 LDR D1, [X8] |
0x47d564 FMUL D0, D1, D0 |
0x47d568 LDR D1, [X0, X9,LSL #3] |
0x47d56c FDIV D0, D0, D1 |
0x47d570 LDR D1, [X6, X9,LSL #3] |
0x47d574 FADD D0, D1, D0 |
0x47d578 STR D0, [X6, X9,LSL #3] |
0x47d57c CMP X24, X1 |
0x47d580 ORR X9, XZR, X24 |
0x47d584 B.EQ 47d4d8 |
(2739) 0x47d588 LDR X4, [X11, X9,LSL #3] |
(2739) 0x47d58c LDR D1, [X19] |
(2739) 0x47d590 LDR D0, [X10, X4,LSL #3] |
(2739) 0x47d594 FCMP D0, D1 |
(2739) 0x47d598 B.EQ 47d5c0 |
0x47d59c LDR D0, [X12, X9,LSL #3] |
0x47d5a0 ADD X24, X9, #1 |
0x47d5a4 LDR X25, [X11, X24,LSL #3] |
0x47d5a8 SUBS X27, X25, X4 |
0x47d5ac B.LE 47d6c0 |
0x47d5b0 CMP X27, #2 |
0x47d5b4 B.CS 47d5d4 |
0x47d5b8 ORR X26, XZR, X4 |
0x47d5bc B 47d620 |
(2739) 0x47d5c0 ADD X24, X9, #1 |
(2739) 0x47d5c4 CMP X24, X1 |
(2739) 0x47d5c8 ORR X9, XZR, X24 |
(2739) 0x47d5cc B.NE 47d588 |
0x47d5d4 AND X28, X27, #0xfffffffe |
0x47d5d8 MOVI D1, #0 |
0x47d5dc ADD X30, X2, X4,LSL #3 |
0x47d5e0 AND X5, X27, #0xfffffffe |
0x47d5e4 ADD X26, X4, X28 |
0x47d5e8 ADD X4, X3, X4,LSL #3 |
(2744) 0x47d5ec LDP X22, X23, [X4, #1016] |
(2744) 0x47d5f0 LDP D2, D3, [X30, #1016] |
(2744) 0x47d5f4 SUBS X5, X5, #2 |
(2744) 0x47d5f8 ADD X30, X30, #16 |
(2744) 0x47d5fc ADD X4, X4, #16 |
(2744) 0x47d600 LDR D4, [X14, X22,LSL #3] |
(2744) 0x47d604 LDR D5, [X14, X23,LSL #3] |
(2744) 0x47d608 FMSUB D0, D4, D2, D0 |
(2744) 0x47d60c FMSUB D1, D5, D3, D1 |
(2744) 0x47d610 B.NE 47d5ec |
0x47d614 FADD D0, D1, D0 |
0x47d618 CMP X27, X28 |
0x47d61c B.EQ 47d6c0 |
0x47d620 SUB W4, W25, W26 |
0x47d624 ORR X27, XZR, X26 |
0x47d628 ANDS X4, X4, #0x3 |
0x47d62c B.EQ 47d65c |
0x47d630 HINT #0 |
0x47d634 HINT #0 |
0x47d638 HINT #0 |
0x47d63c HINT #0 |
(2746) 0x47d640 LDR X5, [X13, X27,LSL #3] |
(2746) 0x47d644 LDR D1, [X10, X27,LSL #3] |
(2746) 0x47d648 ADD X27, X27, #1 |
(2746) 0x47d64c SUBS X4, X4, #1 |
(2746) 0x47d650 LDR D2, [X14, X5,LSL #3] |
(2746) 0x47d654 FMSUB D0, D2, D1, D0 |
(2746) 0x47d658 B.NE 47d640 |
0x47d65c SUB X4, X26, X25 |
0x47d660 CMN X4, #4 |
0x47d664 B.HI 47d6c0 |
0x47d668 ADD X4, X10, #16 |
0x47d66c SUB X25, X25, X27 |
0x47d670 ADD X26, X4, X27,LSL #3 |
0x47d674 ADD X4, X13, #16 |
0x47d678 ADD X27, X4, X27,LSL #3 |
0x47d67c HINT #0 |
(2745) 0x47d680 LDP X4, X5, [X27, #1008] |
(2745) 0x47d684 LDP D2, D3, [X26, #1008] |
(2745) 0x47d688 SUBS X25, X25, #4 |
(2745) 0x47d68c LDR D4, [X14, X5,LSL #3] |
(2745) 0x47d690 LDR D1, [X14, X4,LSL #3] |
(2745) 0x47d694 FMUL D3, D4, D3 |
(2745) 0x47d698 FMADD D1, D1, D2, D3 |
(2745) 0x47d69c LDP X4, X5, [X27], #32 |
(2745) 0x47d6a0 LDR D4, [X14, X4,LSL #3] |
(2745) 0x47d6a4 LDP D2, D3, [X26], #32 |
(2745) 0x47d6a8 FMADD D1, D4, D2, D1 |
(2745) 0x47d6ac LDR D2, [X14, X5,LSL #3] |
(2745) 0x47d6b0 FMADD D1, D2, D3, D1 |
(2745) 0x47d6b4 FSUB D0, D0, S1 |
(2745) 0x47d6b8 B.NE 47d680 |
0x47d6bc HINT #0 |
0x47d6c0 LDR X4, [X15, X9,LSL #3] |
0x47d6c4 LDR X25, [X15, X24,LSL #3] |
0x47d6c8 SUBS X27, X25, X4 |
0x47d6cc B.LE 47d560 |
0x47d6d0 CMP X27, #2 |
0x47d6d4 B.CS 47d6e0 |
0x47d6d8 ORR X26, XZR, X4 |
0x47d6dc B 47d734 |
0x47d6e0 AND X28, X27, #0xfffffffe |
0x47d6e4 MOVI D1, #0 |
0x47d6e8 ADD X30, X7, X4,LSL #3 |
0x47d6ec AND X5, X27, #0xfffffffe |
0x47d6f0 ADD X26, X4, X28 |
0x47d6f4 ADD X4, X21, X4,LSL #3 |
0x47d6f8 HINT #0 |
0x47d6fc HINT #0 |
(2741) 0x47d700 LDP X22, X23, [X4, #1016] |
(2741) 0x47d704 LDP D2, D3, [X30, #1016] |
(2741) 0x47d708 SUBS X5, X5, #2 |
(2741) 0x47d70c ADD X30, X30, #16 |
(2741) 0x47d710 ADD X4, X4, #16 |
(2741) 0x47d714 LDR D4, [X18, X22,LSL #3] |
(2741) 0x47d718 LDR D5, [X18, X23,LSL #3] |
(2741) 0x47d71c FMSUB D0, D4, D2, D0 |
(2741) 0x47d720 FMSUB D1, D5, D3, D1 |
(2741) 0x47d724 B.NE 47d700 |
0x47d728 FADD D0, D1, D0 |
0x47d72c CMP X27, X28 |
0x47d730 B.EQ 47d560 |
0x47d734 SUB W4, W25, W26 |
0x47d738 ORR X27, XZR, X26 |
0x47d73c ANDS X4, X4, #0x3 |
0x47d740 B.EQ 47d760 |
(2743) 0x47d744 LDR X5, [X16, X27,LSL #3] |
(2743) 0x47d748 LDR D1, [X17, X27,LSL #3] |
(2743) 0x47d74c ADD X27, X27, #1 |
(2743) 0x47d750 SUBS X4, X4, #1 |
(2743) 0x47d754 LDR D2, [X18, X5,LSL #3] |
(2743) 0x47d758 FMSUB D0, D2, D1, D0 |
(2743) 0x47d75c B.NE 47d744 |
0x47d760 SUB X4, X26, X25 |
0x47d764 CMN X4, #4 |
0x47d768 B.HI 47d560 |
0x47d76c ADD X4, X17, #16 |
0x47d770 SUB X25, X25, X27 |
0x47d774 ADD X26, X4, X27,LSL #3 |
0x47d778 ADD X4, X16, #16 |
0x47d77c ADD X27, X4, X27,LSL #3 |
(2742) 0x47d780 LDP X4, X5, [X27, #1008] |
(2742) 0x47d784 LDP D2, D3, [X26, #1008] |
(2742) 0x47d788 SUBS X25, X25, #4 |
(2742) 0x47d78c LDR D4, [X18, X5,LSL #3] |
(2742) 0x47d790 LDR D1, [X18, X4,LSL #3] |
(2742) 0x47d794 FMUL D3, D4, D3 |
(2742) 0x47d798 FMADD D1, D1, D2, D3 |
(2742) 0x47d79c LDP X4, X5, [X27], #32 |
(2742) 0x47d7a0 LDR D4, [X18, X4,LSL #3] |
(2742) 0x47d7a4 LDP D2, D3, [X26], #32 |
(2742) 0x47d7a8 FMADD D1, D4, D2, D1 |
(2742) 0x47d7ac LDR D2, [X18, X5,LSL #3] |
(2742) 0x47d7b0 FMADD D1, D2, D3, D1 |
(2742) 0x47d7b4 FSUB D0, D0, S1 |
(2742) 0x47d7b8 B.NE 47d780 |
0x47d7bc B 47d560 |
/home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/build/AMG/AMG/parcsr_ls/ams.c: 3664 - 3682 |
-------------------------------------------------------------------------------- |
3664: for (i = 0; i < n; i++) |
3665: { |
3666: /*----------------------------------------------------------- |
3667: * If diagonal is nonzero, relax point i; otherwise, skip it. |
3668: *-----------------------------------------------------------*/ |
3669: if (A_diag_data[A_diag_i[i]] != zero) |
3670: { |
3671: res = f_data[i]; |
3672: for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) |
3673: { |
3674: ii = A_diag_j[jj]; |
3675: res -= A_diag_data[jj] * Vtemp_data[ii]; |
3676: } |
3677: for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) |
3678: { |
3679: ii = A_offd_j[jj]; |
3680: res -= A_offd_data[jj] * Vext_data[ii]; |
3681: } |
3682: u_data[i] += (relax_weight*res)/l1_norms[i]; |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.56+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | hypre_ParCSRRelaxThreads | ams.c:3685 | exec |
| ○ | hypre_BoomerAMGCycle | par_cycle.c:334 | exec |
| ○ | hypre_BoomerAMGSolve | par_amg_solve.c:274 | exec |
| ○ | hypre_PCGSolve | pcg.c:545 | exec |
| ○ | main | amg.c:419 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.56 |
| CQA speedup if FP arith vectorized | 1.58 |
| CQA speedup if fully vectorized | 4.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.17 |
| Bottlenecks | P2, P3, P4, P5, |
| Function | hypre_ParCSRRelaxThreads.omp_outlined.27 |
| Source | ams.c:3664-3664,ams.c:3671-3672,ams.c:3677-3677,ams.c:3682-3682 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 10.25 |
| CQA cycles if no scalar integer | 4.00 |
| CQA cycles if FP arith vectorized | 6.50 |
| CQA cycles if fully vectorized | 2.56 |
| Front-end cycles | 8.75 |
| P0 cycles | 7.00 |
| P1 cycles | 7.00 |
| P2 cycles | 10.25 |
| P3 cycles | 10.25 |
| P4 cycles | 10.25 |
| P5 cycles | 10.25 |
| P6 cycles | 2.00 |
| P7 cycles | 2.00 |
| P8 cycles | 2.00 |
| P9 cycles | 2.00 |
| P10 cycles | 2.83 |
| P11 cycles | 2.50 |
| P12 cycles | 2.67 |
| P13 cycles | 0.00 |
| P14 cycles | 0.00 |
| DIV/SQRT cycles | 1.75 - 3.50 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 78.00 |
| Nb uops | 70.00 |
| Nb loads | NA |
| Nb stores | 1.00 |
| Nb stack references | 0.00 |
| FLOP/cycle | 0.49 |
| Nb FLOP add-sub | 3.00 |
| Nb FLOP mul | 1.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 1.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 0.00 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 0.00 |
| Bytes stored | 0.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | 0.00 |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 25.00 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 25.00 |
| Vector-efficiency ratio mul | 25.00 |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 25.00 |
| Vector-efficiency ratio other | 25.00 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.56 |
| CQA speedup if FP arith vectorized | 1.58 |
| CQA speedup if fully vectorized | 4.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.17 |
| Bottlenecks | P2, P3, P4, P5, |
| Function | hypre_ParCSRRelaxThreads.omp_outlined.27 |
| Source | ams.c:3664-3664,ams.c:3671-3672,ams.c:3677-3677,ams.c:3682-3682 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 10.25 |
| CQA cycles if no scalar integer | 4.00 |
| CQA cycles if FP arith vectorized | 6.50 |
| CQA cycles if fully vectorized | 2.56 |
| Front-end cycles | 8.75 |
| P0 cycles | 7.00 |
| P1 cycles | 7.00 |
| P2 cycles | 10.25 |
| P3 cycles | 10.25 |
| P4 cycles | 10.25 |
| P5 cycles | 10.25 |
| P6 cycles | 2.00 |
| P7 cycles | 2.00 |
| P8 cycles | 2.00 |
| P9 cycles | 2.00 |
| P10 cycles | 2.83 |
| P11 cycles | 2.50 |
| P12 cycles | 2.67 |
| P13 cycles | 0.00 |
| P14 cycles | 0.00 |
| DIV/SQRT cycles | 1.75 - 3.50 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 78.00 |
| Nb uops | 70.00 |
| Nb loads | NA |
| Nb stores | 1.00 |
| Nb stack references | 0.00 |
| FLOP/cycle | 0.49 |
| Nb FLOP add-sub | 3.00 |
| Nb FLOP mul | 1.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 1.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 0.00 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 0.00 |
| Bytes stored | 0.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | 0.00 |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 25.00 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 25.00 |
| Vector-efficiency ratio mul | 25.00 |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 25.00 |
| Vector-efficiency ratio other | 25.00 |
| Path / |
| Function | hypre_ParCSRRelaxThreads.omp_outlined.27 |
| Source file and lines | ams.c:3664-3682 |
| Module | exec |
| nb instructions | 78 |
| nb uops | 70 |
| loop length | 312 |
| used w registers | 3 |
| used x registers | 25 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 0 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 0 |
| ADD-SUB / MUL ratio | 3.00 |
| micro-operation queue | 8.75 cycles |
| front end | 8.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 7.00 | 7.00 | 10.25 | 10.25 | 10.25 | 10.25 | 2.00 | 2.00 | 2.00 | 2.00 | 2.83 | 2.50 | 2.67 | 0.00 | 0.00 |
| cycles | 7.00 | 7.00 | 10.25 | 10.25 | 10.25 | 10.25 | 2.00 | 2.00 | 2.00 | 2.00 | 2.83 | 2.50 | 2.67 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | 1.75-3.50 |
| Front-end | 8.75 |
| Dispatch | 10.25 |
| DIV/SQRT | 1.75-3.50 |
| Overall L1 | 10.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 0% |
| all | 25% |
| load | 25% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 25% |
| load | 25% |
| store | 25% |
| mul | 25% |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR D1, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FMUL D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR D1, [X0, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FDIV D0, D0, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1.75-3.50 | scal (25.0%) |
| LDR D1, [X6, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FADD D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STR D0, [X6, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| CMP X24, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| ORR X9, XZR, X24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B.EQ 47d4d8 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x98> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR D0, [X12, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| ADD X24, X9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X25, [X11, X24,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUBS X27, X25, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.LE 47d6c0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x280> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP X27, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.CS 47d5d4 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x194> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X26, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 47d620 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| AND X28, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD X30, X2, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| AND X5, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X3, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FADD D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| CMP X27, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.EQ 47d6c0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x280> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W25, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X27, XZR, X26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ANDS X4, X4, #0x3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.EQ 47d65c <hypre_ParCSRRelaxThreads.omp_outlined.27+0x21c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| SUB X4, X26, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMN X4, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.HI 47d6c0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x280> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X4, X10, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB X25, X25, X27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X13, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X27, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDR X4, [X15, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X25, [X15, X24,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUBS X27, X25, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.LE 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP X27, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.CS 47d6e0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x2a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X26, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 47d734 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| AND X28, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD X30, X7, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| AND X5, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X21, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| FADD D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| CMP X27, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.EQ 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W25, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X27, XZR, X26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ANDS X4, X4, #0x3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.EQ 47d760 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x320> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB X4, X26, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMN X4, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.HI 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X4, X17, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB X25, X25, X27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X16, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X27, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Function | hypre_ParCSRRelaxThreads.omp_outlined.27 |
| Source file and lines | ams.c:3664-3682 |
| Module | exec |
| nb instructions | 78 |
| nb uops | 70 |
| loop length | 312 |
| used w registers | 3 |
| used x registers | 25 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 0 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 0 |
| ADD-SUB / MUL ratio | 3.00 |
| micro-operation queue | 8.75 cycles |
| front end | 8.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 7.00 | 7.00 | 10.25 | 10.25 | 10.25 | 10.25 | 2.00 | 2.00 | 2.00 | 2.00 | 2.83 | 2.50 | 2.67 | 0.00 | 0.00 |
| cycles | 7.00 | 7.00 | 10.25 | 10.25 | 10.25 | 10.25 | 2.00 | 2.00 | 2.00 | 2.00 | 2.83 | 2.50 | 2.67 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | 1.75-3.50 |
| Front-end | 8.75 |
| Dispatch | 10.25 |
| DIV/SQRT | 1.75-3.50 |
| Overall L1 | 10.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 0% |
| all | 25% |
| load | 25% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 25% |
| load | 25% |
| store | 25% |
| mul | 25% |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR D1, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FMUL D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR D1, [X0, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FDIV D0, D0, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1.75-3.50 | scal (25.0%) |
| LDR D1, [X6, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FADD D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STR D0, [X6, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| CMP X24, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| ORR X9, XZR, X24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B.EQ 47d4d8 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x98> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR D0, [X12, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| ADD X24, X9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X25, [X11, X24,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUBS X27, X25, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.LE 47d6c0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x280> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP X27, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.CS 47d5d4 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x194> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X26, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 47d620 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| AND X28, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD X30, X2, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| AND X5, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X3, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FADD D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| CMP X27, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.EQ 47d6c0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x280> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W25, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X27, XZR, X26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ANDS X4, X4, #0x3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.EQ 47d65c <hypre_ParCSRRelaxThreads.omp_outlined.27+0x21c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| SUB X4, X26, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMN X4, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.HI 47d6c0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x280> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X4, X10, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB X25, X25, X27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X13, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X27, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDR X4, [X15, X9,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X25, [X15, X24,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUBS X27, X25, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.LE 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP X27, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.CS 47d6e0 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x2a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X26, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 47d734 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| AND X28, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD X30, X7, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| AND X5, X27, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X21, X4,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| FADD D0, D1, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| CMP X27, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.EQ 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W25, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X27, XZR, X26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ANDS X4, X4, #0x3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.EQ 47d760 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x320> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB X4, X26, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMN X4, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.HI 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X4, X17, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB X25, X25, X27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X26, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X16, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X27, X4, X27,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 47d560 <hypre_ParCSRRelaxThreads.omp_outlined.27+0x120> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
