| Loop Id: 2043 | Module: exec | Source: par_strength.c:1714-1797 [...] | Coverage: 0.01% |
|---|
| Loop Id: 2043 | Module: exec | Source: par_strength.c:1714-1797 [...] | Coverage: 0.01% |
|---|
0x4b5444 LDR X12, [SP, #112] |
0x4b5448 LDR X0, [SP, #136] |
0x4b544c LDR X3, [X0, X8,LSL #3] |
0x4b5450 STR X10, [X12, X8,LSL #3] |
0x4b5454 LDR X6, [X11] |
0x4b5458 CBZ X6, 4b5464 |
0x4b545c LDR X1, [SP, #152] |
0x4b5460 STR X7, [X1, X8,LSL #3] |
0x4b5464 UBFM X0, X3, #61, #60 |
0x4b5468 ORR X4, XZR, X10 |
0x4b546c ADD X3, X0, #8 |
0x4b5470 LDR X12, [X15, X0] |
0x4b5474 ORR X2, XZR, X7 |
0x4b5478 ADD X20, X15, X3 |
0x4b547c LDR X16, [X15, X3] |
0x4b5480 CMP X12, X16 |
0x4b5484 B.GE 4b55c8 |
0x4b5488 LDR X9, [SP, #224] |
0x4b548c ORR X1, XZR, X28 |
0x4b5490 STR X27, [SP, #104] |
0x4b5494 STR X14, [SP, #120] |
0x4b5498 HINT #0 |
0x4b549c HINT #0 |
(2047) 0x4b54a0 LDR X28, [X26, X12,LSL #3] |
(2047) 0x4b54a4 UBFM X16, X28, #61, #60 |
(2047) 0x4b54a8 LDR X27, [X23, X16] |
(2047) 0x4b54ac CMP X27, #0 |
(2047) 0x4b54b0 B.LE 4b54d4 |
(2047) 0x4b54b4 LDR X14, [X9, X16] |
(2047) 0x4b54b8 UBFM X5, X14, #61, #60 |
(2047) 0x4b54bc LDR X6, [X21, X5] |
(2047) 0x4b54c0 CMP X6, X10 |
(2047) 0x4b54c4 B.GE 4b54d4 |
(2047) 0x4b54c8 MOVZ W30, #1 |
(2047) 0x4b54cc STR X4, [X21, X5] |
(2047) 0x4b54d0 ADD X4, X4, #1 |
(2047) 0x4b54d4 ADD X27, X16, #8 |
(2047) 0x4b54d8 LDR X5, [X15, X16] |
(2047) 0x4b54dc ADD X28, X15, X27 |
(2047) 0x4b54e0 LDR X14, [X15, X27] |
(2047) 0x4b54e4 CMP X5, X14 |
(2047) 0x4b54e8 B.GE 4b5550 |
(2047) 0x4b54ec STR X0, [SP, #96] |
(2047) 0x4b54f0 ORR X0, XZR, X28 |
(2047) 0x4b54f4 HINT #0 |
(2047) 0x4b54f8 HINT #0 |
(2047) 0x4b54fc HINT #0 |
(2050) 0x4b5500 LDR X6, [X26, X5,LSL #3] |
(2050) 0x4b5504 ADD X5, X5, #1 |
(2050) 0x4b5508 UBFM X6, X6, #61, #60 |
(2050) 0x4b550c LDR X28, [X23, X6] |
(2050) 0x4b5510 CMP X28, #0 |
(2050) 0x4b5514 B.LE 4b5544 |
(2050) 0x4b5518 LDR X6, [X9, X6] |
(2050) 0x4b551c UBFM X28, X6, #61, #60 |
(2050) 0x4b5520 CMP X8, X6 |
(2050) 0x4b5524 B.EQ 4b5544 |
(2050) 0x4b5528 LDR X6, [X21, X28] |
(2050) 0x4b552c CMP X10, X6 |
(2050) 0x4b5530 B.LE 4b5544 |
(2050) 0x4b5534 STR X4, [X21, X28] |
(2050) 0x4b5538 MOVZ W30, #1 |
(2050) 0x4b553c ADD X4, X4, #1 |
(2050) 0x4b5540 LDR X14, [X0] |
(2050) 0x4b5544 CMP X5, X14 |
(2050) 0x4b5548 B.LT 4b5500 |
(2047) 0x4b554c LDR X0, [SP, #96] |
(2047) 0x4b5550 ADD X28, X13, X27 |
(2047) 0x4b5554 LDR X14, [X13, X16] |
(2047) 0x4b5558 LDR X16, [X13, X27] |
(2047) 0x4b555c HINT #0 |
(2048) 0x4b5560 CMP X14, X16 |
(2048) 0x4b5564 B.GE 4b55ac |
(2049) 0x4b5568 LDR X27, [X25, X14,LSL #3] |
(2049) 0x4b556c ADD X14, X14, #1 |
(2049) 0x4b5570 UBFM X5, X27, #61, #60 |
(2049) 0x4b5574 LDR X6, [X24, X5] |
(2049) 0x4b5578 CMP X6, #0 |
(2049) 0x4b557c B.LE 4b5560 |
(2049) 0x4b5580 LDR X27, [X18, X5] |
(2049) 0x4b5584 UBFM X5, X27, #61, #60 |
(2049) 0x4b5588 LDR X6, [X19, X5] |
(2049) 0x4b558c CMP X7, X6 |
(2049) 0x4b5590 B.LE 4b5560 |
(2049) 0x4b5594 STR X2, [X19, X5] |
(2049) 0x4b5598 MOVZ W17, #1 |
(2049) 0x4b559c ADD X2, X2, #1 |
(2049) 0x4b55a0 LDR X16, [X28] |
(2049) 0x4b55a4 CMP X14, X16 |
(2049) 0x4b55a8 B.LT 4b5568 |
(2047) 0x4b55ac LDR X16, [X20] |
(2047) 0x4b55b0 ADD X12, X12, #1 |
(2047) 0x4b55b4 CMP X16, X12 |
(2047) 0x4b55b8 B.GT 4b54a0 |
0x4b55bc LDR X27, [SP, #104] |
0x4b55c0 ORR X28, XZR, X1 |
0x4b55c4 LDR X14, [SP, #120] |
0x4b55c8 ADD X16, X13, X3 |
0x4b55cc LDR X12, [X13, X0] |
0x4b55d0 LDR X3, [X13, X3] |
0x4b55d4 CMP X12, X3 |
0x4b55d8 B.GE 4b56d4 |
0x4b55dc LDR X1, [SP, #200] |
0x4b55e0 ORR X20, XZR, X13 |
(2044) 0x4b55e4 LDR X13, [X25, X12,LSL #3] |
(2044) 0x4b55e8 UBFM X5, X13, #61, #60 |
(2044) 0x4b55ec LDR X9, [X24, X5] |
(2044) 0x4b55f0 CMP X9, #0 |
(2044) 0x4b55f4 B.LE 4b5618 |
(2044) 0x4b55f8 LDR X6, [X18, X5] |
(2044) 0x4b55fc UBFM X0, X6, #61, #60 |
(2044) 0x4b5600 LDR X3, [X19, X0] |
(2044) 0x4b5604 CMP X3, X7 |
(2044) 0x4b5608 B.GE 4b5618 |
(2044) 0x4b560c MOVZ W17, #1 |
(2044) 0x4b5610 STR X2, [X19, X0] |
(2044) 0x4b5614 ADD X2, X2, #1 |
(2044) 0x4b5618 ADD X9, X5, #8 |
(2044) 0x4b561c LDR X0, [X1, X5] |
(2044) 0x4b5620 ADD X13, X1, X9 |
(2044) 0x4b5624 LDR X6, [X1, X9] |
(2044) 0x4b5628 CMP X0, X6 |
(2044) 0x4b562c B.GE 4b567c |
(2044) 0x4b5630 STR X19, [SP, #96] |
(2044) 0x4b5634 HINT #0 |
(2044) 0x4b5638 HINT #0 |
(2044) 0x4b563c HINT #0 |
(2046) 0x4b5640 LDR X3, [X28, X0,LSL #3] |
(2046) 0x4b5644 ADD X0, X0, #1 |
(2046) 0x4b5648 UBFM X19, X3, #61, #60 |
(2046) 0x4b564c CMP X8, X3 |
(2046) 0x4b5650 B.EQ 4b5670 |
(2046) 0x4b5654 LDR X3, [X21, X19] |
(2046) 0x4b5658 CMP X10, X3 |
(2046) 0x4b565c B.LE 4b5670 |
(2046) 0x4b5660 STR X4, [X21, X19] |
(2046) 0x4b5664 MOVZ W30, #1 |
(2046) 0x4b5668 ADD X4, X4, #1 |
(2046) 0x4b566c LDR X6, [X13] |
(2046) 0x4b5670 CMP X0, X6 |
(2046) 0x4b5674 B.LT 4b5640 |
(2044) 0x4b5678 LDR X19, [SP, #96] |
(2044) 0x4b567c ADD X13, X14, X9 |
(2044) 0x4b5680 LDR X3, [X14, X5] |
(2044) 0x4b5684 LDR X9, [X14, X9] |
(2044) 0x4b5688 CMP X3, X9 |
(2044) 0x4b568c B.GE 4b56c0 |
(2045) 0x4b5690 LDR X5, [X27, X3,LSL #3] |
(2045) 0x4b5694 UBFM X0, X5, #61, #60 |
(2045) 0x4b5698 LDR X6, [X19, X0] |
(2045) 0x4b569c CMP X7, X6 |
(2045) 0x4b56a0 B.LE 4b56ec |
(2045) 0x4b56a4 STR X2, [X19, X0] |
(2045) 0x4b56a8 ADD X3, X3, #1 |
(2045) 0x4b56ac ADD X2, X2, #1 |
(2045) 0x4b56b0 LDR X9, [X13] |
(2045) 0x4b56b4 MOVZ W17, #1 |
(2045) 0x4b56b8 CMP X3, X9 |
(2045) 0x4b56bc B.LT 4b5690 |
(2044) 0x4b56c0 LDR X13, [X16] |
(2044) 0x4b56c4 ADD X12, X12, #1 |
(2044) 0x4b56c8 CMP X13, X12 |
(2044) 0x4b56cc B.GT 4b55e4 |
0x4b56d0 ORR X13, XZR, X20 |
0x4b56d4 ADD X8, X8, #1 |
0x4b56d8 CMP X22, X8 |
0x4b56dc B.EQ 4b570c |
0x4b56e0 ORR X10, XZR, X4 |
0x4b56e4 ORR X7, XZR, X2 |
0x4b56e8 B 4b5444 |
(2045) 0x4b56ec ADD X3, X3, #1 |
(2045) 0x4b56f0 CMP X3, X9 |
(2045) 0x4b56f4 B.LT 4b5690 |
(2044) 0x4b56f8 LDR X13, [X16] |
(2044) 0x4b56fc ADD X12, X12, #1 |
(2044) 0x4b5700 CMP X13, X12 |
(2044) 0x4b5704 B.GT 4b55e4 |
0x4b5708 B 4b56d0 |
/home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 1714 - 1797 |
-------------------------------------------------------------------------------- |
1714: for (ic = ic_begin; ic < ic_end; ic++) |
[...] |
1720: HYPRE_Int i1 = coarse_to_fine[ic]; |
1721: |
1722: HYPRE_Int jj_row_begin_diag = num_nonzeros_diag; |
1723: HYPRE_Int jj_row_begin_offd = num_nonzeros_offd; |
1724: |
1725: C_diag_i[ic] = num_nonzeros_diag; |
1726: if (num_cols_offd_C) |
1727: { |
1728: C_offd_i[ic] = num_nonzeros_offd; |
1729: } |
1730: |
1731: for (jj1 = S_diag_i[i1]; jj1 < S_diag_i[i1+1]; jj1++) |
1732: { |
1733: i2 = S_diag_j[jj1]; |
1734: if (CF_marker[i2] > 0) |
1735: { |
1736: index = fine_to_coarse[i2]; |
1737: if (S_marker[index] < jj_row_begin_diag) |
1738: { |
1739: S_marker[index] = num_nonzeros_diag; |
1740: num_nonzeros_diag++; |
1741: } |
1742: } |
1743: for (jj2 = S_diag_i[i2]; jj2 < S_diag_i[i2+1]; jj2++) |
1744: { |
1745: i3 = S_diag_j[jj2]; |
1746: if (CF_marker[i3] > 0) |
1747: { |
1748: index = fine_to_coarse[i3]; |
1749: if (index != ic && S_marker[index] < jj_row_begin_diag) |
1750: { |
1751: S_marker[index] = num_nonzeros_diag; |
1752: num_nonzeros_diag++; |
1753: } |
1754: } |
1755: } |
1756: for (jj2 = S_offd_i[i2]; jj2 < S_offd_i[i2+1]; jj2++) |
1757: { |
1758: i3 = S_offd_j[jj2]; |
1759: if (CF_marker_offd[i3] > 0) |
1760: { |
1761: index = map_S_to_C[i3]; |
1762: if (S_marker_offd[index] < jj_row_begin_offd) |
1763: { |
1764: S_marker_offd[index] = num_nonzeros_offd; |
1765: num_nonzeros_offd++; |
1766: } |
1767: } |
1768: } |
1769: } |
1770: for (jj1 = S_offd_i[i1]; jj1 < S_offd_i[i1+1]; jj1++) |
1771: { |
1772: i2 = S_offd_j[jj1]; |
1773: if (CF_marker_offd[i2] > 0) |
1774: { |
1775: index = map_S_to_C[i2]; |
1776: if (S_marker_offd[index] < jj_row_begin_offd) |
1777: { |
1778: S_marker_offd[index] = num_nonzeros_offd; |
1779: num_nonzeros_offd++; |
1780: } |
1781: } |
1782: for (jj2 = S_ext_diag_i[i2]; jj2 < S_ext_diag_i[i2+1]; jj2++) |
1783: { |
1784: i3 = S_ext_diag_j[jj2]; |
1785: if (i3 != ic && S_marker[i3] < jj_row_begin_diag) |
1786: { |
1787: S_marker[i3] = num_nonzeros_diag; |
1788: num_nonzeros_diag++; |
1789: } |
1790: } |
1791: for (jj2 = S_ext_offd_i[i2]; jj2 < S_ext_offd_i[i2+1]; jj2++) |
1792: { |
1793: i3 = S_ext_offd_j[jj2]; |
1794: if (S_marker_offd[i3] < jj_row_begin_offd) |
1795: { |
1796: S_marker_offd[i3] = num_nonzeros_offd; |
1797: num_nonzeros_offd++; |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | hypre_BoomerAMGCreate2ndS | par_strength.c:1668 | exec |
| ○ | hypre_BoomerAMGSetup | par_amg_setup.c:622 | exec |
| ○ | hypre_PCGSetup | pcg.c:234 | exec |
| ○ | main | amg.c:398 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | amg.c:253 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►57.14+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | hypre_BoomerAMGCreate2ndS | par_strength.c:1668 | exec |
| ○ | hypre_BoomerAMGSetup | par_amg_setup.c:622 | exec |
| ○ | hypre_PCGSetup | pcg.c:234 | exec |
| ○ | main | amg.c:398 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | amg.c:253 | exec |
| ►42.86+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►68.75+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►31.25+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | hypre_BoomerAMGCreate2ndS | par_strength.c:1668 | exec |
| ○ | hypre_BoomerAMGSetup | par_amg_setup.c:622 | exec |
| ○ | hypre_PCGSetup | pcg.c:234 | exec |
| ○ | main | amg.c:398 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | amg.c:253 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►83.33+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►16.67+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | hypre_BoomerAMGCreate2ndS | par_strength.c:1668 | exec |
| ○ | hypre_BoomerAMGSetup | par_amg_setup.c:622 | exec |
| ○ | hypre_PCGSetup | pcg.c:234 | exec |
| ○ | main | amg.c:398 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | amg.c:253 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►87.50+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►12.50+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | hypre_BoomerAMGCreate2ndS | par_strength.c:1668 | exec |
| ○ | hypre_BoomerAMGSetup | par_amg_setup.c:622 | exec |
| ○ | hypre_PCGSetup | pcg.c:234 | exec |
| ○ | main | amg.c:398 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | amg.c:253 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►92.86+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►7.14+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | hypre_BoomerAMGCreate2ndS | par_strength.c:1668 | exec |
| ○ | hypre_BoomerAMGSetup | par_amg_setup.c:622 | exec |
| ○ | hypre_PCGSetup | pcg.c:234 | exec |
| ○ | main | amg.c:398 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | amg.c:253 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 4.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.16 |
| Bottlenecks | P10, P11, P12, |
| Function | hypre_BoomerAMGCreate2ndS._omp_fn.7 |
| Source | par_strength.c:1714-1714,par_strength.c:1720-1720,par_strength.c:1725-1728,par_strength.c:1731-1731,par_strength.c:1770-1770 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 5.67 |
| CQA cycles if no scalar integer | 5.67 |
| CQA cycles if FP arith vectorized | 5.67 |
| CQA cycles if fully vectorized | 1.42 |
| Front-end cycles | 4.88 |
| P0 cycles | 3.00 |
| P1 cycles | 3.00 |
| P2 cycles | 4.00 |
| P3 cycles | 4.00 |
| P4 cycles | 4.00 |
| P5 cycles | 4.00 |
| P6 cycles | 0.00 |
| P7 cycles | 0.00 |
| P8 cycles | 0.00 |
| P9 cycles | 0.00 |
| P10 cycles | 5.67 |
| P11 cycles | 5.67 |
| P12 cycles | 5.67 |
| P13 cycles | 2.00 |
| P14 cycles | 2.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 41.00 |
| Nb uops | 39.00 |
| Nb loads | NA |
| Nb stores | 4.00 |
| Nb stack references | 9.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 0.00 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 0.00 |
| Bytes stored | 0.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 25.00 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 25.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 25.00 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 4.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.16 |
| Bottlenecks | P10, P11, P12, |
| Function | hypre_BoomerAMGCreate2ndS._omp_fn.7 |
| Source | par_strength.c:1714-1714,par_strength.c:1720-1720,par_strength.c:1725-1728,par_strength.c:1731-1731,par_strength.c:1770-1770 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 5.67 |
| CQA cycles if no scalar integer | 5.67 |
| CQA cycles if FP arith vectorized | 5.67 |
| CQA cycles if fully vectorized | 1.42 |
| Front-end cycles | 4.88 |
| P0 cycles | 3.00 |
| P1 cycles | 3.00 |
| P2 cycles | 4.00 |
| P3 cycles | 4.00 |
| P4 cycles | 4.00 |
| P5 cycles | 4.00 |
| P6 cycles | 0.00 |
| P7 cycles | 0.00 |
| P8 cycles | 0.00 |
| P9 cycles | 0.00 |
| P10 cycles | 5.67 |
| P11 cycles | 5.67 |
| P12 cycles | 5.67 |
| P13 cycles | 2.00 |
| P14 cycles | 2.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 41.00 |
| Nb uops | 39.00 |
| Nb loads | NA |
| Nb stores | 4.00 |
| Nb stack references | 9.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 0.00 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 0.00 |
| Bytes stored | 0.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 25.00 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 25.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 25.00 |
| Path / |
| Function | hypre_BoomerAMGCreate2ndS._omp_fn.7 |
| Source file and lines | par_strength.c:1714-1797 |
| Module | exec |
| nb instructions | 41 |
| nb uops | 39 |
| loop length | 164 |
| used w registers | 0 |
| used x registers | 21 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 9 |
| micro-operation queue | 4.88 cycles |
| front end | 4.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 2.00 | 2.00 |
| cycles | 3.00 | 3.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 2.00 | 2.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 4.88 |
| Dispatch | 5.67 |
| Overall L1 | 5.67 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 25% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR X12, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X3, [X0, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X10, [X12, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X6, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CBZ X6, 4b5464 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x15fc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X7, [X1, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| UBFM X0, X3, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X4, XZR, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X3, X0, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X12, [X15, X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X2, XZR, X7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X20, X15, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X16, [X15, X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP X12, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GE 4b55c8 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x1760> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X9, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ORR X1, XZR, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR X27, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X14, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDR X27, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ORR X28, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X14, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD X16, X13, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X12, [X13, X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X3, [X13, X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP X12, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GE 4b56d4 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x186c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X20, XZR, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X13, XZR, X20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X8, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP X22, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.EQ 4b570c <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x18a4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X10, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X7, XZR, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 4b5444 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x15dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 4b56d0 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x1868> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Function | hypre_BoomerAMGCreate2ndS._omp_fn.7 |
| Source file and lines | par_strength.c:1714-1797 |
| Module | exec |
| nb instructions | 41 |
| nb uops | 39 |
| loop length | 164 |
| used w registers | 0 |
| used x registers | 21 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 9 |
| micro-operation queue | 4.88 cycles |
| front end | 4.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 2.00 | 2.00 |
| cycles | 3.00 | 3.00 | 4.00 | 4.00 | 4.00 | 4.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 2.00 | 2.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 4.88 |
| Dispatch | 5.67 |
| Overall L1 | 5.67 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 25% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR X12, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X3, [X0, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X10, [X12, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X6, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CBZ X6, 4b5464 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x15fc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X7, [X1, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| UBFM X0, X3, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X4, XZR, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X3, X0, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X12, [X15, X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X2, XZR, X7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X20, X15, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X16, [X15, X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP X12, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GE 4b55c8 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x1760> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X9, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ORR X1, XZR, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR X27, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X14, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDR X27, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ORR X28, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X14, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD X16, X13, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X12, [X13, X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X3, [X13, X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP X12, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GE 4b56d4 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x186c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X20, XZR, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X13, XZR, X20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X8, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP X22, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.EQ 4b570c <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x18a4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X10, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X7, XZR, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| B 4b5444 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x15dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 4b56d0 <hypre_BoomerAMGCreate2ndS._omp_fn.7+0x1868> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-188-3659/intel/AMG/run/oneview_runs/multicore/gcc_1/oneview_run_1781892409OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.96 | 0 | 1.58 | 0 | 1.89 | 0 | 6.6 | 0 | 7.72 | 0 | 8.91 | 0 | 15.43 | 0 | 16.73 | 0 | 13.82 | 0 | 19.41 | 0 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 0.035000000149012 | 0.010112702846527 |
| 1x2 | 2 | 0.96 | 0.96 | 2 | 0.03999999910593 | 0.019387809559703 |
| 1x4 | 4 | 1.58 | 1.58 | 4 | 0.025000000372529 | 0.02167359739542 |
| 1x8 | 8 | 1.89 | 1.89 | 8 | 0.035000000149012 | 0.029884118586779 |
| 1x16 | 8 | 6.6 | 6.6 | 16 | 0.010000000707805 | 0.010092378593981 |
| 1x24 | 10 | 7.72 | 7.72 | 24 | 0.019999999552965 | 0.0090798875316978 |
| 1x32 | 13 | 8.91 | 8.91 | 32 | 0.019999999552965 | 0.0084090400487185 |
| 1x40 | 11 | 15.43 | 15.43 | 40 | 0.0099999997764826 | 0.0051363059319556 |
| 1x48 | 13 | 16.73 | 16.73 | 48 | 0.0099999997764826 | 0.0047206678427756 |
| 1x56 | 19 | 13.82 | 13.82 | 56 | 0.0099999997764826 | 0.0058093057014048 |
| 1x64 | 14 | 19.41 | 19.41 | 64 | 0.010000000707805 | 0.0040588513948023 |
