| Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage (incl. loops): 9.76% | (excl. loops): 0.00% |
|---|
| Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage (incl. loops): 9.76% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/viscosity.cpp: 36 - 64 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
0x45b6a0 STP X29, X30, [SP, #912]! |
0x45b6a4 ADD X29, SP, #0 |
0x45b6a8 STP X19, X20, [SP, #16] |
0x45b6ac LDP W4, W19, [X0, #64] |
0x45b6b0 LDP W20, W2, [X0, #56] |
0x45b6b4 ADD W1, W4, #1 |
0x45b6b8 ADD W19, W19, #2 |
0x45b6bc CMP W1, W19 |
0x45b6c0 B.GE 45b9b4 |
0x45b6c4 ADD W20, W20, #1 |
0x45b6c8 STP X21, X22, [SP, #32] |
0x45b6cc ADD W21, W2, #2 |
0x45b6d0 STP X25, X26, [SP, #64] |
0x45b6d4 SUB W25, W19, W1 |
0x45b6d8 STR W1, [SP, #104] |
0x45b6dc CMP W20, W21 |
0x45b6e0 B.GE 45b9f4 |
0x45b6e4 STP X23, X24, [SP, #48] |
0x45b6e8 SUB W24, W21, W20 |
0x45b6ec ORR X22, XZR, X0 |
0x45b6f0 MUL W26, W25, W24 |
0x45b6f4 STP X27, X28, [SP, #80] |
0x45b6f8 BL 410210 |
0x45b6fc ORR W23, WZR, W0 |
0x45b700 BL 410240 |
0x45b704 UDIV W6, W26, W23 |
0x45b708 ORR W3, WZR, W0 |
0x45b70c LDR W5, [SP, #104] |
0x45b710 MSUB W0, W6, W23, W26 |
0x45b714 CMP W3, W0 |
0x45b718 B.CC 45b9e8 |
0x45b71c MADD W15, W6, W3, W0 |
0x45b720 ADD W27, W6, W15 |
0x45b724 CMP W15, W27 |
0x45b728 B.CS 45b9a4 |
0x45b72c UDIV W7, W15, W24 |
0x45b730 ADRP X8, |
0x45b734 LDP X9, X28, [X22] |
0x45b738 FMOV D31, #0.5000000 |
0x45b73c MOVI D29, #0 |
0x45b740 LDP X16, X26, [X22, #16] |
0x45b744 LDP X25, X23, [X22, #32] |
0x45b748 STR X9, [SP, #104] |
0x45b74c MSUB W10, W7, W24, W15 |
0x45b750 LDR D30, [X8, #344] |
0x45b754 ADD W11, W7, W5 |
0x45b758 SBFM X11, X11, #0, #31 |
0x45b75c LDR X22, [X22, #48] |
0x45b760 ADD W30, W10, W20 |
0x45b764 SUB W17, W21, W30 |
(639) 0x45b768 CMP W6, W17 |
(639) 0x45b76c CSEL W12, W6, W17, #9 |
(639) 0x45b770 ADD W17, W15, W12 |
(639) 0x45b774 CMP W15, W17 |
(639) 0x45b778 B.CS 45b9c0 |
(639) 0x45b77c LDR X8, [X25] |
(639) 0x45b780 SBFM X1, X30, #0, #31 |
(639) 0x45b784 SUB W14, W12, #1 |
(639) 0x45b788 ADD X15, X1, #2 |
(639) 0x45b78c ADD W18, W30, #1 |
(639) 0x45b790 LDR X2, [X23] |
(639) 0x45b794 ADD X15, X15, X14 |
(639) 0x45b798 SBFM X0, X18, #0, #31 |
(639) 0x45b79c SUB W13, W11, #1 |
(639) 0x45b7a0 ADD X3, X1, #1 |
(639) 0x45b7a4 LDR X4, [X22] |
(639) 0x45b7a8 SBFM X10, X13, #0, #31 |
(639) 0x45b7ac ADD W12, W11, #2 |
(639) 0x45b7b0 MUL X9, X11, X8 |
(639) 0x45b7b4 ADD X30, X11, #1 |
(639) 0x45b7b8 LDR X6, [X23, #16] |
(639) 0x45b7bc MUL X21, X2, X11 |
(639) 0x45b7c0 UBFM X14, X9, #61, #60 |
(639) 0x45b7c4 LDR X13, [X22, #16] |
(639) 0x45b7c8 SUB X18, X14, #8 |
(639) 0x45b7cc MADD X8, X11, X4, X4 |
(639) 0x45b7d0 LDR X14, [X26] |
(639) 0x45b7d4 ADD X5, X2, X21 |
(639) 0x45b7d8 ADD X7, X6, X21,LSL #3 |
(639) 0x45b7dc ADD X5, X6, X5,LSL #3 |
(639) 0x45b7e0 SUB X4, X8, X4 |
(639) 0x45b7e4 LDR X21, [X26, #16] |
(639) 0x45b7e8 ADD X4, X13, X4,LSL #3 |
(639) 0x45b7ec MUL X6, X14, X11 |
(639) 0x45b7f0 MUL X10, X10, X14 |
(639) 0x45b7f4 ADD X2, X3, X6 |
(639) 0x45b7f8 SUB X10, X10, X6 |
(639) 0x45b7fc ADD X6, X13, X8,LSL #3 |
(639) 0x45b800 LDR X13, [X25, #16] |
(639) 0x45b804 ADD X2, X21, X2,LSL #3 |
(639) 0x45b808 LDR X21, [SP, #104] |
(639) 0x45b80c ADD X9, X13, X18 |
(639) 0x45b810 LDR X18, [X28, #8] |
(639) 0x45b814 ADD X12, X18, W12,SXTW #3 |
(639) 0x45b818 LDR X8, [X21, #8] |
(639) 0x45b81c ADD X13, X18, X11,LSL #3 |
(639) 0x45b820 B 45b894 |
(640) 0x45b824 FABS D28, D27 |
(640) 0x45b828 FMAXNM D26, D28, D30 |
(640) 0x45b82c FMUL D19, D23, D26 |
(640) 0x45b830 LDR X21, [X16] |
(640) 0x45b834 LDR X18, [X16, #16] |
(640) 0x45b838 FMUL D16, D19, D19 |
(640) 0x45b83c MADD X1, X11, X21, X1 |
(640) 0x45b840 LDR D24, [X18, X1,LSL #3] |
(640) 0x45b844 FMADD D27, D25, D25, D16 |
(640) 0x45b848 FADD D6, D24, D24 |
(640) 0x45b84c FSQRT D20, D27 |
(640) 0x45b850 FMUL D0, D0, D20 |
(640) 0x45b854 FMUL D17, D17, D20 |
(640) 0x45b858 FDIV D22, D0, D25 |
(640) 0x45b85c FDIV D21, D17, D19 |
(640) 0x45b860 FABS D18, D22 |
(640) 0x45b864 FABS D5, D21 |
(640) 0x45b868 FMINNM D3, D18, D5 |
(640) 0x45b86c FMUL D1, D3, D7 |
(640) 0x45b870 FMUL D4, D1, D1 |
(640) 0x45b874 FMUL D23, D6, D4 |
(640) 0x45b878 STR D23, [X9, X0,LSL #3] |
(640) 0x45b87c ADD X0, X0, #1 |
(640) 0x45b880 ORR X1, XZR, X3 |
(640) 0x45b884 ADD X2, X2, #8 |
(640) 0x45b888 ADD X3, X3, #1 |
(640) 0x45b88c CMP X0, X15 |
(640) 0x45b890 B.EQ 45b9c8 |
(640) 0x45b894 LDR D22, [X6, X0,LSL #3] |
(640) 0x45b898 SUB X18, X2, #8 |
(640) 0x45b89c LDR D3, [X4, X0,LSL #3] |
(640) 0x45b8a0 LDR D21, [X4, X1,LSL #3] |
(640) 0x45b8a4 LDR D23, [X6, X1,LSL #3] |
(640) 0x45b8a8 LDR D27, [X5, X0,LSL #3] |
(640) 0x45b8ac FADD D1, D22, D3 |
(640) 0x45b8b0 LDR D4, [X5, X1,LSL #3] |
(640) 0x45b8b4 FADD D0, D21, D3 |
(640) 0x45b8b8 LDR D5, [X7, X1,LSL #3] |
(640) 0x45b8bc FADD D2, D23, D21 |
(640) 0x45b8c0 FADD D6, D23, D22 |
(640) 0x45b8c4 LDR D28, [X7, X0,LSL #3] |
(640) 0x45b8c8 FADD D24, D27, D4 |
(640) 0x45b8cc LDUR D23, [X2, #496] |
(640) 0x45b8d0 FADD D7, D5, D4 |
(640) 0x45b8d4 LDR D26, [X18, X10,LSL #3] |
(640) 0x45b8d8 FSUB D16, D1, S2 |
(640) 0x45b8dc FSUB D18, D6, S0 |
(640) 0x45b8e0 FADD D17, D28, D27 |
(640) 0x45b8e4 FADD D20, D28, D5 |
(640) 0x45b8e8 LDR D0, [X8, X1,LSL #3] |
(640) 0x45b8ec LDR D19, [X18, X14,LSL #3] |
(640) 0x45b8f0 LDR D25, [X8, X0,LSL #3] |
(640) 0x45b8f4 LDR D4, [X2] |
(640) 0x45b8f8 FSUB D1, D17, S7 |
(640) 0x45b8fc FSUB D3, D24, S20 |
(640) 0x45b900 FDIV D6, D31, D0 |
(640) 0x45b904 LDR D17, [X13] |
(640) 0x45b908 FSUB D22, D19, S26 |
(640) 0x45b90c LDR D24, [X12] |
(640) 0x45b910 FADD D27, D0, D25 |
(640) 0x45b914 FSUB D5, D4, S23 |
(640) 0x45b918 MOVI D23, #0 |
(640) 0x45b91c FDIV D7, D31, D17 |
(640) 0x45b920 FMUL D26, D18, D17 |
(640) 0x45b924 FADD D19, D17, D24 |
(640) 0x45b928 FDIV D28, D5, D27 |
(640) 0x45b92c FMUL D21, D16, D6 |
(640) 0x45b930 FMADD D16, D1, D0, D26 |
(640) 0x45b934 FDIV D27, D22, D19 |
(640) 0x45b938 FMADD D20, D3, D7, D21 |
(640) 0x45b93c FMUL D21, D28, D28 |
(640) 0x45b940 FABS D2, D28 |
(640) 0x45b944 FMAXNM D25, D2, D30 |
(640) 0x45b948 FMUL D22, D27, D27 |
(640) 0x45b94c FMUL D1, D1, D21 |
(640) 0x45b950 FMUL D5, D28, D27 |
(640) 0x45b954 FMUL D18, D18, D22 |
(640) 0x45b958 FADD D4, D22, D21 |
(640) 0x45b95c FMAXNM D24, D4, D30 |
(640) 0x45b960 FMUL D3, D18, D7 |
(640) 0x45b964 FMADD D6, D1, D6, D3 |
(640) 0x45b968 FMADD D2, D20, D5, D6 |
(640) 0x45b96c FDIV D7, D2, D24 |
(640) 0x45b970 FCMPE D7, D23 |
(640) 0x45b974 FCCMPE D16, D29, #0, #13 |
(640) 0x45b978 CSINC W21, WZR, WZR, #11 |
(640) 0x45b97c CBNZ W21, 45b878 |
(640) 0x45b980 FCMPE D28, #0 |
(640) 0x45b984 FMOV D23, #1.0000000 |
(640) 0x45b988 B.GE 45b824 |
(640) 0x45b98c FCMPE D25, #0 |
(640) 0x45b990 FMOV D23, #-1.0000000 |
(640) 0x45b994 FNEG D25, D25 |
(640) 0x45b998 B.GT 45b824 |
(640) 0x45b99c FMOV D23, #1.0000000 |
(640) 0x45b9a0 B 45b824 |
0x45b9a4 LDP X21, X22, [SP, #32] |
0x45b9a8 LDP X23, X24, [SP, #48] |
0x45b9ac LDP X25, X26, [SP, #64] |
0x45b9b0 LDP X27, X28, [SP, #80] |
0x45b9b4 LDP X19, X20, [SP, #16] |
0x45b9b8 LDP X29, X30, [SP], #112 |
0x45b9bc RET |
(639) 0x45b9c0 ORR W17, WZR, W15 |
(639) 0x45b9c4 ADD X30, X11, #1 |
(639) 0x45b9c8 ORR X11, XZR, X30 |
(639) 0x45b9cc CMP W19, W30 |
(639) 0x45b9d0 B.LE 45b9a4 |
(639) 0x45b9d4 SUB W6, W27, W17 |
(639) 0x45b9d8 ORR W15, WZR, W17 |
(639) 0x45b9dc ORR W30, WZR, W20 |
(639) 0x45b9e0 ORR W17, WZR, W24 |
(639) 0x45b9e4 B 45b768 |
0x45b9e8 ADD W6, W6, #1 |
0x45b9ec MOVZ W0, #0 |
0x45b9f0 B 45b71c |
0x45b9f4 LDP X21, X22, [SP, #32] |
0x45b9f8 LDP X25, X26, [SP, #64] |
0x45b9fc B 45b9b4 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.10+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►49.90+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►74.84+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►25.16+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►87.45+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►12.55+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►93.72+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►6.28+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►95.81+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.19+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.89+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.11+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.49+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.51+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.88+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.12+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.13+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.87+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.41+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.59+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | viscosity(global_variables&) | viscosity.cpp:76 | exec |
| ○ | timestep(global_variables&, pa[...] | timestep.cpp:64 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | viscosity.cpp:36-64 |
| Module | exec |
| nb instructions | 63 |
| nb uops | 63 |
| loop length | 252 |
| used w registers | 22 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 3 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 17 |
| micro-operation queue | 7.88 cycles |
| front end | 7.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 8.00 | 8.00 | 8.00 | 4.00 | 4.00 |
| cycles | 4.50 | 4.50 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 8.00 | 8.00 | 8.00 | 4.00 | 4.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 7.88 |
| Dispatch | 8.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 28% |
| load | 41% |
| store | 42% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 23% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 28% |
| load | 41% |
| store | 42% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W4, W19, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP W20, W2, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W1, W4, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 45b9b4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W21, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W25, W19, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W1, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W20, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 45b9f4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x354> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W24, W21, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MUL W26, W25, W24 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W23, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W26, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W3, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W5, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| MSUB W0, W6, W23, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 45b9e8 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x348> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W15, W6, W3, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W27, W6, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W15, W27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 45b9a4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x304> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W15, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| ADRP X8, <462730> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X9, X28, [X22] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FMOV D31, #0.5000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D29, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDP X16, X26, [X22, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X23, [X22, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STR X9, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| MSUB W10, W7, W24, W15 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDR D30, [X8, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| ADD W11, W7, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X11, X11, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X22, [X22, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W30, W10, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W21, W30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 45b71c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x7c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| B 45b9b4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | viscosity.cpp:36-64 |
| Module | exec |
| nb instructions | 63 |
| nb uops | 63 |
| loop length | 252 |
| used w registers | 22 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 3 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 17 |
| micro-operation queue | 7.88 cycles |
| front end | 7.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 8.00 | 8.00 | 8.00 | 4.00 | 4.00 |
| cycles | 4.50 | 4.50 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 8.00 | 8.00 | 8.00 | 4.00 | 4.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 7.88 |
| Dispatch | 8.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 28% |
| load | 41% |
| store | 42% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 23% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 28% |
| load | 41% |
| store | 42% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W4, W19, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP W20, W2, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W1, W4, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 45b9b4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W21, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W25, W19, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W1, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W20, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 45b9f4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x354> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W24, W21, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MUL W26, W25, W24 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W23, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W26, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W3, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W5, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| MSUB W0, W6, W23, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 45b9e8 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x348> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W15, W6, W3, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W27, W6, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W15, W27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 45b9a4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x304> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W15, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| ADRP X8, <462730> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X9, X28, [X22] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FMOV D31, #0.5000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D29, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDP X16, X26, [X22, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X23, [X22, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STR X9, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| MSUB W10, W7, W24, W15 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDR D30, [X8, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| ADD W11, W7, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X11, X11, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X22, [X22, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W30, W10, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W21, W30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 45b71c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x7c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| B 45b9b4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 1 | 0.03 | 0.99 | 0.08 | 0.98 | 0.13 | 0.95 | 0.28 | 0.97 | 0.13 | 0.96 | 0.11 | 0.94 | 0.16 | 0.9 | 0.24 | 0.79 | 0.52 | 0.68 | 0.8 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 144.99496459961 | 9.7553653717041 |
| 1x2 | 2 | 1 | 1.99 | 2 | 72.609992980957 | 9.6941766738892 |
| 1x4 | 4 | 0.99 | 3.97 | 4 | 36.439994812012 | 9.5313377380371 |
| 1x8 | 8 | 0.98 | 7.88 | 8 | 18.254989624023 | 8.4777803421021 |
| 1x16 | 16 | 0.95 | 15.21 | 16 | 9.2049989700317 | 5.6679606437683 |
| 1x24 | 24 | 0.97 | 23.24 | 24 | 6.1850018501282 | 3.9776101112366 |
| 1x32 | 32 | 0.96 | 30.87 | 32 | 4.6850004196167 | 3.2524580955505 |
| 1x40 | 40 | 0.94 | 37.75 | 40 | 3.8650004863739 | 2.8053002357483 |
| 1x48 | 48 | 0.9 | 43.31 | 48 | 3.3699994087219 | 2.4877912998199 |
| 1x56 | 56 | 0.79 | 44.12 | 56 | 3.3350005149841 | 2.446359872818 |
| 1x64 | 64 | 0.68 | 43.36 | 64 | 3.3349997997284 | 2.4853868484497 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼viscosity_kernel(int, int, int, int, clover::Buffer1D | 9.76 | 144.99 |
| ▼Loop 639 - viscosity.cpp:38-64 - exec– | 0.00 | 0.00 |
| ○Loop 640 - viscosity.cpp:39-64 - exec | 9.75 | 144.94 |
