| Loop Id: 154 | Module: exec | Source: calc_dt_kernel.f90:93-129 | Coverage: NA% |
|---|
| Loop Id: 154 | Module: exec | Source: calc_dt_kernel.f90:93-129 | Coverage: NA% |
|---|
0x422b28 LDR X18, [SP, #160] |
0x422b2c LDR D31, [X25, X15] |
0x422b30 LDR D26, [X24, X15] |
0x422b34 LDR D24, [X18] |
0x422b38 LDR X18, [SP, #184] |
0x422b3c LDR D20, [X26] |
0x422b40 FADD D26, D26, D31 |
0x422b44 LDR D19, [X27] |
0x422b48 FNEG D18, D24 |
0x422b4c LDR D16, [X18, X14,LSL #3] |
0x422b50 LDR X14, [SP, #168] |
0x422b54 FADD D20, D20, D20 |
0x422b58 LDR D25, [X20] |
0x422b5c FADD D19, D19, D19 |
0x422b60 LDR X18, [SP, #112] |
0x422b64 LDR D17, [X14] |
0x422b68 LDR X14, [SP, #192] |
0x422b6c LDR X1, [SP, #176] |
0x422b70 LDR D22, [X14, X16] |
0x422b74 LDR X14, [SP, #200] |
0x422b78 LDR D23, [X14, X16] |
0x422b7c HINT #0 |
(153) 0x422b80 FMUL D30, D26, D25 |
(153) 0x422b84 LDR D0, [X5, X1,LSL #3] |
(153) 0x422b88 LDR D29, [X4, X1,LSL #3] |
(153) 0x422b8c LDR D15, [X6, X1,LSL #3] |
(153) 0x422b90 FABS D4, D30 |
(153) 0x422b94 LDR D1, [X2, X1,LSL #3] |
(153) 0x422b98 FADD D5, D0, D22 |
(153) 0x422b9c FMOV D22, D0 |
(153) 0x422ba0 LDR D27, [X11, X1,LSL #3] |
(153) 0x422ba4 FADD D2, D29, D23 |
(153) 0x422ba8 FMOV D23, D29 |
(153) 0x422bac LDR D28, [X10, X1,LSL #3] |
(153) 0x422bb0 LDR D25, [X3, X1,LSL #3] |
(153) 0x422bb4 FADD D26, D1, D15 |
(153) 0x422bb8 LDR D3, [X7, X1,LSL #3] |
(153) 0x422bbc FMUL D0, D5, D27 |
(153) 0x422bc0 FMUL D31, D2, D28 |
(153) 0x422bc4 LDR D15, [X9, X1,LSL #3] |
(153) 0x422bc8 FMUL D1, D26, D25 |
(153) 0x422bcc LDR D28, [X12, X1,LSL #3] |
(153) 0x422bd0 FADD D6, D3, D3 |
(153) 0x422bd4 FABS D5, D0 |
(153) 0x422bd8 LDR D27, [X28, X1,LSL #3] |
(153) 0x422bdc FMINNM D2, D16, D27 |
(153) 0x422be0 FMUL D27, D2, D17 |
(153) 0x422be4 FSUB D30, D31, S30 |
(153) 0x422be8 FABS D3, D31 |
(153) 0x422bec FMAXNM D31, D3, D5 |
(153) 0x422bf0 LDR D29, [X8, X1,LSL #3] |
(153) 0x422bf4 FSUB D5, D1, S0 |
(153) 0x422bf8 FADD D0, D28, D28 |
(153) 0x422bfc FMUL D3, D20, D28 |
(153) 0x422c00 FMUL D2, D19, D28 |
(153) 0x422c04 FDIV D15, D6, D15 |
(153) 0x422c08 FMUL D28, D24, D28 |
(153) 0x422c0c FMAXNM D6, D31, D28 |
(153) 0x422c10 FABS D1, D1 |
(153) 0x422c14 FMAXNM D4, D4, D28 |
(153) 0x422c18 FMAXNM D1, D4, D1 |
(153) 0x422c1c FADD D30, D30, D5 |
(153) 0x422c20 FDIV D5, D3, D6 |
(153) 0x422c24 FDIV D31, D30, D0 |
(153) 0x422c28 FMADD D29, D29, D29, D15 |
(153) 0x422c2c FDIV D0, D2, D1 |
(153) 0x422c30 FSQRT D3, D29 |
(153) 0x422c34 FMAXNM D2, D24, D3 |
(153) 0x422c38 FCMPE D18, D31 |
(153) 0x422c3c FDIV D27, D27, D2 |
(153) 0x422c40 FMINNM D15, D5, D27 |
(153) 0x422c44 B.GT 422cf4 |
(153) 0x422c48 ADD X1, X1, #1 |
(153) 0x422c4c LDR D28, [X30] |
(153) 0x422c50 FMINNM D4, D0, D28 |
(153) 0x422c54 FMINNM D6, D4, D15 |
(153) 0x422c58 FMINNM D21, D21, D6 |
(153) 0x422c5c CMP W13, W1 |
(153) 0x422c60 B.GE 422b80 |
0x422c64 LDR W14, [SP, #156] |
0x422c68 MOVZ W1, #1 |
0x422c6c STR X18, [SP, #112] |
0x422c70 STR W14, [SP, #212] |
0x422c74 LDR X14, [SP, #128] |
0x422c78 ADD X9, X9, X19 |
0x422c7c ADD X6, X6, X21 |
0x422c80 ADD X2, X2, X21 |
0x422c84 ADD X3, X3, X23 |
0x422c88 LDR W18, [SP, #124] |
0x422c8c ADD X5, X5, X17 |
0x422c90 ADD X11, X11, X22 |
0x422c94 ADD X4, X4, X17 |
0x422c98 ADD X10, X10, X22 |
0x422c9c ADD X20, X20, X23 |
0x422ca0 ADD X15, X15, X21 |
0x422ca4 ADD X8, X8, X14 |
0x422ca8 LDR X14, [SP, #136] |
0x422cac ADD X16, X16, X17 |
0x422cb0 ADD X7, X7, X14 |
0x422cb4 LDR X14, [SP, #144] |
0x422cb8 ADD X12, X12, X14 |
0x422cbc ORR X14, XZR, X0 |
0x422cc0 CMP W18, W0 |
0x422cc4 B.LE 422d1c |
0x422cc8 LDR W18, [SP, #152] |
0x422ccc ADD X0, X0, #1 |
0x422cd0 CMP W18, W13 |
0x422cd4 B.LE 422b28 |
(153) 0x422cf4 FDIV D1, D7, D31 |
(153) 0x422cf8 LDR D30, [X18] |
(153) 0x422cfc ADD X1, X1, #1 |
(153) 0x422d00 FNMUL D5, D1, D30 |
(153) 0x422d04 FMINNM D31, D5, D0 |
(153) 0x422d08 FMINNM D29, D31, D15 |
(153) 0x422d0c FMINNM D21, D21, D29 |
(153) 0x422d10 CMP W13, W1 |
(153) 0x422d14 B.GE 422b80 |
0x422d18 B 422c64 |
/home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/build/CloverLeaf1.3-FC/CloverLeaf_ref/kernels/calc_dt_kernel.f90: 93 - 129 |
-------------------------------------------------------------------------------- |
93: !$OMP SIMD |
94: DO j=x_min,x_max |
95: |
96: dsx=celldx(j) |
97: dsy=celldy(k) |
98: |
99: cc=soundspeed(j,k)*soundspeed(j,k) |
100: cc=cc+2.0_8*viscosity_a(j,k)/density0(j,k) |
101: cc=MAX(SQRT(cc),g_small) |
102: |
103: dtct=dtc_safe*MIN(dsx,dsy)/cc |
104: |
105: div=0.0 |
106: |
107: dv1=(xvel0(j ,k)+xvel0(j ,k+1))*xarea(j ,k) |
108: dv2=(xvel0(j+1,k)+xvel0(j+1,k+1))*xarea(j+1,k) |
109: |
110: div=div+dv2-dv1 |
111: |
112: dtut=dtu_safe*2.0_8*volume(j,k)/MAX(ABS(dv1),ABS(dv2),g_small*volume(j,k)) |
113: |
114: dv1=(yvel0(j,k )+yvel0(j+1,k ))*yarea(j,k ) |
115: dv2=(yvel0(j,k+1)+yvel0(j+1,k+1))*yarea(j,k+1) |
116: |
117: div=div+dv2-dv1 |
118: |
119: dtvt=dtv_safe*2.0_8*volume(j,k)/MAX(ABS(dv1),ABS(dv2),g_small*volume(j,k)) |
120: |
121: div=div/(2.0_8*volume(j,k)) |
122: |
123: IF(div.LT.-g_small)THEN |
124: dtdivt=dtdiv_safe*(-1.0_8/div) |
125: ELSE |
126: dtdivt=g_big |
127: ENDIF |
128: |
129: dt_min_val=MIN(dt_min_val,dtct,dtut,dtvt,dtdivt) |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.08 |
| CQA speedup if FP arith vectorized | 1.56 |
| CQA speedup if fully vectorized | 5.33 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.31 |
| Bottlenecks | P10, P11, P12, |
| Function | __calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0 |
| Source | calc_dt_kernel.f90:93-93,calc_dt_kernel.f90:97-97,calc_dt_kernel.f90:101-103,calc_dt_kernel.f90:107-107,calc_dt_kernel.f90:114-115,calc_dt_kernel.f90:123-123,calc_dt_kernel.f90:129-129 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 8.33 |
| CQA cycles if no scalar integer | 4.00 |
| CQA cycles if FP arith vectorized | 5.33 |
| CQA cycles if fully vectorized | 1.56 |
| Front-end cycles | 6.38 |
| P0 cycles | 1.50 |
| P1 cycles | 1.50 |
| P2 cycles | 4.75 |
| P3 cycles | 4.75 |
| P4 cycles | 4.75 |
| P5 cycles | 4.75 |
| P6 cycles | 1.00 |
| P7 cycles | 1.00 |
| P8 cycles | 1.00 |
| P9 cycles | 1.00 |
| P10 cycles | 8.33 |
| P11 cycles | 8.33 |
| P12 cycles | 8.33 |
| P13 cycles | 1.00 |
| P14 cycles | 1.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 52.00 |
| Nb uops | 51.00 |
| Nb loads | NA |
| Nb stores | 2.00 |
| Nb stack references | 15.00 |
| FLOP/cycle | 0.36 |
| Nb FLOP add-sub | 3.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 1.44 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 9.00 |
| Bytes stored | 3.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 23.79 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 18.75 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 16.67 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.08 |
| CQA speedup if FP arith vectorized | 1.56 |
| CQA speedup if fully vectorized | 5.33 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.31 |
| Bottlenecks | P10, P11, P12, |
| Function | __calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0 |
| Source | calc_dt_kernel.f90:93-93,calc_dt_kernel.f90:97-97,calc_dt_kernel.f90:101-103,calc_dt_kernel.f90:107-107,calc_dt_kernel.f90:114-115,calc_dt_kernel.f90:123-123,calc_dt_kernel.f90:129-129 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 8.33 |
| CQA cycles if no scalar integer | 4.00 |
| CQA cycles if FP arith vectorized | 5.33 |
| CQA cycles if fully vectorized | 1.56 |
| Front-end cycles | 6.38 |
| P0 cycles | 1.50 |
| P1 cycles | 1.50 |
| P2 cycles | 4.75 |
| P3 cycles | 4.75 |
| P4 cycles | 4.75 |
| P5 cycles | 4.75 |
| P6 cycles | 1.00 |
| P7 cycles | 1.00 |
| P8 cycles | 1.00 |
| P9 cycles | 1.00 |
| P10 cycles | 8.33 |
| P11 cycles | 8.33 |
| P12 cycles | 8.33 |
| P13 cycles | 1.00 |
| P14 cycles | 1.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 52.00 |
| Nb uops | 51.00 |
| Nb loads | NA |
| Nb stores | 2.00 |
| Nb stack references | 15.00 |
| FLOP/cycle | 0.36 |
| Nb FLOP add-sub | 3.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 1.44 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 9.00 |
| Bytes stored | 3.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 23.79 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 18.75 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 16.67 |
| Path / |
| Function | __calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0 |
| Source file and lines | calc_dt_kernel.f90:93-129 |
| Module | exec |
| nb instructions | 52 |
| nb uops | 51 |
| loop length | 208 |
| used w registers | 5 |
| used x registers | 28 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 11 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 6.38 cycles |
| front end | 6.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 1.00 | 1.00 | 1.00 | 1.00 | 8.33 | 8.33 | 8.33 | 1.00 | 1.00 |
| cycles | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 1.00 | 1.00 | 1.00 | 1.00 | 8.33 | 8.33 | 8.33 | 1.00 | 1.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 6.38 |
| Dispatch | 8.33 |
| Overall L1 | 8.33 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 23% |
| load | 25% |
| store | 18% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 12% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 23% |
| load | 25% |
| store | 18% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR X18, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D31, [X25, X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR D26, [X24, X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR D24, [X18] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X18, [SP, #184] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D20, [X26] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FADD D26, D26, D31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR D19, [X27] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FNEG D18, D24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR D16, [X18, X14,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X14, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| FADD D20, D20, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR D25, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FADD D19, D19, D19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR X18, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D17, [X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X14, [SP, #192] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X1, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR D22, [X14, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X14, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D23, [X14, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDR W14, [SP, #156] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X18, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR W14, [SP, #212] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDR X14, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X9, X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X6, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X2, X2, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X3, X3, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR W18, [SP, #124] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X5, X5, X17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X11, X11, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X4, X17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X10, X10, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X20, X20, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X15, X15, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X8, X8, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X14, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X16, X16, X17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X7, X7, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X14, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X12, X12, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X14, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W18, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 422d1c <__calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0+0x43c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W18, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X0, X0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CMP W18, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.LE 422b28 <__calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0+0x248> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 422c64 <__calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0+0x384> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Function | __calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0 |
| Source file and lines | calc_dt_kernel.f90:93-129 |
| Module | exec |
| nb instructions | 52 |
| nb uops | 51 |
| loop length | 208 |
| used w registers | 5 |
| used x registers | 28 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 11 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 6.38 cycles |
| front end | 6.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 1.00 | 1.00 | 1.00 | 1.00 | 8.33 | 8.33 | 8.33 | 1.00 | 1.00 |
| cycles | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 1.00 | 1.00 | 1.00 | 1.00 | 8.33 | 8.33 | 8.33 | 1.00 | 1.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 6.38 |
| Dispatch | 8.33 |
| Overall L1 | 8.33 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 23% |
| load | 25% |
| store | 18% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 12% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 23% |
| load | 25% |
| store | 18% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR X18, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D31, [X25, X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR D26, [X24, X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR D24, [X18] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X18, [SP, #184] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D20, [X26] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FADD D26, D26, D31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR D19, [X27] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FNEG D18, D24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR D16, [X18, X14,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X14, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| FADD D20, D20, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR D25, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FADD D19, D19, D19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR X18, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D17, [X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X14, [SP, #192] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X1, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR D22, [X14, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X14, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D23, [X14, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDR W14, [SP, #156] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X18, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR W14, [SP, #212] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDR X14, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X9, X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X6, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X2, X2, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X3, X3, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR W18, [SP, #124] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X5, X5, X17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X11, X11, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X4, X17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X10, X10, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X20, X20, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X15, X15, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X8, X8, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X14, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X16, X16, X17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X7, X7, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X14, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X12, X12, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X14, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W18, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 422d1c <__calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0+0x43c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W18, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X0, X0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CMP W18, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.LE 422b28 <__calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0+0x248> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 422c64 <__calc_dt_kernel_module_MOD_calc_dt_kernel._omp_fn.0+0x384> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/run/oneview_runs/multicore/gcc_5/oneview_run_1782340583OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | ||||||
| 1x2 | ||||||
| 1x4 | ||||||
| 1x8 | ||||||
| 1x16 | ||||||
| 1x24 | 11 | 1 | 1 | 24 | 0.030000003054738 | 0.0068075573071837 |
| 1x32 | 22 | 1 | 1 | 32 | 0.050000000745058 | 0.010919448919594 |
| 1x40 | 26 | 1 | 1 | 40 | 0.035000003874302 | 0.011048424057662 |
| 1x48 | 34 | 1 | 1 | 48 | 0.045000001788139 | 0.011552038602531 |
| 1x56 | 44 | 1 | 1 | 56 | 0.035000000149012 | 0.013284534215927 |
| 1x64 | 40 | 1 | 1 | 64 | 0.044999998062849 | 0.01057028118521 |
