Loop Id: 189 | Module: exec | Source: accelerate.cpp:40-53 [...] | Coverage: 7.36% |
---|
Loop Id: 189 | Module: exec | Source: accelerate.cpp:40-53 [...] | Coverage: 7.36% |
---|
0x419ca0 SUB X14, X29, #64 |
0x419ca4 MOVPRFX Z28, Z0 |
0x419ca8 SDIV Z28.D, P0/M, Z28.D, Z2.D |
0x419cac MOVPRFX Z27, Z0 |
0x419cb0 MLS Z27.D, P0/M, Z28.D, Z2.D |
0x419cb4 SUBS X12, X12, X10 |
0x419cb8 LDR Z1, [X14, #510, MUL VL] [34] |
0x419cbc SUB X14, X29, #64 |
0x419cc0 ADD Z29.D, Z1.D, Z28.D |
0x419cc4 LDR Z1, [X14, #509, MUL VL] [27] |
0x419cc8 ADD Z28.D, Z6.D, Z28.D |
0x419ccc LD1RD {Z30.D}, P0/Z, [X13] [39] |
0x419cd0 SUB X14, X29, #64 |
0x419cd4 SXTW Z28.D, P0/M, Z28.D |
0x419cd8 MOVPRFX Z31, Z7 |
0x419cdc MUL Z31.D, P0/M, Z31.D, Z28.D |
0x419ce0 MOVPRFX Z9, Z16 |
0x419ce4 MUL Z9.D, P0/M, Z9.D, Z28.D |
0x419ce8 SXTW Z29.D, P0/M, Z29.D |
0x419cec MOVPRFX Z12, Z16 |
0x419cf0 MUL Z12.D, P0/M, Z12.D, Z29.D |
0x419cf4 MOVPRFX Z24, Z19 |
0x419cf8 MUL Z24.D, P0/M, Z24.D, Z28.D |
0x419cfc ADD Z26.D, Z1.D, Z27.D |
0x419d00 ADD Z27.D, Z5.D, Z27.D |
0x419d04 MOVPRFX Z14, Z26 |
0x419d08 SXTW Z14.D, P0/M, Z26.D |
0x419d0c ADR Z8.D, [Z31, Z27.D,SXTW] [24] |
0x419d10 ADR Z10.D, [Z9, Z27.D,SXTW] [6] |
0x419d14 ADR Z13.D, [Z12, Z27.D,SXTW] [19] |
0x419d18 ADR Z31.D, [Z31, Z26.D,SXTW] [45] |
0x419d1c ADR Z9.D, [Z9, Z26.D,SXTW] [43] |
0x419d20 ADR Z1.D, [Z24, Z26.D,SXTW] [33] |
0x419d24 ADR Z24.D, [Z24, Z27.D,SXTW] [37] |
0x419d28 LD1D {Z8.D}, P0/Z, [X25, Z8.D,LSL #3] [38] |
0x419d2c LD1D {Z10.D}, P0/Z, [X27, Z10.D,LSL #3] [40] |
0x419d30 FMUL Z8.D, Z10.D, Z8.D |
0x419d34 MOVPRFX Z10, Z7 |
0x419d38 MUL Z10.D, P0/M, Z10.D, Z29.D |
0x419d3c ADR Z11.D, [Z10, Z27.D,SXTW] [30] |
0x419d40 ADR Z10.D, [Z10, Z26.D,SXTW] [36] |
0x419d44 LD1D {Z11.D}, P0/Z, [X25, Z11.D,LSL #3] [21] |
0x419d48 LD1D {Z13.D}, P0/Z, [X27, Z13.D,LSL #3] [2] |
0x419d4c LD1D {Z10.D}, P0/Z, [X25, Z10.D,LSL #3] [31] |
0x419d50 FMLA Z8.D, P0/M, Z13.D, Z11.D |
0x419d54 ADR Z11.D, [Z12, Z26.D,SXTW] [28] |
0x419d58 LD1D {Z11.D}, P0/Z, [X27, Z11.D,LSL #3] [44] |
0x419d5c LD1D {Z31.D}, P0/Z, [X25, Z31.D,LSL #3] [46] |
0x419d60 LD1D {Z9.D}, P0/Z, [X27, Z9.D,LSL #3] [32] |
0x419d64 FMLA Z8.D, P0/M, Z11.D, Z10.D |
0x419d68 MOVPRFX Z11, Z19 |
0x419d6c MUL Z11.D, P0/M, Z11.D, Z29.D |
0x419d70 ADR Z12.D, [Z11, Z26.D,SXTW] [15] |
0x419d74 ADR Z11.D, [Z11, Z27.D,SXTW] [3] |
0x419d78 MOVPRFX Z10, Z14 |
0x419d7c MLA Z10.D, P0/M, Z17.D, Z29.D |
0x419d80 FMAD Z31.D, P0/M, Z9.D, Z8.D |
0x419d84 LD1D {Z8.D}, P0/Z, [X22, Z10.D,LSL #3] [13] |
0x419d88 MOVPRFX Z9, Z14 |
0x419d8c MLA Z9.D, P0/M, Z18.D, Z29.D |
0x419d90 LD1D {Z10.D}, P0/Z, [X21, Z9.D,LSL #3] [20] |
0x419d94 LD1D {Z13.D}, P0/Z, [X19, Z12.D,LSL #3] [41] |
0x419d98 LD1D {Z15.D}, P0/Z, [X19, Z11.D,LSL #3] [23] |
0x419d9c FSUB Z13.D, Z15.D, Z13.D |
0x419da0 FMUL Z10.D, Z13.D, Z10.D |
0x419da4 MOVPRFX Z13, Z14 |
0x419da8 MLA Z13.D, P0/M, Z18.D, Z28.D |
0x419dac LD1D {Z15.D}, P0/Z, [X21, Z13.D,LSL #3] [12] |
0x419db0 LD1D {Z3.D}, P0/Z, [X19, Z1.D,LSL #3] [22] |
0x419db4 LD1D {Z4.D}, P0/Z, [X19, Z24.D,LSL #3] [17] |
0x419db8 MUL Z28.D, P0/M, Z28.D, Z25.D |
0x419dbc FSUB Z3.D, Z4.D, Z3.D |
0x419dc0 FDUP Z4.D, #80 |
0x419dc4 FMUL Z4.D, Z31.D, Z4.D |
0x419dc8 FMAD Z3.D, P0/M, Z15.D, Z10.D |
0x419dcc MOVPRFX Z31, Z22 |
0x419dd0 MUL Z31.D, P0/M, Z31.D, Z29.D |
0x419dd4 FDIVR Z4.D, P0/M, Z4.D, Z30.D |
0x419dd8 MOVPRFX Z30, Z14 |
0x419ddc MLA Z30.D, P0/M, Z20.D, Z29.D |
0x419de0 FMAD Z3.D, P0/M, Z4.D, Z8.D |
0x419de4 ADR Z8.D, [Z31, Z26.D,SXTW] [35] |
0x419de8 ADR Z31.D, [Z31, Z27.D,SXTW] [25] |
0x419dec ST1D {Z3.D}, P0, [X20, Z30.D,LSL #3] [18] |
0x419df0 MOVPRFX Z3, Z14 |
0x419df4 MLA Z3.D, P0/M, Z21.D, Z29.D |
0x419df8 LD1D {Z3.D}, P0/Z, [X8, Z3.D,LSL #3] [14] |
0x419dfc LD1D {Z10.D}, P0/Z, [X24, Z8.D,LSL #3] [8] |
0x419e00 LD1D {Z12.D}, P0/Z, [X19, Z12.D,LSL #3] [41] |
0x419e04 LD1D {Z1.D}, P0/Z, [X19, Z1.D,LSL #3] [22] |
0x419e08 FSUB Z1.D, Z1.D, Z12.D |
0x419e0c FMUL Z1.D, Z1.D, Z10.D |
0x419e10 LD1D {Z10.D}, P0/Z, [X24, Z31.D,LSL #3] [5] |
0x419e14 LD1D {Z11.D}, P0/Z, [X19, Z11.D,LSL #3] [23] |
0x419e18 LD1D {Z24.D}, P0/Z, [X19, Z24.D,LSL #3] [17] |
0x419e1c FSUB Z24.D, Z24.D, Z11.D |
0x419e20 FMLA Z1.D, P0/M, Z24.D, Z10.D |
0x419e24 FMAD Z1.D, P0/M, Z4.D, Z3.D |
0x419e28 MOVPRFX Z3, Z14 |
0x419e2c MLA Z3.D, P0/M, Z23.D, Z29.D |
0x419e30 MUL Z29.D, P0/M, Z29.D, Z25.D |
0x419e34 ST1D {Z1.D}, P0, [X28, Z3.D,LSL #3] [4] |
0x419e38 LD1D {Z1.D}, P0/Z, [X20, Z30.D,LSL #3] [18] |
0x419e3c LD1D {Z24.D}, P0/Z, [X21, Z9.D,LSL #3] [20] |
0x419e40 ADR Z9.D, [Z29, Z26.D,SXTW] [7] |
0x419e44 ADR Z29.D, [Z29, Z27.D,SXTW] [26] |
0x419e48 ADR Z26.D, [Z28, Z26.D,SXTW] [1] |
0x419e4c ADR Z27.D, [Z28, Z27.D,SXTW] [11] |
0x419e50 LD1D {Z10.D}, P0/Z, [X23, Z9.D,LSL #3] [29] |
0x419e54 LD1D {Z11.D}, P0/Z, [X23, Z29.D,LSL #3] [16] |
0x419e58 FSUB Z10.D, Z11.D, Z10.D |
0x419e5c FMUL Z24.D, Z10.D, Z24.D |
0x419e60 LD1D {Z10.D}, P0/Z, [X21, Z13.D,LSL #3] [12] |
0x419e64 LD1D {Z11.D}, P0/Z, [X23, Z26.D,LSL #3] [42] |
0x419e68 LD1D {Z28.D}, P0/Z, [X23, Z27.D,LSL #3] [9] |
0x419e6c FSUB Z28.D, Z28.D, Z11.D |
0x419e70 FMLA Z24.D, P0/M, Z28.D, Z10.D |
0x419e74 FMLA Z1.D, P0/M, Z24.D, Z4.D |
0x419e78 ST1D {Z1.D}, P0, [X20, Z30.D,LSL #3] [18] |
0x419e7c LD1D {Z1.D}, P0/Z, [X28, Z3.D,LSL #3] [4] |
0x419e80 LD1D {Z24.D}, P0/Z, [X24, Z8.D,LSL #3] [8] |
0x419e84 LD1D {Z28.D}, P0/Z, [X23, Z9.D,LSL #3] [29] |
0x419e88 LD1D {Z26.D}, P0/Z, [X23, Z26.D,LSL #3] [42] |
0x419e8c FSUB Z26.D, Z26.D, Z28.D |
0x419e90 FMUL Z24.D, Z26.D, Z24.D |
0x419e94 LD1D {Z26.D}, P0/Z, [X24, Z31.D,LSL #3] [5] |
0x419e98 LD1D {Z28.D}, P0/Z, [X23, Z29.D,LSL #3] [16] |
0x419e9c LD1D {Z27.D}, P0/Z, [X23, Z27.D,LSL #3] [9] |
0x419ea0 FSUB Z27.D, Z27.D, Z28.D |
0x419ea4 FMLA Z24.D, P0/M, Z27.D, Z26.D |
0x419ea8 FMLA Z1.D, P0/M, Z24.D, Z4.D |
0x419eac LDR Z4, [X14, #511, MUL VL] [10] |
0x419eb0 ST1D {Z1.D}, P0, [X28, Z3.D,LSL #3] [4] |
0x419eb4 ADD Z0.D, Z0.D, Z4.D |
0x419eb8 B.NE 419ca0 |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/accelerate.cpp: 40 - 53 |
-------------------------------------------------------------------------------- |
40: #pragma omp parallel for simd collapse(2) |
41: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
42: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
43: double stepbymass_s = halfdt / ((density0(i - 1, j - 1) * volume(i - 1, j - 1) + density0(i - 1, j + 0) * volume(i - 1, j + 0) + |
44: density0(i, j) * volume(i, j) + density0(i + 0, j - 1) * volume(i + 0, j - 1)) * |
45: 0.25); |
46: xvel1(i, j) = xvel0(i, j) - stepbymass_s * (xarea(i, j) * (pressure(i, j) - pressure(i - 1, j + 0)) + |
47: xarea(i + 0, j - 1) * (pressure(i + 0, j - 1) - pressure(i - 1, j - 1))); |
48: yvel1(i, j) = yvel0(i, j) - stepbymass_s * (yarea(i, j) * (pressure(i, j) - pressure(i + 0, j - 1)) + |
49: yarea(i - 1, j + 0) * (pressure(i - 1, j + 0) - pressure(i - 1, j - 1))); |
50: xvel1(i, j) = xvel1(i, j) - stepbymass_s * (xarea(i, j) * (viscosity(i, j) - viscosity(i - 1, j + 0)) + |
51: xarea(i + 0, j - 1) * (viscosity(i + 0, j - 1) - viscosity(i - 1, j - 1))); |
52: yvel1(i, j) = yvel1(i, j) - stepbymass_s * (yarea(i, j) * (viscosity(i, j) - viscosity(i + 0, j - 1)) + |
53: yarea(i - 1, j + 0) * (viscosity(i - 1, j + 0) - viscosity(i - 1, j - 1))); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.20 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.95 |
Bottlenecks | P6, P7, |
Function | .omp_outlined. |
Source | context.h:69-69,accelerate.cpp:40-53 |
Source loop unroll info | unrolled by 4 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | NA |
Unroll factor | 4 |
CQA cycles | 43.00 |
CQA cycles if no scalar integer | 43.00 |
CQA cycles if FP arith vectorized | 43.00 |
CQA cycles if fully vectorized | 35.77 |
Front-end cycles | 16.88 |
DIV/SQRT cycles | 0.50 |
P0 cycles | 0.50 |
P1 cycles | 1.00 |
P2 cycles | 1.00 |
P3 cycles | 1.00 |
P4 cycles | 1.00 |
P5 cycles | 43.00 |
P6 cycles | 43.00 |
P7 cycles | 12.50 |
P8 cycles | 12.50 |
P9 cycles | 22.00 |
P10 cycles | 22.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 2.00 - 1.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 135.00 |
Nb uops | 135.00 |
Nb loads | NA |
Nb stores | 4.00 |
Nb stack references | 0.00 |
FLOP/cycle | 3.44 |
Nb FLOP add-sub | 32.00 |
Nb FLOP mul | 24.00 |
Nb FLOP fma | 44.00 |
Nb FLOP div | 4.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 32.19 |
Bytes prefetched | 0.00 |
Bytes loaded | 1256.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 71.76 |
Vectorization ratio load | 67.24 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 51.28 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 15.00 |
Vector-efficiency ratio all | 87.81 |
Vector-efficiency ratio load | 75.43 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 63.46 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 95.16 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.20 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.95 |
Bottlenecks | P6, P7, |
Function | .omp_outlined. |
Source | context.h:69-69,accelerate.cpp:40-53 |
Source loop unroll info | unrolled by 4 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | NA |
Unroll factor | 4 |
CQA cycles | 43.00 |
CQA cycles if no scalar integer | 43.00 |
CQA cycles if FP arith vectorized | 43.00 |
CQA cycles if fully vectorized | 35.77 |
Front-end cycles | 16.88 |
DIV/SQRT cycles | 0.50 |
P0 cycles | 0.50 |
P1 cycles | 1.00 |
P2 cycles | 1.00 |
P3 cycles | 1.00 |
P4 cycles | 1.00 |
P5 cycles | 43.00 |
P6 cycles | 43.00 |
P7 cycles | 12.50 |
P8 cycles | 12.50 |
P9 cycles | 22.00 |
P10 cycles | 22.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 2.00 - 1.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 135.00 |
Nb uops | 135.00 |
Nb loads | NA |
Nb stores | 4.00 |
Nb stack references | 0.00 |
FLOP/cycle | 3.44 |
Nb FLOP add-sub | 32.00 |
Nb FLOP mul | 24.00 |
Nb FLOP fma | 44.00 |
Nb FLOP div | 4.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 32.19 |
Bytes prefetched | 0.00 |
Bytes loaded | 1256.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 71.76 |
Vectorization ratio load | 67.24 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 51.28 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 15.00 |
Vector-efficiency ratio all | 87.81 |
Vector-efficiency ratio load | 75.43 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 63.46 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 95.16 |
Path / |
Function | .omp_outlined. |
Source file and lines | accelerate.cpp:40-53 |
Module | exec |
nb instructions | 135 |
loop length | 540 |
nb stack references | 0 |
front end | 16.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 0.50 | 0.50 | 1.00 | 1.00 | 1.00 | 1.00 | 43.00 | 43.00 | 2.00 | 2.00 | 22.00 | 22.00 | 0.00 | 0.00 | 0.00 |
cycles | 0.50 | 0.50 | 1.00 | 1.00 | 1.00 | 1.00 | 43.00 | 43.00 | 12.50 | 12.50 | 22.00 | 22.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 2.00-1.00 |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 16.88 |
Data deps. | 2.00 |
Overall L1 | 43.00 |
all | 65% |
load | 67% |
store | 100% |
mul | 100% |
add-sub | 38% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 96% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 0% |
all | 71% |
load | 67% |
store | 100% |
mul | 100% |
add-sub | 51% |
fma | 100% |
div/sqrt | 100% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB X14, X29, #64 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVPRFX Z28, Z0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
SDIV Z28.D, P0/M, Z28.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-20 | 1-0.50 |
MOVPRFX Z27, Z0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLS Z27.D, P0/M, Z28.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
SUBS X12, X12, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR Z1, [X14, #510, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
SUB X14, X29, #64 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD Z29.D, Z1.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LDR Z1, [X14, #509, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD Z28.D, Z6.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1RD {Z30.D}, P0/Z, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
SUB X14, X29, #64 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SXTW Z28.D, P0/M, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVPRFX Z31, Z7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z31.D, P0/M, Z31.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
MOVPRFX Z9, Z16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z9.D, P0/M, Z9.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
SXTW Z29.D, P0/M, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVPRFX Z12, Z16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z12.D, P0/M, Z12.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
MOVPRFX Z24, Z19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z24.D, P0/M, Z24.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ADD Z26.D, Z1.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD Z27.D, Z5.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z14, Z26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
SXTW Z14.D, P0/M, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADR Z8.D, [Z31, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z10.D, [Z9, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z13.D, [Z12, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z31.D, [Z31, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z9.D, [Z9, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z1.D, [Z24, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z24.D, [Z24, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z8.D}, P0/Z, [X25, Z8.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z10.D}, P0/Z, [X27, Z10.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMUL Z8.D, Z10.D, Z8.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVPRFX Z10, Z7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z10.D, P0/M, Z10.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ADR Z11.D, [Z10, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z10.D, [Z10, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z11.D}, P0/Z, [X25, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z13.D}, P0/Z, [X27, Z13.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z10.D}, P0/Z, [X25, Z10.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMLA Z8.D, P0/M, Z13.D, Z11.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADR Z11.D, [Z12, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z11.D}, P0/Z, [X27, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z31.D}, P0/Z, [X25, Z31.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z9.D}, P0/Z, [X27, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMLA Z8.D, P0/M, Z11.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVPRFX Z11, Z19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z11.D, P0/M, Z11.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ADR Z12.D, [Z11, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z11.D, [Z11, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z10, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z10.D, P0/M, Z17.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FMAD Z31.D, P0/M, Z9.D, Z8.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LD1D {Z8.D}, P0/Z, [X22, Z10.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
MOVPRFX Z9, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z9.D, P0/M, Z18.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
LD1D {Z10.D}, P0/Z, [X21, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z13.D}, P0/Z, [X19, Z12.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z15.D}, P0/Z, [X19, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z13.D, Z15.D, Z13.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z10.D, Z13.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVPRFX Z13, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z13.D, P0/M, Z18.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
LD1D {Z15.D}, P0/Z, [X21, Z13.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z3.D}, P0/Z, [X19, Z1.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z4.D}, P0/Z, [X19, Z24.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
MUL Z28.D, P0/M, Z28.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FSUB Z3.D, Z4.D, Z3.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDUP Z4.D, #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z4.D, Z31.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMAD Z3.D, P0/M, Z15.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVPRFX Z31, Z22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z31.D, P0/M, Z31.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FDIVR Z4.D, P0/M, Z4.D, Z30.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
MOVPRFX Z30, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z30.D, P0/M, Z20.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FMAD Z3.D, P0/M, Z4.D, Z8.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADR Z8.D, [Z31, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z31.D, [Z31, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ST1D {Z3.D}, P0, [X20, Z30.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z3, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z3.D, P0/M, Z21.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
LD1D {Z3.D}, P0/Z, [X8, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z10.D}, P0/Z, [X24, Z8.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z12.D}, P0/Z, [X19, Z12.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z1.D}, P0/Z, [X19, Z1.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z1.D, Z1.D, Z12.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z1.D, Z1.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z10.D}, P0/Z, [X24, Z31.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z11.D}, P0/Z, [X19, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X19, Z24.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z24.D, Z24.D, Z11.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMLA Z1.D, P0/M, Z24.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMAD Z1.D, P0/M, Z4.D, Z3.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVPRFX Z3, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z3.D, P0/M, Z23.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
MUL Z29.D, P0/M, Z29.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ST1D {Z1.D}, P0, [X28, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z1.D}, P0/Z, [X20, Z30.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X21, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADR Z9.D, [Z29, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z29.D, [Z29, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z26.D, [Z28, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z27.D, [Z28, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z10.D}, P0/Z, [X23, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z11.D}, P0/Z, [X23, Z29.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z10.D, Z11.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z24.D, Z10.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z10.D}, P0/Z, [X21, Z13.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z11.D}, P0/Z, [X23, Z26.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z28.D}, P0/Z, [X23, Z27.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z28.D, Z28.D, Z11.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMLA Z24.D, P0/M, Z28.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMLA Z1.D, P0/M, Z24.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ST1D {Z1.D}, P0, [X20, Z30.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z1.D}, P0/Z, [X28, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X24, Z8.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z28.D}, P0/Z, [X23, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z26.D}, P0/Z, [X23, Z26.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z26.D, Z26.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z24.D, Z26.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z26.D}, P0/Z, [X24, Z31.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z28.D}, P0/Z, [X23, Z29.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z27.D}, P0/Z, [X23, Z27.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z27.D, Z27.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMLA Z24.D, P0/M, Z27.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMLA Z1.D, P0/M, Z24.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LDR Z4, [X14, #511, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ST1D {Z1.D}, P0, [X28, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
ADD Z0.D, Z0.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
B.NE 419ca0 <.omp_outlined.+0x220> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Function | .omp_outlined. |
Source file and lines | accelerate.cpp:40-53 |
Module | exec |
nb instructions | 135 |
loop length | 540 |
nb stack references | 0 |
front end | 16.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 0.50 | 0.50 | 1.00 | 1.00 | 1.00 | 1.00 | 43.00 | 43.00 | 2.00 | 2.00 | 22.00 | 22.00 | 0.00 | 0.00 | 0.00 |
cycles | 0.50 | 0.50 | 1.00 | 1.00 | 1.00 | 1.00 | 43.00 | 43.00 | 12.50 | 12.50 | 22.00 | 22.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 2.00-1.00 |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 16.88 |
Data deps. | 2.00 |
Overall L1 | 43.00 |
all | 65% |
load | 67% |
store | 100% |
mul | 100% |
add-sub | 38% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 96% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 0% |
all | 71% |
load | 67% |
store | 100% |
mul | 100% |
add-sub | 51% |
fma | 100% |
div/sqrt | 100% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB X14, X29, #64 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVPRFX Z28, Z0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
SDIV Z28.D, P0/M, Z28.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-20 | 1-0.50 |
MOVPRFX Z27, Z0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLS Z27.D, P0/M, Z28.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
SUBS X12, X12, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR Z1, [X14, #510, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
SUB X14, X29, #64 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD Z29.D, Z1.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LDR Z1, [X14, #509, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD Z28.D, Z6.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1RD {Z30.D}, P0/Z, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
SUB X14, X29, #64 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SXTW Z28.D, P0/M, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVPRFX Z31, Z7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z31.D, P0/M, Z31.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
MOVPRFX Z9, Z16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z9.D, P0/M, Z9.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
SXTW Z29.D, P0/M, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVPRFX Z12, Z16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z12.D, P0/M, Z12.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
MOVPRFX Z24, Z19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z24.D, P0/M, Z24.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ADD Z26.D, Z1.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD Z27.D, Z5.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z14, Z26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
SXTW Z14.D, P0/M, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADR Z8.D, [Z31, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z10.D, [Z9, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z13.D, [Z12, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z31.D, [Z31, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z9.D, [Z9, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z1.D, [Z24, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z24.D, [Z24, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z8.D}, P0/Z, [X25, Z8.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z10.D}, P0/Z, [X27, Z10.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMUL Z8.D, Z10.D, Z8.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVPRFX Z10, Z7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z10.D, P0/M, Z10.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ADR Z11.D, [Z10, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z10.D, [Z10, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z11.D}, P0/Z, [X25, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z13.D}, P0/Z, [X27, Z13.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z10.D}, P0/Z, [X25, Z10.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMLA Z8.D, P0/M, Z13.D, Z11.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADR Z11.D, [Z12, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z11.D}, P0/Z, [X27, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z31.D}, P0/Z, [X25, Z31.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z9.D}, P0/Z, [X27, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMLA Z8.D, P0/M, Z11.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVPRFX Z11, Z19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z11.D, P0/M, Z11.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ADR Z12.D, [Z11, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z11.D, [Z11, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z10, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z10.D, P0/M, Z17.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FMAD Z31.D, P0/M, Z9.D, Z8.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LD1D {Z8.D}, P0/Z, [X22, Z10.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
MOVPRFX Z9, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z9.D, P0/M, Z18.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
LD1D {Z10.D}, P0/Z, [X21, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z13.D}, P0/Z, [X19, Z12.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z15.D}, P0/Z, [X19, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z13.D, Z15.D, Z13.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z10.D, Z13.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVPRFX Z13, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z13.D, P0/M, Z18.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
LD1D {Z15.D}, P0/Z, [X21, Z13.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z3.D}, P0/Z, [X19, Z1.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z4.D}, P0/Z, [X19, Z24.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
MUL Z28.D, P0/M, Z28.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FSUB Z3.D, Z4.D, Z3.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDUP Z4.D, #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z4.D, Z31.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMAD Z3.D, P0/M, Z15.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVPRFX Z31, Z22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MUL Z31.D, P0/M, Z31.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FDIVR Z4.D, P0/M, Z4.D, Z30.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
MOVPRFX Z30, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z30.D, P0/M, Z20.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
FMAD Z3.D, P0/M, Z4.D, Z8.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADR Z8.D, [Z31, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z31.D, [Z31, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ST1D {Z3.D}, P0, [X20, Z30.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z3, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z3.D, P0/M, Z21.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
LD1D {Z3.D}, P0/Z, [X8, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z10.D}, P0/Z, [X24, Z8.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z12.D}, P0/Z, [X19, Z12.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z1.D}, P0/Z, [X19, Z1.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z1.D, Z1.D, Z12.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z1.D, Z1.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z10.D}, P0/Z, [X24, Z31.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z11.D}, P0/Z, [X19, Z11.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X19, Z24.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z24.D, Z24.D, Z11.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMLA Z1.D, P0/M, Z24.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMAD Z1.D, P0/M, Z4.D, Z3.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVPRFX Z3, Z14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MLA Z3.D, P0/M, Z23.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
MUL Z29.D, P0/M, Z29.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 2 |
ST1D {Z1.D}, P0, [X28, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z1.D}, P0/Z, [X20, Z30.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X21, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADR Z9.D, [Z29, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z29.D, [Z29, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z26.D, [Z28, Z26.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADR Z27.D, [Z28, Z27.D,SXTW] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z10.D}, P0/Z, [X23, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z11.D}, P0/Z, [X23, Z29.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z10.D, Z11.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z24.D, Z10.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z10.D}, P0/Z, [X21, Z13.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z11.D}, P0/Z, [X23, Z26.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z28.D}, P0/Z, [X23, Z27.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z28.D, Z28.D, Z11.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMLA Z24.D, P0/M, Z28.D, Z10.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMLA Z1.D, P0/M, Z24.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ST1D {Z1.D}, P0, [X20, Z30.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z1.D}, P0/Z, [X28, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X24, Z8.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z28.D}, P0/Z, [X23, Z9.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z26.D}, P0/Z, [X23, Z26.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z26.D, Z26.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z24.D, Z26.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z26.D}, P0/Z, [X24, Z31.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z28.D}, P0/Z, [X23, Z29.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z27.D}, P0/Z, [X23, Z27.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FSUB Z27.D, Z27.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMLA Z24.D, P0/M, Z27.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMLA Z1.D, P0/M, Z24.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LDR Z4, [X14, #511, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ST1D {Z1.D}, P0, [X28, Z3.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
ADD Z0.D, Z0.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
B.NE 419ca0 <.omp_outlined.+0x220> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |