Loop Id: 1092 | Module: exec | Source: viscosity_kernel.f90-pp.f90:56-89 | Coverage: 13.32% |
---|
Loop Id: 1092 | Module: exec | Source: viscosity_kernel.f90-pp.f90:56-89 | Coverage: 13.32% |
---|
0x44cbe0 UBFM X2, X14, #61, #60 |
0x44cbe4 ADD X4, X26, X2 |
0x44cbe8 ADD X5, X28, X2 |
0x44cbec LD1D {Z20.D}, P0/Z, [X4, X16,LSL #3] [4] |
0x44cbf0 LD1D {Z21.D}, P0/Z, [X5, X16,LSL #3] [6] |
0x44cbf4 LD1D {Z23.D}, P0/Z, [X4, MUL VL] [8] |
0x44cbf8 ADD X4, X25, X2 |
0x44cbfc LD1D {Z24.D}, P0/Z, [X5, MUL VL] [16] |
0x44cc00 LD1D {Z26.D}, P0/Z, [X4, X16,LSL #3] [14] |
0x44cc04 LD1D {Z27.D}, P0/Z, [X4, MUL VL] [12] |
0x44cc08 ADD X4, X27, X2 |
0x44cc0c ADD X2, X22, X2 |
0x44cc10 LD1D {Z28.D}, P0/Z, [X4, X16,LSL #3] [3] |
0x44cc14 FADD Z25.D, Z23.D, Z24.D |
0x44cc18 LD1D {Z29.D}, P0/Z, [X4, MUL VL] [11] |
0x44cc1c LD1D {Z31.D}, P0/Z, [X2, MUL VL] [5] |
0x44cc20 FADD Z22.D, Z20.D, Z21.D |
0x44cc24 FADD Z20.D, Z20.D, Z23.D |
0x44cc28 FSUB Z22.D, Z22.D, Z25.D |
0x44cc2c FADD Z25.D, Z26.D, Z27.D |
0x44cc30 FSUB Z20.D, Z20.D, Z24.D |
0x44cc34 FADD Z26.D, Z26.D, Z28.D |
0x44cc38 FSUB Z20.D, Z20.D, Z21.D |
0x44cc3c FADD Z30.D, Z28.D, Z29.D |
0x44cc40 FSUB Z26.D, Z26.D, Z29.D |
0x44cc44 FMUL Z20.D, P0/M, Z20.D, #0 |
0x44cc48 MOVPRFX Z21, Z20 |
0x44cc4c FDIV Z21.D, P0/M, Z21.D, Z16.D |
0x44cc50 ADD Z20.D, Z18.D, Z19.D |
0x44cc54 FSUB Z25.D, Z25.D, Z30.D |
0x44cc58 MOVPRFX Z29, Z25 |
0x44cc5c FMUL Z29.D, P0/M, Z29.D, #0 |
0x44cc60 FDIV Z29.D, P0/M, Z29.D, Z16.D |
0x44cc64 ADD Z19.D, Z19.D, Z5.D |
0x44cc68 FSUB Z26.D, Z26.D, Z27.D |
0x44cc6c FMOV X4, D20 |
0x44cc70 FMUL Z27.D, Z22.D, Z31.D |
0x44cc74 FMUL Z22.D, P0/M, Z22.D, #0 |
0x44cc78 FMUL Z26.D, P0/M, Z26.D, #0 |
0x44cc7c LD1D {Z23.D}, P0/Z, [X18, X4,LSL #3] [18] |
0x44cc80 LD1D {Z24.D}, P0/Z, [X1, X4,LSL #3] [9] |
0x44cc84 FDIV Z22.D, P0/M, Z22.D, Z31.D |
0x44cc88 FMAD Z25.D, P0/M, Z16.D, Z27.D |
0x44cc8c FDIV Z26.D, P0/M, Z26.D, Z31.D |
0x44cc90 FCMGE P2.D, P0/Z, Z25.D, #0 |
0x44cc94 FSUB Z23.D, Z23.D, Z24.D |
0x44cc98 LD1D {Z24.D}, P0/Z, [X2, X16,LSL #3] [2] |
0x44cc9c FADD Z24.D, Z31.D, Z24.D |
0x44cca0 FDIV Z23.D, P0/M, Z23.D, Z24.D |
0x44cca4 LD1D {Z24.D}, P0/Z, [X19, X14,LSL #3] [17] |
0x44cca8 FADD Z21.D, Z21.D, Z26.D |
0x44ccac LD1D {Z26.D}, P0/Z, [X6, X14,LSL #3] [10] |
0x44ccb0 ADD X14, X14, X13 |
0x44ccb4 CMP X15, X14 |
0x44ccb8 FSUB Z24.D, Z24.D, Z26.D |
0x44ccbc FDIV Z24.D, P0/M, Z24.D, Z17.D |
0x44ccc0 FMUL Z26.D, Z23.D, Z23.D |
0x44ccc4 FMUL Z21.D, Z21.D, Z23.D |
0x44ccc8 FMUL Z22.D, Z22.D, Z26.D |
0x44cccc FMUL Z28.D, Z24.D, Z24.D |
0x44ccd0 FMLA Z22.D, P0/M, Z28.D, Z29.D |
0x44ccd4 FMAD Z21.D, P0/M, Z24.D, Z22.D |
0x44ccd8 FADD Z22.D, Z26.D, Z28.D |
0x44ccdc FCMGE P1.D, P0/Z, Z22.D, Z2.D |
0x44cce0 SEL Z22.D, P1, Z22.D, Z2.D |
0x44cce4 FDIV Z21.D, P0/M, Z21.D, Z22.D |
0x44cce8 MOVPRFX Z22, Z23 |
0x44ccec FABS Z22.D, P0/M, Z23.D |
0x44ccf0 FCMGE P3.D, P0/Z, Z22.D, Z2.D |
0x44ccf4 SEL Z22.D, P3, Z22.D, Z2.D |
0x44ccf8 FCMLT P3.D, P0/Z, Z23.D, #0 |
0x44ccfc MOVPRFX Z23, Z22 |
0x44cd00 FNEG Z23.D, P0/M, Z22.D |
0x44cd04 EOR P3.B, P0/Z, P3.B, P0.B |
0x44cd08 SEL Z22.D, P3, Z22.D, Z23.D |
0x44cd0c MOVPRFX Z23, Z24 |
0x44cd10 FABS Z23.D, P0/M, Z24.D |
0x44cd14 FCMLT P3.D, P0/Z, Z22.D, #0 |
0x44cd18 EOR P3.B, P0/Z, P3.B, P0.B |
0x44cd1c FCMGT P1.D, P0/Z, Z21.D, #0 |
0x44cd20 EOR P1.B, P0/Z, P1.B, P0.B |
0x44cd24 BIC P1.B, P1/Z, P1.B, P2.B |
0x44cd28 FCMGE P2.D, P0/Z, Z23.D, Z2.D |
0x44cd2c SEL Z23.D, P2, Z23.D, Z2.D |
0x44cd30 MOVPRFX Z24, Z23 |
0x44cd34 FNEG Z24.D, P0/M, Z23.D |
0x44cd38 SEL Z23.D, P3, Z23.D, Z24.D |
0x44cd3c FMUL Z24.D, Z23.D, Z23.D |
0x44cd40 FMLA Z24.D, P0/M, Z22.D, Z22.D |
0x44cd44 FSQRT Z24.D, P0/M, Z24.D |
0x44cd48 FMUL Z25.D, Z31.D, Z24.D |
0x44cd4c FMUL Z24.D, Z16.D, Z24.D |
0x44cd50 FDIVR Z22.D, P0/M, Z22.D, Z25.D |
0x44cd54 FDIVR Z23.D, P0/M, Z23.D, Z24.D |
0x44cd58 FABS Z22.D, P0/M, Z22.D |
0x44cd5c FABS Z23.D, P0/M, Z23.D |
0x44cd60 FCMGE P2.D, P0/Z, Z22.D, Z23.D |
0x44cd64 SEL Z22.D, P2, Z23.D, Z22.D |
0x44cd68 LD1D {Z23.D}, P1/Z, [X12, Z4.D,LSL #3] [13] |
0x44cd6c FMUL Z22.D, Z22.D, Z22.D |
0x44cd70 ADR Z23.D, [Z23, Z20.D,LSL #3] [7] |
0x44cd74 LD1D {Z23.D}, P1/Z, [V23.D] [15] |
0x44cd78 FADD Z23.D, Z23.D, Z23.D |
0x44cd7c FMUL Z22.D, Z22.D, Z23.D |
0x44cd80 FMUL Z22.D, Z21.D, Z22.D |
0x44cd84 FMUL Z21.D, Z21.D, Z22.D |
0x44cd88 SEL Z21.D, P1, Z21.D, Z4.D |
0x44cd8c ST1D {Z21.D}, P0, [X10, Z20.D,LSL #3] [1] |
0x44cd90 B.NE 44cbe0 |
/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/build/CMakeFiles/clover_leaf.dir/CloverLeaf_ref/kernels/viscosity_kernel.f90-pp.f90: 56 - 89 |
-------------------------------------------------------------------------------- |
56: DO j=x_min,x_max |
57: ugrad=(xvel0(j+1,k )+xvel0(j+1,k+1))-(xvel0(j ,k )+xvel0(j ,k+1)) |
58: # 58 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
59: vgrad=(yvel0(j ,k+1)+yvel0(j+1,k+1))-(yvel0(j ,k )+yvel0(j+1,k )) |
60: # 60 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
61: div = (celldx(j)*(ugrad)+ celldy(k)*(vgrad)) |
62: # 62 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
63: strain2 = 0.5_8*(xvel0(j, k+1) + xvel0(j+1,k+1)-xvel0(j ,k )-xvel0(j+1,k ))/celldy(k) & |
64: + 0.5_8*(yvel0(j+1,k ) + yvel0(j+1,k+1)-yvel0(j ,k )-yvel0(j ,k+1))/celldx(j) |
65: # 65 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
66: pgradx=(pressure(j+1,k)-pressure(j-1,k))/(celldx(j)+celldx(j+1)) |
67: pgrady=(pressure(j,k+1)-pressure(j,k-1))/(celldy(k)+celldy(k+1)) |
68: # 68 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
69: pgradx2 = pgradx*pgradx |
70: pgrady2 = pgrady*pgrady |
71: # 71 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
72: limiter = ((0.5_8*(ugrad)/celldx(j))*pgradx2+(0.5_8*(vgrad)/celldy(k))*pgrady2+strain2*pgradx*pgrady) & |
73: /MAX(pgradx2+pgrady2,1.0e-16_8) |
74: # 74 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
75: IF ((limiter.GT.0.0).OR.(div.GE.0.0))THEN |
76: viscosity(j,k) = 0.0 |
77: ELSE |
78: dirx=1.0_8 |
79: IF(pgradx.LT.0.0) dirx=-1.0_8 |
80: pgradx = dirx*MAX(1.0e-16_8,ABS(pgradx)) |
81: diry=1.0_8 |
82: IF(pgradx.LT.0.0) diry=-1.0_8 |
83: pgrady = diry*MAX(1.0e-16_8,ABS(pgrady)) |
84: pgrad = SQRT(pgradx**2+pgrady**2) |
85: xgrad = ABS(celldx(j)*pgrad/pgradx) |
86: ygrad = ABS(celldy(k)*pgrad/pgrady) |
87: grad = MIN(xgrad,ygrad) |
88: grad2 = grad*grad |
89: # 89 "/home/hbollore/qaas-runs/170-307-1706/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90" |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.01 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.01 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.08 |
Bottlenecks | P6, P7, |
Function | __nv_viscosity_kernel_module_viscosity_kernel__F1L50_1_ |
Source | viscosity_kernel.f90-pp.f90:56-89 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 39.50 |
CQA cycles if no scalar integer | 39.00 |
CQA cycles if FP arith vectorized | 39.50 |
CQA cycles if fully vectorized | 39.13 |
Front-end cycles | 13.63 |
DIV/SQRT cycles | 0.50 |
P0 cycles | 0.50 |
P1 cycles | 2.75 |
P2 cycles | 2.75 |
P3 cycles | 4.00 |
P4 cycles | 2.50 |
P5 cycles | 39.50 |
P6 cycles | 39.50 |
P7 cycles | 19.00 |
P8 cycles | 19.00 |
P9 cycles | 8.00 |
P10 cycles | 8.00 |
P11 cycles | 1.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 10.00 - 5.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 109.00 |
Nb uops | 109.00 |
Nb loads | NA |
Nb stores | 1.00 |
Nb stack references | 0.00 |
FLOP/cycle | 5.27 |
Nb FLOP add-sub | 72.00 |
Nb FLOP mul | 64.00 |
Nb FLOP fma | 16.00 |
Nb FLOP div | 36.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 4.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 13.77 |
Bytes prefetched | 0.00 |
Bytes loaded | 512.00 |
Bytes stored | 32.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 92.00 |
Vectorization ratio load | 94.12 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 95.24 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 78.13 |
Vector-efficiency ratio all | 95.00 |
Vector-efficiency ratio load | 95.59 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 96.43 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 86.72 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.01 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.01 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.08 |
Bottlenecks | P6, P7, |
Function | __nv_viscosity_kernel_module_viscosity_kernel__F1L50_1_ |
Source | viscosity_kernel.f90-pp.f90:56-89 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 39.50 |
CQA cycles if no scalar integer | 39.00 |
CQA cycles if FP arith vectorized | 39.50 |
CQA cycles if fully vectorized | 39.13 |
Front-end cycles | 13.63 |
DIV/SQRT cycles | 0.50 |
P0 cycles | 0.50 |
P1 cycles | 2.75 |
P2 cycles | 2.75 |
P3 cycles | 4.00 |
P4 cycles | 2.50 |
P5 cycles | 39.50 |
P6 cycles | 39.50 |
P7 cycles | 19.00 |
P8 cycles | 19.00 |
P9 cycles | 8.00 |
P10 cycles | 8.00 |
P11 cycles | 1.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 10.00 - 5.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 109.00 |
Nb uops | 109.00 |
Nb loads | NA |
Nb stores | 1.00 |
Nb stack references | 0.00 |
FLOP/cycle | 5.27 |
Nb FLOP add-sub | 72.00 |
Nb FLOP mul | 64.00 |
Nb FLOP fma | 16.00 |
Nb FLOP div | 36.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 4.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 13.77 |
Bytes prefetched | 0.00 |
Bytes loaded | 512.00 |
Bytes stored | 32.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 92.00 |
Vectorization ratio load | 94.12 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 95.24 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 78.13 |
Vector-efficiency ratio all | 95.00 |
Vector-efficiency ratio load | 95.59 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 96.43 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 86.72 |
Path / |
Function | __nv_viscosity_kernel_module_viscosity_kernel__F1L50_1_ |
Source file and lines | viscosity_kernel.f90-pp.f90:56-89 |
Module | exec |
nb instructions | 109 |
loop length | 436 |
nb stack references | 0 |
front end | 13.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 0.50 | 0.50 | 2.75 | 2.75 | 4.00 | 2.50 | 39.50 | 39.50 | 1.00 | 1.00 | 8.00 | 8.00 | 1.00 | 0.00 | 0.00 |
cycles | 0.50 | 0.50 | 2.75 | 2.75 | 4.00 | 2.50 | 39.50 | 39.50 | 19.00 | 19.00 | 8.00 | 8.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 10.00-5.00 |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 13.63 |
Data deps. | 2.00 |
Overall L1 | 39.50 |
all | 78% |
load | 94% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 61% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 92% |
load | 94% |
store | 100% |
mul | 100% |
add-sub | 95% |
fma | 100% |
div/sqrt | 100% |
other | 78% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
UBFM X2, X14, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X4, X26, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X5, X28, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LD1D {Z20.D}, P0/Z, [X4, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z21.D}, P0/Z, [X5, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z23.D}, P0/Z, [X4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD X4, X25, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LD1D {Z24.D}, P0/Z, [X5, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z26.D}, P0/Z, [X4, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z27.D}, P0/Z, [X4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD X4, X27, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X22, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LD1D {Z28.D}, P0/Z, [X4, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z25.D, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z29.D}, P0/Z, [X4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z31.D}, P0/Z, [X2, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z22.D, Z20.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z20.D, Z20.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z22.D, Z22.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z25.D, Z26.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z20.D, Z20.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z26.D, Z26.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z20.D, Z20.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z30.D, Z28.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z26.D, Z26.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z20.D, P0/M, Z20.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVPRFX Z21, Z20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z21.D, P0/M, Z21.D, Z16.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
ADD Z20.D, Z18.D, Z19.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z25.D, Z25.D, Z30.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z29, Z25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z29.D, P0/M, Z29.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FDIV Z29.D, P0/M, Z29.D, Z16.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
ADD Z19.D, Z19.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z26.D, Z26.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMOV X4, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
FMUL Z27.D, Z22.D, Z31.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z22.D, P0/M, Z22.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z26.D, P0/M, Z26.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z23.D}, P0/Z, [X18, X4,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X1, X4,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FDIV Z22.D, P0/M, Z22.D, Z31.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMAD Z25.D, P0/M, Z16.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FDIV Z26.D, P0/M, Z26.D, Z31.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FCMGE P2.D, P0/Z, Z25.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
FSUB Z23.D, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z24.D}, P0/Z, [X2, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z24.D, Z31.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z23.D, P0/M, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
LD1D {Z24.D}, P0/Z, [X19, X14,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z21.D, Z21.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z26.D}, P0/Z, [X6, X14,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD X14, X14, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X15, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
FSUB Z24.D, Z24.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z24.D, P0/M, Z24.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMUL Z26.D, Z23.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z21.D, Z21.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z22.D, Z22.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z28.D, Z24.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMLA Z22.D, P0/M, Z28.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMAD Z21.D, P0/M, Z24.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FADD Z22.D, Z26.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMGE P1.D, P0/Z, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z22.D, P1, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z21.D, P0/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
MOVPRFX Z22, Z23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FABS Z22.D, P0/M, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMGE P3.D, P0/Z, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z22.D, P3, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMLT P3.D, P0/Z, Z23.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVPRFX Z23, Z22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FNEG Z23.D, P0/M, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
EOR P3.B, P0/Z, P3.B, P0.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SEL Z22.D, P3, Z22.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z23, Z24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FABS Z23.D, P0/M, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMLT P3.D, P0/Z, Z22.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
EOR P3.B, P0/Z, P3.B, P0.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
FCMGT P1.D, P0/Z, Z21.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
EOR P1.B, P0/Z, P1.B, P0.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
BIC P1.B, P1/Z, P1.B, P2.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
FCMGE P2.D, P0/Z, Z23.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z23.D, P2, Z23.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z24, Z23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FNEG Z24.D, P0/M, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
SEL Z23.D, P3, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z24.D, Z23.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMLA Z24.D, P0/M, Z22.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FSQRT Z24.D, P0/M, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-16 | 1-0.50 |
FMUL Z25.D, Z31.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z24.D, Z16.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FDIVR Z22.D, P0/M, Z22.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FDIVR Z23.D, P0/M, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FABS Z22.D, P0/M, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FABS Z23.D, P0/M, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMGE P2.D, P0/Z, Z22.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z22.D, P2, Z23.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z23.D}, P1/Z, [X12, Z4.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMUL Z22.D, Z22.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
ADR Z23.D, [Z23, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z23.D}, P1/Z, [V23.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 |
FADD Z23.D, Z23.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z22.D, Z22.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z22.D, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z21.D, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SEL Z21.D, P1, Z21.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ST1D {Z21.D}, P0, [X10, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
B.NE 44cbe0 <__nv_viscosity_kernel_module_viscosity_kernel__F1L50_1_+0x2f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Function | __nv_viscosity_kernel_module_viscosity_kernel__F1L50_1_ |
Source file and lines | viscosity_kernel.f90-pp.f90:56-89 |
Module | exec |
nb instructions | 109 |
loop length | 436 |
nb stack references | 0 |
front end | 13.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 0.50 | 0.50 | 2.75 | 2.75 | 4.00 | 2.50 | 39.50 | 39.50 | 1.00 | 1.00 | 8.00 | 8.00 | 1.00 | 0.00 | 0.00 |
cycles | 0.50 | 0.50 | 2.75 | 2.75 | 4.00 | 2.50 | 39.50 | 39.50 | 19.00 | 19.00 | 8.00 | 8.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 10.00-5.00 |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 13.63 |
Data deps. | 2.00 |
Overall L1 | 39.50 |
all | 78% |
load | 94% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 61% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 92% |
load | 94% |
store | 100% |
mul | 100% |
add-sub | 95% |
fma | 100% |
div/sqrt | 100% |
other | 78% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
UBFM X2, X14, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X4, X26, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X5, X28, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LD1D {Z20.D}, P0/Z, [X4, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z21.D}, P0/Z, [X5, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z23.D}, P0/Z, [X4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD X4, X25, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LD1D {Z24.D}, P0/Z, [X5, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z26.D}, P0/Z, [X4, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z27.D}, P0/Z, [X4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD X4, X27, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X22, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LD1D {Z28.D}, P0/Z, [X4, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z25.D, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z29.D}, P0/Z, [X4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z31.D}, P0/Z, [X2, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z22.D, Z20.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z20.D, Z20.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z22.D, Z22.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z25.D, Z26.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z20.D, Z20.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z26.D, Z26.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z20.D, Z20.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z30.D, Z28.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z26.D, Z26.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z20.D, P0/M, Z20.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVPRFX Z21, Z20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z21.D, P0/M, Z21.D, Z16.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
ADD Z20.D, Z18.D, Z19.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z25.D, Z25.D, Z30.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z29, Z25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z29.D, P0/M, Z29.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FDIV Z29.D, P0/M, Z29.D, Z16.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
ADD Z19.D, Z19.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FSUB Z26.D, Z26.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMOV X4, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
FMUL Z27.D, Z22.D, Z31.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z22.D, P0/M, Z22.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z26.D, P0/M, Z26.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LD1D {Z23.D}, P0/Z, [X18, X4,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1D {Z24.D}, P0/Z, [X1, X4,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FDIV Z22.D, P0/M, Z22.D, Z31.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMAD Z25.D, P0/M, Z16.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FDIV Z26.D, P0/M, Z26.D, Z31.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FCMGE P2.D, P0/Z, Z25.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
FSUB Z23.D, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z24.D}, P0/Z, [X2, X16,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z24.D, Z31.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z23.D, P0/M, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
LD1D {Z24.D}, P0/Z, [X19, X14,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FADD Z21.D, Z21.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z26.D}, P0/Z, [X6, X14,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
ADD X14, X14, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X15, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
FSUB Z24.D, Z24.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z24.D, P0/M, Z24.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMUL Z26.D, Z23.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z21.D, Z21.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z22.D, Z22.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z28.D, Z24.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMLA Z22.D, P0/M, Z28.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FMAD Z21.D, P0/M, Z24.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FADD Z22.D, Z26.D, Z28.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMGE P1.D, P0/Z, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z22.D, P1, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDIV Z21.D, P0/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
MOVPRFX Z22, Z23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FABS Z22.D, P0/M, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMGE P3.D, P0/Z, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z22.D, P3, Z22.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMLT P3.D, P0/Z, Z23.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVPRFX Z23, Z22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FNEG Z23.D, P0/M, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
EOR P3.B, P0/Z, P3.B, P0.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SEL Z22.D, P3, Z22.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z23, Z24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FABS Z23.D, P0/M, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMLT P3.D, P0/Z, Z22.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
EOR P3.B, P0/Z, P3.B, P0.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
FCMGT P1.D, P0/Z, Z21.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
EOR P1.B, P0/Z, P1.B, P0.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
BIC P1.B, P1/Z, P1.B, P2.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
FCMGE P2.D, P0/Z, Z23.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z23.D, P2, Z23.D, Z2.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVPRFX Z24, Z23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FNEG Z24.D, P0/M, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
SEL Z23.D, P3, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z24.D, Z23.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMLA Z24.D, P0/M, Z22.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
FSQRT Z24.D, P0/M, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-16 | 1-0.50 |
FMUL Z25.D, Z31.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z24.D, Z16.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FDIVR Z22.D, P0/M, Z22.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FDIVR Z23.D, P0/M, Z23.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FABS Z22.D, P0/M, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FABS Z23.D, P0/M, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FCMGE P2.D, P0/Z, Z22.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SEL Z22.D, P2, Z23.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z23.D}, P1/Z, [X12, Z4.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
FMUL Z22.D, Z22.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
ADR Z23.D, [Z23, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1D {Z23.D}, P1/Z, [V23.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 |
FADD Z23.D, Z23.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMUL Z22.D, Z22.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z22.D, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
FMUL Z21.D, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SEL Z21.D, P1, Z21.D, Z4.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ST1D {Z21.D}, P0, [X10, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
B.NE 44cbe0 <__nv_viscosity_kernel_module_viscosity_kernel__F1L50_1_+0x2f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |