Loop Id: 354 | Module: libqmcwfs.so | Source: BsplineFunctor.h:305-336 | Coverage: 0.37% |
---|
Loop Id: 354 | Module: libqmcwfs.so | Source: BsplineFunctor.h:305-336 | Coverage: 0.37% |
---|
0x37250 VMOVAPD (%R12,%R8,2),%ZMM10 [10] |
0x37257 VMOVAPD 0x40(%R12,%R8,2),%ZMM9 [10] |
0x3725f KMOVB %K1,%K2 |
0x37263 KMOVB %K1,%K3 |
0x37267 KMOVB %K1,%K6 |
0x3726b VMOVAPD 0x9c0(%RSP),%ZMM8 [18] |
0x37273 KMOVB %K1,%K5 |
0x37277 KMOVB %K1,%K7 |
0x3727b VMULPD %ZMM18,%ZMM10,%ZMM0 |
0x37281 KMOVB %K1,%K4 |
0x37285 VMOVAPD 0x840(%RSP),%ZMM24 [18] |
0x3728d VMOVDQA32 (%R15,%R8,1),%ZMM7 [7] |
0x37294 VMULPD %ZMM18,%ZMM9,%ZMM1 |
0x3729a ADD $0x40,%R8 |
0x3729e VRNDSCALEPD $0xb,%ZMM0,%ZMM3 |
0x372a5 VCVTTPD2DQ %ZMM0,%YMM6 |
0x372ab VGATHERDPD (%RAX,%YMM6,8),%ZMM22{%K2} [17] |
0x372b2 KMOVB %K1,%K2 |
0x372b6 VRNDSCALEPD $0xb,%ZMM1,%ZMM11 |
0x372bd VCVTTPD2DQ %ZMM1,%YMM2 |
0x372c3 VINSERTI64X4 $0x1,%YMM2,%ZMM6,%ZMM13 |
0x372ca VPADDD 0x302ec(%RIP),%ZMM13,%ZMM14 [11] |
0x372d4 VPADDD 0x30322(%RIP),%ZMM13,%ZMM5 [11] |
0x372de VSHUFI32X4 $-0x12,%ZMM13,%ZMM13,%ZMM12 |
0x372e5 VPADDD 0x30351(%RIP),%ZMM13,%ZMM13 [11] |
0x372ef VGATHERDPD (%RAX,%YMM12,8),%ZMM21{%K3} [13] |
0x372f6 KMOVB %K1,%K3 |
0x372fa VGATHERDPD (%RAX,%YMM5,8),%ZMM12{%K6} [15] |
0x37301 VSHUFI32X4 $-0x12,%ZMM14,%ZMM14,%ZMM6 |
0x37308 VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM15 |
0x3730f VMOVAPD 0xa00(%RSP),%ZMM5 [18] |
0x37317 VGATHERDPD (%RAX,%YMM6,8),%ZMM4{%K5} [16] |
0x3731e VSHUFI32X4 $-0x12,%ZMM13,%ZMM13,%ZMM6 |
0x37325 VGATHERDPD (%RAX,%YMM13,8),%ZMM20{%K2} [1] |
0x3732c KMOVB %K1,%K5 |
0x37330 VSUBPD %ZMM3,%ZMM0,%ZMM0 |
0x37336 VGATHERDPD (%RAX,%YMM6,8),%ZMM13{%K3} [8] |
0x3733d VMOVAPD %ZMM5,%ZMM6 |
0x37343 KMOVB %K1,%K6 |
0x37347 VSUBPD %ZMM11,%ZMM1,%ZMM1 |
0x3734d VGATHERDPD (%RAX,%YMM15,8),%ZMM11{%K7} [2] |
0x37354 VMOVAPD 0x940(%RSP),%ZMM15 [18] |
0x3735c VGATHERDPD (%RAX,%YMM14,8),%ZMM23{%K4} [14] |
0x37363 KMOVB %K1,%K4 |
0x37367 KMOVB %K1,%K7 |
0x3736b KMOVB %K1,%K2 |
0x3736f KMOVB %K1,%K3 |
0x37373 VMULPD %ZMM0,%ZMM0,%ZMM2 |
0x37379 VFMADD132PD %ZMM0,%ZMM8,%ZMM6 |
0x3737f VMULPD %ZMM1,%ZMM1,%ZMM3 |
0x37385 VFMADD132PD %ZMM1,%ZMM8,%ZMM5 |
0x3738b VMOVAPD 0x980(%RSP),%ZMM8 [18] |
0x37393 VMOVAPD %ZMM8,%ZMM14 |
0x37399 VFMADD132PD %ZMM1,%ZMM15,%ZMM8 |
0x3739f VFMADD132PD %ZMM0,%ZMM15,%ZMM14 |
0x373a5 VMOVAPD 0x900(%RSP),%ZMM15 [18] |
0x373ad VMULPD %ZMM2,%ZMM0,%ZMM17 |
0x373b3 VMULPD %ZMM3,%ZMM1,%ZMM16 |
0x373b9 VMOVAPD %ZMM15,%ZMM19 |
0x373bf VMULPD %ZMM8,%ZMM4,%ZMM8 |
0x373c5 VMULPD %ZMM14,%ZMM23,%ZMM14 |
0x373cb VFMADD132PD %ZMM21,%ZMM8,%ZMM5 |
0x373d1 VMOVAPD 0x880(%RSP),%ZMM8 [18] |
0x373d9 VFMADD132PD %ZMM22,%ZMM14,%ZMM6 |
0x373df VMOVAPD 0x8c0(%RSP),%ZMM14 [18] |
0x373e7 VFMADD132PD %ZMM0,%ZMM14,%ZMM19 |
0x373ed VFMADD132PD %ZMM1,%ZMM14,%ZMM15 |
0x373f3 VMOVAPD %ZMM8,%ZMM14 |
0x373f9 VFMADD132PD %ZMM0,%ZMM24,%ZMM14 |
0x373ff VFMADD132PD %ZMM1,%ZMM24,%ZMM8 |
0x37405 VMULPD %ZMM14,%ZMM20,%ZMM14 |
0x3740b VMULPD %ZMM8,%ZMM13,%ZMM8 |
0x37411 VFMADD231PD %ZMM19,%ZMM12,%ZMM14 |
0x37417 VMOVDQA32 %YMM7,%YMM19 |
0x3741d VFMADD231PD %ZMM15,%ZMM11,%ZMM8 |
0x37423 VMOVAPD 0x800(%RSP),%ZMM15 [18] |
0x3742b VADDPD %ZMM14,%ZMM6,%ZMM6 |
0x37431 VADDPD %ZMM8,%ZMM5,%ZMM5 |
0x37437 VMULPD %ZMM15,%ZMM6,%ZMM14 |
0x3743d VBROADCASTSD 0x2fff9(%RIP),%ZMM6 [11] |
0x37447 VMULPD %ZMM15,%ZMM5,%ZMM8 |
0x3744d VDIVPD %ZMM10,%ZMM6,%ZMM10 |
0x37453 VDIVPD %ZMM9,%ZMM6,%ZMM9 |
0x37459 VMULPD %ZMM18,%ZMM10,%ZMM10 |
0x3745f VMULPD %ZMM18,%ZMM9,%ZMM9 |
0x37465 VSCATTERDPD %ZMM14,(%R9,%YMM7,8){%K4} [9] |
0x3746c VSHUFI32X4 $-0x12,%ZMM7,%ZMM7,%ZMM7 |
0x37473 VSCATTERDPD %ZMM8,(%R9,%YMM7,8){%K5} [12] |
0x3747a VMOVAPD 0x780(%RSP),%ZMM5 [18] |
0x37482 VMOVAPD 0x740(%RSP),%ZMM15 [18] |
0x3748a VMOVAPD 0x7c0(%RSP),%ZMM14 [18] |
0x37492 VMOVAPD 0x6c0(%RSP),%ZMM8 [18] |
0x3749a VMOVAPD %ZMM5,%ZMM6 |
0x374a0 VFMADD132PD %ZMM1,%ZMM15,%ZMM5 |
0x374a6 VFMADD132PD %ZMM0,%ZMM15,%ZMM6 |
0x374ac VMOVAPD 0x680(%RSP),%ZMM15 [18] |
0x374b4 VFMADD231PD %ZMM14,%ZMM3,%ZMM5 |
0x374ba VFMADD231PD %ZMM14,%ZMM2,%ZMM6 |
0x374c0 VMOVAPD %ZMM8,%ZMM14 |
0x374c6 VFMADD132PD %ZMM0,%ZMM15,%ZMM14 |
0x374cc VFMADD132PD %ZMM1,%ZMM15,%ZMM8 |
0x374d2 VMOVAPD 0x700(%RSP),%ZMM15 [18] |
0x374da VFMADD231PD %ZMM15,%ZMM2,%ZMM14 |
0x374e0 VFMADD231PD %ZMM15,%ZMM3,%ZMM8 |
0x374e6 VMULPD %ZMM14,%ZMM23,%ZMM14 |
0x374ec VMULPD %ZMM8,%ZMM4,%ZMM8 |
0x374f2 VFMADD132PD %ZMM22,%ZMM14,%ZMM6 |
0x374f8 VMOVAPD 0x600(%RSP),%ZMM14 [18] |
0x37500 VMOVAPD 0x5c0(%RSP),%ZMM15 [18] |
0x37508 VFMADD132PD %ZMM21,%ZMM8,%ZMM5 |
0x3750e VMOVAPD 0x640(%RSP),%ZMM8 [18] |
0x37516 VMOVAPD %ZMM14,%ZMM24 |
0x3751c VFMADD132PD %ZMM1,%ZMM15,%ZMM14 |
0x37522 VFMADD132PD %ZMM0,%ZMM15,%ZMM24 |
0x37528 VMOVAPD 0x580(%RSP),%ZMM15 [18] |
0x37530 VFMADD231PD %ZMM8,%ZMM3,%ZMM14 |
0x37536 VFMADD231PD %ZMM8,%ZMM2,%ZMM24 |
0x3753c VMOVAPD %ZMM15,%ZMM8 |
0x37542 VFMADD213PD 0xa40(%RSP),%ZMM0,%ZMM8 [18] |
0x3754a VFMADD213PD 0xa40(%RSP),%ZMM1,%ZMM15 [18] |
0x37552 VFMADD231PD 0xa80(%RSP),%ZMM2,%ZMM8 [18] |
0x3755a VFMADD231PD 0xa80(%RSP),%ZMM3,%ZMM15 [18] |
0x37562 VMULPD %ZMM8,%ZMM20,%ZMM8 |
0x37568 VMULPD %ZMM15,%ZMM13,%ZMM15 |
0x3756e VFMADD231PD %ZMM24,%ZMM12,%ZMM8 |
0x37574 VFMADD132PD %ZMM11,%ZMM15,%ZMM14 |
0x3757a VADDPD %ZMM8,%ZMM6,%ZMM6 |
0x37580 VADDPD %ZMM14,%ZMM5,%ZMM5 |
0x37586 VMULPD %ZMM10,%ZMM6,%ZMM10 |
0x3758c VMULPD %ZMM9,%ZMM5,%ZMM9 |
0x37592 VSCATTERDPD %ZMM10,(%R13,%YMM19,8){%K6} [6] |
0x3759a VSCATTERDPD %ZMM9,(%R13,%YMM7,8){%K7} [4] |
0x375a2 VMOVAPD 0x500(%RSP),%ZMM14 [18] |
0x375aa VMOVAPD 0x4c0(%RSP),%ZMM6 [18] |
0x375b2 VMOVAPD 0x540(%RSP),%ZMM15 [18] |
0x375ba VMOVAPD 0x480(%RSP),%ZMM10 [18] |
0x375c2 VMULPD %ZMM14,%ZMM2,%ZMM8 |
0x375c8 VMOVAPD %ZMM6,%ZMM9 |
0x375ce VMULPD %ZMM14,%ZMM3,%ZMM5 |
0x375d4 VFMADD132PD %ZMM0,%ZMM10,%ZMM9 |
0x375da VMOVAPD 0x400(%RSP),%ZMM14 [18] |
0x375e2 VFMADD132PD %ZMM1,%ZMM10,%ZMM6 |
0x375e8 VMULPD %ZMM14,%ZMM2,%ZMM10 |
0x375ee VMULPD %ZMM14,%ZMM3,%ZMM14 |
0x375f4 VFMADD231PD %ZMM15,%ZMM17,%ZMM8 |
0x375fa VFMADD231PD %ZMM15,%ZMM16,%ZMM5 |
0x37600 VMOVAPD 0x3c0(%RSP),%ZMM15 [18] |
0x37608 VADDPD %ZMM8,%ZMM9,%ZMM9 |
0x3760e VMOVAPD 0x440(%RSP),%ZMM8 [18] |
0x37616 VADDPD %ZMM5,%ZMM6,%ZMM6 |
0x3761c VFMADD231PD %ZMM8,%ZMM17,%ZMM10 |
0x37622 VFMADD231PD %ZMM8,%ZMM16,%ZMM14 |
0x37628 VMOVAPD 0x340(%RSP),%ZMM8 [18] |
0x37630 VMOVAPD %ZMM8,%ZMM5 |
0x37636 VFMADD132PD %ZMM1,%ZMM15,%ZMM8 |
0x3763c VFMADD132PD %ZMM0,%ZMM15,%ZMM5 |
0x37642 VADDPD %ZMM14,%ZMM8,%ZMM14 |
0x37648 VMOVAPD %ZMM0,%ZMM8 |
0x3764e VFMADD132PD %ZMM26,%ZMM25,%ZMM0 |
0x37654 VFMADD132PD %ZMM30,%ZMM29,%ZMM8 |
0x3765a VADDPD %ZMM10,%ZMM5,%ZMM10 |
0x37660 VMULPD %ZMM14,%ZMM4,%ZMM4 |
0x37666 VMULPD %ZMM23,%ZMM10,%ZMM5 |
0x3766c VFMADD231PD %ZMM6,%ZMM21,%ZMM4 |
0x37672 VMOVAPD %ZMM1,%ZMM6 |
0x37678 VFMADD231PD %ZMM9,%ZMM22,%ZMM5 |
0x3767e VMOVAPD 0x380(%RSP),%ZMM9 [18] |
0x37686 VFMADD132PD %ZMM26,%ZMM25,%ZMM1 |
0x3768c VFMADD132PD %ZMM30,%ZMM29,%ZMM6 |
0x37692 VMULPD %ZMM9,%ZMM2,%ZMM15 |
0x37698 VMULPD %ZMM9,%ZMM3,%ZMM14 |
0x3769e VMULPD %ZMM27,%ZMM2,%ZMM2 |
0x376a4 VMULPD %ZMM27,%ZMM3,%ZMM3 |
0x376aa VFMADD231PD %ZMM31,%ZMM17,%ZMM15 |
0x376b0 VFMADD231PD %ZMM31,%ZMM16,%ZMM14 |
0x376b6 VFMADD231PD %ZMM28,%ZMM17,%ZMM2 |
0x376bc VFMADD231PD %ZMM28,%ZMM16,%ZMM3 |
0x376c2 VADDPD %ZMM15,%ZMM8,%ZMM10 |
0x376c8 VADDPD %ZMM6,%ZMM14,%ZMM9 |
0x376ce VADDPD %ZMM0,%ZMM2,%ZMM0 |
0x376d4 VADDPD %ZMM1,%ZMM3,%ZMM1 |
0x376da VMULPD %ZMM20,%ZMM0,%ZMM15 |
0x376e0 VMULPD %ZMM1,%ZMM13,%ZMM13 |
0x376e6 VFMADD132PD %ZMM10,%ZMM15,%ZMM12 |
0x376ec VFMADD132PD %ZMM9,%ZMM13,%ZMM11 |
0x376f2 VADDPD %ZMM12,%ZMM5,%ZMM12 |
0x376f8 VADDPD %ZMM11,%ZMM4,%ZMM11 |
0x376fe VSCATTERDPD %ZMM12,(%RSI,%YMM19,8){%K2} [3] |
0x37705 VSCATTERDPD %ZMM11,(%RSI,%YMM7,8){%K3} [5] |
0x3770c CMP %R8,%R10 |
0x3770f JNE 37250 |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 305 - 336 |
-------------------------------------------------------------------------------- |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:409 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:181 | libqmcwfs.so |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.02 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.19 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 16 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 16 |
CQA cycles | 94.50 |
CQA cycles if no scalar integer | 91.00 |
CQA cycles if FP arith vectorized | 94.50 |
CQA cycles if fully vectorized | 92.58 |
Front-end cycles | 94.50 |
DIV/SQRT cycles | 79.50 |
P0 cycles | 50.00 |
P1 cycles | 75.50 |
P2 cycles | 75.50 |
P3 cycles | 48.00 |
P4 cycles | 79.50 |
P5 cycles | 1.00 |
P6 cycles | 0.00 |
P7 cycles | 32.00 |
Inter-iter dependencies cycles | 29 |
FE+BE cycles (UFS) | 89.26 |
Stall cycles (UFS) | 35.76 |
Nb insns | 191.00 |
Nb uops | 378.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 13.04 |
Nb FLOP add-sub | 128.00 |
Nb FLOP mul | 256.00 |
Nb FLOP fma | 416.00 |
Nb FLOP div | 16.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 35.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 2952.00 |
Bytes stored | 384.00 |
Stride 0 | 10.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 97.49 |
Vector-efficiency ratio load | 98.14 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 86.33 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.02 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.19 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 16 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 16 |
CQA cycles | 94.50 |
CQA cycles if no scalar integer | 91.00 |
CQA cycles if FP arith vectorized | 94.50 |
CQA cycles if fully vectorized | 92.58 |
Front-end cycles | 94.50 |
DIV/SQRT cycles | 79.50 |
P0 cycles | 50.00 |
P1 cycles | 75.50 |
P2 cycles | 75.50 |
P3 cycles | 48.00 |
P4 cycles | 79.50 |
P5 cycles | 1.00 |
P6 cycles | 0.00 |
P7 cycles | 32.00 |
Inter-iter dependencies cycles | 29 |
FE+BE cycles (UFS) | 89.26 |
Stall cycles (UFS) | 35.76 |
Nb insns | 191.00 |
Nb uops | 378.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 13.04 |
Nb FLOP add-sub | 128.00 |
Nb FLOP mul | 256.00 |
Nb FLOP fma | 416.00 |
Nb FLOP div | 16.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 35.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 2952.00 |
Bytes stored | 384.00 |
Stride 0 | 10.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 97.49 |
Vector-efficiency ratio load | 98.14 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 86.33 |
Path / |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:305-336 |
Module | libqmcwfs.so |
nb instructions | 191 |
nb uops | 378 |
loop length | 1221 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 32 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 94.50 cycles |
front end | 94.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 79.50 | 1.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
cycles | 79.50 | 50.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
Cycles executing div or sqrt instructions | 32.00 |
Longest recurrence chain latency (RecMII) | 29.00 |
FE+BE cycles | 89.26 |
Stall cycles | 35.76 |
RS full (events) | 0.06 |
PRF_FLOAT full (events) | 42.72 |
Front-end | 94.50 |
Dispatch | 79.50 |
DIV/SQRT | 32.00 |
Data deps. | 29.00 |
Overall L1 | 94.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 68% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 97% |
load | 98% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 86% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%R12,%R8,2),%ZMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x40(%R12,%R8,2),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x9c0(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM18,%ZMM10,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x840(%RSP),%ZMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA32 (%R15,%R8,1),%ZMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM18,%ZMM9,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VRNDSCALEPD $0xb,%ZMM0,%ZMM3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM0,%YMM6 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM22{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%ZMM1,%ZMM11 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM1,%YMM2 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VINSERTI64X4 $0x1,%YMM2,%ZMM6,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x302ec(%RIP),%ZMM13,%ZMM14 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDD 0x30322(%RIP),%ZMM13,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFI32X4 $-0x12,%ZMM13,%ZMM13,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x30351(%RIP),%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERDPD (%RAX,%YMM12,8),%ZMM21{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%YMM5,8),%ZMM12{%K6} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM14,%ZMM14,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0xa00(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM4{%K5} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM13,%ZMM13,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%YMM13,8),%ZMM20{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM13{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM11,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM15,8),%ZMM11{%K7} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VMOVAPD 0x940(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%YMM14,8),%ZMM23{%K4} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM0,%ZMM0,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM8,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM1,%ZMM1,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM8,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x980(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM8,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x900(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM2,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM3,%ZMM1,%ZMM16 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM8,%ZMM4,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM21,%ZMM8,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x880(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x8c0(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM0,%ZMM14,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM8,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM24,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM24,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM20,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM8,%ZMM13,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM19,%ZMM12,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %YMM7,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %ZMM15,%ZMM11,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x800(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM14,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM8,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM6,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x2fff9(%RIP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM15,%ZMM5,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VDIVPD %ZMM10,%ZMM6,%ZMM10 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VDIVPD %ZMM9,%ZMM6,%ZMM9 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VMULPD %ZMM18,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM18,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM14,(%R9,%YMM7,8){%K4} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSHUFI32X4 $-0x12,%ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %ZMM8,(%R9,%YMM7,8){%K5} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x780(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x740(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x7c0(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x6c0(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x680(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM14,%ZMM3,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM14,%ZMM2,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM8,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x700(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM15,%ZMM2,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM3,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM8,%ZMM4,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x600(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x5c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM21,%ZMM8,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x640(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM14,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x580(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM8,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM8,%ZMM2,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0xa40(%RSP),%ZMM0,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0xa40(%RSP),%ZMM1,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0xa80(%RSP),%ZMM2,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0xa80(%RSP),%ZMM3,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM8,%ZMM20,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM13,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM24,%ZMM12,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM11,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM8,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM10,%ZMM6,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM10,(%R13,%YMM19,8){%K6} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM9,(%R13,%YMM7,8){%K7} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x500(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x4c0(%RSP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x540(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x480(%RSP),%ZMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM14,%ZMM2,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM6,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM14,%ZMM3,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM10,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x400(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM1,%ZMM10,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM2,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM17,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM16,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM8,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x440(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM5,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM8,%ZMM17,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM8,%ZMM16,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM8,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM8,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM0,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM10,%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM23,%ZMM10,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM6,%ZMM21,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM1,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %ZMM9,%ZMM22,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x380(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM2,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM17,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM16,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM17,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM16,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM15,%ZMM8,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM6,%ZMM14,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM0,%ZMM2,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM1,%ZMM3,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM20,%ZMM0,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM1,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM10,%ZMM15,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM9,%ZMM13,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM12,%ZMM5,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM11,%ZMM4,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM12,(%RSI,%YMM19,8){%K2} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM11,(%RSI,%YMM7,8){%K3} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
CMP %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 37250 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:305-336 |
Module | libqmcwfs.so |
nb instructions | 191 |
nb uops | 378 |
loop length | 1221 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 32 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 94.50 cycles |
front end | 94.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 79.50 | 1.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
cycles | 79.50 | 50.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
Cycles executing div or sqrt instructions | 32.00 |
Longest recurrence chain latency (RecMII) | 29.00 |
FE+BE cycles | 89.26 |
Stall cycles | 35.76 |
RS full (events) | 0.06 |
PRF_FLOAT full (events) | 42.72 |
Front-end | 94.50 |
Dispatch | 79.50 |
DIV/SQRT | 32.00 |
Data deps. | 29.00 |
Overall L1 | 94.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 68% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 97% |
load | 98% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 86% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%R12,%R8,2),%ZMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x40(%R12,%R8,2),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x9c0(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM18,%ZMM10,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x840(%RSP),%ZMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA32 (%R15,%R8,1),%ZMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM18,%ZMM9,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VRNDSCALEPD $0xb,%ZMM0,%ZMM3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM0,%YMM6 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM22{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%ZMM1,%ZMM11 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM1,%YMM2 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VINSERTI64X4 $0x1,%YMM2,%ZMM6,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x302ec(%RIP),%ZMM13,%ZMM14 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDD 0x30322(%RIP),%ZMM13,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFI32X4 $-0x12,%ZMM13,%ZMM13,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x30351(%RIP),%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERDPD (%RAX,%YMM12,8),%ZMM21{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%YMM5,8),%ZMM12{%K6} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM14,%ZMM14,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0xa00(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM4{%K5} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM13,%ZMM13,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%YMM13,8),%ZMM20{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM13{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM11,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM15,8),%ZMM11{%K7} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VMOVAPD 0x940(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%YMM14,8),%ZMM23{%K4} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM0,%ZMM0,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM8,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM1,%ZMM1,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM8,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x980(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM8,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x900(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM2,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM3,%ZMM1,%ZMM16 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM8,%ZMM4,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM21,%ZMM8,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x880(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x8c0(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM0,%ZMM14,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM8,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM24,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM24,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM20,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM8,%ZMM13,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM19,%ZMM12,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %YMM7,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %ZMM15,%ZMM11,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x800(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM14,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM8,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM6,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x2fff9(%RIP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM15,%ZMM5,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VDIVPD %ZMM10,%ZMM6,%ZMM10 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VDIVPD %ZMM9,%ZMM6,%ZMM9 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VMULPD %ZMM18,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM18,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM14,(%R9,%YMM7,8){%K4} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSHUFI32X4 $-0x12,%ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %ZMM8,(%R9,%YMM7,8){%K5} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x780(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x740(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x7c0(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x6c0(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x680(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM14,%ZMM3,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM14,%ZMM2,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM8,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x700(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM15,%ZMM2,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM3,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM8,%ZMM4,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x600(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x5c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM21,%ZMM8,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x640(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM14,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x580(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM8,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM8,%ZMM2,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0xa40(%RSP),%ZMM0,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0xa40(%RSP),%ZMM1,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0xa80(%RSP),%ZMM2,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0xa80(%RSP),%ZMM3,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM8,%ZMM20,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM13,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM24,%ZMM12,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM11,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM8,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM10,%ZMM6,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM10,(%R13,%YMM19,8){%K6} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM9,(%R13,%YMM7,8){%K7} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x500(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x4c0(%RSP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x540(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x480(%RSP),%ZMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM14,%ZMM2,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM6,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM14,%ZMM3,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM10,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x400(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM1,%ZMM10,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM2,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM17,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM16,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM8,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x440(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM5,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM8,%ZMM17,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM8,%ZMM16,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM8,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM8,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM0,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM10,%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM23,%ZMM10,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM6,%ZMM21,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM1,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %ZMM9,%ZMM22,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x380(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM2,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM17,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM16,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM17,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM16,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM15,%ZMM8,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM6,%ZMM14,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM0,%ZMM2,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM1,%ZMM3,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM20,%ZMM0,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM1,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM10,%ZMM15,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM9,%ZMM13,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM12,%ZMM5,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM11,%ZMM4,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM12,(%RSI,%YMM19,8){%K2} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM11,(%RSI,%YMM7,8){%K3} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
CMP %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 37250 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |