Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 99.95% |
---|
Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 99.95% |
---|
/home/kcamus/qaas_runs/169-401-3406/intel/HACCmk/build/HACCmk/src/Step10_orig.c: 10 - 41 |
-------------------------------------------------------------------------------- |
10: { |
[...] |
19: for ( j = 0; j < count1; j++ ) |
20: { |
21: dxc = xx1[j] - xxi; |
22: dyc = yy1[j] - yyi; |
23: dzc = zz1[j] - zzi; |
24: |
25: r2 = dxc * dxc + dyc * dyc + dzc * dzc; |
26: |
27: m = ( r2 < fsrrmax2 ) ? mass1[j] : 0.0f; |
28: |
29: f = pow( r2 + mp_rsm2, -1.5 ) - ( ma0 + r2*(ma1 + r2*(ma2 + r2*(ma3 + r2*(ma4 + r2*ma5))))); |
30: |
31: f = ( r2 > 0.0f ) ? m * f : 0.0f; |
32: |
33: xi = xi + f * dxc; |
34: yi = yi + f * dyc; |
35: zi = zi + f * dzc; |
36: } |
37: |
38: *dxi = xi; |
39: *dyi = yi; |
40: *dzi = zi; |
41: } |
0x401b90 PUSH %RBP |
0x401b91 MOV %RSP,%RBP |
0x401b94 PUSH %RBX |
0x401b95 AND $-0x20,%RSP |
0x401b99 SUB $0xc0,%RSP |
0x401ba0 MOV 0x18(%RBP),%R10 |
0x401ba4 MOV 0x10(%RBP),%R11 |
0x401ba8 TEST %EDI,%EDI |
0x401baa JLE 401daa |
0x401bb0 MOV %EDI,%EAX |
0x401bb2 MOV $-0x8,%EDI |
0x401bb7 AND %RAX,%RDI |
0x401bba VMOVUPS %XMM3,0x90(%RSP) |
0x401bc3 JE 401dbb |
0x401bc9 VMOVUPS %XMM0,0x60(%RSP) |
0x401bcf VBROADCASTSS %XMM0,%YMM0 |
0x401bd4 VMOVUPS %YMM0,0x40(%RSP) |
0x401bda VMOVUPS %XMM1,0x70(%RSP) |
0x401be0 VBROADCASTSS %XMM1,%YMM0 |
0x401be5 VMOVUPS %YMM0,0x20(%RSP) |
0x401beb VMOVUPS %XMM2,0x80(%RSP) |
0x401bf4 VBROADCASTSS %XMM2,%YMM8 |
0x401bf9 VBROADCASTSS %XMM3,%YMM9 |
0x401bfe VMOVUPS %XMM4,(%RSP) |
0x401c03 VBROADCASTSS %XMM4,%YMM10 |
0x401c08 XOR %EBX,%EBX |
0x401c0a VBROADCASTSD 0x7625(%RIP),%YMM13 |
0x401c13 VXORPS %XMM4,%XMM4,%XMM4 |
0x401c17 VXORPS %XMM12,%XMM12,%XMM12 |
0x401c1c VXORPS %XMM6,%XMM6,%XMM6 |
(5) 0x401c20 VMOVUPS (%RSI,%RBX,4),%YMM0 |
(5) 0x401c25 VSUBPS 0x40(%RSP),%YMM0,%YMM0 |
(5) 0x401c2b VMOVUPS (%RDX,%RBX,4),%YMM11 |
(5) 0x401c30 VSUBPS 0x20(%RSP),%YMM11,%YMM15 |
(5) 0x401c36 VMOVUPS (%RCX,%RBX,4),%YMM14 |
(5) 0x401c3b VMULPS %YMM0,%YMM0,%YMM11 |
(5) 0x401c3f VFMADD231PS %YMM15,%YMM15,%YMM11 |
(5) 0x401c44 VSUBPS %YMM8,%YMM14,%YMM14 |
(5) 0x401c49 VFMADD231PS %YMM14,%YMM14,%YMM11 |
(5) 0x401c4e VADDPS %YMM10,%YMM11,%YMM3 |
(5) 0x401c53 VCVTPS2PD %XMM3,%YMM2 |
(5) 0x401c57 VSQRTPD %YMM2,%YMM1 |
(5) 0x401c5b VBROADCASTSS 0x75e4(%RIP),%YMM5 |
(5) 0x401c64 VBROADCASTSS 0x75df(%RIP),%YMM7 |
(5) 0x401c6d VFMADD213PS %YMM7,%YMM11,%YMM5 |
(5) 0x401c72 VBROADCASTSS 0x75d5(%RIP),%YMM7 |
(5) 0x401c7b VFMADD213PS %YMM7,%YMM11,%YMM5 |
(5) 0x401c80 VMULPD %YMM2,%YMM2,%YMM2 |
(5) 0x401c84 VBROADCASTSS 0x75c7(%RIP),%YMM7 |
(5) 0x401c8d VFMADD213PS %YMM7,%YMM11,%YMM5 |
(5) 0x401c92 VDIVPD %YMM2,%YMM13,%YMM2 |
(5) 0x401c96 VBROADCASTSS 0x75b9(%RIP),%YMM7 |
(5) 0x401c9f VFMADD213PS %YMM7,%YMM11,%YMM5 |
(5) 0x401ca4 VBROADCASTSS 0x75af(%RIP),%YMM7 |
(5) 0x401cad VFMADD213PS %YMM7,%YMM11,%YMM5 |
(5) 0x401cb2 VCVTPS2PD %XMM5,%YMM7 |
(5) 0x401cb6 VFMADD231PD %YMM2,%YMM1,%YMM7 |
(5) 0x401cbb VEXTRACTF128 $0x1,%YMM3,%XMM1 |
(5) 0x401cc1 VCVTPS2PD %XMM1,%YMM1 |
(5) 0x401cc5 VMULPD %YMM1,%YMM1,%YMM2 |
(5) 0x401cc9 VSQRTPD %YMM1,%YMM1 |
(5) 0x401ccd VDIVPD %YMM2,%YMM13,%YMM2 |
(5) 0x401cd1 VEXTRACTF128 $0x1,%YMM5,%XMM3 |
(5) 0x401cd7 VCVTPS2PD %XMM3,%YMM3 |
(5) 0x401cdb VCVTPD2PS %YMM7,%XMM5 |
(5) 0x401cdf VFMADD231PD %YMM2,%YMM1,%YMM3 |
(5) 0x401ce4 VCVTPD2PS %YMM3,%XMM1 |
(5) 0x401ce8 VINSERTF128 $0x1,%XMM1,%YMM5,%YMM1 |
(5) 0x401cee VCMPPS $0x1,%YMM9,%YMM11,%YMM2 |
(5) 0x401cf4 VMASKMOVPS (%R8,%RBX,4),%YMM2,%YMM2 |
(5) 0x401cfa VMULPS %YMM1,%YMM2,%YMM1 |
(5) 0x401cfe VXORPS %XMM2,%XMM2,%XMM2 |
(5) 0x401d02 VCMPPS $0x1,%YMM11,%YMM2,%YMM2 |
(5) 0x401d08 VANDPS %YMM1,%YMM2,%YMM1 |
(5) 0x401d0c VFMADD231PS %YMM0,%YMM1,%YMM4 |
(5) 0x401d11 VFMADD231PS %YMM15,%YMM1,%YMM12 |
(5) 0x401d16 VFMADD231PS %YMM14,%YMM1,%YMM6 |
(5) 0x401d1b ADD $0x8,%RBX |
(5) 0x401d1f CMP %RDI,%RBX |
(5) 0x401d22 JB 401c20 |
0x401d28 VEXTRACTF128 $0x1,%YMM4,%XMM0 |
0x401d2e VADDPS %XMM0,%XMM4,%XMM0 |
0x401d32 VPERMILPD $0x1,%XMM0,%XMM1 |
0x401d38 VADDPS %XMM1,%XMM0,%XMM0 |
0x401d3c VMOVSHDUP %XMM0,%XMM1 |
0x401d40 VADDSS %XMM1,%XMM0,%XMM5 |
0x401d44 VEXTRACTF128 $0x1,%YMM12,%XMM0 |
0x401d4a VADDPS %XMM0,%XMM12,%XMM0 |
0x401d4e VPERMILPD $0x1,%XMM0,%XMM1 |
0x401d54 VADDPS %XMM1,%XMM0,%XMM0 |
0x401d58 VMOVSHDUP %XMM0,%XMM1 |
0x401d5c VADDSS %XMM1,%XMM0,%XMM7 |
0x401d60 VEXTRACTF128 $0x1,%YMM6,%XMM0 |
0x401d66 VADDPS %XMM0,%XMM6,%XMM0 |
0x401d6a VPERMILPD $0x1,%XMM0,%XMM1 |
0x401d70 VADDPS %XMM1,%XMM0,%XMM0 |
0x401d74 VMOVSHDUP %XMM0,%XMM1 |
0x401d78 VADDSS %XMM1,%XMM0,%XMM6 |
0x401d7c VMOVQ %RAX,%XMM0 |
0x401d81 VPBROADCASTQ %XMM0,%YMM11 |
0x401d86 CMP %RAX,%RDI |
0x401d89 VMOVUPS (%RSP),%XMM4 |
0x401d8e VMOVUPS 0x80(%RSP),%XMM2 |
0x401d97 VMOVUPS 0x70(%RSP),%XMM1 |
0x401d9d VMOVUPS 0x60(%RSP),%XMM0 |
0x401da3 JNE 401dd3 |
0x401da5 JMP 402024 |
0x401daa VXORPS %XMM5,%XMM5,%XMM5 |
0x401dae VXORPS %XMM7,%XMM7,%XMM7 |
0x401db2 VXORPS %XMM6,%XMM6,%XMM6 |
0x401db6 JMP 402024 |
0x401dbb VMOVQ %RAX,%XMM3 |
0x401dc0 VPBROADCASTQ %XMM3,%YMM11 |
0x401dc5 VXORPS %XMM6,%XMM6,%XMM6 |
0x401dc9 XOR %EDI,%EDI |
0x401dcb VXORPS %XMM7,%XMM7,%XMM7 |
0x401dcf VXORPS %XMM5,%XMM5,%XMM5 |
0x401dd3 VMOVQ %RDI,%XMM3 |
0x401dd8 VPBROADCASTQ %XMM3,%YMM8 |
0x401ddd VPADDQ 0x747b(%RIP),%YMM8,%YMM13 |
0x401de5 VPADDQ 0x7493(%RIP),%YMM8,%YMM14 |
0x401ded VPBROADCASTQ 0x744a(%RIP),%YMM9 |
0x401df6 VPXOR %YMM9,%YMM11,%YMM10 |
0x401dfb VPADDQ 0x749d(%RIP),%YMM8,%YMM3 |
0x401e03 VPCMPGTQ %YMM3,%YMM10,%YMM3 |
0x401e08 VPXOR %YMM9,%YMM11,%YMM12 |
0x401e0d VPADDQ 0x74ab(%RIP),%YMM8,%YMM8 |
0x401e15 VPCMPGTQ %YMM8,%YMM12,%YMM8 |
0x401e1a VPOR %YMM3,%YMM8,%YMM3 |
0x401e1e VMOVMSKPD %YMM3,%EAX |
0x401e22 TEST %EAX,%EAX |
0x401e24 JE 401f8e |
0x401e2a VPOR %YMM9,%YMM14,%YMM3 |
0x401e2f VPCMPGTQ %YMM3,%YMM12,%YMM8 |
0x401e34 VPOR %YMM9,%YMM13,%YMM3 |
0x401e39 VPCMPGTQ %YMM3,%YMM10,%YMM3 |
0x401e3e VPACKSSDW %YMM3,%YMM8,%YMM3 |
0x401e42 VPERMQ $-0x28,%YMM3,%YMM15 |
0x401e48 VMASKMOVPS (%RSI,%RDI,4),%YMM15,%YMM8 |
0x401e4e VBROADCASTSS %XMM0,%YMM3 |
0x401e53 VSUBPS %YMM3,%YMM8,%YMM8 |
0x401e57 VMOVUPS %YMM8,(%RSP) |
0x401e5c VMASKMOVPS (%RDX,%RDI,4),%YMM15,%YMM0 |
0x401e62 VBROADCASTSS %XMM1,%YMM3 |
0x401e67 VMOVDQU %YMM14,0x20(%RSP) |
0x401e6d VSUBPS %YMM3,%YMM0,%YMM14 |
0x401e71 VMASKMOVPS (%RCX,%RDI,4),%YMM15,%YMM3 |
0x401e77 VBROADCASTSS %XMM2,%YMM0 |
0x401e7c VSUBPS %YMM0,%YMM3,%YMM3 |
0x401e80 VMULPS %YMM8,%YMM8,%YMM8 |
0x401e85 VFMADD231PS %YMM14,%YMM14,%YMM8 |
0x401e8a VFMADD231PS %YMM3,%YMM3,%YMM8 |
0x401e8f VMOVDQU %YMM13,0x40(%RSP) |
0x401e95 VBROADCASTSS %XMM4,%YMM13 |
0x401e9a VADDPS %YMM13,%YMM8,%YMM13 |
0x401e9f VEXTRACTF128 $0x1,%YMM13,%XMM0 |
0x401ea5 VCVTPS2PD %XMM0,%YMM0 |
0x401ea9 VCVTPS2PD %XMM13,%YMM13 |
0x401eae VSQRTPD %YMM13,%YMM2 |
0x401eb3 VSQRTPD %YMM0,%YMM4 |
0x401eb7 VMULPD %YMM13,%YMM13,%YMM13 |
0x401ebc VBROADCASTSD 0x7373(%RIP),%YMM11 |
0x401ec5 VDIVPD %YMM13,%YMM11,%YMM13 |
0x401eca VMULPD %YMM0,%YMM0,%YMM0 |
0x401ece VDIVPD %YMM0,%YMM11,%YMM1 |
0x401ed2 VBROADCASTSS 0x736d(%RIP),%YMM11 |
0x401edb VBROADCASTSS 0x7368(%RIP),%YMM0 |
0x401ee4 VFMADD231PS %YMM11,%YMM8,%YMM0 |
0x401ee9 VBROADCASTSS 0x735e(%RIP),%YMM11 |
0x401ef2 VFMADD231PS %YMM0,%YMM8,%YMM11 |
0x401ef7 VBROADCASTSS 0x7354(%RIP),%YMM0 |
0x401f00 VFMADD231PS %YMM11,%YMM8,%YMM0 |
0x401f05 VBROADCASTSS 0x734a(%RIP),%YMM11 |
0x401f0e VFMADD231PS %YMM0,%YMM8,%YMM11 |
0x401f13 VBROADCASTSS 0x7340(%RIP),%YMM0 |
0x401f1c VFMADD231PS %YMM11,%YMM8,%YMM0 |
0x401f21 VCVTPS2PD %XMM0,%YMM11 |
0x401f25 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
0x401f2b VCVTPS2PD %XMM0,%YMM0 |
0x401f2f VFMADD231PD %YMM13,%YMM2,%YMM11 |
0x401f34 VMOVDQU 0x40(%RSP),%YMM13 |
0x401f3a VFMADD231PD %YMM1,%YMM4,%YMM0 |
0x401f3f VBROADCASTSS 0x90(%RSP),%YMM1 |
0x401f49 VCMPPS $0x1,%YMM1,%YMM8,%YMM1 |
0x401f4e VANDPS %YMM1,%YMM15,%YMM1 |
0x401f52 VMASKMOVPS (%R8,%RDI,4),%YMM1,%YMM1 |
0x401f58 VCVTPD2PS %YMM11,%XMM2 |
0x401f5d VCVTPD2PS %YMM0,%XMM0 |
0x401f61 VINSERTF128 $0x1,%XMM0,%YMM2,%YMM0 |
0x401f67 VMULPS %YMM0,%YMM1,%YMM0 |
0x401f6b VXORPS %XMM1,%XMM1,%XMM1 |
0x401f6f VCMPPS $0x1,%YMM8,%YMM1,%YMM1 |
0x401f75 VANDPS %YMM0,%YMM1,%YMM2 |
0x401f79 VMULPS (%RSP),%YMM2,%YMM0 |
0x401f7e VMULPS %YMM2,%YMM14,%YMM1 |
0x401f82 VMOVDQU 0x20(%RSP),%YMM14 |
0x401f88 VMULPS %YMM3,%YMM2,%YMM2 |
0x401f8c JMP 401f9a |
0x401f8e VXORPS %XMM0,%XMM0,%XMM0 |
0x401f92 VXORPS %XMM1,%XMM1,%XMM1 |
0x401f96 VXORPS %XMM2,%XMM2,%XMM2 |
0x401f9a VPOR %YMM9,%YMM14,%YMM3 |
0x401f9f VPCMPGTQ %YMM3,%YMM12,%YMM3 |
0x401fa4 VPOR %YMM9,%YMM13,%YMM4 |
0x401fa9 VPCMPGTQ %YMM4,%YMM10,%YMM4 |
0x401fae VPACKSSDW %YMM4,%YMM3,%YMM3 |
0x401fb2 VPERMQ $-0x28,%YMM3,%YMM3 |
0x401fb8 VPAND %YMM0,%YMM3,%YMM0 |
0x401fbc VPAND %YMM1,%YMM3,%YMM1 |
0x401fc0 VPAND %YMM2,%YMM3,%YMM2 |
0x401fc4 VEXTRACTI128 $0x1,%YMM0,%XMM3 |
0x401fca VADDPS %XMM3,%XMM0,%XMM0 |
0x401fce VPERMILPD $0x1,%XMM0,%XMM3 |
0x401fd4 VADDPS %XMM3,%XMM0,%XMM0 |
0x401fd8 VMOVSHDUP %XMM0,%XMM3 |
0x401fdc VADDSS %XMM3,%XMM0,%XMM0 |
0x401fe0 VADDSS %XMM0,%XMM5,%XMM5 |
0x401fe4 VEXTRACTI128 $0x1,%YMM1,%XMM0 |
0x401fea VADDPS %XMM0,%XMM1,%XMM0 |
0x401fee VPERMILPD $0x1,%XMM0,%XMM1 |
0x401ff4 VADDPS %XMM1,%XMM0,%XMM0 |
0x401ff8 VMOVSHDUP %XMM0,%XMM1 |
0x401ffc VADDSS %XMM1,%XMM0,%XMM0 |
0x402000 VADDSS %XMM0,%XMM7,%XMM7 |
0x402004 VEXTRACTI128 $0x1,%YMM2,%XMM0 |
0x40200a VADDPS %XMM0,%XMM2,%XMM0 |
0x40200e VPERMILPD $0x1,%XMM0,%XMM1 |
0x402014 VADDPS %XMM1,%XMM0,%XMM0 |
0x402018 VMOVSHDUP %XMM0,%XMM1 |
0x40201c VADDSS %XMM1,%XMM0,%XMM0 |
0x402020 VADDSS %XMM0,%XMM6,%XMM6 |
0x402024 VMOVSS %XMM5,(%R9) |
0x402029 VMOVSS %XMM7,(%R11) |
0x40202e VMOVSS %XMM6,(%R10) |
0x402033 LEA -0x8(%RBP),%RSP |
0x402037 POP %RBX |
0x402038 POP %RBP |
0x402039 VZEROUPPER |
0x40203c RET |
0x40203d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | main.extracted.8 | main.c:142 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | main.c:139 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 190 |
nb uops | 203 |
loop length | 936 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 3.57 |
micro-operation queue | 50.75 cycles |
front end | 50.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 37.83 | 37.67 | 15.17 | 14.83 | 15.00 | 52.00 | 14.50 | 15.00 |
cycles | 37.83 | 37.67 | 15.17 | 14.83 | 15.00 | 52.00 | 14.50 | 15.00 |
Cycles executing div or sqrt instructions | 34.00-40.00 |
FE+BE cycles | 90.03-93.98 |
Stall cycles | 39.47-43.42 |
ROB full (events) | 39.97-43.44 |
RS full (events) | 9.11-10.91 |
Front-end | 50.75 |
Dispatch | 52.00 |
DIV/SQRT | 34.00-40.00 |
Overall L1 | 52.00 |
all | 76% |
load | 85% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 70% |
all | 75% |
load | 50% |
store | 72% |
mul | 100% |
add-sub | 64% |
fma | 100% |
div/sqrt | 100% |
other | 70% |
all | 75% |
load | 60% |
store | 76% |
mul | 100% |
add-sub | 68% |
fma | 100% |
div/sqrt | 100% |
other | 70% |
all | 38% |
load | 44% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 27% |
load | 23% |
store | 26% |
mul | 50% |
add-sub | 22% |
fma | 50% |
div/sqrt | 50% |
other | 21% |
all | 30% |
load | 29% |
store | 30% |
mul | 50% |
add-sub | 26% |
fma | 50% |
div/sqrt | 50% |
other | 26% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0xc0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x18(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 401daa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x8,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPS %XMM3,0x90(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 401dbb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVUPS %XMM0,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPS %XMM1,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPS %XMM2,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM2,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM3,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %XMM4,(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM4,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0x7625(%RIP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VXORPS %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF128 $0x1,%YMM4,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM12,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM6,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM6,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVQ %RAX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM0,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
CMP %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPS (%RSP),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS 0x80(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS 0x70(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS 0x60(%RSP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 401dd3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 402024 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 402024 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VMOVQ %RAX,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVQ %RDI,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM3,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ 0x747b(%RIP),%YMM8,%YMM13 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x7493(%RIP),%YMM8,%YMM14 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ 0x744a(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPXOR %YMM9,%YMM11,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPADDQ 0x749d(%RIP),%YMM8,%YMM3 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPGTQ %YMM3,%YMM10,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPXOR %YMM9,%YMM11,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPADDQ 0x74ab(%RIP),%YMM8,%YMM8 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPGTQ %YMM8,%YMM12,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPOR %YMM3,%YMM8,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVMSKPD %YMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 401f8e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPOR %YMM9,%YMM14,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM3,%YMM12,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPOR %YMM9,%YMM13,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM3,%YMM10,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPACKSSDW %YMM3,%YMM8,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPERMQ $-0x28,%YMM3,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMASKMOVPS (%RSI,%RDI,4),%YMM15,%YMM8 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VBROADCASTSS %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSUBPS %YMM3,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPS %YMM8,(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMASKMOVPS (%RDX,%RDI,4),%YMM15,%YMM0 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VBROADCASTSS %XMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM14,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VSUBPS %YMM3,%YMM0,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMASKMOVPS (%RCX,%RDI,4),%YMM15,%YMM3 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSUBPS %YMM0,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM14,%YMM14,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM3,%YMM3,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU %YMM13,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VBROADCASTSS %XMM4,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %YMM13,%YMM8,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM13,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VCVTPS2PD %XMM0,%YMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VCVTPS2PD %XMM13,%YMM13 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VSQRTPD %YMM13,%YMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VSQRTPD %YMM0,%YMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VMULPD %YMM13,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x7373(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VDIVPD %YMM13,%YMM11,%YMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VMULPD %YMM0,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM0,%YMM11,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VBROADCASTSS 0x736d(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x7368(%RIP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM11,%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x735e(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM0,%YMM8,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x7354(%RIP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM11,%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x734a(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM0,%YMM8,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x7340(%RIP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM11,%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPS2PD %XMM0,%YMM11 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VCVTPS2PD %XMM0,%YMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VFMADD231PD %YMM13,%YMM2,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU 0x40(%RSP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PD %YMM1,%YMM4,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x90(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VCMPPS $0x1,%YMM1,%YMM8,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPS %YMM1,%YMM15,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMASKMOVPS (%R8,%RDI,4),%YMM1,%YMM1 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VCVTPD2PS %YMM11,%XMM2 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VCVTPD2PS %YMM0,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTF128 $0x1,%XMM0,%YMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMULPS %YMM0,%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCMPPS $0x1,%YMM8,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPS %YMM0,%YMM1,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULPS (%RSP),%YMM2,%YMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM2,%YMM14,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU 0x20(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPS %YMM3,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 401f9a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPOR %YMM9,%YMM14,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM3,%YMM12,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPOR %YMM9,%YMM13,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM4,%YMM10,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPACKSSDW %YMM4,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPERMQ $-0x28,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPAND %YMM0,%YMM3,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPAND %YMM1,%YMM3,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPAND %YMM2,%YMM3,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VEXTRACTI128 $0x1,%YMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSS %XMM0,%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTI128 $0x1,%YMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSS %XMM0,%XMM7,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTI128 $0x1,%YMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSS %XMM0,%XMM6,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSS %XMM5,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSS %XMM7,(%R11) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSS %XMM6,(%R10) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA -0x8(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 190 |
nb uops | 203 |
loop length | 936 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 3.57 |
micro-operation queue | 50.75 cycles |
front end | 50.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 37.83 | 37.67 | 15.17 | 14.83 | 15.00 | 52.00 | 14.50 | 15.00 |
cycles | 37.83 | 37.67 | 15.17 | 14.83 | 15.00 | 52.00 | 14.50 | 15.00 |
Cycles executing div or sqrt instructions | 34.00-40.00 |
FE+BE cycles | 90.03-93.98 |
Stall cycles | 39.47-43.42 |
ROB full (events) | 39.97-43.44 |
RS full (events) | 9.11-10.91 |
Front-end | 50.75 |
Dispatch | 52.00 |
DIV/SQRT | 34.00-40.00 |
Overall L1 | 52.00 |
all | 76% |
load | 85% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 70% |
all | 75% |
load | 50% |
store | 72% |
mul | 100% |
add-sub | 64% |
fma | 100% |
div/sqrt | 100% |
other | 70% |
all | 75% |
load | 60% |
store | 76% |
mul | 100% |
add-sub | 68% |
fma | 100% |
div/sqrt | 100% |
other | 70% |
all | 38% |
load | 44% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 27% |
load | 23% |
store | 26% |
mul | 50% |
add-sub | 22% |
fma | 50% |
div/sqrt | 50% |
other | 21% |
all | 30% |
load | 29% |
store | 30% |
mul | 50% |
add-sub | 26% |
fma | 50% |
div/sqrt | 50% |
other | 26% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0xc0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x18(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 401daa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x8,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPS %XMM3,0x90(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 401dbb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVUPS %XMM0,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPS %XMM1,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %YMM0,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPS %XMM2,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM2,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM3,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS %XMM4,(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VBROADCASTSS %XMM4,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSD 0x7625(%RIP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VXORPS %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF128 $0x1,%YMM4,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM12,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM6,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM6,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVQ %RAX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM0,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
CMP %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPS (%RSP),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS 0x80(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS 0x70(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVUPS 0x60(%RSP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 401dd3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 402024 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 402024 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VMOVQ %RAX,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVQ %RDI,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM3,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ 0x747b(%RIP),%YMM8,%YMM13 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x7493(%RIP),%YMM8,%YMM14 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ 0x744a(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPXOR %YMM9,%YMM11,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPADDQ 0x749d(%RIP),%YMM8,%YMM3 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPGTQ %YMM3,%YMM10,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPXOR %YMM9,%YMM11,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPADDQ 0x74ab(%RIP),%YMM8,%YMM8 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPGTQ %YMM8,%YMM12,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPOR %YMM3,%YMM8,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVMSKPD %YMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 401f8e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPOR %YMM9,%YMM14,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM3,%YMM12,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPOR %YMM9,%YMM13,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM3,%YMM10,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPACKSSDW %YMM3,%YMM8,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPERMQ $-0x28,%YMM3,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMASKMOVPS (%RSI,%RDI,4),%YMM15,%YMM8 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VBROADCASTSS %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSUBPS %YMM3,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPS %YMM8,(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMASKMOVPS (%RDX,%RDI,4),%YMM15,%YMM0 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VBROADCASTSS %XMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM14,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VSUBPS %YMM3,%YMM0,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMASKMOVPS (%RCX,%RDI,4),%YMM15,%YMM3 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSUBPS %YMM0,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM14,%YMM14,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM3,%YMM3,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU %YMM13,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VBROADCASTSS %XMM4,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %YMM13,%YMM8,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM13,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VCVTPS2PD %XMM0,%YMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VCVTPS2PD %XMM13,%YMM13 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VSQRTPD %YMM13,%YMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VSQRTPD %YMM0,%YMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VMULPD %YMM13,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x7373(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VDIVPD %YMM13,%YMM11,%YMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VMULPD %YMM0,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM0,%YMM11,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VBROADCASTSS 0x736d(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x7368(%RIP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM11,%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x735e(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM0,%YMM8,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x7354(%RIP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM11,%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x734a(%RIP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM0,%YMM8,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x7340(%RIP),%YMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PS %YMM11,%YMM8,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPS2PD %XMM0,%YMM11 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VCVTPS2PD %XMM0,%YMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VFMADD231PD %YMM13,%YMM2,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU 0x40(%RSP),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VFMADD231PD %YMM1,%YMM4,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSS 0x90(%RSP),%YMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VCMPPS $0x1,%YMM1,%YMM8,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPS %YMM1,%YMM15,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMASKMOVPS (%R8,%RDI,4),%YMM1,%YMM1 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 3 | 0.50 |
VCVTPD2PS %YMM11,%XMM2 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VCVTPD2PS %YMM0,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTF128 $0x1,%XMM0,%YMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMULPS %YMM0,%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCMPPS $0x1,%YMM8,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPS %YMM0,%YMM1,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULPS (%RSP),%YMM2,%YMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM2,%YMM14,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU 0x20(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPS %YMM3,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 401f9a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPOR %YMM9,%YMM14,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM3,%YMM12,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPOR %YMM9,%YMM13,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPCMPGTQ %YMM4,%YMM10,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPACKSSDW %YMM4,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPERMQ $-0x28,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPAND %YMM0,%YMM3,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPAND %YMM1,%YMM3,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPAND %YMM2,%YMM3,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VEXTRACTI128 $0x1,%YMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSS %XMM0,%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTI128 $0x1,%YMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSS %XMM0,%XMM7,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTI128 $0x1,%YMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSS %XMM0,%XMM6,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSS %XMM5,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSS %XMM7,(%R11) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSS %XMM6,(%R10) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA -0x8(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼Step10_orig– | 99.95 | 40.42 |
○Loop 5 - Step10_orig.c:19-35 - exec | 99.8 | 40.36 |