Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 99.83% |
---|
Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 99.83% |
---|
/home/kcamus/qaas_runs/169-401-3406/intel/HACCmk/build/HACCmk/src/Step10_orig.c: 10 - 41 |
-------------------------------------------------------------------------------- |
10: { |
[...] |
17: xi = 0.; yi = 0.; zi = 0.; |
18: |
19: for ( j = 0; j < count1; j++ ) |
20: { |
21: dxc = xx1[j] - xxi; |
22: dyc = yy1[j] - yyi; |
23: dzc = zz1[j] - zzi; |
24: |
25: r2 = dxc * dxc + dyc * dyc + dzc * dzc; |
26: |
27: m = ( r2 < fsrrmax2 ) ? mass1[j] : 0.0f; |
28: |
29: f = pow( r2 + mp_rsm2, -1.5 ) - ( ma0 + r2*(ma1 + r2*(ma2 + r2*(ma3 + r2*(ma4 + r2*ma5))))); |
30: |
31: f = ( r2 > 0.0f ) ? m * f : 0.0f; |
32: |
33: xi = xi + f * dxc; |
34: yi = yi + f * dyc; |
35: zi = zi + f * dzc; |
36: } |
37: |
38: *dxi = xi; |
39: *dyi = yi; |
40: *dzi = zi; |
41: } |
0x4019e0 PUSH %RBP |
0x4019e1 MOV %RSP,%RBP |
0x4019e4 PUSH %RBX |
0x4019e5 TEST %EDI,%EDI |
0x4019e7 JLE 4020c8 |
0x4019ed MOV %EDI,%EAX |
0x4019ef LEA -0x1(%RDI),%EDI |
0x4019f2 VMOVAPS %XMM0,%XMM7 |
0x4019f6 CMP $0xe,%EDI |
0x4019f9 JBE 4020dc |
0x4019ff MOV %EAX,%EBX |
0x401a01 VXORPS %XMM12,%XMM12,%XMM12 |
0x401a06 VBROADCASTSS 0x1690(%RIP),%ZMM21 |
0x401a10 VBROADCASTSS 0x168a(%RIP),%ZMM20 |
0x401a1a SHR $0x4,%EBX |
0x401a1d VBROADCASTSS 0x1681(%RIP),%ZMM19 |
0x401a27 VBROADCASTSS 0x167b(%RIP),%ZMM18 |
0x401a31 VBROADCASTSS %XMM0,%ZMM26 |
0x401a37 VBROADCASTSS 0x166f(%RIP),%ZMM17 |
0x401a41 VBROADCASTSS 0x1669(%RIP),%ZMM16 |
0x401a4b VBROADCASTSS %XMM1,%ZMM25 |
0x401a51 VBROADCASTSS %XMM2,%ZMM24 |
0x401a57 VBROADCASTSD 0x1657(%RIP),%ZMM14 |
0x401a61 VBROADCASTSS %XMM3,%ZMM23 |
0x401a67 VBROADCASTSS %XMM4,%ZMM22 |
0x401a6d SAL $0x6,%RBX |
0x401a71 VMOVAPS %ZMM12,%ZMM11 |
0x401a77 VMOVAPS %ZMM12,%ZMM10 |
0x401a7d VMOVAPS %ZMM12,%ZMM15 |
0x401a83 XOR %R11D,%R11D |
0x401a86 NOPW %CS:(%RAX,%RAX,1) |
(4) 0x401a90 VMOVUPS (%RDX,%R11,1),%ZMM0 |
(4) 0x401a97 VMOVUPS (%RSI,%R11,1),%ZMM6 |
(4) 0x401a9e VMOVUPS (%RCX,%R11,1),%ZMM5 |
(4) 0x401aa5 VSUBPS %ZMM25,%ZMM0,%ZMM8 |
(4) 0x401aab VSUBPS %ZMM26,%ZMM6,%ZMM9 |
(4) 0x401ab1 VSUBPS %ZMM24,%ZMM5,%ZMM30 |
(4) 0x401ab7 VMULPS %ZMM8,%ZMM8,%ZMM28 |
(4) 0x401abd VFMADD231PS %ZMM9,%ZMM9,%ZMM28 |
(4) 0x401ac3 VFMADD231PS %ZMM30,%ZMM30,%ZMM28 |
(4) 0x401ac9 VCMPPS $0xe,%ZMM15,%ZMM28,%K2 |
(4) 0x401ad0 VCMPPS $0x1,%ZMM23,%ZMM28,%K1 |
(4) 0x401ad7 VADDPS %ZMM22,%ZMM28,%ZMM0 |
(4) 0x401add VCVTPS2PD %YMM0,%ZMM6 |
(4) 0x401ae3 VEXTRACTF32X8 $0x1,%ZMM0,%YMM5 |
(4) 0x401aea VMOVAPS %ZMM28,%ZMM0 |
(4) 0x401af0 VSQRTPD %ZMM6,%ZMM29 |
(4) 0x401af6 VCVTPS2PD %YMM5,%ZMM5 |
(4) 0x401afc VMULPD %ZMM29,%ZMM6,%ZMM6 |
(4) 0x401b02 VFMADD132PS %ZMM21,%ZMM20,%ZMM0 |
(4) 0x401b08 VSQRTPD %ZMM5,%ZMM31 |
(4) 0x401b0e VMULPD %ZMM31,%ZMM5,%ZMM5 |
(4) 0x401b14 VMOVUPS (%R8,%R11,1),%ZMM13{%K1} |
(4) 0x401b1b ADD $0x40,%R11 |
(4) 0x401b1f VMOVAPS %ZMM13,%ZMM27{%K1}{z} |
(4) 0x401b25 VFMADD132PS %ZMM28,%ZMM19,%ZMM0 |
(4) 0x401b2b VDIVPD %ZMM6,%ZMM14,%ZMM6 |
(4) 0x401b31 VDIVPD %ZMM5,%ZMM14,%ZMM5 |
(4) 0x401b37 VFMADD132PS %ZMM28,%ZMM18,%ZMM0 |
(4) 0x401b3d VFMADD132PS %ZMM28,%ZMM17,%ZMM0 |
(4) 0x401b43 VFMADD132PS %ZMM28,%ZMM16,%ZMM0 |
(4) 0x401b49 VCVTPS2PD %YMM0,%ZMM28 |
(4) 0x401b4f VEXTRACTF32X8 $0x1,%ZMM0,%YMM0 |
(4) 0x401b56 VADDPD %ZMM28,%ZMM6,%ZMM6 |
(4) 0x401b5c VCVTPS2PD %YMM0,%ZMM0 |
(4) 0x401b62 VADDPD %ZMM0,%ZMM5,%ZMM5 |
(4) 0x401b68 VCVTPD2PS %ZMM6,%YMM6 |
(4) 0x401b6e VCVTPD2PS %ZMM5,%YMM0 |
(4) 0x401b74 VINSERTF64X4 $0x1,%YMM0,%ZMM6,%ZMM5 |
(4) 0x401b7b VMULPS %ZMM27,%ZMM5,%ZMM6 |
(4) 0x401b81 VMULPS %ZMM6,%ZMM9,%ZMM0{%K2}{z} |
(4) 0x401b87 VMULPS %ZMM6,%ZMM8,%ZMM9{%K2}{z} |
(4) 0x401b8d VMULPS %ZMM6,%ZMM30,%ZMM8{%K2}{z} |
(4) 0x401b93 VADDPS %ZMM0,%ZMM10,%ZMM10 |
(4) 0x401b99 VADDPS %ZMM9,%ZMM11,%ZMM11 |
(4) 0x401b9f VADDPS %ZMM8,%ZMM12,%ZMM12 |
(4) 0x401ba5 CMP %R11,%RBX |
(4) 0x401ba8 JNE 401a90 |
0x401bae VEXTRACTF32X8 $0x1,%ZMM12,%YMM13 |
0x401bb5 VEXTRACTF32X8 $0x1,%ZMM11,%YMM8 |
0x401bbc MOV %EAX,%EDI |
0x401bbe VADDPS %YMM12,%YMM13,%YMM14 |
0x401bc3 VADDPS %YMM11,%YMM8,%YMM9 |
0x401bc8 AND $-0x10,%EDI |
0x401bcb VADDPS %YMM8,%YMM11,%YMM11 |
0x401bd0 VADDPS %YMM13,%YMM12,%YMM12 |
0x401bd5 MOV %EDI,%R10D |
0x401bd8 VEXTRACTF128 $0x1,%YMM14,%XMM15 |
0x401bde VADDPS %XMM14,%XMM15,%XMM5 |
0x401be3 VEXTRACTF128 $0x1,%YMM9,%XMM14 |
0x401be9 VADDPS %XMM9,%XMM14,%XMM15 |
0x401bee VEXTRACTF32X8 $0x1,%ZMM10,%YMM9 |
0x401bf5 VMOVHLPS %XMM5,%XMM5,%XMM6 |
0x401bf9 VADDPS %XMM5,%XMM6,%XMM0 |
0x401bfd VMOVHLPS %XMM15,%XMM15,%XMM5 |
0x401c02 VADDPS %XMM15,%XMM5,%XMM6 |
0x401c07 VSHUFPS $0x55,%XMM0,%XMM0,%XMM26 |
0x401c0e VADDPS %XMM0,%XMM26,%XMM17 |
0x401c14 VADDPS %YMM10,%YMM9,%YMM0 |
0x401c19 VSHUFPS $0x55,%XMM6,%XMM6,%XMM25 |
0x401c20 VADDPS %XMM6,%XMM25,%XMM16 |
0x401c26 VADDPS %YMM9,%YMM10,%YMM10 |
0x401c2b VEXTRACTF128 $0x1,%YMM0,%XMM14 |
0x401c31 VADDPS %XMM0,%XMM14,%XMM15 |
0x401c35 VMOVHLPS %XMM15,%XMM15,%XMM5 |
0x401c3a VADDPS %XMM15,%XMM5,%XMM6 |
0x401c3f VSHUFPS $0x55,%XMM6,%XMM6,%XMM0 |
0x401c44 VADDPS %XMM6,%XMM0,%XMM15 |
0x401c48 CMP %EDI,%EAX |
0x401c4a JE 402092 |
0x401c50 MOV %EAX,%R11D |
0x401c53 SUB %R10D,%R11D |
0x401c56 LEA -0x1(%R11),%EBX |
0x401c5a CMP $0x6,%EBX |
0x401c5d JBE 401e07 |
0x401c63 VMOVUPS (%RSI,%R10,4),%YMM14 |
0x401c69 VMOVUPS (%RDX,%R10,4),%YMM5 |
0x401c6f VBROADCASTSS %XMM7,%YMM13 |
0x401c74 VBROADCASTSS %XMM1,%YMM6 |
0x401c79 VBROADCASTSS %XMM2,%YMM0 |
0x401c7e VBROADCASTSS %XMM3,%YMM8 |
0x401c83 VBROADCASTSS %XMM4,%YMM9 |
0x401c88 VSUBPS %YMM13,%YMM14,%YMM15 |
0x401c8d VSUBPS %YMM6,%YMM5,%YMM14 |
0x401c91 VMOVUPS (%RCX,%R10,4),%YMM13 |
0x401c97 VXORPS %XMM5,%XMM5,%XMM5 |
0x401c9b VSUBPS %YMM0,%YMM13,%YMM13 |
0x401c9f VMULPS %YMM14,%YMM14,%YMM0 |
0x401ca4 VFMADD231PS %YMM15,%YMM15,%YMM0 |
0x401ca9 VFMADD231PS %YMM13,%YMM13,%YMM0 |
0x401cae VCMPPS $0x1,%YMM8,%YMM0,%K3 |
0x401cb5 VCMPPS $0x1,%YMM8,%YMM0,%YMM8 |
0x401cbb VADDPS %YMM9,%YMM0,%YMM9 |
0x401cc0 VCMPPS $0xe,%YMM5,%YMM0,%K4 |
0x401cc7 VMOVUPS (%R8,%R10,4),%YMM6{%K3}{z} |
0x401cce VCVTPS2PD %XMM9,%YMM5 |
0x401cd3 MOV %R11D,%R10D |
0x401cd6 VANDPS %YMM6,%YMM8,%YMM8 |
0x401cda VEXTRACTF128 $0x1,%YMM9,%XMM6 |
0x401ce0 VBROADCASTSS 0x13b7(%RIP),%YMM9 |
0x401ce9 AND $-0x8,%R10D |
0x401ced VCVTPS2PD %XMM6,%YMM6 |
0x401cf1 VSQRTPD %YMM6,%YMM24 |
0x401cf7 ADD %R10D,%EDI |
0x401cfa AND $0x7,%R11D |
0x401cfe VFMADD213PS 0x139c(%RIP){1to8},%YMM0,%YMM9 |
0x401d08 VMULPD %YMM24,%YMM6,%YMM6 |
0x401d0e VFMADD213PS 0x1390(%RIP){1to8},%YMM0,%YMM9 |
0x401d18 VFMADD213PS 0x138a(%RIP){1to8},%YMM0,%YMM9 |
0x401d22 VFMADD213PS 0x1384(%RIP){1to8},%YMM0,%YMM9 |
0x401d2c VFMADD213PS 0x137e(%RIP){1to8},%YMM9,%YMM0 |
0x401d36 VSQRTPD %YMM5,%YMM9 |
0x401d3a VMULPD %YMM9,%YMM5,%YMM5 |
0x401d3f VBROADCASTSD 0x1370(%RIP),%YMM9 |
0x401d48 VCVTPS2PD %XMM0,%YMM23 |
0x401d4e VEXTRACTF128 $0x1,%YMM0,%XMM0 |
0x401d54 VDIVPD %YMM5,%YMM9,%YMM5 |
0x401d58 VDIVPD %YMM6,%YMM9,%YMM9 |
0x401d5c VCVTPS2PD %XMM0,%YMM6 |
0x401d60 VADDPD %YMM23,%YMM5,%YMM5 |
0x401d66 VCVTPD2PS %YMM5,%XMM5 |
0x401d6a VADDPD %YMM6,%YMM9,%YMM9 |
0x401d6e VCVTPD2PS %YMM9,%XMM0 |
0x401d73 VINSERTF128 $0x1,%XMM0,%YMM5,%YMM6 |
0x401d79 VMULPS %YMM8,%YMM6,%YMM8 |
0x401d7e VMULPS %YMM8,%YMM15,%YMM9{%K4}{z} |
0x401d84 VMULPS %YMM8,%YMM14,%YMM15{%K4}{z} |
0x401d8a VMULPS %YMM8,%YMM13,%YMM14{%K4}{z} |
0x401d90 VADDPS %YMM9,%YMM10,%YMM10 |
0x401d95 VADDPS %YMM15,%YMM11,%YMM11 |
0x401d9a VADDPS %YMM14,%YMM12,%YMM12 |
0x401d9f VEXTRACTF128 $0x1,%YMM10,%XMM14 |
0x401da5 VEXTRACTF128 $0x1,%YMM11,%XMM8 |
0x401dab VADDPS %XMM10,%XMM14,%XMM10 |
0x401db0 VEXTRACTF128 $0x1,%YMM12,%XMM13 |
0x401db6 VADDPS %XMM11,%XMM8,%XMM9 |
0x401dbb VADDPS %XMM12,%XMM13,%XMM5 |
0x401dc0 VMOVHLPS %XMM10,%XMM10,%XMM12 |
0x401dc5 VMOVHLPS %XMM9,%XMM9,%XMM15 |
0x401dca VADDPS %XMM10,%XMM12,%XMM13 |
0x401dcf VMOVHLPS %XMM5,%XMM5,%XMM0 |
0x401dd3 VADDPS %XMM9,%XMM15,%XMM11 |
0x401dd8 VADDPS %XMM5,%XMM0,%XMM6 |
0x401ddc VSHUFPS $0x55,%XMM13,%XMM13,%XMM5 |
0x401de2 VADDPS %XMM13,%XMM5,%XMM15 |
0x401de7 VSHUFPS $0x55,%XMM11,%XMM11,%XMM21 |
0x401dee VADDPS %XMM11,%XMM21,%XMM16 |
0x401df4 VSHUFPS $0x55,%XMM6,%XMM6,%XMM22 |
0x401dfb VADDPS %XMM6,%XMM22,%XMM17 |
0x401e01 JE 402092 |
0x401e07 MOVSXD %EDI,%RDI |
0x401e0a VMOVSS 0x128e(%RIP),%XMM8 |
0x401e12 VMOVSS 0x128a(%RIP),%XMM9 |
0x401e1a VXORPS %XMM6,%XMM6,%XMM6 |
0x401e1e VMOVSS (%RDX,%RDI,4),%XMM19 |
0x401e25 VMOVSS (%RSI,%RDI,4),%XMM20 |
0x401e2c MOV %RDI,%R11 |
0x401e2f VMOVSS (%RCX,%RDI,4),%XMM18 |
0x401e36 NOT %R11 |
0x401e39 VMOVSS 0x1267(%RIP),%XMM10 |
0x401e41 VSUBSS %XMM1,%XMM19,%XMM27 |
0x401e47 VSUBSS %XMM7,%XMM20,%XMM30 |
0x401e4d ADD %EAX,%R11D |
0x401e50 VMOVSS 0x1254(%RIP),%XMM11 |
0x401e58 VSUBSS %XMM2,%XMM18,%XMM29 |
0x401e5e AND $0x1,%R11D |
0x401e62 VMOVSS 0x1246(%RIP),%XMM12 |
0x401e6a VMOVSS 0x1242(%RIP),%XMM13 |
0x401e72 VMOVSD 0x123e(%RIP),%XMM14 |
0x401e7a VMULSS %XMM27,%XMM27,%XMM0 |
0x401e80 VFMADD231SS %XMM30,%XMM30,%XMM0 |
0x401e86 VFMADD231SS %XMM29,%XMM29,%XMM0 |
0x401e8c VCOMISS %XMM0,%XMM3 |
0x401e90 JA 4020b8 |
0x401e96 VXORPS %XMM31,%XMM31,%XMM31 |
0x401e9c VCOMISS %XMM6,%XMM0 |
0x401ea0 JBE 401f02 |
0x401ea2 VADDSS %XMM0,%XMM4,%XMM5 |
0x401ea6 VMOVAPS %XMM0,%XMM28 |
0x401eac VFMADD132SS %XMM8,%XMM9,%XMM28 |
0x401eb2 VCVTSS2SD %XMM5,%XMM5,%XMM5 |
0x401eb6 VSQRTSD %XMM5,%XMM5,%XMM26 |
0x401ebc VMULSD %XMM26,%XMM5,%XMM5 |
0x401ec2 VFMADD132SS %XMM0,%XMM10,%XMM28 |
0x401ec8 VDIVSD %XMM5,%XMM14,%XMM5 |
0x401ecc VFMADD132SS %XMM0,%XMM11,%XMM28 |
0x401ed2 VFMADD132SS %XMM0,%XMM12,%XMM28 |
0x401ed8 VFMADD132SS %XMM28,%XMM13,%XMM0 |
0x401ede VCVTSS2SD %XMM0,%XMM0,%XMM0 |
0x401ee2 VADDSD %XMM0,%XMM5,%XMM5 |
0x401ee6 VCVTSD2SS %XMM5,%XMM5,%XMM0 |
0x401eea VMULSS %XMM31,%XMM0,%XMM5 |
0x401ef0 VFMADD231SS %XMM5,%XMM30,%XMM15 |
0x401ef6 VFMADD231SS %XMM5,%XMM27,%XMM16 |
0x401efc VFMADD231SS %XMM5,%XMM29,%XMM17 |
0x401f02 INC %RDI |
0x401f05 CMP %EDI,%EAX |
0x401f07 JLE 402092 |
0x401f0d TEST %R11D,%R11D |
0x401f10 JNE 401fd5 |
0x401f16 NOPW %CS:(%RAX,%RAX,1) |
(3) 0x401f20 VMOVSS (%RDX,%RDI,4),%XMM31 |
(3) 0x401f27 VMOVSS (%RSI,%RDI,4),%XMM18 |
(3) 0x401f2e VXORPS %XMM24,%XMM24,%XMM24 |
(3) 0x401f34 VMOVSS (%RCX,%RDI,4),%XMM26 |
(3) 0x401f3b VSUBSS %XMM1,%XMM31,%XMM28 |
(3) 0x401f41 VSUBSS %XMM7,%XMM18,%XMM29 |
(3) 0x401f47 VSUBSS %XMM2,%XMM26,%XMM25 |
(3) 0x401f4d VMULSS %XMM28,%XMM28,%XMM0 |
(3) 0x401f53 VFMADD231SS %XMM29,%XMM29,%XMM0 |
(3) 0x401f59 VFMADD231SS %XMM25,%XMM25,%XMM0 |
(3) 0x401f5f VCOMISS %XMM0,%XMM3 |
(3) 0x401f63 JBE 401f6c |
(3) 0x401f65 VMOVSS (%R8,%RDI,4),%XMM24 |
(3) 0x401f6c VCOMISS %XMM6,%XMM0 |
(3) 0x401f70 JBE 401fd2 |
(3) 0x401f72 VMOVAPS %XMM0,%XMM23 |
(3) 0x401f78 VADDSS %XMM0,%XMM4,%XMM5 |
(3) 0x401f7c VFMADD132SS %XMM8,%XMM9,%XMM23 |
(3) 0x401f82 VCVTSS2SD %XMM5,%XMM5,%XMM5 |
(3) 0x401f86 VFMADD132SS %XMM0,%XMM10,%XMM23 |
(3) 0x401f8c VFMADD132SS %XMM0,%XMM11,%XMM23 |
(3) 0x401f92 VFMADD132SS %XMM0,%XMM12,%XMM23 |
(3) 0x401f98 VFMADD132SS %XMM0,%XMM13,%XMM23 |
(3) 0x401f9e VSQRTSD %XMM5,%XMM5,%XMM0 |
(3) 0x401fa2 VMULSD %XMM0,%XMM5,%XMM5 |
(3) 0x401fa6 VCVTSS2SD %XMM23,%XMM23,%XMM20 |
(3) 0x401fac VDIVSD %XMM5,%XMM14,%XMM0 |
(3) 0x401fb0 VADDSD %XMM20,%XMM0,%XMM5 |
(3) 0x401fb6 VCVTSD2SS %XMM5,%XMM5,%XMM0 |
(3) 0x401fba VMULSS %XMM24,%XMM0,%XMM5 |
(3) 0x401fc0 VFMADD231SS %XMM5,%XMM29,%XMM15 |
(3) 0x401fc6 VFMADD231SS %XMM5,%XMM28,%XMM16 |
(3) 0x401fcc VFMADD231SS %XMM5,%XMM25,%XMM17 |
(3) 0x401fd2 INC %RDI |
(3) 0x401fd5 VMOVSS (%RDX,%RDI,4),%XMM23 |
(3) 0x401fdc VMOVSS (%RSI,%RDI,4),%XMM25 |
(3) 0x401fe3 VXORPS %XMM19,%XMM19,%XMM19 |
(3) 0x401fe9 VMOVSS (%RCX,%RDI,4),%XMM22 |
(3) 0x401ff0 VSUBSS %XMM1,%XMM23,%XMM20 |
(3) 0x401ff6 VSUBSS %XMM7,%XMM25,%XMM24 |
(3) 0x401ffc VSUBSS %XMM2,%XMM22,%XMM30 |
(3) 0x402002 VMULSS %XMM20,%XMM20,%XMM0 |
(3) 0x402008 VFMADD231SS %XMM24,%XMM24,%XMM0 |
(3) 0x40200e VFMADD231SS %XMM30,%XMM30,%XMM0 |
(3) 0x402014 VCOMISS %XMM0,%XMM3 |
(3) 0x402018 JBE 402021 |
(3) 0x40201a VMOVSS (%R8,%RDI,4),%XMM19 |
(3) 0x402021 VCOMISS %XMM6,%XMM0 |
(3) 0x402025 JBE 402087 |
(3) 0x402027 VADDSS %XMM0,%XMM4,%XMM5 |
(3) 0x40202b VMOVAPS %XMM0,%XMM21 |
(3) 0x402031 VFMADD132SS %XMM8,%XMM9,%XMM21 |
(3) 0x402037 VCVTSS2SD %XMM5,%XMM5,%XMM5 |
(3) 0x40203b VSQRTSD %XMM5,%XMM5,%XMM27 |
(3) 0x402041 VMULSD %XMM27,%XMM5,%XMM5 |
(3) 0x402047 VFMADD132SS %XMM0,%XMM10,%XMM21 |
(3) 0x40204d VDIVSD %XMM5,%XMM14,%XMM5 |
(3) 0x402051 VFMADD132SS %XMM0,%XMM11,%XMM21 |
(3) 0x402057 VFMADD132SS %XMM0,%XMM12,%XMM21 |
(3) 0x40205d VFMADD132SS %XMM21,%XMM13,%XMM0 |
(3) 0x402063 VCVTSS2SD %XMM0,%XMM0,%XMM0 |
(3) 0x402067 VADDSD %XMM0,%XMM5,%XMM5 |
(3) 0x40206b VCVTSD2SS %XMM5,%XMM5,%XMM0 |
(3) 0x40206f VMULSS %XMM19,%XMM0,%XMM5 |
(3) 0x402075 VFMADD231SS %XMM5,%XMM24,%XMM15 |
(3) 0x40207b VFMADD231SS %XMM5,%XMM20,%XMM16 |
(3) 0x402081 VFMADD231SS %XMM5,%XMM30,%XMM17 |
(3) 0x402087 INC %RDI |
(3) 0x40208a CMP %EDI,%EAX |
(3) 0x40208c JG 401f20 |
0x402092 VZEROUPPER |
0x402095 MOV 0x10(%RBP),%RDX |
0x402099 MOV 0x18(%RBP),%RCX |
0x40209d VMOVSS %XMM15,(%R9) |
0x4020a2 MOV -0x8(%RBP),%RBX |
0x4020a6 VMOVSS %XMM16,(%RDX) |
0x4020ac VMOVSS %XMM17,(%RCX) |
0x4020b2 LEAVE |
0x4020b3 RET |
0x4020b4 NOPL (%RAX) |
0x4020b8 VMOVSS (%R8,%RDI,4),%XMM31 |
0x4020bf JMP 401e9c |
0x4020c4 NOPL (%RAX) |
0x4020c8 VXORPS %XMM17,%XMM17,%XMM17 |
0x4020ce VMOVAPS %XMM17,%XMM16 |
0x4020d4 VMOVAPS %XMM17,%XMM15 |
0x4020da JMP 402095 |
0x4020dc VXORPS %XMM12,%XMM12,%XMM12 |
0x4020e1 VXORPS %XMM17,%XMM17,%XMM17 |
0x4020e7 XOR %R10D,%R10D |
0x4020ea XOR %EDI,%EDI |
0x4020ec VMOVAPS %YMM12,%YMM11 |
0x4020f1 VMOVAPS %YMM12,%YMM10 |
0x4020f6 VMOVAPS %XMM17,%XMM16 |
0x4020fc VMOVAPS %XMM17,%XMM15 |
0x402102 JMP 401c50 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►50.23+ | main._omp_fn.1 | main.c:144 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►49.77+ | main._omp_fn.1 | main.c:144 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 220 |
nb uops | 234 |
loop length | 1175 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 30 |
used ymm registers | 13 |
used zmm registers | 16 |
nb stack references | 3 |
ADD-SUB / MUL ratio | 3.80 |
micro-operation queue | 58.50 cycles |
front end | 58.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 43.17 | 42.83 | 16.50 | 16.50 | 5.00 | 45.00 | 33.00 | 6.00 |
cycles | 43.17 | 42.83 | 16.50 | 16.50 | 5.00 | 45.00 | 33.00 | 6.00 |
Cycles executing div or sqrt instructions | 42.50-50.00 |
FE+BE cycles | 118.51-122.22 |
Stall cycles | 60.46-64.17 |
ROB full (events) | 68.96-72.67 |
Front-end | 58.50 |
Dispatch | 45.00 |
DIV/SQRT | 42.50-50.00 |
Overall L1 | 58.50 |
all | 10% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 31% |
store | 0% |
mul | 70% |
add-sub | 86% |
fma | 41% |
div/sqrt | 66% |
other | 68% |
all | 61% |
load | 30% |
store | 0% |
mul | 70% |
add-sub | 86% |
fma | 41% |
div/sqrt | 66% |
other | 62% |
all | 9% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 26% |
load | 20% |
store | 6% |
mul | 37% |
add-sub | 32% |
fma | 24% |
div/sqrt | 37% |
other | 24% |
all | 25% |
load | 20% |
store | 6% |
mul | 37% |
add-sub | 32% |
fma | 24% |
div/sqrt | 37% |
other | 23% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4020c8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x1(%RDI),%EDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVAPS %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0xe,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 4020dc | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSS 0x1690(%RIP),%ZMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x168a(%RIP),%ZMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
SHR $0x4,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
VBROADCASTSS 0x1681(%RIP),%ZMM19 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x167b(%RIP),%ZMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS %XMM0,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS 0x166f(%RIP),%ZMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x1669(%RIP),%ZMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS %XMM1,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM2,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x1657(%RIP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS %XMM3,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM4,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
SAL $0x6,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
VMOVAPS %ZMM12,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %ZMM12,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %ZMM12,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF32X8 $0x1,%ZMM12,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VEXTRACTF32X8 $0x1,%ZMM11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPS %YMM12,%YMM13,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM11,%YMM8,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x10,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDPS %YMM8,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM13,%YMM12,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF128 $0x1,%YMM14,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM14,%XMM15,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM9,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM9,%XMM14,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF32X8 $0x1,%ZMM10,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVHLPS %XMM5,%XMM5,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM5,%XMM6,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM15,%XMM15,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM15,%XMM5,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM0,%XMM0,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM0,%XMM26,%XMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM10,%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM6,%XMM6,%XMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM6,%XMM25,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM9,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM14,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM15,%XMM15,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM15,%XMM5,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM6,%XMM6,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM6,%XMM0,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDI,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 402092 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EAX,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10D,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R11),%EBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 401e07 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVUPS (%RSI,%R10,4),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPS (%RDX,%R10,4),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VBROADCASTSS %XMM7,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM1,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM3,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM4,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSUBPS %YMM13,%YMM14,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPS %YMM6,%YMM5,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPS (%RCX,%R10,4),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VSUBPS %YMM0,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM14,%YMM14,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM15,%YMM15,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM13,%YMM13,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPS $0x1,%YMM8,%YMM0,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VCMPPS $0x1,%YMM8,%YMM0,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM9,%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPS $0xe,%YMM5,%YMM0,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS (%R8,%R10,4),%YMM6{%K3}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VCVTPS2PD %XMM9,%YMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
MOV %R11D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VANDPS %YMM6,%YMM8,%YMM8 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VEXTRACTF128 $0x1,%YMM9,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS 0x13b7(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
AND $-0x8,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VCVTPS2PD %XMM6,%YMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VSQRTPD %YMM6,%YMM24 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
ADD %R10D,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD213PS 0x139c(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM24,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x1390(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x138a(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x1384(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x137e(%RIP){1to8},%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTPD %YMM5,%YMM9 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VMULPD %YMM9,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x1370(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VCVTPS2PD %XMM0,%YMM23 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VDIVPD %YMM5,%YMM9,%YMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VDIVPD %YMM6,%YMM9,%YMM9 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VCVTPS2PD %XMM0,%YMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VADDPD %YMM23,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2PS %YMM5,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VADDPD %YMM6,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2PS %YMM9,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTF128 $0x1,%XMM0,%YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMULPS %YMM8,%YMM6,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM15,%YMM9{%K4}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM14,%YMM15{%K4}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM13,%YMM14{%K4}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM9,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM15,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM14,%YMM12,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM10,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VEXTRACTF128 $0x1,%YMM11,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM10,%XMM14,%XMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM11,%XMM8,%XMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %XMM12,%XMM13,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM10,%XMM10,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVHLPS %XMM9,%XMM9,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM10,%XMM12,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM5,%XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM9,%XMM15,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %XMM5,%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM13,%XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM13,%XMM5,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM11,%XMM11,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM11,%XMM21,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM6,%XMM6,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM6,%XMM22,%XMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JE 402092 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x128e(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS 0x128a(%RIP),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSS (%RDX,%RDI,4),%XMM19 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS (%RSI,%RDI,4),%XMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSS (%RCX,%RDI,4),%XMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOT %R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x1267(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSS %XMM1,%XMM19,%XMM27 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSS %XMM7,%XMM20,%XMM30 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %EAX,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x1254(%RIP),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSS %XMM2,%XMM18,%XMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $0x1,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x1246(%RIP),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS 0x1242(%RIP),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x123e(%RIP),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSS %XMM27,%XMM27,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM30,%XMM30,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM29,%XMM29,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISS %XMM0,%XMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 4020b8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPS %XMM31,%XMM31,%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCOMISS %XMM6,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 401f02 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VADDSS %XMM0,%XMM4,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPS %XMM0,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132SS %XMM8,%XMM9,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTSS2SD %XMM5,%XMM5,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 5 | 1 |
VSQRTSD %XMM5,%XMM5,%XMM26 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50-6 |
VMULSD %XMM26,%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SS %XMM0,%XMM10,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM5,%XMM14,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VFMADD132SS %XMM0,%XMM11,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SS %XMM0,%XMM12,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SS %XMM28,%XMM13,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTSS2SD %XMM0,%XMM0,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 5 | 1 |
VADDSD %XMM0,%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTSD2SS %XMM5,%XMM5,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 5 | 1 |
VMULSS %XMM31,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM5,%XMM30,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM5,%XMM27,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM5,%XMM29,%XMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %EDI,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 402092 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %R11D,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 401fd5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x10(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS %XMM15,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x8(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS %XMM16,(%RDX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSS %XMM17,(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEAVE | 3 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 0 | 2-6 | 2 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSS (%R8,%RDI,4),%XMM31 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 401e9c | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 402095 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %YMM12,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %YMM12,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 401c50 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 220 |
nb uops | 234 |
loop length | 1175 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 30 |
used ymm registers | 13 |
used zmm registers | 16 |
nb stack references | 3 |
ADD-SUB / MUL ratio | 3.80 |
micro-operation queue | 58.50 cycles |
front end | 58.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 43.17 | 42.83 | 16.50 | 16.50 | 5.00 | 45.00 | 33.00 | 6.00 |
cycles | 43.17 | 42.83 | 16.50 | 16.50 | 5.00 | 45.00 | 33.00 | 6.00 |
Cycles executing div or sqrt instructions | 42.50-50.00 |
FE+BE cycles | 118.51-122.22 |
Stall cycles | 60.46-64.17 |
ROB full (events) | 68.96-72.67 |
Front-end | 58.50 |
Dispatch | 45.00 |
DIV/SQRT | 42.50-50.00 |
Overall L1 | 58.50 |
all | 10% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 31% |
store | 0% |
mul | 70% |
add-sub | 86% |
fma | 41% |
div/sqrt | 66% |
other | 68% |
all | 61% |
load | 30% |
store | 0% |
mul | 70% |
add-sub | 86% |
fma | 41% |
div/sqrt | 66% |
other | 62% |
all | 9% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 26% |
load | 20% |
store | 6% |
mul | 37% |
add-sub | 32% |
fma | 24% |
div/sqrt | 37% |
other | 24% |
all | 25% |
load | 20% |
store | 6% |
mul | 37% |
add-sub | 32% |
fma | 24% |
div/sqrt | 37% |
other | 23% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4020c8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x1(%RDI),%EDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVAPS %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0xe,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 4020dc | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VBROADCASTSS 0x1690(%RIP),%ZMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x168a(%RIP),%ZMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
SHR $0x4,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
VBROADCASTSS 0x1681(%RIP),%ZMM19 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x167b(%RIP),%ZMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS %XMM0,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS 0x166f(%RIP),%ZMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS 0x1669(%RIP),%ZMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS %XMM1,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM2,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x1657(%RIP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSS %XMM3,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM4,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
SAL $0x6,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
VMOVAPS %ZMM12,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %ZMM12,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %ZMM12,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF32X8 $0x1,%ZMM12,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VEXTRACTF32X8 $0x1,%ZMM11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPS %YMM12,%YMM13,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM11,%YMM8,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x10,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDPS %YMM8,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM13,%YMM12,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF128 $0x1,%YMM14,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM14,%XMM15,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM9,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM9,%XMM14,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF32X8 $0x1,%ZMM10,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVHLPS %XMM5,%XMM5,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM5,%XMM6,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM15,%XMM15,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM15,%XMM5,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM0,%XMM0,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM0,%XMM26,%XMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM10,%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM6,%XMM6,%XMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM6,%XMM25,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM9,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM0,%XMM14,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM15,%XMM15,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM15,%XMM5,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM6,%XMM6,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM6,%XMM0,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDI,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 402092 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %EAX,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10D,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R11),%EBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 401e07 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVUPS (%RSI,%R10,4),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVUPS (%RDX,%R10,4),%YMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VBROADCASTSS %XMM7,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM1,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM3,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS %XMM4,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSUBPS %YMM13,%YMM14,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPS %YMM6,%YMM5,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPS (%RCX,%R10,4),%YMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VSUBPS %YMM0,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM14,%YMM14,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM15,%YMM15,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PS %YMM13,%YMM13,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPS $0x1,%YMM8,%YMM0,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VCMPPS $0x1,%YMM8,%YMM0,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM9,%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPS $0xe,%YMM5,%YMM0,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVUPS (%R8,%R10,4),%YMM6{%K3}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VCVTPS2PD %XMM9,%YMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
MOV %R11D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VANDPS %YMM6,%YMM8,%YMM8 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VEXTRACTF128 $0x1,%YMM9,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSS 0x13b7(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
AND $-0x8,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VCVTPS2PD %XMM6,%YMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VSQRTPD %YMM6,%YMM24 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
ADD %R10D,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD213PS 0x139c(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM24,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x1390(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x138a(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x1384(%RIP){1to8},%YMM0,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PS 0x137e(%RIP){1to8},%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTPD %YMM5,%YMM9 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VMULPD %YMM9,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x1370(%RIP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VCVTPS2PD %XMM0,%YMM23 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VDIVPD %YMM5,%YMM9,%YMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VDIVPD %YMM6,%YMM9,%YMM9 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VCVTPS2PD %XMM0,%YMM6 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VADDPD %YMM23,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2PS %YMM5,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VADDPD %YMM6,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2PS %YMM9,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTF128 $0x1,%XMM0,%YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMULPS %YMM8,%YMM6,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM15,%YMM9{%K4}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM14,%YMM15{%K4}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPS %YMM8,%YMM13,%YMM14{%K4}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM9,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM15,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %YMM14,%YMM12,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM10,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VEXTRACTF128 $0x1,%YMM11,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM10,%XMM14,%XMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPS %XMM11,%XMM8,%XMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %XMM12,%XMM13,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM10,%XMM10,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVHLPS %XMM9,%XMM9,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM10,%XMM12,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHLPS %XMM5,%XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM9,%XMM15,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPS %XMM5,%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM13,%XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM13,%XMM5,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM11,%XMM11,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM11,%XMM21,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSHUFPS $0x55,%XMM6,%XMM6,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDPS %XMM6,%XMM22,%XMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JE 402092 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x128e(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS 0x128a(%RIP),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSS (%RDX,%RDI,4),%XMM19 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS (%RSI,%RDI,4),%XMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSS (%RCX,%RDI,4),%XMM18 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOT %R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x1267(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSS %XMM1,%XMM19,%XMM27 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSS %XMM7,%XMM20,%XMM30 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %EAX,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x1254(%RIP),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSS %XMM2,%XMM18,%XMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $0x1,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x1246(%RIP),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS 0x1242(%RIP),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x123e(%RIP),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMULSS %XMM27,%XMM27,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM30,%XMM30,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM29,%XMM29,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISS %XMM0,%XMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JA 4020b8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPS %XMM31,%XMM31,%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCOMISS %XMM6,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 401f02 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VADDSS %XMM0,%XMM4,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPS %XMM0,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132SS %XMM8,%XMM9,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTSS2SD %XMM5,%XMM5,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 5 | 1 |
VSQRTSD %XMM5,%XMM5,%XMM26 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50-6 |
VMULSD %XMM26,%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SS %XMM0,%XMM10,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM5,%XMM14,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VFMADD132SS %XMM0,%XMM11,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SS %XMM0,%XMM12,%XMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SS %XMM28,%XMM13,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTSS2SD %XMM0,%XMM0,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 5 | 1 |
VADDSD %XMM0,%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTSD2SS %XMM5,%XMM5,%XMM0 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 5 | 1 |
VMULSS %XMM31,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM5,%XMM30,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM5,%XMM27,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SS %XMM5,%XMM29,%XMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %EDI,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 402092 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %R11D,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 401fd5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x10(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS %XMM15,(%R9) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x8(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSS %XMM16,(%RDX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSS %XMM17,(%RCX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEAVE | 3 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 0 | 2-6 | 2 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSS (%R8,%RDI,4),%XMM31 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 401e9c | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 402095 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VXORPS %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %YMM12,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %YMM12,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPS %XMM17,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 401c50 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼Step10_orig– | 99.83 | 40.61 |
○Loop 4 - Step10_orig.c:19-31 - exec | 99.42 | 40.44 |
○Loop 3 - Step10_orig.c:19-35 - exec | 0.11 | 0.04 |