Function: viscosity_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: viscosity_kernel.f90:50-94 | Coverage: 2.06% |
---|
Function: viscosity_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: viscosity_kernel.f90:50-94 | Coverage: 2.06% |
---|
/scratch_na/users/xoserete/qaas_runs/171-419-3245/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90: 50 - 94 |
-------------------------------------------------------------------------------- |
50: !$OMP PARALLEL |
51: |
52: !$OMP DO PRIVATE(ugrad,vgrad,div,strain2,pgradx,pgrady,pgradx2,pgrady2,limiter,pgrad,xgrad,ygrad,grad,grad2,dirx,diry) |
53: DO k=y_min,y_max |
54: !$OMP SIMD |
55: DO j=x_min,x_max |
56: ugrad=(xvel0(j+1,k )+xvel0(j+1,k+1))-(xvel0(j ,k )+xvel0(j ,k+1)) |
57: |
58: vgrad=(yvel0(j ,k+1)+yvel0(j+1,k+1))-(yvel0(j ,k )+yvel0(j+1,k )) |
59: |
60: div = (celldx(j)*(ugrad)+ celldy(k)*(vgrad)) |
61: |
62: strain2 = 0.5_8*(xvel0(j, k+1) + xvel0(j+1,k+1)-xvel0(j ,k )-xvel0(j+1,k ))/celldy(k) & |
63: + 0.5_8*(yvel0(j+1,k ) + yvel0(j+1,k+1)-yvel0(j ,k )-yvel0(j ,k+1))/celldx(j) |
64: |
65: pgradx=(pressure(j+1,k)-pressure(j-1,k))/(celldx(j)+celldx(j+1)) |
66: pgrady=(pressure(j,k+1)-pressure(j,k-1))/(celldy(k)+celldy(k+1)) |
67: |
68: pgradx2 = pgradx*pgradx |
69: pgrady2 = pgrady*pgrady |
70: |
71: limiter = ((0.5_8*(ugrad)/celldx(j))*pgradx2+(0.5_8*(vgrad)/celldy(k))*pgrady2+strain2*pgradx*pgrady) & |
72: /MAX(pgradx2+pgrady2,1.0e-16_8) |
73: |
74: IF ((limiter.GT.0.0).OR.(div.GE.0.0))THEN |
75: viscosity(j,k) = 0.0 |
76: ELSE |
77: dirx=1.0_8 |
78: IF(pgradx.LT.0.0) dirx=-1.0_8 |
79: pgradx = dirx*MAX(1.0e-16_8,ABS(pgradx)) |
80: diry=1.0_8 |
81: IF(pgradx.LT.0.0) diry=-1.0_8 |
82: pgrady = diry*MAX(1.0e-16_8,ABS(pgrady)) |
83: pgrad = SQRT(pgradx**2+pgrady**2) |
84: xgrad = ABS(celldx(j)*pgrad/pgradx) |
85: ygrad = ABS(celldy(k)*pgrad/pgrady) |
86: grad = MIN(xgrad,ygrad) |
87: grad2 = grad*grad |
88: |
89: viscosity(j,k)=2.0_8*density0(j,k)*grad2*limiter*limiter |
90: ENDIF |
91: |
92: ENDDO |
93: ENDDO |
94: !$OMP END DO |
0x47a740 PUSH %RBP |
0x47a741 MOV %RSP,%RBP |
0x47a744 PUSH %R15 |
0x47a746 PUSH %R14 |
0x47a748 PUSH %R13 |
0x47a74a PUSH %R12 |
0x47a74c PUSH %RBX |
0x47a74d AND $-0x40,%RSP |
0x47a751 SUB $0x2c0,%RSP |
0x47a758 MOV %R9,0x48(%RSP) |
0x47a75d MOV %RCX,%RBX |
0x47a760 MOV 0x50(%RBP),%ECX |
0x47a763 MOV 0x48(%RBP),%EAX |
0x47a766 SUB %ECX,%EAX |
0x47a768 MOVL $0,0x44(%RSP) |
0x47a770 JS 47a7e8 |
0x47a772 MOV %RDI,0x68(%RSP) |
0x47a777 MOV (%RDI),%ESI |
0x47a779 MOVL $0,0x30(%RSP) |
0x47a781 MOV %EAX,0x2c(%RSP) |
0x47a785 MOVL $0x1,0x40(%RSP) |
0x47a78d SUB $0x8,%RSP |
0x47a791 LEA 0x48(%RSP),%RAX |
0x47a796 LEA 0x4c(%RSP),%RCX |
0x47a79b LEA 0x38(%RSP),%R8 |
0x47a7a0 LEA 0x34(%RSP),%R9 |
0x47a7a5 MOV $0x751310,%EDI |
0x47a7aa MOV %ESI,0x3c(%RSP) |
0x47a7ae MOV $0x22,%EDX |
0x47a7b3 PUSH $0x1 |
0x47a7b5 PUSH $0x1 |
0x47a7b7 PUSH %RAX |
0x47a7b8 CALL 4044c0 <__kmpc_for_static_init_4@plt> |
0x47a7bd ADD $0x20,%RSP |
0x47a7c1 MOV 0x30(%RSP),%EAX |
0x47a7c5 MOV 0x2c(%RSP),%ESI |
0x47a7c9 MOV %RAX,0x50(%RSP) |
0x47a7ce SUB %EAX,%ESI |
0x47a7d0 JAE 47a840 |
0x47a7d2 MOV $0x751330,%EDI |
0x47a7d7 MOV 0x34(%RSP),%ESI |
0x47a7db VZEROUPPER |
0x47a7de CALL 4040b0 <__kmpc_for_static_fini@plt> |
0x47a7e3 MOV 0x68(%RSP),%RDI |
0x47a7e8 MOV (%RDI),%ESI |
0x47a7ea MOV $0x751350,%EDI |
0x47a7ef CALL 404580 <__kmpc_barrier@plt> |
0x47a7f4 LEA -0x28(%RBP),%RSP |
0x47a7f8 POP %RBX |
0x47a7f9 POP %R12 |
0x47a7fb POP %R13 |
0x47a7fd POP %R14 |
0x47a7ff POP %R15 |
0x47a801 POP %RBP |
0x47a802 RET |
0x47a803 NOPW %CS:(%RAX,%RAX,1) |
0x47a812 NOPW %CS:(%RAX,%RAX,1) |
0x47a821 NOPW %CS:(%RAX,%RAX,1) |
0x47a830 NOPW %CS:(%RAX,%RAX,1) |
0x47a83f NOP |
0x47a840 MOV 0x40(%RBP),%RDX |
0x47a844 MOV 0x30(%RBP),%RDI |
0x47a848 MOV 0x28(%RBP),%R8 |
0x47a84c MOV 0x20(%RBP),%R9 |
0x47a850 MOV 0x10(%RBP),%R10 |
0x47a854 MOVSXD 0x48(%RSP),%RAX |
0x47a859 SAL $0x3,%RAX |
0x47a85d MOV $0x18,%R11D |
0x47a863 SUB %RAX,%R11 |
0x47a866 MOVSXD %EBX,%RCX |
0x47a869 MOV $0x2,%R14D |
0x47a86f SUB %RCX,%R14 |
0x47a872 MOV %R14,0xb0(%RSP) |
0x47a87a MOV $0x10,%R14D |
0x47a880 SUB %RAX,%R14 |
0x47a883 LEA (%R9,%R14,1),%RAX |
0x47a887 MOV %RAX,0xa0(%RSP) |
0x47a88f MOV $0x1,%EAX |
0x47a894 SUB %RCX,%RAX |
0x47a897 MOV %RAX,0x98(%RSP) |
0x47a89f MOV $0x3,%EAX |
0x47a8a4 SUB %RCX,%RAX |
0x47a8a7 MOV %RAX,0x90(%RSP) |
0x47a8af LEA (%RDI,%R14,1),%RAX |
0x47a8b3 MOV %RBX,%RDI |
0x47a8b6 MOV %RAX,0x88(%RSP) |
0x47a8be ADD %R8,%R14 |
0x47a8c1 MOV %R14,0xa8(%RSP) |
0x47a8c9 LEA (%R8,%R11,1),%RAX |
0x47a8cd MOV %RAX,0x80(%RSP) |
0x47a8d5 LEA (%RDX,%R11,1),%RAX |
0x47a8d9 MOV %RAX,0x78(%RSP) |
0x47a8de LEA (%R10,%R11,1),%RAX |
0x47a8e2 MOV %RAX,0x70(%RSP) |
0x47a8e7 ADD 0x18(%RBP),%R11 |
0x47a8eb MOV %R11,0xb8(%RSP) |
0x47a8f3 VBROADCASTSD 0x8e4b3(%RIP),%ZMM2 |
0x47a8fd VBROADCASTSD 0xc5019(%RIP),%ZMM3 |
0x47a907 VBROADCASTSD 0x8e53f(%RIP),%ZMM4 |
0x47a911 VBROADCASTSD 0x8e465(%RIP),%ZMM5 |
0x47a91b VBROADCASTSD 0x8e4d3(%RIP),%ZMM6 |
0x47a925 MOV 0x60(%RBP),%R8 |
0x47a929 MOV 0x58(%RBP),%R13 |
0x47a92d MOV 0x50(%RBP),%EAX |
0x47a930 MOV 0x50(%RSP),%RCX |
0x47a935 LEA (%RCX,%RAX,1),%R10D |
0x47a939 XOR %R11D,%R11D |
0x47a93c MOV %RBX,0xc0(%RSP) |
0x47a944 MOV %ESI,0x38(%RSP) |
0x47a948 JMP 47a9b0 |
0x47a94a NOPW %CS:(%RAX,%RAX,1) |
0x47a959 NOPW %CS:(%RAX,%RAX,1) |
0x47a968 NOPW %CS:(%RAX,%RAX,1) |
0x47a977 NOPW (%RAX,%RAX,1) |
(919) 0x47a980 MOV 0x58(%RSP),%RCX |
(919) 0x47a985 VMOVAPD %ZMM25,%ZMM0{%K2}{z} |
(919) 0x47a98b IMUL %R15,%RCX |
(919) 0x47a98f ADD 0x20(%RBP),%RCX |
(919) 0x47a993 VMOVUPD %ZMM0,(%RCX,%RAX,8){%K1} |
(919) 0x47a99a MOV %RBX,%RDI |
(919) 0x47a99d LEA 0x1(%R11),%EAX |
(919) 0x47a9a1 INC %R10D |
(919) 0x47a9a4 CMP %ESI,%R11D |
(919) 0x47a9a7 MOV %EAX,%R11D |
(919) 0x47a9aa JE 47a7d2 |
(919) 0x47a9b0 MOVSXD (%R8),%RCX |
(919) 0x47a9b3 MOV (%R13),%EBX |
(919) 0x47a9b7 CMP %ECX,%EBX |
(919) 0x47a9b9 JS 47a99d |
(919) 0x47a9bb MOV %R8,%R13 |
(919) 0x47a9be MOV %ESI,%R8D |
(919) 0x47a9c1 MOV 0x50(%RBP),%EAX |
(919) 0x47a9c4 MOV %RCX,0x60(%RSP) |
(919) 0x47a9c9 MOV 0x50(%RSP),%RCX |
(919) 0x47a9ce ADD %ECX,%EAX |
(919) 0x47a9d0 LEA (%RAX,%R11,1),%ECX |
(919) 0x47a9d4 MOV 0x68(%RBP),%RDX |
(919) 0x47a9d8 MOV (%RDX),%R12 |
(919) 0x47a9db MOV 0x70(%RBP),%RDX |
(919) 0x47a9df MOV (%RDX),%R15 |
(919) 0x47a9e2 MOVSXD %ECX,%R14 |
(919) 0x47a9e5 LEA -0x2(%RDI),%ECX |
(919) 0x47a9e8 MOVSXD %ECX,%RCX |
(919) 0x47a9eb MOV %R14,%RDX |
(919) 0x47a9ee SUB %RCX,%RDX |
(919) 0x47a9f1 MOV %RDI,%RSI |
(919) 0x47a9f4 MOV 0x38(%RBP),%RDI |
(919) 0x47a9f8 VMOVSD (%RDI,%RDX,8),%XMM22 |
(919) 0x47a9ff MOV 0x78(%RBP),%RDX |
(919) 0x47aa03 MOV (%RDX),%R9 |
(919) 0x47aa06 LEA 0x1(%R11,%RAX,1),%EAX |
(919) 0x47aa0b CLTQ |
(919) 0x47aa0d SUB %RCX,%RAX |
(919) 0x47aa10 MOV 0x60(%RSP),%RCX |
(919) 0x47aa15 VADDSD (%RDI,%RAX,8),%XMM22,%XMM1 |
(919) 0x47aa1c MOV 0x80(%RBP),%RAX |
(919) 0x47aa23 MOV (%RAX),%RDI |
(919) 0x47aa26 VMOVSD 0xc4ef2(%RIP),%XMM0 |
(919) 0x47aa2e VDIVSD %XMM22,%XMM0,%XMM24 |
(919) 0x47aa34 VDIVSD %XMM1,%XMM0,%XMM23 |
(919) 0x47aa3a SUB %ECX,%EBX |
(919) 0x47aa3c INC %EBX |
(919) 0x47aa3e CMP $0x2,%EBX |
(919) 0x47aa41 MOV $0x1,%EAX |
(919) 0x47aa46 CMOVL %EAX,%EBX |
(919) 0x47aa49 MOV %RBX,%RDX |
(919) 0x47aa4c MOV %RBX,0xf0(%RSP) |
(919) 0x47aa54 VPBROADCASTQ %RBX,%ZMM25 |
(919) 0x47aa5a AND $0x7ffffff8,%RDX |
(919) 0x47aa61 MOV %RDI,0x58(%RSP) |
(919) 0x47aa66 JE 47ae40 |
(919) 0x47aa6c MOV %R14,0xe0(%RSP) |
(919) 0x47aa74 MOV %R11,0xe8(%RSP) |
(919) 0x47aa7c MOV %R10D,0x3c(%RSP) |
(919) 0x47aa81 MOVSXD %R10D,%RAX |
(919) 0x47aa84 MOV %RCX,%RBX |
(919) 0x47aa87 MOV 0xb0(%RSP),%RCX |
(919) 0x47aa8f LEA (%RCX,%RAX,1),%R8 |
(919) 0x47aa93 MOV 0x98(%RSP),%RCX |
(919) 0x47aa9b LEA (%RCX,%RAX,1),%RSI |
(919) 0x47aa9f ADD 0x90(%RSP),%RAX |
(919) 0x47aaa7 VBROADCASTSD %XMM22,%ZMM26 |
(919) 0x47aaad VBROADCASTSD %XMM24,%ZMM27 |
(919) 0x47aab3 VBROADCASTSD %XMM23,%ZMM28 |
(919) 0x47aab9 MOV %R9,%RCX |
(919) 0x47aabc IMUL %R8,%RCX |
(919) 0x47aac0 LEA (%RCX,%RBX,8),%R11 |
(919) 0x47aac4 ADD 0x80(%RSP),%R11 |
(919) 0x47aacc MOV %RDI,%RCX |
(919) 0x47aacf IMUL %R8,%RCX |
(919) 0x47aad3 LEA (%RCX,%RBX,8),%RCX |
(919) 0x47aad7 ADD 0xa0(%RSP),%RCX |
(919) 0x47aadf MOV 0x88(%RSP),%RDI |
(919) 0x47aae7 LEA (%RDI,%RBX,8),%RDI |
(919) 0x47aaeb MOV %RDI,0xf8(%RSP) |
(919) 0x47aaf3 IMUL %R9,%RSI |
(919) 0x47aaf7 MOV %R12,%R13 |
(919) 0x47aafa LEA (%RSI,%RBX,8),%R12 |
(919) 0x47aafe MOV 0xa8(%RSP),%RDI |
(919) 0x47ab06 ADD %RDI,%R12 |
(919) 0x47ab09 MOV %R9,0xd8(%RSP) |
(919) 0x47ab11 IMUL %RAX,%R9 |
(919) 0x47ab15 LEA (%R9,%RBX,8),%R10 |
(919) 0x47ab19 ADD %RDI,%R10 |
(919) 0x47ab1c MOV 0x78(%RSP),%RSI |
(919) 0x47ab21 LEA (%RSI,%RBX,8),%RDI |
(919) 0x47ab25 MOV %R15,%RSI |
(919) 0x47ab28 IMUL %R8,%RSI |
(919) 0x47ab2c LEA (%RSI,%RBX,8),%R14 |
(919) 0x47ab30 MOV 0x70(%RSP),%R9 |
(919) 0x47ab35 ADD %R9,%R14 |
(919) 0x47ab38 MOV %R15,0xd0(%RSP) |
(919) 0x47ab40 MOV %R15,%RSI |
(919) 0x47ab43 IMUL %RAX,%RSI |
(919) 0x47ab47 LEA (%RSI,%RBX,8),%RSI |
(919) 0x47ab4b ADD %R9,%RSI |
(919) 0x47ab4e IMUL %R13,%RAX |
(919) 0x47ab52 LEA (%RAX,%RBX,8),%RAX |
(919) 0x47ab56 MOV 0xb8(%RSP),%R15 |
(919) 0x47ab5e ADD %R15,%RAX |
(919) 0x47ab61 MOV %R13,0xc8(%RSP) |
(919) 0x47ab69 IMUL %R8,%R13 |
(919) 0x47ab6d LEA (%R13,%RBX,8),%R9 |
(919) 0x47ab72 ADD %R15,%R9 |
(919) 0x47ab75 XOR %R13D,%R13D |
(919) 0x47ab78 JMP 47ab9a |
0x47ab7a NOPW (%RAX,%RAX,1) |
(920) 0x47ab80 VMOVAPD %ZMM21,%ZMM0{%K1}{z} |
(920) 0x47ab86 VMOVUPD %ZMM0,(%RCX,%R13,8) |
(920) 0x47ab8d ADD $0x8,%R13 |
(920) 0x47ab91 CMP %RDX,%R13 |
(920) 0x47ab94 JAE 47adc0 |
(920) 0x47ab9a VMOVUPD -0x8(%R9,%R13,8),%ZMM21 |
(920) 0x47aba5 VMOVUPD (%R9,%R13,8),%ZMM29 |
(920) 0x47abac VMOVUPD -0x8(%RAX,%R13,8),%ZMM30 |
(920) 0x47abb7 VMOVUPD (%RAX,%R13,8),%ZMM31 |
(920) 0x47abbe VADDPD %ZMM29,%ZMM31,%ZMM1 |
(920) 0x47abc4 VADDPD %ZMM30,%ZMM21,%ZMM0 |
(920) 0x47abca VSUBPD %ZMM0,%ZMM1,%ZMM0 |
(920) 0x47abd0 VMOVUPD -0x8(%RSI,%R13,8),%ZMM16 |
(920) 0x47abdb VMOVUPD (%RSI,%R13,8),%ZMM17 |
(920) 0x47abe2 VMOVUPD -0x8(%R14,%R13,8),%ZMM18 |
(920) 0x47abed VMOVUPD (%R14,%R13,8),%ZMM19 |
(920) 0x47abf4 VADDPD %ZMM16,%ZMM17,%ZMM1 |
(920) 0x47abfa VADDPD %ZMM19,%ZMM18,%ZMM20 |
(920) 0x47ac00 VSUBPD %ZMM20,%ZMM1,%ZMM20 |
(920) 0x47ac06 VMOVUPD -0x8(%RDI,%R13,8),%ZMM1 |
(920) 0x47ac11 VADDPD %ZMM21,%ZMM29,%ZMM21 |
(920) 0x47ac17 VSUBPD %ZMM21,%ZMM31,%ZMM21 |
(920) 0x47ac1d VADDPD %ZMM30,%ZMM21,%ZMM21 |
(920) 0x47ac23 VMULPD %ZMM2,%ZMM21,%ZMM21 |
(920) 0x47ac29 VADDPD %ZMM18,%ZMM16,%ZMM16 |
(920) 0x47ac2f VSUBPD %ZMM16,%ZMM17,%ZMM16 |
(920) 0x47ac35 VADDPD %ZMM19,%ZMM16,%ZMM16 |
(920) 0x47ac3b VDIVPD %ZMM1,%ZMM3,%ZMM17 |
(920) 0x47ac41 VMULPD %ZMM2,%ZMM16,%ZMM16 |
(920) 0x47ac47 VMULPD %ZMM17,%ZMM16,%ZMM16 |
(920) 0x47ac4d VFMADD231PD %ZMM21,%ZMM27,%ZMM16 |
(920) 0x47ac53 VMOVUPD (%R11,%R13,8),%ZMM18 |
(920) 0x47ac5a VSUBPD -0x10(%R11,%R13,8),%ZMM18,%ZMM18 |
(920) 0x47ac65 VADDPD (%RDI,%R13,8),%ZMM1,%ZMM19 |
(920) 0x47ac6c VDIVPD %ZMM19,%ZMM18,%ZMM31 |
(920) 0x47ac72 VMOVUPD (%R10,%R13,8),%ZMM18 |
(920) 0x47ac79 VSUBPD (%R12,%R13,8),%ZMM18,%ZMM18 |
(920) 0x47ac80 VMULPD %ZMM28,%ZMM18,%ZMM30 |
(920) 0x47ac86 VMULPD %ZMM31,%ZMM31,%ZMM18 |
(920) 0x47ac8c VMULPD %ZMM30,%ZMM30,%ZMM19 |
(920) 0x47ac92 VMULPD %ZMM2,%ZMM0,%ZMM21 |
(920) 0x47ac98 VMULPD %ZMM17,%ZMM21,%ZMM17 |
(920) 0x47ac9e VMULPD %ZMM18,%ZMM17,%ZMM17 |
(920) 0x47aca4 VMULPD %ZMM2,%ZMM20,%ZMM21 |
(920) 0x47acaa VMULPD %ZMM19,%ZMM21,%ZMM21 |
(920) 0x47acb0 VMULPD %ZMM16,%ZMM31,%ZMM16 |
(920) 0x47acb6 VFMADD213PD %ZMM17,%ZMM30,%ZMM16 |
(920) 0x47acbc VFMADD231PD %ZMM21,%ZMM27,%ZMM16 |
(920) 0x47acc2 VADDPD %ZMM18,%ZMM19,%ZMM17 |
(920) 0x47acc8 VMAXPD %ZMM4,%ZMM17,%ZMM17 |
(920) 0x47acce VDIVPD %ZMM17,%ZMM16,%ZMM29 |
(920) 0x47acd4 VMULPD %ZMM0,%ZMM1,%ZMM0 |
(920) 0x47acda VFMADD231PD %ZMM20,%ZMM26,%ZMM0 |
(920) 0x47ace0 VXORPD %XMM21,%XMM21,%XMM21 |
(920) 0x47ace6 VCMPPD $0x6,%ZMM0,%ZMM21,%K1 |
(920) 0x47aced VCMPPD $0x5,%ZMM29,%ZMM21,%K1{%K1} |
(920) 0x47acf4 KORTESTB %K1,%K1 |
(920) 0x47acf8 JE 47ab80 |
(920) 0x47acfe VFPCLASSPD $0x50,%ZMM31,%K2 |
(920) 0x47ad05 VANDPD %ZMM5,%ZMM31,%ZMM0 |
(920) 0x47ad0b VMAXPD %ZMM4,%ZMM0,%ZMM0 |
(920) 0x47ad11 VXORPD %ZMM6,%ZMM0,%ZMM0{%K2} |
(920) 0x47ad17 VFPCLASSPD $0x50,%ZMM0,%K2 |
(920) 0x47ad1e VANDPD %ZMM5,%ZMM30,%ZMM16 |
(920) 0x47ad24 VMAXPD %ZMM4,%ZMM16,%ZMM16 |
(920) 0x47ad2a VXORPD %ZMM6,%ZMM16,%ZMM16{%K2} |
(920) 0x47ad30 VMULPD %ZMM0,%ZMM0,%ZMM17 |
(920) 0x47ad36 VFMADD231PD %ZMM16,%ZMM16,%ZMM17 |
(920) 0x47ad3c VSQRTPD %ZMM17,%ZMM17 |
(920) 0x47ad42 VMULPD %ZMM1,%ZMM17,%ZMM1 |
(920) 0x47ad48 VDIVPD %ZMM0,%ZMM1,%ZMM0 |
(920) 0x47ad4e VANDPD %ZMM5,%ZMM0,%ZMM0 |
(920) 0x47ad54 VMULPD %ZMM26,%ZMM17,%ZMM1 |
(920) 0x47ad5a VDIVPD %ZMM16,%ZMM1,%ZMM1 |
(920) 0x47ad60 VANDPD %ZMM5,%ZMM1,%ZMM1 |
(920) 0x47ad66 VMINPD %ZMM1,%ZMM0,%ZMM0 |
(920) 0x47ad6c VMULPD %ZMM0,%ZMM0,%ZMM0 |
(920) 0x47ad72 VMULPD %ZMM29,%ZMM29,%ZMM1 |
(920) 0x47ad78 VADDPD %ZMM0,%ZMM0,%ZMM0 |
(920) 0x47ad7e MOV 0x88(%RBP),%R15 |
(920) 0x47ad85 MOV (%R15),%R15 |
(920) 0x47ad88 IMUL %R8,%R15 |
(920) 0x47ad8c ADD 0xf8(%RSP),%R15 |
(920) 0x47ad94 VMOVUPD (%R15,%R13,8),%ZMM16{%K1}{z} |
(920) 0x47ad9b VMULPD %ZMM16,%ZMM1,%ZMM1 |
(920) 0x47ada1 VMULPD %ZMM0,%ZMM1,%ZMM21 |
(920) 0x47ada7 JMP 47ab80 |
0x47adac NOPW %CS:(%RAX,%RAX,1) |
0x47adbb NOPL (%RAX,%RAX,1) |
(919) 0x47adc0 CMP 0xf0(%RSP),%RDX |
(919) 0x47adc8 MOV 0xc0(%RSP),%RDI |
(919) 0x47add0 MOV 0x38(%RSP),%ESI |
(919) 0x47add4 MOV 0x60(%RBP),%R8 |
(919) 0x47add8 MOV 0x58(%RBP),%R13 |
(919) 0x47addc MOV 0x3c(%RSP),%R10D |
(919) 0x47ade1 MOV 0xe8(%RSP),%R11 |
(919) 0x47ade9 MOV 0xe0(%RSP),%R14 |
(919) 0x47adf1 MOV 0xd8(%RSP),%R9 |
(919) 0x47adf9 MOV 0xd0(%RSP),%R15 |
(919) 0x47ae01 MOV 0xc8(%RSP),%R12 |
(919) 0x47ae09 MOV 0x60(%RSP),%RCX |
(919) 0x47ae0e JE 47a99d |
(919) 0x47ae14 JMP 47ae4f |
0x47ae16 NOPW %CS:(%RAX,%RAX,1) |
0x47ae25 NOPW %CS:(%RAX,%RAX,1) |
0x47ae34 NOPW %CS:(%RAX,%RAX,1) |
(919) 0x47ae40 XOR %EDX,%EDX |
(919) 0x47ae42 MOV %RSI,%RDI |
(919) 0x47ae45 MOV %R8D,%ESI |
(919) 0x47ae48 MOV %R13,%R8 |
(919) 0x47ae4b MOV 0x58(%RBP),%R13 |
(919) 0x47ae4f VPBROADCASTQ %RDX,%ZMM1 |
(919) 0x47ae55 VPSUBQ %ZMM1,%ZMM25,%ZMM1 |
(919) 0x47ae5b VPCMPNLEUQ 0x8dc9a(%RIP),%ZMM1,%K1 |
(919) 0x47ae66 KORTESTB %K1,%K1 |
(919) 0x47ae6a JE 47a99d |
(919) 0x47ae70 MOV %RDI,%RBX |
(919) 0x47ae73 MOVSXD %EDI,%RAX |
(919) 0x47ae76 SUB %RAX,%R14 |
(919) 0x47ae79 MOV %R12,%RDI |
(919) 0x47ae7c MOV %R15,%R12 |
(919) 0x47ae7f LEA 0x2(%R14),%R15 |
(919) 0x47ae83 ADD %RCX,%RDX |
(919) 0x47ae86 MOVSXD 0x48(%RSP),%RAX |
(919) 0x47ae8b SUB %RAX,%RDX |
(919) 0x47ae8e MOV %RDI,%RAX |
(919) 0x47ae91 IMUL %R15,%RAX |
(919) 0x47ae95 MOV 0x18(%RBP),%RCX |
(919) 0x47ae99 ADD %RCX,%RAX |
(919) 0x47ae9c VMOVUPD 0x18(%RAX,%RDX,8),%ZMM25{%K1}{z} |
(919) 0x47aea7 VMOVUPD 0x10(%RAX,%RDX,8),%ZMM26{%K1}{z} |
(919) 0x47aeb2 LEA 0x3(%R14),%RAX |
(919) 0x47aeb6 IMUL %RAX,%RDI |
(919) 0x47aeba ADD %RCX,%RDI |
(919) 0x47aebd VMOVUPD 0x18(%RDI,%RDX,8),%ZMM27{%K1}{z} |
(919) 0x47aec8 VMOVUPD 0x10(%RDI,%RDX,8),%ZMM28{%K1}{z} |
(919) 0x47aed3 MOV %R12,%RCX |
(919) 0x47aed6 IMUL %RAX,%RCX |
(919) 0x47aeda MOV 0x10(%RBP),%RDI |
(919) 0x47aede ADD %RDI,%RCX |
(919) 0x47aee1 VMOVUPD 0x10(%RCX,%RDX,8),%ZMM29{%K1}{z} |
(919) 0x47aeec VMOVUPD 0x18(%RCX,%RDX,8),%ZMM30{%K1}{z} |
(919) 0x47aef7 IMUL %R15,%R12 |
(919) 0x47aefb ADD %RDI,%R12 |
(919) 0x47aefe VMOVUPD 0x10(%R12,%RDX,8),%ZMM31{%K1}{z} |
(919) 0x47af09 VMOVUPD 0x18(%R12,%RDX,8),%ZMM1{%K1}{z} |
(919) 0x47af14 MOV %R9,%RCX |
(919) 0x47af17 IMUL %R15,%RCX |
(919) 0x47af1b MOV 0x28(%RBP),%RDI |
(919) 0x47af1f ADD %RDI,%RCX |
(919) 0x47af22 VMOVUPD 0x18(%RCX,%RDX,8),%ZMM0{%K1}{z} |
(919) 0x47af2d VMOVUPD 0x8(%RCX,%RDX,8),%ZMM16{%K1}{z} |
(919) 0x47af38 IMUL %R9,%RAX |
(919) 0x47af3c ADD %RDI,%RAX |
(919) 0x47af3f VMOVUPD 0x10(%RAX,%RDX,8),%ZMM17{%K1}{z} |
(919) 0x47af4a INC %R14 |
(919) 0x47af4d IMUL %R9,%R14 |
(919) 0x47af51 ADD %RDI,%R14 |
(919) 0x47af54 VMOVUPD 0x10(%R14,%RDX,8),%ZMM18{%K1}{z} |
(919) 0x47af5f VMOVAPD %ZMM25,%ZMM7{%K1} |
(919) 0x47af65 VMOVAPD %ZMM27,%ZMM8{%K1} |
(919) 0x47af6b VMOVAPD %ZMM26,%ZMM9{%K1} |
(919) 0x47af71 VMOVAPD %ZMM28,%ZMM10{%K1} |
(919) 0x47af77 VADDPD %ZMM7,%ZMM8,%ZMM19 |
(919) 0x47af7d VADDPD %ZMM10,%ZMM9,%ZMM20 |
(919) 0x47af83 VSUBPD %ZMM20,%ZMM19,%ZMM19 |
(919) 0x47af89 VMOVAPD %ZMM29,%ZMM11{%K1} |
(919) 0x47af8f VMOVAPD %ZMM30,%ZMM12{%K1} |
(919) 0x47af95 VMOVAPD %ZMM31,%ZMM13{%K1} |
(919) 0x47af9b VMOVAPD %ZMM1,%ZMM14{%K1} |
(919) 0x47afa1 VADDPD %ZMM11,%ZMM12,%ZMM1 |
(919) 0x47afa7 VADDPD %ZMM14,%ZMM13,%ZMM20 |
(919) 0x47afad MOV 0x40(%RBP),%RAX |
(919) 0x47afb1 VMOVUPD 0x10(%RAX,%RDX,8),%ZMM21{%K1}{z} |
(919) 0x47afbc VSUBPD %ZMM20,%ZMM1,%ZMM20 |
(919) 0x47afc2 VMOVAPD %ZMM21,%ZMM15{%K1} |
(919) 0x47afc8 VADDPD %ZMM9,%ZMM7,%ZMM1 |
(919) 0x47afce VSUBPD %ZMM1,%ZMM8,%ZMM1 |
(919) 0x47afd4 VADDPD %ZMM10,%ZMM1,%ZMM1 |
(919) 0x47afda VMULPD %ZMM2,%ZMM1,%ZMM1 |
(919) 0x47afe0 VBROADCASTSD %XMM24,%ZMM21 |
(919) 0x47afe6 VADDPD %ZMM13,%ZMM11,%ZMM24 |
(919) 0x47afec VSUBPD %ZMM24,%ZMM12,%ZMM24 |
(919) 0x47aff2 VDIVPD %ZMM15,%ZMM3,%ZMM25 |
(919) 0x47aff8 VADDPD %ZMM14,%ZMM24,%ZMM24 |
(919) 0x47affe VMULPD %ZMM2,%ZMM24,%ZMM24 |
(919) 0x47b004 VMULPD %ZMM25,%ZMM24,%ZMM26 |
(919) 0x47b00a VFMADD231PD %ZMM1,%ZMM21,%ZMM26 |
(919) 0x47b010 VMOVUPD 0x18(%RAX,%RDX,8),%ZMM1{%K1}{z} |
(919) 0x47b01b VMOVAPD 0x240(%RSP),%ZMM24 |
(919) 0x47b023 VMOVAPD %ZMM0,%ZMM24{%K1} |
(919) 0x47b029 LEA 0x2(%RDX),%RAX |
(919) 0x47b02d VMOVAPD 0x200(%RSP),%ZMM0 |
(919) 0x47b035 VMOVAPD %ZMM16,%ZMM0{%K1} |
(919) 0x47b03b VMOVAPD %ZMM24,0x240(%RSP) |
(919) 0x47b043 VMOVAPD %ZMM0,0x200(%RSP) |
(919) 0x47b04b VSUBPD %ZMM0,%ZMM24,%ZMM0 |
(919) 0x47b051 VMOVAPD 0x1c0(%RSP),%ZMM16 |
(919) 0x47b059 VMOVAPD %ZMM1,%ZMM16{%K1} |
(919) 0x47b05f VMOVAPD %ZMM16,0x1c0(%RSP) |
(919) 0x47b067 VADDPD %ZMM15,%ZMM16,%ZMM1 |
(919) 0x47b06d VDIVPD %ZMM1,%ZMM0,%ZMM1 |
(919) 0x47b073 VMULPD %ZMM19,%ZMM15,%ZMM0 |
(919) 0x47b079 VMOVAPD 0x180(%RSP),%ZMM16 |
(919) 0x47b081 VMOVAPD %ZMM17,%ZMM16{%K1} |
(919) 0x47b087 VMOVAPD 0x140(%RSP),%ZMM17 |
(919) 0x47b08f VMOVAPD %ZMM18,%ZMM17{%K1} |
(919) 0x47b095 VMOVAPD %ZMM16,0x180(%RSP) |
(919) 0x47b09d VMOVAPD %ZMM17,0x140(%RSP) |
(919) 0x47b0a5 VSUBPD %ZMM17,%ZMM16,%ZMM16 |
(919) 0x47b0ab VBROADCASTSD %XMM23,%ZMM17 |
(919) 0x47b0b1 VMULPD %ZMM17,%ZMM16,%ZMM24 |
(919) 0x47b0b7 VMULPD %ZMM1,%ZMM1,%ZMM16 |
(919) 0x47b0bd VMULPD %ZMM24,%ZMM24,%ZMM17 |
(919) 0x47b0c3 VMULPD %ZMM2,%ZMM19,%ZMM18 |
(919) 0x47b0c9 VMULPD %ZMM25,%ZMM18,%ZMM18 |
(919) 0x47b0cf VMULPD %ZMM16,%ZMM18,%ZMM18 |
(919) 0x47b0d5 VMULPD %ZMM2,%ZMM20,%ZMM19 |
(919) 0x47b0db VMULPD %ZMM17,%ZMM19,%ZMM19 |
(919) 0x47b0e1 VMULPD %ZMM26,%ZMM1,%ZMM23 |
(919) 0x47b0e7 VFMADD213PD %ZMM18,%ZMM24,%ZMM23 |
(919) 0x47b0ed VFMADD231PD %ZMM19,%ZMM21,%ZMM23 |
(919) 0x47b0f3 VADDPD %ZMM16,%ZMM17,%ZMM16 |
(919) 0x47b0f9 VMAXPD %ZMM4,%ZMM16,%ZMM16 |
(919) 0x47b0ff VDIVPD %ZMM16,%ZMM23,%ZMM23 |
(919) 0x47b105 VBROADCASTSD %XMM22,%ZMM22 |
(919) 0x47b10b VFMADD231PD %ZMM20,%ZMM22,%ZMM0 |
(919) 0x47b111 VXORPD %XMM25,%XMM25,%XMM25 |
(919) 0x47b117 VCMPPD $0x6,%ZMM0,%ZMM25,%K2{%K1} |
(919) 0x47b11e VCMPPD $0x5,%ZMM23,%ZMM25,%K2{%K2} |
(919) 0x47b125 KORTESTB %K2,%K2 |
(919) 0x47b129 JE 47a980 |
(919) 0x47b12f VFPCLASSPD $0x50,%ZMM1,%K3 |
(919) 0x47b136 VANDPD %ZMM5,%ZMM1,%ZMM0 |
(919) 0x47b13c VMAXPD %ZMM4,%ZMM0,%ZMM0 |
(919) 0x47b142 VXORPD %ZMM6,%ZMM0,%ZMM0{%K3} |
(919) 0x47b148 VFPCLASSPD $0x50,%ZMM0,%K3 |
(919) 0x47b14f VANDPD %ZMM5,%ZMM24,%ZMM1 |
(919) 0x47b155 VMAXPD %ZMM4,%ZMM1,%ZMM1 |
(919) 0x47b15b VXORPD %ZMM6,%ZMM1,%ZMM1{%K3} |
(919) 0x47b161 VMULPD %ZMM0,%ZMM0,%ZMM16 |
(919) 0x47b167 VFMADD231PD %ZMM1,%ZMM1,%ZMM16 |
(919) 0x47b16d VSQRTPD %ZMM16,%ZMM16 |
(919) 0x47b173 VMULPD %ZMM15,%ZMM16,%ZMM17 |
(919) 0x47b179 VDIVPD %ZMM0,%ZMM17,%ZMM0 |
(919) 0x47b17f VANDPD %ZMM5,%ZMM0,%ZMM0 |
(919) 0x47b185 VMULPD %ZMM22,%ZMM16,%ZMM16 |
(919) 0x47b18b VDIVPD %ZMM1,%ZMM16,%ZMM1 |
(919) 0x47b191 VANDPD %ZMM5,%ZMM1,%ZMM1 |
(919) 0x47b197 VMINPD %ZMM1,%ZMM0,%ZMM0 |
(919) 0x47b19d VMULPD %ZMM0,%ZMM0,%ZMM0 |
(919) 0x47b1a3 VMULPD %ZMM23,%ZMM23,%ZMM1 |
(919) 0x47b1a9 VADDPD %ZMM0,%ZMM0,%ZMM0 |
(919) 0x47b1af MOV 0x88(%RBP),%RCX |
(919) 0x47b1b6 MOV (%RCX),%RCX |
(919) 0x47b1b9 IMUL %R15,%RCX |
(919) 0x47b1bd ADD 0x30(%RBP),%RCX |
(919) 0x47b1c1 VMOVUPD (%RCX,%RAX,8),%ZMM16{%K2}{z} |
(919) 0x47b1c8 VMOVAPD 0x100(%RSP),%ZMM17 |
(919) 0x47b1d0 VMOVAPD %ZMM16,%ZMM17{%K2} |
(919) 0x47b1d6 VMOVAPD %ZMM17,0x100(%RSP) |
(919) 0x47b1de VMULPD %ZMM17,%ZMM1,%ZMM1 |
(919) 0x47b1e4 VMULPD %ZMM0,%ZMM1,%ZMM25 |
(919) 0x47b1ea JMP 47a980 |
0x47b1ef NOP |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | viscosity_kernel.f90:50-94 |
Module | exec |
nb instructions | 121 |
nb uops | 125 |
loop length | 645 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 5 |
nb stack references | 33 |
micro-operation queue | 20.83 cycles |
front end | 20.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.20 | 5.33 | 10.33 | 10.33 | 16.00 | 5.13 | 5.20 | 16.00 | 16.00 | 16.00 | 5.13 | 10.33 |
cycles | 5.20 | 5.33 | 10.33 | 10.33 | 16.00 | 5.13 | 5.20 | 16.00 | 16.00 | 16.00 | 5.13 | 10.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.28-20.29 |
Stall cycles | 0.00 |
Front-end | 20.83 |
Dispatch | 16.00 |
Overall L1 | 20.83 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 10% |
load | 7% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 18% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x2c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 47a7e8 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0xa8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x48(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x751310,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4044c0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 47a840 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x751330,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4040b0 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x68(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x751350,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404580 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0x18,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EBX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV $0x2,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R9,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x3,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R8,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0x18(%RBP),%R11 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R11,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0x8e4b3(%RIP),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0xc5019(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x8e53f(%RIP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x8e465(%RIP),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x8e4d3(%RIP),%ZMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x60(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 47a9b0 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x270> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | viscosity_kernel.f90:50-94 |
Module | exec |
nb instructions | 121 |
nb uops | 125 |
loop length | 645 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 5 |
nb stack references | 33 |
micro-operation queue | 20.83 cycles |
front end | 20.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.20 | 5.33 | 10.33 | 10.33 | 16.00 | 5.13 | 5.20 | 16.00 | 16.00 | 16.00 | 5.13 | 10.33 |
cycles | 5.20 | 5.33 | 10.33 | 10.33 | 16.00 | 5.13 | 5.20 | 16.00 | 16.00 | 16.00 | 5.13 | 10.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.28-20.29 |
Stall cycles | 0.00 |
Front-end | 20.83 |
Dispatch | 16.00 |
Overall L1 | 20.83 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 10% |
load | 7% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 18% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x2c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 47a7e8 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0xa8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x48(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x751310,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4044c0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 47a840 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x751330,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4040b0 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x68(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x751350,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404580 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0x18,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EBX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV $0x2,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R9,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x3,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R8,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0x18(%RBP),%R11 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R11,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0x8e4b3(%RIP),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0xc5019(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x8e53f(%RIP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x8e465(%RIP),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x8e4d3(%RIP),%ZMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x60(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 47a9b0 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x270> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼viscosity_kernel_.DIR.OMP.PARALLEL.2– | 2.06 | 0.65 |
▼Loop 919 - viscosity_kernel.f90:53-89 - exec– | 0 | 0 |
○Loop 920 - viscosity_kernel.f90:53-89 - exec | 2.06 | 0.65 |