Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 4.76% |
---|
Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 4.76% |
---|
/scratch_na/users/xoserete/qaas_runs/171-322-0339/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 57 - 79 |
-------------------------------------------------------------------------------- |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
64: +density0(j ,k-1)*volume(j ,k-1) & |
65: +density0(j ,k )*volume(j ,k ) & |
66: +density0(j-1,k )*volume(j-1,k )) & |
67: *0.25_8) |
68: |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
76: +yarea(j-1,k )*(viscosity(j-1,k )-viscosity(j-1,k-1))) |
77: ENDDO |
78: ENDDO |
79: !$OMP END DO |
0x42db40 PUSH %RBP |
0x42db41 MOV %RSP,%RBP |
0x42db44 PUSH %R15 |
0x42db46 PUSH %R14 |
0x42db48 PUSH %R13 |
0x42db4a PUSH %R12 |
0x42db4c PUSH %RBX |
0x42db4d AND $-0x40,%RSP |
0x42db51 SUB $0x5c0,%RSP |
0x42db58 MOV %R8,0x58(%RSP) |
0x42db5d MOV 0x70(%RBP),%EBX |
0x42db60 MOV 0x68(%RBP),%EAX |
0x42db63 SUB %EBX,%EAX |
0x42db65 INC %EAX |
0x42db67 MOVL $0,0x54(%RSP) |
0x42db6f JS 42dbeb |
0x42db71 MOV %RDX,%R14 |
0x42db74 MOV %RDI,0x90(%RSP) |
0x42db7c MOV (%RDI),%ESI |
0x42db7e MOVL $0,0x24(%RSP) |
0x42db86 MOV %EAX,0x20(%RSP) |
0x42db8a MOVL $0x1,0x50(%RSP) |
0x42db92 SUB $0x8,%RSP |
0x42db96 LEA 0x58(%RSP),%RAX |
0x42db9b LEA 0x5c(%RSP),%RCX |
0x42dba0 LEA 0x2c(%RSP),%R8 |
0x42dba5 LEA 0x28(%RSP),%R9 |
0x42dbaa MOV $0x749f70,%EDI |
0x42dbaf MOV %ESI,0x4c(%RSP) |
0x42dbb3 MOV $0x22,%EDX |
0x42dbb8 PUSH $0x1 |
0x42dbba PUSH $0x1 |
0x42dbbc PUSH %RAX |
0x42dbbd CALL 4044c0 <__kmpc_for_static_init_4@plt> |
0x42dbc2 ADD $0x20,%RSP |
0x42dbc6 MOV 0x24(%RSP),%EAX |
0x42dbca MOV 0x20(%RSP),%EDX |
0x42dbce SUB %EAX,%EDX |
0x42dbd0 JAE 42dc40 |
0x42dbd2 MOV $0x749f90,%EDI |
0x42dbd7 MOV 0x44(%RSP),%ESI |
0x42dbdb VZEROUPPER |
0x42dbde CALL 4040b0 <__kmpc_for_static_fini@plt> |
0x42dbe3 MOV 0x90(%RSP),%RDI |
0x42dbeb MOV (%RDI),%ESI |
0x42dbed MOV $0x749fb0,%EDI |
0x42dbf2 CALL 404580 <__kmpc_barrier@plt> |
0x42dbf7 LEA -0x28(%RBP),%RSP |
0x42dbfb POP %RBX |
0x42dbfc POP %R12 |
0x42dbfe POP %R13 |
0x42dc00 POP %R14 |
0x42dc02 POP %R15 |
0x42dc04 POP %RBP |
0x42dc05 RET |
0x42dc06 NOPW %CS:(%RAX,%RAX,1) |
0x42dc15 NOPW %CS:(%RAX,%RAX,1) |
0x42dc24 NOPW %CS:(%RAX,%RAX,1) |
0x42dc33 NOPW %CS:(%RAX,%RAX,1) |
0x42dc40 MOV %RAX,%RCX |
0x42dc43 MOV 0x58(%RBP),%RSI |
0x42dc47 MOV 0x50(%RBP),%RDI |
0x42dc4b MOV 0x48(%RBP),%R8 |
0x42dc4f MOV 0x40(%RBP),%R9 |
0x42dc53 MOV 0x38(%RBP),%R10 |
0x42dc57 MOV 0x30(%RBP),%R11 |
0x42dc5b MOV 0x28(%RBP),%R15 |
0x42dc5f MOV 0x10(%RBP),%RAX |
0x42dc63 VMOVQ %R14,%XMM0 |
0x42dc68 CLTQ |
0x42dc6a SAL $0x3,%RAX |
0x42dc6e MOV $0x10,%R14D |
0x42dc74 SUB %RAX,%R14 |
0x42dc77 MOVSXD 0x58(%RSP),%RAX |
0x42dc7c MOV $0x1,%R12D |
0x42dc82 SUB %RAX,%R12 |
0x42dc85 MOV %R12,0xf0(%RSP) |
0x42dc8d ADD %EBX,%ECX |
0x42dc8f MOV $0x2,%EBX |
0x42dc94 SUB %RAX,%RBX |
0x42dc97 MOV %RBX,0xe8(%RSP) |
0x42dc9f MOV 0x20(%RBP),%RAX |
0x42dca3 MOV 0x18(%RBP),%RBX |
0x42dca7 ADD %R14,%R10 |
0x42dcaa MOV %R10,0xe0(%RSP) |
0x42dcb2 LEA (%RSI,%R14,1),%RSI |
0x42dcb6 MOV %RSI,0xd8(%RSP) |
0x42dcbe LEA (%R9,%R14,1),%RSI |
0x42dcc2 MOV %RSI,0xd0(%RSP) |
0x42dcca LEA (%R8,%R14,1),%RSI |
0x42dcce MOV %RSI,0xc8(%RSP) |
0x42dcd6 LEA (%RDI,%R14,1),%RSI |
0x42dcda MOV %EDX,%R9D |
0x42dcdd MOV %RSI,0xc0(%RSP) |
0x42dce5 LEA (%RBX,%R14,1),%RSI |
0x42dce9 MOV %RSI,0xb8(%RSP) |
0x42dcf1 LEA (%R15,%R14,1),%RSI |
0x42dcf5 MOV %RSI,0xb0(%RSP) |
0x42dcfd ADD %R14,%RAX |
0x42dd00 MOV %RAX,0xa8(%RSP) |
0x42dd08 LEA (%R11,%R14,1),%RAX |
0x42dd0c MOV %RAX,0xa0(%RSP) |
0x42dd14 ADD 0x60(%RBP),%R14 |
0x42dd18 MOV %R14,0x60(%RSP) |
0x42dd1d VBROADCASTSD 0xdb0b9(%RIP),%ZMM1 |
0x42dd27 VPBROADCASTQ %XMM0,%ZMM2 |
0x42dd2d XOR %R14D,%R14D |
0x42dd30 MOV %RCX,0x98(%RSP) |
0x42dd38 MOV %ECX,%ESI |
0x42dd3a MOV %EDX,0x48(%RSP) |
0x42dd3e JMP 42dd52 |
(223) 0x42dd40 LEA 0x1(%R14),%EAX |
(223) 0x42dd44 INC %ESI |
(223) 0x42dd46 CMP %R9D,%R14D |
(223) 0x42dd49 MOV %EAX,%R14D |
(223) 0x42dd4c JE 42dbd2 |
(223) 0x42dd52 MOV 0x80(%RBP),%RAX |
(223) 0x42dd59 MOVSXD (%RAX),%RDI |
(223) 0x42dd5c MOV 0x78(%RBP),%RAX |
(223) 0x42dd60 MOV (%RAX),%ECX |
(223) 0x42dd62 MOV %ECX,%EAX |
(223) 0x42dd64 SUB %EDI,%EAX |
(223) 0x42dd66 INC %EAX |
(223) 0x42dd68 JS 42dd40 |
(223) 0x42dd6a MOV %ESI,%EDX |
(223) 0x42dd6c MOV 0x88(%RBP),%RAX |
(223) 0x42dd73 MOV (%RAX),%R13 |
(223) 0x42dd76 MOV 0x90(%RBP),%RAX |
(223) 0x42dd7d MOV (%RAX),%R11 |
(223) 0x42dd80 MOV 0x98(%RBP),%RAX |
(223) 0x42dd87 MOV (%RAX),%RAX |
(223) 0x42dd8a MOV %RAX,0x30(%RSP) |
(223) 0x42dd8f MOV 0xa0(%RBP),%RAX |
(223) 0x42dd96 MOV (%RAX),%R12 |
(223) 0x42dd99 MOV 0xa8(%RBP),%RAX |
(223) 0x42dda0 MOV %RCX,%RSI |
(223) 0x42dda3 MOV (%RAX),%RCX |
(223) 0x42dda6 MOV 0xb0(%RBP),%RAX |
(223) 0x42ddad MOV (%RAX),%RAX |
(223) 0x42ddb0 MOV %RAX,0x70(%RSP) |
(223) 0x42ddb5 MOV 0xb8(%RBP),%RAX |
(223) 0x42ddbc MOV (%RAX),%R15 |
(223) 0x42ddbf MOV 0xc0(%RBP),%RAX |
(223) 0x42ddc6 MOV (%RAX),%RBX |
(223) 0x42ddc9 MOV 0xc8(%RBP),%RAX |
(223) 0x42ddd0 MOV (%RAX),%RAX |
(223) 0x42ddd3 MOV %RAX,0x38(%RSP) |
(223) 0x42ddd8 MOV 0xd0(%RBP),%RAX |
(223) 0x42dddf MOV (%RAX),%R8 |
(223) 0x42dde2 SUB %EDI,%ESI |
(223) 0x42dde4 ADD $0x2,%ESI |
(223) 0x42dde7 CMP $0x2,%ESI |
(223) 0x42ddea MOV $0x1,%EAX |
(223) 0x42ddef CMOVL %EAX,%ESI |
(223) 0x42ddf2 MOV %RSI,%R10 |
(223) 0x42ddf5 MOV %RSI,0x120(%RSP) |
(223) 0x42ddfd VPBROADCASTQ %RSI,%ZMM25 |
(223) 0x42de03 AND $0x7ffffff8,%R10 |
(223) 0x42de0a MOV %R14,0x80(%RSP) |
(223) 0x42de12 MOV %R15,0x78(%RSP) |
(223) 0x42de17 MOV %R8,0x118(%RSP) |
(223) 0x42de1f MOV %RBX,0x110(%RSP) |
(223) 0x42de27 MOV %RDI,0x108(%RSP) |
(223) 0x42de2f JE 42e180 |
(223) 0x42de35 MOV %EDX,0x4c(%RSP) |
(223) 0x42de39 MOVSXD %EDX,%RSI |
(223) 0x42de3c MOV 0xf0(%RSP),%RAX |
(223) 0x42de44 MOV %R12,0x68(%RSP) |
(223) 0x42de49 MOV %R13,%R9 |
(223) 0x42de4c MOV %R11,0x28(%RSP) |
(223) 0x42de51 LEA (%RAX,%RSI,1),%R14 |
(223) 0x42de55 ADD 0xe8(%RSP),%RSI |
(223) 0x42de5d MOV %R8,%RAX |
(223) 0x42de60 IMUL %R14,%RAX |
(223) 0x42de64 LEA (%RAX,%RDI,8),%R11 |
(223) 0x42de68 MOV %RCX,%RDX |
(223) 0x42de6b MOV 0xe0(%RSP),%RCX |
(223) 0x42de73 ADD %RCX,%R11 |
(223) 0x42de76 MOV %R8,%RAX |
(223) 0x42de79 IMUL %RSI,%RAX |
(223) 0x42de7d LEA (%RAX,%RDI,8),%R12 |
(223) 0x42de81 ADD %RCX,%R12 |
(223) 0x42de84 MOV %RBX,%RAX |
(223) 0x42de87 IMUL %RSI,%RAX |
(223) 0x42de8b MOV %R10,0x88(%RSP) |
(223) 0x42de93 LEA (%RAX,%RDI,8),%R15 |
(223) 0x42de97 ADD 0xd8(%RSP),%R15 |
(223) 0x42de9f MOV %RDX,%RAX |
(223) 0x42dea2 IMUL %R14,%RAX |
(223) 0x42dea6 LEA (%RAX,%RDI,8),%R13 |
(223) 0x42deaa MOV 0xd0(%RSP),%RCX |
(223) 0x42deb2 ADD %RCX,%R13 |
(223) 0x42deb5 MOV %RDX,0x100(%RSP) |
(223) 0x42debd MOV %RDX,%RAX |
(223) 0x42dec0 IMUL %RSI,%RAX |
(223) 0x42dec4 LEA (%RAX,%RDI,8),%R10 |
(223) 0x42dec8 ADD %RCX,%R10 |
(223) 0x42decb MOV %R9,%RBX |
(223) 0x42dece MOV %R9,%RAX |
(223) 0x42ded1 IMUL %RSI,%RAX |
(223) 0x42ded5 LEA (%RAX,%RDI,8),%R8 |
(223) 0x42ded9 MOV 0xc8(%RSP),%R9 |
(223) 0x42dee1 ADD %R9,%R8 |
(223) 0x42dee4 MOV 0x28(%RSP),%RAX |
(223) 0x42dee9 IMUL %RSI,%RAX |
(223) 0x42deed LEA (%RAX,%RDI,8),%RDX |
(223) 0x42def1 MOV 0xc0(%RSP),%RCX |
(223) 0x42def9 ADD %RCX,%RDX |
(223) 0x42defc MOV %RBX,0xf8(%RSP) |
(223) 0x42df04 MOV %RBX,%RAX |
(223) 0x42df07 IMUL %R14,%RAX |
(223) 0x42df0b LEA (%RAX,%RDI,8),%RBX |
(223) 0x42df0f ADD %R9,%RBX |
(223) 0x42df12 MOV 0x28(%RSP),%RAX |
(223) 0x42df17 IMUL %R14,%RAX |
(223) 0x42df1b LEA (%RAX,%RDI,8),%R9 |
(223) 0x42df1f ADD %RCX,%R9 |
(223) 0x42df22 MOV 0x38(%RSP),%RAX |
(223) 0x42df27 IMUL %RSI,%RAX |
(223) 0x42df2b LEA (%RAX,%RDI,8),%RAX |
(223) 0x42df2f ADD 0xb8(%RSP),%RAX |
(223) 0x42df37 MOV %RAX,0x138(%RSP) |
(223) 0x42df3f MOV 0x78(%RSP),%RAX |
(223) 0x42df44 IMUL %RSI,%RAX |
(223) 0x42df48 LEA (%RAX,%RDI,8),%RAX |
(223) 0x42df4c ADD 0xb0(%RSP),%RAX |
(223) 0x42df54 MOV %RAX,0x130(%RSP) |
(223) 0x42df5c MOV 0x70(%RSP),%RAX |
(223) 0x42df61 IMUL %RSI,%RAX |
(223) 0x42df65 LEA (%RAX,%RDI,8),%RAX |
(223) 0x42df69 ADD 0xa8(%RSP),%RAX |
(223) 0x42df71 MOV %RAX,0x128(%RSP) |
(223) 0x42df79 MOV 0x30(%RSP),%RAX |
(223) 0x42df7e IMUL %RSI,%RAX |
(223) 0x42df82 LEA (%RAX,%RDI,8),%RAX |
(223) 0x42df86 ADD 0xa0(%RSP),%RAX |
(223) 0x42df8e MOV 0x68(%RSP),%RCX |
(223) 0x42df93 IMUL %RCX,%R14 |
(223) 0x42df97 LEA (%R14,%RDI,8),%R14 |
(223) 0x42df9b ADD 0x60(%RSP),%R14 |
(223) 0x42dfa0 IMUL %RCX,%RSI |
(223) 0x42dfa4 LEA (%RSI,%RDI,8),%RDI |
(223) 0x42dfa8 ADD 0x60(%RSP),%RDI |
(223) 0x42dfad XOR %ECX,%ECX |
(223) 0x42dfaf NOP |
(224) 0x42dfb0 VMOVUPD -0x8(%R9,%RCX,8),%ZMM0 |
(224) 0x42dfbb VMOVUPD (%R9,%RCX,8),%ZMM3 |
(224) 0x42dfc2 VMULPD -0x8(%RBX,%RCX,8),%ZMM0,%ZMM0 |
(224) 0x42dfcd VFMADD231PD (%RBX,%RCX,8),%ZMM3,%ZMM0 |
(224) 0x42dfd4 VMOVUPD -0x8(%RDX,%RCX,8),%ZMM3 |
(224) 0x42dfdf VMOVUPD (%RDX,%RCX,8),%ZMM4 |
(224) 0x42dfe6 VFMADD132PD (%R8,%RCX,8),%ZMM0,%ZMM4 |
(224) 0x42dfed VFMADD231PD -0x8(%R8,%RCX,8),%ZMM3,%ZMM4 |
(224) 0x42dff8 VMULPD %ZMM1,%ZMM4,%ZMM0 |
(224) 0x42dffe VDIVPD %ZMM0,%ZMM2,%ZMM0 |
(224) 0x42e004 VMOVUPD (%RDI,%RCX,8),%ZMM3 |
(224) 0x42e00b VMOVUPD -0x8(%R10,%RCX,8),%ZMM4 |
(224) 0x42e016 VMOVUPD (%R10,%RCX,8),%ZMM17 |
(224) 0x42e01d VSUBPD %ZMM17,%ZMM4,%ZMM18 |
(224) 0x42e023 VMULPD %ZMM3,%ZMM18,%ZMM18 |
(224) 0x42e029 VMOVUPD (%R14,%RCX,8),%ZMM19 |
(224) 0x42e030 VMOVUPD -0x8(%R13,%RCX,8),%ZMM20 |
(224) 0x42e03b VMOVUPD (%R13,%RCX,8),%ZMM21 |
(224) 0x42e043 VSUBPD %ZMM21,%ZMM20,%ZMM22 |
(224) 0x42e049 VFMADD213PD %ZMM18,%ZMM19,%ZMM22 |
(224) 0x42e04f VMOVUPD -0x8(%R15,%RCX,8),%ZMM18 |
(224) 0x42e05a VMOVUPD (%R15,%RCX,8),%ZMM26 |
(224) 0x42e061 VSUBPD %ZMM17,%ZMM21,%ZMM17 |
(224) 0x42e067 VMULPD %ZMM17,%ZMM26,%ZMM17 |
(224) 0x42e06d VSUBPD %ZMM4,%ZMM20,%ZMM4 |
(224) 0x42e073 VFMADD213PD %ZMM17,%ZMM18,%ZMM4 |
(224) 0x42e079 VMOVUPD -0x8(%R12,%RCX,8),%ZMM17 |
(224) 0x42e084 VMOVUPD (%R12,%RCX,8),%ZMM20 |
(224) 0x42e08b VSUBPD %ZMM20,%ZMM17,%ZMM21 |
(224) 0x42e091 VMOVUPD -0x8(%R11,%RCX,8),%ZMM27 |
(224) 0x42e09c VMOVUPD (%R11,%RCX,8),%ZMM28 |
(224) 0x42e0a3 VSUBPD %ZMM28,%ZMM27,%ZMM29 |
(224) 0x42e0a9 VFMADD213PD %ZMM22,%ZMM3,%ZMM21 |
(224) 0x42e0af VFMADD231PD %ZMM29,%ZMM19,%ZMM21 |
(224) 0x42e0b5 VFMADD213PD (%RAX,%RCX,8),%ZMM0,%ZMM21 |
(224) 0x42e0bc MOV 0x128(%RSP),%RSI |
(224) 0x42e0c4 VMOVUPD %ZMM21,(%RSI,%RCX,8) |
(224) 0x42e0cb VSUBPD %ZMM20,%ZMM28,%ZMM3 |
(224) 0x42e0d1 VSUBPD %ZMM17,%ZMM27,%ZMM17 |
(224) 0x42e0d7 VFMADD213PD %ZMM4,%ZMM26,%ZMM3 |
(224) 0x42e0dd VFMADD231PD %ZMM17,%ZMM18,%ZMM3 |
(224) 0x42e0e3 MOV 0x130(%RSP),%RSI |
(224) 0x42e0eb VFMADD213PD (%RSI,%RCX,8),%ZMM0,%ZMM3 |
(224) 0x42e0f2 MOV 0x138(%RSP),%RSI |
(224) 0x42e0fa VMOVUPD %ZMM3,(%RSI,%RCX,8) |
(224) 0x42e101 ADD $0x8,%RCX |
(224) 0x42e105 CMP 0x88(%RSP),%RCX |
(224) 0x42e10d JB 42dfb0 |
(223) 0x42e113 MOV 0x88(%RSP),%R15 |
(223) 0x42e11b CMP 0x120(%RSP),%R15 |
(223) 0x42e123 MOV 0x48(%RSP),%R9D |
(223) 0x42e128 MOV 0x4c(%RSP),%ESI |
(223) 0x42e12c MOV 0x80(%RSP),%R14 |
(223) 0x42e134 MOV 0x38(%RSP),%RBX |
(223) 0x42e139 MOV 0x30(%RSP),%RDX |
(223) 0x42e13e MOV 0x100(%RSP),%RCX |
(223) 0x42e146 MOV 0x68(%RSP),%R12 |
(223) 0x42e14b MOV 0xf8(%RSP),%R13 |
(223) 0x42e153 MOV 0x28(%RSP),%R11 |
(223) 0x42e158 JE 42dd40 |
(223) 0x42e15e JMP 42e18f |
0x42e160 NOPW %CS:(%RAX,%RAX,1) |
0x42e16f NOPW %CS:(%RAX,%RAX,1) |
0x42e17e XCHG %AX,%AX |
(223) 0x42e180 XOR %R15D,%R15D |
(223) 0x42e183 MOV %EDX,%ESI |
(223) 0x42e185 MOV 0x38(%RSP),%RBX |
(223) 0x42e18a MOV 0x30(%RSP),%RDX |
(223) 0x42e18f VPBROADCASTQ %R15,%ZMM0 |
(223) 0x42e195 VPSUBQ %ZMM0,%ZMM25,%ZMM0 |
(223) 0x42e19b VPCMPNLEUQ 0xd97da(%RIP),%ZMM0,%K1 |
(223) 0x42e1a6 KORTESTB %K1,%K1 |
(223) 0x42e1aa JE 42dd40 |
(223) 0x42e1b0 MOV 0x98(%RSP),%RAX |
(223) 0x42e1b8 ADD %R14D,%EAX |
(223) 0x42e1bb MOVSXD %EAX,%R14 |
(223) 0x42e1be MOVSXD 0x58(%RSP),%RAX |
(223) 0x42e1c3 SUB %RAX,%R14 |
(223) 0x42e1c6 ADD 0x108(%RSP),%R15 |
(223) 0x42e1ce MOV 0x10(%RBP),%RAX |
(223) 0x42e1d2 CLTQ |
(223) 0x42e1d4 SUB %RAX,%R15 |
(223) 0x42e1d7 MOV %ESI,%R10D |
(223) 0x42e1da LEA 0x1(%R14),%RSI |
(223) 0x42e1de MOV %R11,%RAX |
(223) 0x42e1e1 IMUL %RSI,%RAX |
(223) 0x42e1e5 MOV %RCX,%R8 |
(223) 0x42e1e8 MOV 0x50(%RBP),%RCX |
(223) 0x42e1ec ADD %RCX,%RAX |
(223) 0x42e1ef VMOVUPD 0x8(%RAX,%R15,8),%ZMM25{%K1}{z} |
(223) 0x42e1fa VMOVUPD 0x10(%RAX,%R15,8),%ZMM26{%K1}{z} |
(223) 0x42e205 MOV %R13,%RAX |
(223) 0x42e208 IMUL %RSI,%RAX |
(223) 0x42e20c MOV 0x48(%RBP),%RDI |
(223) 0x42e210 ADD %RDI,%RAX |
(223) 0x42e213 VMOVUPD 0x8(%RAX,%R15,8),%ZMM27{%K1}{z} |
(223) 0x42e21e VMOVUPD 0x10(%RAX,%R15,8),%ZMM28{%K1}{z} |
(223) 0x42e229 ADD $0x2,%R14 |
(223) 0x42e22d IMUL %R14,%R11 |
(223) 0x42e231 ADD %RCX,%R11 |
(223) 0x42e234 VMOVUPD 0x10(%R11,%R15,8),%ZMM29{%K1}{z} |
(223) 0x42e23f VMOVUPD 0x8(%R11,%R15,8),%ZMM30{%K1}{z} |
(223) 0x42e24a IMUL %R14,%R13 |
(223) 0x42e24e ADD %RDI,%R13 |
(223) 0x42e251 VMOVUPD 0x10(%R13,%R15,8),%ZMM31{%K1}{z} |
(223) 0x42e25c VMOVUPD 0x8(%R13,%R15,8),%ZMM3{%K1}{z} |
(223) 0x42e267 MOV %R12,%RAX |
(223) 0x42e26a IMUL %R14,%RAX |
(223) 0x42e26e MOV 0x60(%RBP),%RCX |
(223) 0x42e272 ADD %RCX,%RAX |
(223) 0x42e275 VMOVUPD 0x10(%RAX,%R15,8),%ZMM17{%K1}{z} |
(223) 0x42e280 MOV %R8,%RAX |
(223) 0x42e283 IMUL %R14,%RAX |
(223) 0x42e287 MOV 0x40(%RBP),%RDI |
(223) 0x42e28b ADD %RDI,%RAX |
(223) 0x42e28e VMOVUPD 0x10(%RAX,%R15,8),%ZMM18{%K1}{z} |
(223) 0x42e299 VMOVUPD 0x8(%RAX,%R15,8),%ZMM19{%K1}{z} |
(223) 0x42e2a4 IMUL %RSI,%R12 |
(223) 0x42e2a8 ADD %RCX,%R12 |
(223) 0x42e2ab VMOVUPD 0x10(%R12,%R15,8),%ZMM20{%K1}{z} |
(223) 0x42e2b6 IMUL %RSI,%R8 |
(223) 0x42e2ba ADD %RDI,%R8 |
(223) 0x42e2bd VMOVUPD 0x10(%R8,%R15,8),%ZMM21{%K1}{z} |
(223) 0x42e2c8 VMOVUPD 0x8(%R8,%R15,8),%ZMM22{%K1}{z} |
(223) 0x42e2d3 MOV 0x110(%RSP),%RAX |
(223) 0x42e2db IMUL %R14,%RAX |
(223) 0x42e2df ADD 0x58(%RBP),%RAX |
(223) 0x42e2e3 VMOVUPD 0x10(%RAX,%R15,8),%ZMM0{%K1}{z} |
(223) 0x42e2ee VMOVUPD 0x8(%RAX,%R15,8),%ZMM4{%K1}{z} |
(223) 0x42e2f9 MOV 0x118(%RSP),%R8 |
(223) 0x42e301 MOV %R8,%RAX |
(223) 0x42e304 IMUL %R14,%RAX |
(223) 0x42e308 MOV 0x38(%RBP),%RCX |
(223) 0x42e30c ADD %RCX,%RAX |
(223) 0x42e30f VMOVUPD 0x10(%RAX,%R15,8),%ZMM23{%K1}{z} |
(223) 0x42e31a VMOVUPD 0x8(%RAX,%R15,8),%ZMM24{%K1}{z} |
(223) 0x42e325 IMUL %RSI,%R8 |
(223) 0x42e329 MOV %R10D,%ESI |
(223) 0x42e32c ADD %RCX,%R8 |
(223) 0x42e32f VMOVUPD 0x10(%R8,%R15,8),%ZMM5{%K1}{z} |
(223) 0x42e33a VMOVUPD 0x8(%R8,%R15,8),%ZMM6{%K1}{z} |
(223) 0x42e345 IMUL %R14,%RDX |
(223) 0x42e349 ADD 0x30(%RBP),%RDX |
(223) 0x42e34d VMOVUPD 0x10(%RDX,%R15,8),%ZMM7{%K1}{z} |
(223) 0x42e358 VMOVAPD 0x180(%RSP),%ZMM11 |
(223) 0x42e360 VMOVAPD %ZMM25,%ZMM11{%K1} |
(223) 0x42e366 VMOVAPD 0x200(%RSP),%ZMM10 |
(223) 0x42e36e VMOVAPD %ZMM27,%ZMM10{%K1} |
(223) 0x42e374 VMOVAPD 0x280(%RSP),%ZMM9 |
(223) 0x42e37c VMOVAPD %ZMM26,%ZMM9{%K1} |
(223) 0x42e382 VMOVAPD 0x300(%RSP),%ZMM8 |
(223) 0x42e38a VMOVAPD %ZMM28,%ZMM8{%K1} |
(223) 0x42e390 VMOVAPD 0x340(%RSP),%ZMM28 |
(223) 0x42e398 VMOVAPD %ZMM29,%ZMM28{%K1} |
(223) 0x42e39e VMOVAPD 0x380(%RSP),%ZMM27 |
(223) 0x42e3a6 VMOVAPD %ZMM31,%ZMM27{%K1} |
(223) 0x42e3ac VMOVAPD 0x3c0(%RSP),%ZMM26 |
(223) 0x42e3b4 VMOVAPD %ZMM30,%ZMM26{%K1} |
(223) 0x42e3ba VMOVAPD 0x400(%RSP),%ZMM25 |
(223) 0x42e3c2 VMOVAPD %ZMM3,%ZMM25{%K1} |
(223) 0x42e3c8 VMOVAPD %ZMM17,%ZMM16{%K1} |
(223) 0x42e3ce VMOVAPD %ZMM18,%ZMM15{%K1} |
(223) 0x42e3d4 VMOVAPD %ZMM19,%ZMM14{%K1} |
(223) 0x42e3da VMOVAPD %ZMM20,%ZMM13{%K1} |
(223) 0x42e3e0 VMOVAPD %ZMM21,%ZMM12{%K1} |
(223) 0x42e3e6 VSUBPD %ZMM15,%ZMM14,%ZMM3 |
(223) 0x42e3ec VMULPD %ZMM16,%ZMM3,%ZMM3 |
(223) 0x42e3f2 VMOVAPD 0x140(%RSP),%ZMM29 |
(223) 0x42e3fa VMOVAPD %ZMM22,%ZMM29{%K1} |
(223) 0x42e400 VSUBPD %ZMM12,%ZMM29,%ZMM17 |
(223) 0x42e406 VFMADD213PD %ZMM3,%ZMM13,%ZMM17 |
(223) 0x42e40c VMOVAPD 0x2c0(%RSP),%ZMM21 |
(223) 0x42e414 VMOVAPD %ZMM23,%ZMM21{%K1} |
(223) 0x42e41a VMOVAPD 0x440(%RSP),%ZMM20 |
(223) 0x42e422 VMOVAPD %ZMM24,%ZMM20{%K1} |
(223) 0x42e428 VMOVAPD 0x480(%RSP),%ZMM19 |
(223) 0x42e430 VMOVAPD %ZMM5,%ZMM19{%K1} |
(223) 0x42e436 VMOVAPD 0x4c0(%RSP),%ZMM18 |
(223) 0x42e43e VMOVAPD %ZMM6,%ZMM18{%K1} |
(223) 0x42e444 VSUBPD %ZMM21,%ZMM20,%ZMM3 |
(223) 0x42e44a VFMADD213PD %ZMM17,%ZMM16,%ZMM3 |
(223) 0x42e450 VSUBPD %ZMM19,%ZMM18,%ZMM5 |
(223) 0x42e456 VFMADD231PD %ZMM5,%ZMM13,%ZMM3 |
(223) 0x42e45c VMOVAPD %ZMM10,0x200(%RSP) |
(223) 0x42e464 VMOVAPD %ZMM11,0x180(%RSP) |
(223) 0x42e46c VMULPD %ZMM10,%ZMM11,%ZMM5 |
(223) 0x42e472 VMOVAPD %ZMM8,0x300(%RSP) |
(223) 0x42e47a VMOVAPD %ZMM9,0x280(%RSP) |
(223) 0x42e482 VFMADD231PD %ZMM8,%ZMM9,%ZMM5 |
(223) 0x42e488 VMOVAPD %ZMM27,0x380(%RSP) |
(223) 0x42e490 VMOVAPD %ZMM28,0x340(%RSP) |
(223) 0x42e498 VFMADD231PD %ZMM27,%ZMM28,%ZMM5 |
(223) 0x42e49e VMOVAPD %ZMM25,0x400(%RSP) |
(223) 0x42e4a6 VMOVAPD %ZMM26,0x3c0(%RSP) |
(223) 0x42e4ae VFMADD231PD %ZMM25,%ZMM26,%ZMM5 |
(223) 0x42e4b4 VMULPD %ZMM1,%ZMM5,%ZMM5 |
(223) 0x42e4ba VDIVPD %ZMM5,%ZMM2,%ZMM5 |
(223) 0x42e4c0 VMOVAPD 0x500(%RSP),%ZMM6 |
(223) 0x42e4c8 VMOVAPD %ZMM7,%ZMM6{%K1} |
(223) 0x42e4ce VMOVAPD %ZMM6,0x500(%RSP) |
(223) 0x42e4d6 VFMADD213PD %ZMM6,%ZMM5,%ZMM3 |
(223) 0x42e4dc MOV 0x70(%RSP),%RAX |
(223) 0x42e4e1 IMUL %R14,%RAX |
(223) 0x42e4e5 ADD 0x20(%RBP),%RAX |
(223) 0x42e4e9 VMOVUPD %ZMM3,0x10(%RAX,%R15,8){%K1} |
(223) 0x42e4f4 MOV 0x78(%RSP),%RAX |
(223) 0x42e4f9 IMUL %R14,%RAX |
(223) 0x42e4fd ADD 0x28(%RBP),%RAX |
(223) 0x42e501 VMOVUPD 0x10(%RAX,%R15,8),%ZMM3{%K1}{z} |
(223) 0x42e50c IMUL %R14,%RBX |
(223) 0x42e510 MOV 0x80(%RSP),%R14 |
(223) 0x42e518 VMOVAPD 0x1c0(%RSP),%ZMM8 |
(223) 0x42e520 VMOVAPD %ZMM0,%ZMM8{%K1} |
(223) 0x42e526 VSUBPD %ZMM15,%ZMM12,%ZMM0 |
(223) 0x42e52c VMULPD %ZMM0,%ZMM8,%ZMM0 |
(223) 0x42e532 VMOVAPD 0x240(%RSP),%ZMM7 |
(223) 0x42e53a VMOVAPD %ZMM4,%ZMM7{%K1} |
(223) 0x42e540 VMOVAPD %ZMM29,0x140(%RSP) |
(223) 0x42e548 VSUBPD %ZMM14,%ZMM29,%ZMM4 |
(223) 0x42e54e VFMADD213PD %ZMM0,%ZMM7,%ZMM4 |
(223) 0x42e554 VMOVAPD %ZMM19,0x480(%RSP) |
(223) 0x42e55c VMOVAPD %ZMM21,0x2c0(%RSP) |
(223) 0x42e564 VSUBPD %ZMM21,%ZMM19,%ZMM0 |
(223) 0x42e56a VMOVAPD %ZMM18,0x4c0(%RSP) |
(223) 0x42e572 VMOVAPD %ZMM20,0x440(%RSP) |
(223) 0x42e57a VSUBPD %ZMM20,%ZMM18,%ZMM6 |
(223) 0x42e580 VMOVAPD %ZMM8,0x1c0(%RSP) |
(223) 0x42e588 VFMADD213PD %ZMM4,%ZMM8,%ZMM0 |
(223) 0x42e58e VMOVAPD %ZMM7,0x240(%RSP) |
(223) 0x42e596 VFMADD231PD %ZMM6,%ZMM7,%ZMM0 |
(223) 0x42e59c VMOVAPD 0x540(%RSP),%ZMM4 |
(223) 0x42e5a4 VMOVAPD %ZMM3,%ZMM4{%K1} |
(223) 0x42e5aa VMOVAPD %ZMM4,0x540(%RSP) |
(223) 0x42e5b2 VFMADD213PD %ZMM4,%ZMM5,%ZMM0 |
(223) 0x42e5b8 ADD 0x18(%RBP),%RBX |
(223) 0x42e5bc VMOVUPD %ZMM0,0x10(%RBX,%R15,8){%K1} |
(223) 0x42e5c7 JMP 42dd40 |
0x42e5cc NOPL (%RAX) |
Path / |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 115 |
nb uops | 119 |
loop length | 548 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 39 |
micro-operation queue | 19.83 cycles |
front end | 19.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.20 | 5.20 | 9.33 | 9.33 | 16.50 | 5.20 | 5.20 | 16.50 | 16.50 | 16.50 | 5.20 | 9.33 |
cycles | 5.20 | 5.20 | 9.33 | 9.33 | 16.50 | 5.20 | 5.20 | 16.50 | 16.50 | 16.50 | 5.20 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 19.29-19.34 |
Stall cycles | 0.00 |
Front-end | 19.83 |
Dispatch | 16.50 |
Overall L1 | 19.83 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 9% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x5c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42dbeb <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x58(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x5c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2c(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x28(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x749f70,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4044c0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x24(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42dc40 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x749f90,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x44(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4040b0 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x749fb0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404580 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x1,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EBX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x2,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RBX,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RSI,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RBX,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R15,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0x60(%RBP),%R14 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R14,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdb0b9(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42dd52 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x212> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 115 |
nb uops | 119 |
loop length | 548 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 39 |
micro-operation queue | 19.83 cycles |
front end | 19.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.20 | 5.20 | 9.33 | 9.33 | 16.50 | 5.20 | 5.20 | 16.50 | 16.50 | 16.50 | 5.20 | 9.33 |
cycles | 5.20 | 5.20 | 9.33 | 9.33 | 16.50 | 5.20 | 5.20 | 16.50 | 16.50 | 16.50 | 5.20 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 19.29-19.34 |
Stall cycles | 0.00 |
Front-end | 19.83 |
Dispatch | 16.50 |
Overall L1 | 19.83 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 9% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x5c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42dbeb <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x58(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x5c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2c(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x28(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x749f70,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4044c0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x24(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42dc40 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x749f90,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x44(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4040b0 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x749fb0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404580 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x1,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EBX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x2,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RBX,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RSI,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RBX,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R15,%R14,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0x60(%RBP),%R14 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R14,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdb0b9(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42dd52 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x212> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel_.DIR.OMP.PARALLEL.2– | 4.76 | 1.51 |
▼Loop 223 - accelerate_kernel.f90:60-76 - exec– | 0.01 | 0 |
○Loop 224 - accelerate_kernel.f90:62-76 - exec | 4.75 | 1.5 |