Function: flux_calc_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: flux_calc_kernel.f90:49-63 | Coverage: 4.41% |
---|
Function: flux_calc_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: flux_calc_kernel.f90:49-63 | Coverage: 4.41% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-152-3172/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/flux_calc_kernel.f90: 49 - 63 |
-------------------------------------------------------------------------------- |
49: !$OMP PARALLEL |
50: |
51: ! Note that the loops calculate one extra flux than required, but this |
52: ! allows loop fusion that improves performance |
53: !$OMP DO |
54: DO k=y_min,y_max+1 |
55: !$OMP SIMD |
56: DO j=x_min,x_max+1 |
57: vol_flux_x(j,k)=0.25_8*dt*xarea(j,k) & |
58: *(xvel0(j,k)+xvel0(j,k+1)+xvel1(j,k)+xvel1(j,k+1)) |
59: vol_flux_y(j,k)=0.25_8*dt*yarea(j,k) & |
60: *(yvel0(j,k)+yvel0(j+1,k)+yvel1(j,k)+yvel1(j+1,k)) |
61: ENDDO |
62: ENDDO |
63: !$OMP END DO |
0x441c00 PUSH %RBP |
0x441c01 MOV %RSP,%RBP |
0x441c04 PUSH %R15 |
0x441c06 PUSH %R14 |
0x441c08 PUSH %R13 |
0x441c0a PUSH %R12 |
0x441c0c PUSH %RBX |
0x441c0d SUB $0xf8,%RSP |
0x441c14 MOV %R9,-0x88(%RBP) |
0x441c1b MOV %R8,-0x80(%RBP) |
0x441c1f MOV 0x50(%RBP),%EBX |
0x441c22 MOV 0x48(%RBP),%EAX |
0x441c25 SUB %EBX,%EAX |
0x441c27 INC %EAX |
0x441c29 MOVL $0,-0x70(%RBP) |
0x441c30 JS 441ca4 |
0x441c32 MOV %RCX,%R15 |
0x441c35 MOV %RDX,%R14 |
0x441c38 MOV %RDI,-0x90(%RBP) |
0x441c3f MOV (%RDI),%ESI |
0x441c41 MOVL $0,-0x3c(%RBP) |
0x441c48 MOV %EAX,-0x38(%RBP) |
0x441c4b MOVL $0x1,-0x6c(%RBP) |
0x441c52 SUB $0x8,%RSP |
0x441c56 LEA -0x6c(%RBP),%RAX |
0x441c5a LEA -0x70(%RBP),%RCX |
0x441c5e LEA -0x3c(%RBP),%R8 |
0x441c62 LEA -0x38(%RBP),%R9 |
0x441c66 MOV $0x54d5f0,%EDI |
0x441c6b MOV %ESI,-0x64(%RBP) |
0x441c6e MOV $0x22,%EDX |
0x441c73 PUSH $0x1 |
0x441c75 PUSH $0x1 |
0x441c77 PUSH %RAX |
0x441c78 CALL 404670 <__kmpc_for_static_init_4@plt> |
0x441c7d ADD $0x20,%RSP |
0x441c81 MOV -0x3c(%RBP),%EAX |
0x441c84 MOV -0x38(%RBP),%R8D |
0x441c88 SUB %EAX,%R8D |
0x441c8b JAE 441d00 |
0x441c8d MOV $0x54d610,%EDI |
0x441c92 MOV -0x64(%RBP),%ESI |
0x441c95 VZEROUPPER |
0x441c98 CALL 404230 <__kmpc_for_static_fini@plt> |
0x441c9d MOV -0x90(%RBP),%RDI |
0x441ca4 MOV (%RDI),%ESI |
0x441ca6 MOV $0x54d630,%EDI |
0x441cab CALL 404740 <__kmpc_barrier@plt> |
0x441cb0 ADD $0xf8,%RSP |
0x441cb7 POP %RBX |
0x441cb8 POP %R12 |
0x441cba POP %R13 |
0x441cbc POP %R14 |
0x441cbe POP %R15 |
0x441cc0 POP %RBP |
0x441cc1 RET |
0x441cc2 NOPW %CS:(%RAX,%RAX,1) |
0x441cd1 NOPW %CS:(%RAX,%RAX,1) |
0x441ce0 NOPW %CS:(%RAX,%RAX,1) |
0x441cef NOPW %CS:(%RAX,%RAX,1) |
0x441cfe XCHG %AX,%AX |
0x441d00 MOV %RAX,%RDI |
0x441d03 MOV 0x30(%RBP),%R9 |
0x441d07 MOV 0x28(%RBP),%R10 |
0x441d0b SAL $0x20,%R15 |
0x441d0f MOV $-0x200000000,%RCX |
0x441d19 LEA (%R15,%RCX,1),%RDX |
0x441d1d MOV %RDX,%RAX |
0x441d20 SAR $0x20,%RAX |
0x441d24 MOV %RAX,-0xe8(%RBP) |
0x441d2b SAL $0x20,%R14 |
0x441d2f ADD %R14,%RCX |
0x441d32 MOV %RCX,%R11 |
0x441d35 SAR $0x20,%R11 |
0x441d39 TEST %RDX,%RDX |
0x441d3c MOV $-0x1,%RSI |
0x441d43 CMOVNS %RDX,%RSI |
0x441d47 TEST %RSI,%RSI |
0x441d4a MOV $0x1,%R12D |
0x441d50 CMOVG %R12,%RSI |
0x441d54 MOV $0x200000000,%R13 |
0x441d5e MOV %R13,%RAX |
0x441d61 SUB %R15,%RAX |
0x441d64 MOV 0x18(%RBP),%R15 |
0x441d68 CMP %RAX,%RDX |
0x441d6b CMOVG %RDX,%RAX |
0x441d6f MOV $-0x1,%RDX |
0x441d76 SHR $0x20,%RAX |
0x441d7a IMUL %RSI,%RAX |
0x441d7e SAL $0x3,%RAX |
0x441d82 MOV $0x8,%ESI |
0x441d87 SUB %RAX,%RSI |
0x441d8a TEST %RCX,%RCX |
0x441d8d CMOVNS %RCX,%RDX |
0x441d91 TEST %RDX,%RDX |
0x441d94 CMOVG %R12,%RDX |
0x441d98 SUB %R14,%R13 |
0x441d9b CMP %R13,%RCX |
0x441d9e CMOVG %RCX,%R13 |
0x441da2 SHR $0x20,%R13 |
0x441da6 IMUL %RDX,%R13 |
0x441daa ADD %EBX,%EDI |
0x441dac MOV -0x80(%RBP),%RCX |
0x441db0 SUB %RAX,%RCX |
0x441db3 MOV %RCX,-0xc8(%RBP) |
0x441dba SUB %RAX,%R9 |
0x441dbd MOV %R9,-0xc0(%RBP) |
0x441dc4 MOV -0x88(%RBP),%RCX |
0x441dcb SUB %RAX,%RCX |
0x441dce MOV %RCX,-0xb8(%RBP) |
0x441dd5 SUB %RAX,%R15 |
0x441dd8 MOV %R15,-0xb0(%RBP) |
0x441ddf SUB %RAX,%R10 |
0x441de2 MOV %R10,-0xa8(%RBP) |
0x441de9 MOV 0x38(%RBP),%RCX |
0x441ded SUB %RAX,%RCX |
0x441df0 MOV %RCX,-0xa0(%RBP) |
0x441df7 NEG %R13 |
0x441dfa MOV %R13,-0xd8(%RBP) |
0x441e01 MOV 0x10(%RBP),%RAX |
0x441e05 ADD %RSI,%RAX |
0x441e08 MOV %RAX,-0x98(%RBP) |
0x441e0f ADD 0x20(%RBP),%RSI |
0x441e13 MOV %RSI,-0xd0(%RBP) |
0x441e1a MOV %R11,-0x78(%RBP) |
0x441e1e SUB %R11,%R12 |
0x441e21 MOV %R12,-0xe0(%RBP) |
0x441e28 VMOVSD 0xc9870(%RIP),%XMM0 |
0x441e30 VMOVDQA64 0xc89c6(%RIP),%ZMM1 |
0x441e3a MOV 0x60(%RBP),%RDX |
0x441e3e MOV 0x58(%RBP),%R9 |
0x441e42 XOR %EBX,%EBX |
0x441e44 MOV %RDI,-0x110(%RBP) |
0x441e4b MOV %EDI,%R10D |
0x441e4e MOV %R8D,-0x34(%RBP) |
0x441e52 JMP 442031 |
0x441e57 NOPW %CS:(%RAX,%RAX,1) |
0x441e66 NOPW %CS:(%RAX,%RAX,1) |
0x441e75 NOPW %CS:(%RAX,%RAX,1) |
(343) 0x441e80 VPBROADCASTQ %RSI,%ZMM13 |
(343) 0x441e86 SUB -0x78(%RBP),%RAX |
(343) 0x441e8a IMUL %RAX,%R15 |
(343) 0x441e8e MOV %R15,-0x30(%RBP) |
(343) 0x441e92 VBROADCASTSD %XMM12,%ZMM12 |
(343) 0x441e98 LEA 0x1(%RAX),%RCX |
(343) 0x441e9c MOV %RAX,%RSI |
(343) 0x441e9f MOV %RDI,%RAX |
(343) 0x441ea2 IMUL %RCX,%RAX |
(343) 0x441ea6 IMUL %RSI,%RDI |
(343) 0x441eaa MOV %RDI,-0x58(%RBP) |
(343) 0x441eae MOV -0x60(%RBP),%R15 |
(343) 0x441eb2 MOV %R8,%RDI |
(343) 0x441eb5 MOV %R15,%R8 |
(343) 0x441eb8 IMUL %RSI,%R8 |
(343) 0x441ebc IMUL %RCX,%R15 |
(343) 0x441ec0 IMUL %RSI,%RDI |
(343) 0x441ec4 MOV %RDI,%RCX |
(343) 0x441ec7 IMUL %RSI,%RDX |
(343) 0x441ecb IMUL %RSI,%R13 |
(343) 0x441ecf IMUL %RSI,%R12 |
(343) 0x441ed3 IMUL %RSI,%R9 |
(343) 0x441ed7 MOV %R9,-0x48(%RBP) |
(343) 0x441edb XOR %R11D,%R11D |
(343) 0x441ede MOV %RDX,%RDI |
(343) 0x441ee1 MOV %RCX,%RDX |
(343) 0x441ee4 MOV %R15,%R9 |
(343) 0x441ee7 MOV -0x30(%RBP),%RCX |
(343) 0x441eeb MOV -0x58(%RBP),%RSI |
(343) 0x441eef VPBROADCASTQ %R11,%ZMM14 |
(343) 0x441ef5 ADD %R11,%R14 |
(343) 0x441ef8 VPSUBQ %ZMM14,%ZMM13,%ZMM13 |
(343) 0x441efe VPCMPNLEUQ %ZMM1,%ZMM13,%K1 |
(343) 0x441f05 ADD 0x38(%RBP),%RCX |
(343) 0x441f09 SUB -0xe8(%RBP),%R14 |
(343) 0x441f10 VMOVUPD (%RCX,%R14,8),%ZMM13{%K1}{z} |
(343) 0x441f17 MOV 0x28(%RBP),%RCX |
(343) 0x441f1b ADD %RCX,%RAX |
(343) 0x441f1e VMOVUPD (%RAX,%R14,8),%ZMM14{%K1}{z} |
(343) 0x441f25 ADD %RCX,%RSI |
(343) 0x441f28 VMOVUPD (%RSI,%R14,8),%ZMM15{%K1}{z} |
(343) 0x441f2f MOV 0x18(%RBP),%RAX |
(343) 0x441f33 ADD %RAX,%R8 |
(343) 0x441f36 VMOVUPD (%R8,%R14,8),%ZMM16{%K1}{z} |
(343) 0x441f3d ADD %RAX,%R9 |
(343) 0x441f40 VMOVUPD (%R9,%R14,8),%ZMM17{%K1}{z} |
(343) 0x441f47 VMOVAPD %ZMM13,%ZMM11{%K1} |
(343) 0x441f4d VMULPD %ZMM12,%ZMM11,%ZMM13 |
(343) 0x441f53 VMOVAPD %ZMM14,%ZMM10{%K1} |
(343) 0x441f59 VMOVAPD %ZMM15,%ZMM9{%K1} |
(343) 0x441f5f VADDPD %ZMM9,%ZMM10,%ZMM14 |
(343) 0x441f65 VMOVAPD %ZMM16,%ZMM8{%K1} |
(343) 0x441f6b VMOVAPD %ZMM17,%ZMM7{%K1} |
(343) 0x441f71 VADDPD %ZMM7,%ZMM8,%ZMM15 |
(343) 0x441f77 VADDPD %ZMM15,%ZMM14,%ZMM14 |
(343) 0x441f7d VMULPD %ZMM14,%ZMM13,%ZMM13 |
(343) 0x441f83 ADD -0x88(%RBP),%RDX |
(343) 0x441f8a VMOVUPD %ZMM13,(%RDX,%R14,8){%K1} |
(343) 0x441f91 ADD 0x30(%RBP),%RDI |
(343) 0x441f95 VMOVUPD (%RDI,%R14,8),%ZMM13{%K1}{z} |
(343) 0x441f9c ADD 0x20(%RBP),%R13 |
(343) 0x441fa0 VMOVUPD 0x8(%R13,%R14,8),%ZMM14{%K1}{z} |
(343) 0x441fab VMOVUPD (%R13,%R14,8),%ZMM15{%K1}{z} |
(343) 0x441fb3 ADD 0x10(%RBP),%R12 |
(343) 0x441fb7 VMOVUPD (%R12,%R14,8),%ZMM16{%K1}{z} |
(343) 0x441fbe VMOVUPD 0x8(%R12,%R14,8),%ZMM17{%K1}{z} |
(343) 0x441fc9 VMOVAPD %ZMM13,%ZMM6{%K1} |
(343) 0x441fcf VMOVAPD %ZMM14,%ZMM5{%K1} |
(343) 0x441fd5 VMULPD %ZMM12,%ZMM6,%ZMM12 |
(343) 0x441fdb VMOVAPD %ZMM15,%ZMM4{%K1} |
(343) 0x441fe1 VADDPD %ZMM4,%ZMM5,%ZMM13 |
(343) 0x441fe7 VMOVAPD %ZMM16,%ZMM3{%K1} |
(343) 0x441fed VMOVAPD %ZMM17,%ZMM2{%K1} |
(343) 0x441ff3 VADDPD %ZMM2,%ZMM3,%ZMM14 |
(343) 0x441ff9 VADDPD %ZMM14,%ZMM13,%ZMM13 |
(343) 0x441fff VMULPD %ZMM13,%ZMM12,%ZMM12 |
(343) 0x442005 MOV -0x48(%RBP),%RAX |
(343) 0x442009 ADD -0x80(%RBP),%RAX |
(343) 0x44200d VMOVUPD %ZMM12,(%RAX,%R14,8){%K1} |
(343) 0x442014 MOV -0x34(%RBP),%R8D |
(343) 0x442018 MOV 0x60(%RBP),%RDX |
(343) 0x44201c MOV 0x58(%RBP),%R9 |
(343) 0x442020 LEA 0x1(%RBX),%EAX |
(343) 0x442023 INC %R10D |
(343) 0x442026 CMP %R8D,%EBX |
(343) 0x442029 MOV %EAX,%EBX |
(343) 0x44202b JE 441c8d |
(343) 0x442031 MOVSXD (%RDX),%R14 |
(343) 0x442034 MOV (%R9),%ESI |
(343) 0x442037 MOV %ESI,%EAX |
(343) 0x442039 SUB %R14D,%EAX |
(343) 0x44203c INC %EAX |
(343) 0x44203e JS 442020 |
(343) 0x442040 MOV -0x110(%RBP),%RAX |
(343) 0x442047 ADD %EBX,%EAX |
(343) 0x442049 VMULSD 0x112ddf(%RIP),%XMM0,%XMM12 |
(343) 0x442051 MOV 0x68(%RBP),%RCX |
(343) 0x442055 MOV (%RCX),%R15 |
(343) 0x442058 MOV 0x70(%RBP),%RCX |
(343) 0x44205c MOV (%RCX),%RDI |
(343) 0x44205f MOV 0x78(%RBP),%RCX |
(343) 0x442063 MOV (%RCX),%RCX |
(343) 0x442066 MOV %RCX,-0x60(%RBP) |
(343) 0x44206a MOV 0x80(%RBP),%RCX |
(343) 0x442071 MOV (%RCX),%R8 |
(343) 0x442074 MOV 0x88(%RBP),%RCX |
(343) 0x44207b MOV (%RCX),%RDX |
(343) 0x44207e MOV 0x90(%RBP),%RCX |
(343) 0x442085 MOV (%RCX),%R13 |
(343) 0x442088 MOV 0x98(%RBP),%RCX |
(343) 0x44208f MOV (%RCX),%R12 |
(343) 0x442092 MOV 0xa0(%RBP),%RCX |
(343) 0x442099 MOV (%RCX),%R9 |
(343) 0x44209c SUB %R14D,%ESI |
(343) 0x44209f ADD $0x2,%ESI |
(343) 0x4420a2 CMP $0x2,%ESI |
(343) 0x4420a5 MOV $0x1,%ECX |
(343) 0x4420aa CMOVL %ECX,%ESI |
(343) 0x4420ad MOV %RSI,%R11 |
(343) 0x4420b0 CLTQ |
(343) 0x4420b2 AND $0x7ffffff8,%R11 |
(343) 0x4420b9 JE 441e80 |
(343) 0x4420bf MOV %RSI,-0x118(%RBP) |
(343) 0x4420c6 MOV %RAX,-0xf0(%RBP) |
(343) 0x4420cd MOV %RBX,-0x120(%RBP) |
(343) 0x4420d4 MOV %R10D,-0x68(%RBP) |
(343) 0x4420d8 MOV %R9,%RBX |
(343) 0x4420db MOVSXD %R10D,%R9 |
(343) 0x4420de MOV -0xd8(%RBP),%RAX |
(343) 0x4420e5 LEA (%RAX,%R9,1),%R10 |
(343) 0x4420e9 ADD -0xe0(%RBP),%R9 |
(343) 0x4420f0 MOV %R12,-0x108(%RBP) |
(343) 0x4420f7 MOV %R12,%RAX |
(343) 0x4420fa IMUL %R10,%RAX |
(343) 0x4420fe LEA (%RAX,%R14,8),%RAX |
(343) 0x442102 ADD -0x98(%RBP),%RAX |
(343) 0x442109 MOV %R13,-0x100(%RBP) |
(343) 0x442110 MOV %R13,%RCX |
(343) 0x442113 IMUL %R10,%RCX |
(343) 0x442117 MOV %R15,-0x30(%RBP) |
(343) 0x44211b MOV %R14,%RSI |
(343) 0x44211e MOV %RDI,%R14 |
(343) 0x442121 LEA (%RCX,%RSI,8),%RDI |
(343) 0x442125 ADD -0xd0(%RBP),%RDI |
(343) 0x44212c MOV %RBX,-0x48(%RBP) |
(343) 0x442130 MOV %RBX,%RCX |
(343) 0x442133 IMUL %R10,%RCX |
(343) 0x442137 LEA (%RCX,%RSI,8),%R12 |
(343) 0x44213b ADD -0xc8(%RBP),%R12 |
(343) 0x442142 MOV %RDX,-0xf8(%RBP) |
(343) 0x442149 MOV %RDX,%RCX |
(343) 0x44214c IMUL %R10,%RCX |
(343) 0x442150 MOV %R8,%RDX |
(343) 0x442153 LEA (%RCX,%RSI,8),%R8 |
(343) 0x442157 ADD -0xc0(%RBP),%R8 |
(343) 0x44215e MOV %RDX,-0x50(%RBP) |
(343) 0x442162 MOV %RDX,%RCX |
(343) 0x442165 IMUL %R10,%RCX |
(343) 0x442169 LEA (%RCX,%RSI,8),%RBX |
(343) 0x44216d ADD -0xb8(%RBP),%RBX |
(343) 0x442174 MOV -0x60(%RBP),%R13 |
(343) 0x442178 MOV %R13,%RCX |
(343) 0x44217b IMUL %R9,%RCX |
(343) 0x44217f LEA (%RCX,%RSI,8),%RDX |
(343) 0x442183 MOV -0xb0(%RBP),%R15 |
(343) 0x44218a ADD %R15,%RDX |
(343) 0x44218d MOV %R13,%RCX |
(343) 0x442190 IMUL %R10,%RCX |
(343) 0x442194 LEA (%RCX,%RSI,8),%R13 |
(343) 0x442198 ADD %R15,%R13 |
(343) 0x44219b MOV %R14,%RCX |
(343) 0x44219e IMUL %R10,%RCX |
(343) 0x4421a2 LEA (%RCX,%RSI,8),%R15 |
(343) 0x4421a6 MOV -0xa8(%RBP),%RCX |
(343) 0x4421ad ADD %RCX,%R15 |
(343) 0x4421b0 MOV %R14,-0x58(%RBP) |
(343) 0x4421b4 IMUL %R14,%R9 |
(343) 0x4421b8 MOV %RSI,%R14 |
(343) 0x4421bb LEA (%R9,%RSI,8),%R9 |
(343) 0x4421bf ADD %RCX,%R9 |
(343) 0x4421c2 IMUL -0x30(%RBP),%R10 |
(343) 0x4421c7 LEA (%R10,%RSI,8),%R10 |
(343) 0x4421cb ADD -0xa0(%RBP),%R10 |
(343) 0x4421d2 VBROADCASTSD %XMM12,%ZMM12 |
(343) 0x4421d8 XOR %ECX,%ECX |
(343) 0x4421da NOPW (%RAX,%RAX,1) |
(344) 0x4421e0 VMULPD (%R10,%RCX,8),%ZMM12,%ZMM13 |
(344) 0x4421e7 VMOVUPD (%R9,%RCX,8),%ZMM14 |
(344) 0x4421ee VADDPD (%R15,%RCX,8),%ZMM14,%ZMM14 |
(344) 0x4421f5 VADDPD (%R13,%RCX,8),%ZMM14,%ZMM14 |
(344) 0x4421fd VADDPD (%RDX,%RCX,8),%ZMM14,%ZMM14 |
(344) 0x442204 VMULPD %ZMM14,%ZMM13,%ZMM13 |
(344) 0x44220a VMOVUPD %ZMM13,(%RBX,%RCX,8) |
(344) 0x442211 VMULPD (%R8,%RCX,8),%ZMM12,%ZMM13 |
(344) 0x442218 VMOVUPD (%RDI,%RCX,8),%ZMM14 |
(344) 0x44221f VADDPD -0x8(%RDI,%RCX,8),%ZMM14,%ZMM14 |
(344) 0x44222a VADDPD -0x8(%RAX,%RCX,8),%ZMM14,%ZMM14 |
(344) 0x442235 VADDPD (%RAX,%RCX,8),%ZMM14,%ZMM14 |
(344) 0x44223c VMULPD %ZMM14,%ZMM13,%ZMM13 |
(344) 0x442242 VMOVUPD %ZMM13,(%R12,%RCX,8) |
(344) 0x442249 ADD $0x8,%RCX |
(344) 0x44224d CMP %R11,%RCX |
(344) 0x442250 JB 4421e0 |
(343) 0x442252 MOV -0x118(%RBP),%RSI |
(343) 0x442259 CMP %RSI,%R11 |
(343) 0x44225c MOV -0x34(%RBP),%R8D |
(343) 0x442260 MOV 0x60(%RBP),%RDX |
(343) 0x442264 MOV 0x58(%RBP),%R9 |
(343) 0x442268 MOV -0x68(%RBP),%R10D |
(343) 0x44226c MOV -0x120(%RBP),%RBX |
(343) 0x442273 JE 442020 |
(343) 0x442279 MOV -0xf0(%RBP),%RDI |
(343) 0x442280 SUB -0x78(%RBP),%RDI |
(343) 0x442284 LEA 0x1(%RDI),%RCX |
(343) 0x442288 MOV -0x58(%RBP),%RDX |
(343) 0x44228c MOV %RDX,%RAX |
(343) 0x44228f IMUL %RCX,%RAX |
(343) 0x442293 MOV -0x60(%RBP),%R9 |
(343) 0x442297 MOV %R9,%R8 |
(343) 0x44229a IMUL %RCX,%R9 |
(343) 0x44229e MOV -0x30(%RBP),%RCX |
(343) 0x4422a2 IMUL %RDI,%RCX |
(343) 0x4422a6 MOV %RCX,-0x30(%RBP) |
(343) 0x4422aa IMUL %RDI,%RDX |
(343) 0x4422ae IMUL %RDI,%R8 |
(343) 0x4422b2 MOV -0x50(%RBP),%RCX |
(343) 0x4422b6 IMUL %RDI,%RCX |
(343) 0x4422ba MOV %RCX,-0x50(%RBP) |
(343) 0x4422be MOV -0xf8(%RBP),%R15 |
(343) 0x4422c5 IMUL %RDI,%R15 |
(343) 0x4422c9 MOV -0x100(%RBP),%R13 |
(343) 0x4422d0 IMUL %RDI,%R13 |
(343) 0x4422d4 MOV -0x108(%RBP),%R12 |
(343) 0x4422db IMUL %RDI,%R12 |
(343) 0x4422df MOV -0x48(%RBP),%RCX |
(343) 0x4422e3 IMUL %RDI,%RCX |
(343) 0x4422e7 MOV %RCX,-0x48(%RBP) |
(343) 0x4422eb VPBROADCASTQ %RSI,%ZMM13 |
(343) 0x4422f1 MOV %RDX,%RSI |
(343) 0x4422f4 MOV -0x50(%RBP),%RDX |
(343) 0x4422f8 MOV %R15,%RDI |
(343) 0x4422fb MOV -0x30(%RBP),%RCX |
(343) 0x4422ff JMP 441eef |
0x442304 NOPW %CS:(%RAX,%RAX,1) |
0x44230e XCHG %AX,%AX |
Path / |
Source file and lines | flux_calc_kernel.f90:49-63 |
Module | exec |
nb instructions | 141 |
nb uops | 145 |
loop length | 652 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 32 |
micro-operation queue | 24.17 cycles |
front end | 24.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.80 | 10.80 | 9.00 | 9.00 | 17.00 | 10.80 | 10.80 | 17.00 | 17.00 | 17.00 | 10.80 | 9.00 |
cycles | 10.80 | 10.80 | 9.00 | 9.00 | 17.00 | 10.80 | 10.80 | 17.00 | 17.00 | 17.00 | 10.80 | 9.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 23.63-23.66 |
Stall cycles | 0.00 |
Front-end | 24.17 |
Dispatch | 17.00 |
Overall L1 | 24.17 |
all | 3% |
load | 9% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 2% |
load | 8% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 12% |
load | 17% |
store | 10% |
mul | 12% |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 17% |
store | 10% |
mul | 12% |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 441ca4 <flux_calc_kernel_module_mp_flux_calc_kernel_.DIR.OMP.PARALLEL.2+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x6c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x70(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x38(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x54d5f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x64(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x3c(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 441d00 <flux_calc_kernel_module_mp_flux_calc_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x54d610,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x64(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x90(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x54d630,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404740 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x20,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R15,%RCX,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x20,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RDX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %R12,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RSI,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %R12,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R13,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EBX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0x20(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %RSI,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xc9870(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA64 0xc89c6(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOV 0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8D,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 442031 <flux_calc_kernel_module_mp_flux_calc_kernel_.DIR.OMP.PARALLEL.2+0x431> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | flux_calc_kernel.f90:49-63 |
Module | exec |
nb instructions | 141 |
nb uops | 145 |
loop length | 652 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 32 |
micro-operation queue | 24.17 cycles |
front end | 24.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.80 | 10.80 | 9.00 | 9.00 | 17.00 | 10.80 | 10.80 | 17.00 | 17.00 | 17.00 | 10.80 | 9.00 |
cycles | 10.80 | 10.80 | 9.00 | 9.00 | 17.00 | 10.80 | 10.80 | 17.00 | 17.00 | 17.00 | 10.80 | 9.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 23.63-23.66 |
Stall cycles | 0.00 |
Front-end | 24.17 |
Dispatch | 17.00 |
Overall L1 | 24.17 |
all | 3% |
load | 9% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 2% |
load | 8% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 12% |
load | 17% |
store | 10% |
mul | 12% |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 17% |
store | 10% |
mul | 12% |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 441ca4 <flux_calc_kernel_module_mp_flux_calc_kernel_.DIR.OMP.PARALLEL.2+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x6c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x70(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x38(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x54d5f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x64(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x3c(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 441d00 <flux_calc_kernel_module_mp_flux_calc_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x54d610,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x64(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x90(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x54d630,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404740 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x20,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R15,%RCX,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x20,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RDX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %R12,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RSI,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %R12,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R13,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EBX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0x20(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %RSI,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xc9870(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA64 0xc89c6(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOV 0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8D,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 442031 <flux_calc_kernel_module_mp_flux_calc_kernel_.DIR.OMP.PARALLEL.2+0x431> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼flux_calc_kernel_.DIR.OMP.PARALLEL.2– | 4.41 | 3.31 |
▼Loop 343 - flux_calc_kernel.f90:54-60 - exec– | 0.01 | 0.01 |
○Loop 344 - flux_calc_kernel.f90:56-60 - exec | 4.41 | 3.3 |