Function: clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104 | Module: exec | Source: pack_kernel.f90:61-69 | Coverage: 0.02% |
---|
Function: clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104 | Module: exec | Source: pack_kernel.f90:61-69 | Coverage: 0.02% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-152-3172/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/pack_kernel.f90: 61 - 69 |
-------------------------------------------------------------------------------- |
61: !$OMP PARALLEL DO PRIVATE(index) |
62: DO k=y_min-depth,y_max+y_inc+depth |
63: !$OMP SIMD |
64: DO j=1,depth |
65: index= buffer_offset + j+(k+depth-1)*depth |
66: left_snd_buffer(index)=field(x_min+x_inc-1+j,k) |
67: ENDDO |
68: ENDDO |
69: !$OMP END PARALLEL DO |
0x448cc0 PUSH %RBP |
0x448cc1 MOV %RSP,%RBP |
0x448cc4 PUSH %R15 |
0x448cc6 PUSH %R14 |
0x448cc8 PUSH %R13 |
0x448cca PUSH %R12 |
0x448ccc PUSH %RBX |
0x448ccd SUB $0x48,%RSP |
0x448cd1 MOV %R9,-0x50(%RBP) |
0x448cd5 MOV %R8,-0x70(%RBP) |
0x448cd9 MOV %RCX,-0x48(%RBP) |
0x448cdd MOV 0x30(%RBP),%EAX |
0x448ce0 MOVL $0,-0x40(%RBP) |
0x448ce7 TEST %EAX,%EAX |
0x448ce9 JS 448d4a |
0x448ceb MOV %RDX,%RBX |
0x448cee MOV (%RDI),%ESI |
0x448cf0 MOVL $0,-0x34(%RBP) |
0x448cf7 MOV %EAX,-0x30(%RBP) |
0x448cfa MOVL $0x1,-0x3c(%RBP) |
0x448d01 SUB $0x8,%RSP |
0x448d05 LEA -0x3c(%RBP),%RAX |
0x448d09 LEA -0x40(%RBP),%RCX |
0x448d0d LEA -0x34(%RBP),%R8 |
0x448d11 LEA -0x30(%RBP),%R9 |
0x448d15 MOV $0x54e4d0,%EDI |
0x448d1a MOV %ESI,-0x38(%RBP) |
0x448d1d MOV $0x22,%EDX |
0x448d22 PUSH $0x1 |
0x448d24 PUSH $0x1 |
0x448d26 PUSH %RAX |
0x448d27 CALL 404670 <__kmpc_for_static_init_4@plt> |
0x448d2c ADD $0x20,%RSP |
0x448d30 MOV -0x34(%RBP),%EAX |
0x448d33 MOV -0x30(%RBP),%ESI |
0x448d36 SUB %EAX,%ESI |
0x448d38 JAE 448d80 |
0x448d3a MOV $0x54e4f0,%EDI |
0x448d3f MOV -0x38(%RBP),%ESI |
0x448d42 VZEROUPPER |
0x448d45 CALL 404230 <__kmpc_for_static_fini@plt> |
0x448d4a ADD $0x48,%RSP |
0x448d4e POP %RBX |
0x448d4f POP %R12 |
0x448d51 POP %R13 |
0x448d53 POP %R14 |
0x448d55 POP %R15 |
0x448d57 POP %RBP |
0x448d58 RET |
0x448d59 NOPW %CS:(%RAX,%RAX,1) |
0x448d68 NOPW %CS:(%RAX,%RAX,1) |
0x448d77 NOPW (%RAX,%RAX,1) |
0x448d80 MOV %RAX,%RDX |
0x448d83 MOV -0x50(%RBP),%RAX |
0x448d87 MOV (%RAX),%ECX |
0x448d89 LEA -0x1(%RDX,%RBX,1),%EDI |
0x448d8d XOR %R8D,%R8D |
0x448d90 ADD %EBX,%EDX |
0x448d92 MOV %RDX,-0x68(%RBP) |
0x448d96 VMOVDQA64 0xc1c20(%RIP),%ZMM0 |
0x448da0 VMOVDQA 0xc2af8(%RIP),%YMM1 |
0x448da8 VPTERNLOGD $-0x1,%ZMM2,%ZMM2,%ZMM2 |
0x448daf VMOVDQA64 0xc1c07(%RIP),%ZMM3 |
0x448db9 MOV %ESI,-0x2c(%RBP) |
0x448dbc JMP 448dd8 |
0x448dbe XCHG %AX,%AX |
(436) 0x448dc0 MOV %ECX,%R10D |
(436) 0x448dc3 LEA 0x1(%R8),%EAX |
(436) 0x448dc7 INC %EDI |
(436) 0x448dc9 MOV %R10D,%ECX |
(436) 0x448dcc CMP %ESI,%R8D |
(436) 0x448dcf MOV %EAX,%R8D |
(436) 0x448dd2 JE 448d3a |
(436) 0x448dd8 TEST %ECX,%ECX |
(436) 0x448dda JLE 448dc0 |
(436) 0x448ddc MOV -0x68(%RBP),%RAX |
(436) 0x448de0 LEA (%RAX,%R8,1),%R13D |
(436) 0x448de4 MOV -0x70(%RBP),%RAX |
(436) 0x448de8 MOVSXD (%RAX),%RAX |
(436) 0x448deb MOV -0x50(%RBP),%RDX |
(436) 0x448def MOV (%RDX),%R10D |
(436) 0x448df2 MOV 0x18(%RBP),%R11 |
(436) 0x448df6 MOV (%R11),%RDX |
(436) 0x448df9 MOV 0x38(%R11),%RSI |
(436) 0x448dfd MOV 0x20(%RBP),%R9 |
(436) 0x448e01 MOV (%R9),%R9D |
(436) 0x448e04 MOV 0x50(%R11),%R15 |
(436) 0x448e08 MOV 0x10aea9(%RIP),%R12 |
(436) 0x448e0f MOV 0x10aeda(%RIP),%R11 |
(436) 0x448e16 MOV %R11,-0x60(%RBP) |
(436) 0x448e1a MOV %ECX,%R14D |
(436) 0x448e1d MOV %R14,%R11 |
(436) 0x448e20 MOVSXD %R13D,%RBX |
(436) 0x448e23 MOV $-0x8,%ECX |
(436) 0x448e28 AND %RCX,%R11 |
(436) 0x448e2b JE 448f00 |
(436) 0x448e31 LEA (%R10,%RDI,1),%ECX |
(436) 0x448e35 IMUL %R10D,%ECX |
(436) 0x448e39 MOVSXD %ECX,%RCX |
(436) 0x448e3c ADD %RAX,%RCX |
(436) 0x448e3f ADD -0x48(%RBP),%R9D |
(436) 0x448e43 VPBROADCASTQ %RSI,%ZMM5 |
(436) 0x448e49 LEA -0x1(%R13,%R10,1),%ESI |
(436) 0x448e4e MOV %R10,-0x58(%RBP) |
(436) 0x448e52 IMUL %R10D,%ESI |
(436) 0x448e56 MOVSXD %ESI,%R13 |
(436) 0x448e59 ADD %RAX,%R13 |
(436) 0x448e5c MOV -0x60(%RBP),%RAX |
(436) 0x448e60 VPBROADCASTQ %RAX,%ZMM6 |
(436) 0x448e66 XOR %ESI,%ESI |
(436) 0x448e68 NOPL (%RAX,%RAX,1) |
(437) 0x448e70 LEA 0x1(%RBX),%RAX |
(437) 0x448e74 IMUL %R15,%RAX |
(437) 0x448e78 LEA (%R9,%RSI,1),%R10D |
(437) 0x448e7c VPBROADCASTD %R10D,%YMM7 |
(437) 0x448e82 VPADDD %YMM1,%YMM7,%YMM7 |
(437) 0x448e86 VPMOVSXDQ %YMM7,%ZMM7 |
(437) 0x448e8c VPSUBQ %ZMM2,%ZMM7,%ZMM7 |
(437) 0x448e92 VPMULLQ %ZMM7,%ZMM5,%ZMM7 |
(437) 0x448e98 LEA (%RDX,%RAX,1),%R10 |
(437) 0x448e9c KXNORW %K0,%K0,%K1 |
(437) 0x448ea0 VXORPD %XMM8,%XMM8,%XMM8 |
(437) 0x448ea5 VGATHERQPD (%R10,%ZMM7,1),%ZMM8{%K1} |
(437) 0x448eac LEA (%RCX,%RSI,1),%R10 |
(437) 0x448eb0 VPBROADCASTQ %R10,%ZMM7 |
(437) 0x448eb6 VPADDQ %ZMM3,%ZMM7,%ZMM7 |
(437) 0x448ebc VPMULLQ %ZMM7,%ZMM6,%ZMM7 |
(437) 0x448ec2 KXNORW %K0,%K0,%K1 |
(437) 0x448ec6 VSCATTERQPD %ZMM8,(%R12,%ZMM7,1){%K1} |
(437) 0x448ecd ADD $0x8,%RSI |
(437) 0x448ed1 CMP %R11,%RSI |
(437) 0x448ed4 JB 448e70 |
(436) 0x448ed6 CMP %R14,%R11 |
(436) 0x448ed9 JNE 448f80 |
(436) 0x448edf MOV -0x2c(%RBP),%ESI |
(436) 0x448ee2 MOV -0x58(%RBP),%R10 |
(436) 0x448ee6 JMP 448dc3 |
0x448eeb NOPW %CS:(%RAX,%RAX,1) |
0x448efa NOPW (%RAX,%RAX,1) |
(436) 0x448f00 VPBROADCASTQ %R14,%ZMM9 |
(436) 0x448f06 ADD -0x48(%RBP),%R9D |
(436) 0x448f0a VPBROADCASTD %R9D,%YMM8 |
(436) 0x448f10 VPBROADCASTQ %RDX,%ZMM10 |
(436) 0x448f16 INC %RBX |
(436) 0x448f19 IMUL %RBX,%R15 |
(436) 0x448f1d VPBROADCASTQ %R15,%ZMM11 |
(436) 0x448f23 VPBROADCASTQ %RSI,%ZMM5 |
(436) 0x448f29 VPBROADCASTQ %R12,%ZMM7 |
(436) 0x448f2f LEA -0x1(%R13,%R10,1),%ECX |
(436) 0x448f34 IMUL %R10D,%ECX |
(436) 0x448f38 MOVSXD %ECX,%R13 |
(436) 0x448f3b ADD %RAX,%R13 |
(436) 0x448f3e MOV -0x60(%RBP),%RAX |
(436) 0x448f42 VPBROADCASTQ %RAX,%ZMM6 |
(436) 0x448f48 XOR %R11D,%R11D |
(436) 0x448f4b MOV -0x2c(%RBP),%ESI |
(436) 0x448f4e JMP 448fa5 |
0x448f50 NOPW %CS:(%RAX,%RAX,1) |
0x448f5f NOPW %CS:(%RAX,%RAX,1) |
0x448f6e NOPW %CS:(%RAX,%RAX,1) |
0x448f7d NOPL (%RAX) |
(436) 0x448f80 VPBROADCASTQ %RAX,%ZMM11 |
(436) 0x448f86 VPBROADCASTQ %R12,%ZMM7 |
(436) 0x448f8c VPBROADCASTQ %RDX,%ZMM10 |
(436) 0x448f92 VPBROADCASTD %R9D,%YMM8 |
(436) 0x448f98 VPBROADCASTQ %R14,%ZMM9 |
(436) 0x448f9e MOV -0x2c(%RBP),%ESI |
(436) 0x448fa1 MOV -0x58(%RBP),%R10 |
(436) 0x448fa5 VPADDQ %ZMM11,%ZMM10,%ZMM10 |
(436) 0x448fab VPBROADCASTQ %R11,%ZMM11 |
(436) 0x448fb1 VPSUBQ %ZMM11,%ZMM9,%ZMM9 |
(436) 0x448fb7 VPCMPNLEUQ %ZMM0,%ZMM9,%K1 |
(436) 0x448fbe VPBROADCASTD %R11D,%YMM9 |
(436) 0x448fc4 VPADDD %YMM8,%YMM9,%YMM8 |
(436) 0x448fc9 VPADDD %YMM1,%YMM8,%YMM8 |
(436) 0x448fcd VPMOVSXDQ %YMM8,%ZMM8 |
(436) 0x448fd3 VPSUBQ %ZMM2,%ZMM8,%ZMM8 |
(436) 0x448fd9 VPMULLQ %ZMM8,%ZMM5,%ZMM5 |
(436) 0x448fdf VPADDQ %ZMM5,%ZMM10,%ZMM5 |
(436) 0x448fe5 KMOVQ %K1,%K2 |
(436) 0x448fea VPXOR %XMM8,%XMM8,%XMM8 |
(436) 0x448fef VGATHERQPD (,%ZMM5,1),%ZMM8{%K2} |
(436) 0x448ffa VMOVAPD %ZMM8,%ZMM4{%K1} |
(436) 0x449000 ADD %R11,%R13 |
(436) 0x449003 VPBROADCASTQ %R13,%ZMM5 |
(436) 0x449009 VPADDQ %ZMM0,%ZMM5,%ZMM5 |
(436) 0x44900f VPMULLQ %ZMM5,%ZMM6,%ZMM5 |
(436) 0x449015 VPADDQ %ZMM5,%ZMM7,%ZMM5 |
(436) 0x44901b VSCATTERQPD %ZMM4,(,%ZMM5,1){%K1} |
(436) 0x449026 JMP 448dc3 |
0x44902b NOPW %CS:(%RAX,%RAX,1) |
0x449035 NOPW %CS:(%RAX,%RAX,1) |
0x44903f NOP |
Path / |
Source file and lines | pack_kernel.f90:61-69 |
Module | exec |
nb instructions | 75 |
nb uops | 78 |
loop length | 346 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 3 |
nb stack references | 11 |
micro-operation queue | 13.00 cycles |
front end | 13.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.30 | 2.20 | 5.67 | 5.67 | 10.50 | 2.20 | 2.10 | 10.50 | 10.50 | 10.50 | 2.20 | 5.67 |
cycles | 2.30 | 2.20 | 5.67 | 5.67 | 10.50 | 2.20 | 2.10 | 10.50 | 10.50 | 10.50 | 2.20 | 5.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.62-12.68 |
Stall cycles | 0.00 |
Front-end | 13.00 |
Dispatch | 10.50 |
Overall L1 | 13.00 |
all | 19% |
load | 42% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 21% |
load | 39% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 448d4a <pack_kernel_module_mp_clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104+0x8a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x34(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x54e4d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x34(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 448d80 <pack_kernel_module_mp_clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104+0xc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x54e4f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RDX,%RBX,1),%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EBX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0xc1c20(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVDQA 0xc2af8(%RIP),%YMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPTERNLOGD $-0x1,%ZMM2,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0xc1c07(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOV %ESI,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 448dd8 <pack_kernel_module_mp_clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104+0x118> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.f90:61-69 |
Module | exec |
nb instructions | 75 |
nb uops | 78 |
loop length | 346 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 3 |
nb stack references | 11 |
micro-operation queue | 13.00 cycles |
front end | 13.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.30 | 2.20 | 5.67 | 5.67 | 10.50 | 2.20 | 2.10 | 10.50 | 10.50 | 10.50 | 2.20 | 5.67 |
cycles | 2.30 | 2.20 | 5.67 | 5.67 | 10.50 | 2.20 | 2.10 | 10.50 | 10.50 | 10.50 | 2.20 | 5.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.62-12.68 |
Stall cycles | 0.00 |
Front-end | 13.00 |
Dispatch | 10.50 |
Overall L1 | 13.00 |
all | 19% |
load | 42% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 21% |
load | 39% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 448d4a <pack_kernel_module_mp_clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104+0x8a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x34(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x54e4d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x34(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 448d80 <pack_kernel_module_mp_clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104+0xc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x54e4f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RDX,%RBX,1),%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EBX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0xc1c20(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVDQA 0xc2af8(%RIP),%YMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPTERNLOGD $-0x1,%ZMM2,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0xc1c07(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOV %ESI,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 448dd8 <pack_kernel_module_mp_clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104+0x118> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_pack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split104– | 0.02 | 0.02 |
▼Loop 436 - pack_kernel.f90:62-66 - exec– | 0.02 | 0.03 |
○Loop 437 - pack_kernel.f90:64-66 - exec | 0 | 0 |