Function: advanceVelocity.extracted | Module: exec | Source: timestep.c:71-78 | Coverage: 2.43% |
---|
Function: advanceVelocity.extracted | Module: exec | Source: timestep.c:71-78 | Coverage: 2.43% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-2581/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 71 - 78 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for |
72: for (int iBox=0; iBox<nBoxes; iBox++) |
73: { |
74: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
75: { |
76: s->atoms->p[iOff][0] += dt*s->atoms->f[iOff][0]; |
77: s->atoms->p[iOff][1] += dt*s->atoms->f[iOff][1]; |
78: s->atoms->p[iOff][2] += dt*s->atoms->f[iOff][2]; |
0x410ea0 PUSH %RBP |
0x410ea1 MOV %RSP,%RBP |
0x410ea4 PUSH %R15 |
0x410ea6 PUSH %R14 |
0x410ea8 PUSH %R13 |
0x410eaa PUSH %R12 |
0x410eac PUSH %RBX |
0x410ead SUB $0x28,%RSP |
0x410eb1 MOV %RCX,%R15 |
0x410eb4 MOV %RDX,%RBX |
0x410eb7 MOVL $0,-0x3c(%RBP) |
0x410ebe MOV (%RDI),%ESI |
0x410ec0 MOVL $0,-0x30(%RBP) |
0x410ec7 MOV %R9D,-0x2c(%RBP) |
0x410ecb MOVL $0x1,-0x38(%RBP) |
0x410ed2 SUB $0x8,%RSP |
0x410ed6 LEA -0x38(%RBP),%RAX |
0x410eda LEA -0x3c(%RBP),%RCX |
0x410ede LEA -0x30(%RBP),%R8 |
0x410ee2 LEA -0x2c(%RBP),%R9 |
0x410ee6 MOV $0x62f8f0,%EDI |
0x410eeb MOV %ESI,-0x34(%RBP) |
0x410eee MOV $0x22,%EDX |
0x410ef3 PUSH $0x1 |
0x410ef5 PUSH $0x1 |
0x410ef7 PUSH %RAX |
0x410ef8 CALL 403120 <__kmpc_for_static_init_4@plt> |
0x410efd ADD $0x20,%RSP |
0x410f01 MOV -0x30(%RBP),%EAX |
0x410f04 MOV -0x2c(%RBP),%ECX |
0x410f07 CMP %ECX,%EAX |
0x410f09 JBE 410f29 |
0x410f0b MOV $0x62f910,%EDI |
0x410f10 MOV -0x34(%RBP),%ESI |
0x410f13 ADD $0x28,%RSP |
0x410f17 POP %RBX |
0x410f18 POP %R12 |
0x410f1a POP %R13 |
0x410f1c POP %R14 |
0x410f1e POP %R15 |
0x410f20 POP %RBP |
0x410f21 VZEROUPPER |
0x410f24 JMP 402fe0 |
0x410f29 VMOVQ %R15,%XMM0 |
0x410f2e MOV 0x18(%RBX),%RDX |
0x410f32 MOV 0x78(%RDX),%RDX |
0x410f36 SUB %RAX,%RCX |
0x410f39 MOV %EAX,%ESI |
0x410f3b SAL $0x6,%ESI |
0x410f3e XOR %EDI,%EDI |
0x410f40 VPBROADCASTQ %XMM0,%XMM3 |
0x410f45 VPBROADCASTQ %XMM0,%YMM2 |
0x410f4a VMOVDQU %XMM3,-0x50(%RBP) |
0x410f4f JMP 410f6c |
0x410f51 NOPW %CS:(%RAX,%RAX,1) |
(102) 0x410f60 ADD $0x40,%ESI |
(102) 0x410f63 CMP %RCX,%RDI |
(102) 0x410f66 LEA 0x1(%RDI),%RDI |
(102) 0x410f6a JE 410f0b |
(102) 0x410f6c LEA (%RDI,%RAX,1),%R8 |
(102) 0x410f70 MOV (%RDX,%R8,4),%R8D |
(102) 0x410f74 TEST %R8D,%R8D |
(102) 0x410f77 JLE 410f60 |
(102) 0x410f79 MOV %ESI,%R11D |
(102) 0x410f7c SAL $0x3,%R11 |
(102) 0x410f80 LEA (%RDI,%RAX,1),%R15D |
(102) 0x410f84 SAL $0x6,%R15D |
(102) 0x410f88 MOV 0x20(%RBX),%R10 |
(102) 0x410f8c MOV 0x20(%R10),%R9 |
(102) 0x410f90 MOV 0x28(%R10),%R10 |
(102) 0x410f94 LEA -0x1(%R8),%R14D |
(102) 0x410f98 MOVSXD %R14D,%R14 |
(102) 0x410f9b ADD %R15,%R14 |
(102) 0x410f9e SAL $0x3,%R14 |
(102) 0x410fa2 LEA (%R14,%R14,2),%R14 |
(102) 0x410fa6 LEA (%R10,%R14,1),%R12 |
(102) 0x410faa ADD $0x10,%R12 |
(102) 0x410fae SAL $0x3,%R15 |
(102) 0x410fb2 LEA (%R15,%R15,2),%R15 |
(102) 0x410fb6 LEA (%R9,%R15,1),%R13 |
(102) 0x410fba CMP %R13,%R12 |
(102) 0x410fbd JB 411030 |
(102) 0x410fbf ADD %R10,%R15 |
(102) 0x410fc2 ADD %R9,%R14 |
(102) 0x410fc5 ADD $0x10,%R14 |
(102) 0x410fc9 CMP %R15,%R14 |
(102) 0x410fcc JB 411030 |
(102) 0x410fce LEA (%R11,%R11,2),%R11 |
(102) 0x410fd2 ADD $0x10,%R11 |
(102) 0x410fd6 NOPW %CS:(%RAX,%RAX,1) |
(105) 0x410fe0 VMOVSD -0x10(%R10,%R11,1),%XMM1 |
(105) 0x410fe7 VFMADD213SD -0x10(%R9,%R11,1),%XMM0,%XMM1 |
(105) 0x410fee VMOVSD %XMM1,-0x10(%R9,%R11,1) |
(105) 0x410ff5 VMOVSD -0x8(%R10,%R11,1),%XMM1 |
(105) 0x410ffc VFMADD213SD -0x8(%R9,%R11,1),%XMM0,%XMM1 |
(105) 0x411003 VMOVSD %XMM1,-0x8(%R9,%R11,1) |
(105) 0x41100a VMOVSD (%R10,%R11,1),%XMM1 |
(105) 0x411010 VFMADD213SD (%R9,%R11,1),%XMM0,%XMM1 |
(105) 0x411016 VMOVSD %XMM1,(%R9,%R11,1) |
(105) 0x41101c ADD $0x18,%R11 |
(105) 0x411020 DEC %R8D |
(105) 0x411023 JNE 410fe0 |
(102) 0x411025 JMP 410f60 |
0x41102a NOPW (%RAX,%RAX,1) |
(102) 0x411030 LEA (%R11,%R11,2),%R14 |
(102) 0x411034 MOV %R8D,%R15D |
(102) 0x411037 AND $-0x8,%R15D |
(102) 0x41103b JE 411285 |
(102) 0x411041 LEA -0x1(%R15),%R12D |
(102) 0x411045 XOR %R13D,%R13D |
(102) 0x411048 MOV %R14,%R11 |
(102) 0x41104b NOPL (%RAX,%RAX,1) |
(104) 0x411050 VMOVUPD 0x20(%R10,%R11,1),%YMM3 |
(104) 0x411057 VMOVUPD 0x80(%R10,%R11,1),%YMM5 |
(104) 0x411061 VMOVUPD 0x10(%R10,%R11,1),%XMM6 |
(104) 0x411068 VMOVUPD 0x70(%R10,%R11,1),%XMM4 |
(104) 0x41106f VBLENDPD $0x3,0x60(%R10,%R11,1),%YMM5,%YMM7 |
(104) 0x411077 VINSERTF128 $0x1,0xa0(%R10,%R11,1),%YMM4,%YMM8 |
(104) 0x411082 VMOVUPD 0x20(%R10,%R11,1),%XMM12 |
(104) 0x411089 VBLENDPD $0xa,%YMM8,%YMM7,%YMM4 |
(104) 0x41108f VBLENDPD $0x3,(%R10,%R11,1),%YMM3,%YMM10 |
(104) 0x411096 VINSERTF128 $0x1,0x40(%R10,%R11,1),%YMM6,%YMM6 |
(104) 0x41109e VBLENDPD $0xa,%YMM6,%YMM10,%YMM9 |
(104) 0x4110a4 VSHUFPD $0x5,%YMM5,%YMM7,%YMM5 |
(104) 0x4110a9 VBROADCASTSD 0xb0(%R10,%R11,1),%YMM7 |
(104) 0x4110b3 VBLENDPD $0x8,%YMM7,%YMM5,%YMM5 |
(104) 0x4110b9 VSHUFPD $0x5,%YMM3,%YMM10,%YMM3 |
(104) 0x4110be VBROADCASTSD 0x50(%R10,%R11,1),%YMM7 |
(104) 0x4110c5 VBLENDPD $0x8,%YMM7,%YMM3,%YMM11 |
(104) 0x4110cb VMOVUPD 0x80(%R10,%R11,1),%XMM3 |
(104) 0x4110d5 VBLENDPD $0xc,0xa0(%R10,%R11,1),%YMM3,%YMM3 |
(104) 0x4110e0 VBLENDPD $0xa,%YMM3,%YMM8,%YMM10 |
(104) 0x4110e6 VBLENDPD $0xc,0x40(%R10,%R11,1),%YMM12,%YMM3 |
(104) 0x4110ee VBLENDPD $0xa,%YMM3,%YMM6,%YMM14 |
(104) 0x4110f4 VMOVUPD 0x20(%R9,%R11,1),%YMM6 |
(104) 0x4110fb VMOVUPD 0x80(%R9,%R11,1),%YMM7 |
(104) 0x411105 VMOVUPD 0x10(%R9,%R11,1),%XMM12 |
(104) 0x41110c VMOVUPD 0x70(%R9,%R11,1),%XMM3 |
(104) 0x411113 VBLENDPD $0x3,0x60(%R9,%R11,1),%YMM7,%YMM8 |
(104) 0x41111b VINSERTF128 $0x1,0xa0(%R9,%R11,1),%YMM3,%YMM13 |
(104) 0x411126 VBLENDPD $0xa,%YMM13,%YMM8,%YMM3 |
(104) 0x41112c VBLENDPD $0x3,(%R9,%R11,1),%YMM6,%YMM15 |
(104) 0x411133 VSHUFPD $0x5,%YMM7,%YMM8,%YMM7 |
(104) 0x411138 VBROADCASTSD 0xb0(%R9,%R11,1),%YMM8 |
(104) 0x411142 VBLENDPD $0x8,%YMM8,%YMM7,%YMM7 |
(104) 0x411148 VSHUFPD $0x5,%YMM6,%YMM15,%YMM6 |
(104) 0x41114d VBROADCASTSD 0x50(%R9,%R11,1),%YMM8 |
(104) 0x411154 VBLENDPD $0x8,%YMM8,%YMM6,%YMM6 |
(104) 0x41115a VMOVUPD 0x80(%R9,%R11,1),%XMM8 |
(104) 0x411164 VBLENDPD $0xc,0xa0(%R9,%R11,1),%YMM8,%YMM8 |
(104) 0x41116f VBLENDPD $0xa,%YMM8,%YMM13,%YMM8 |
(104) 0x411175 VMOVUPD 0x20(%R9,%R11,1),%XMM1 |
(104) 0x41117c VINSERTF128 $0x1,0x40(%R9,%R11,1),%YMM12,%YMM12 |
(104) 0x411184 VBLENDPD $0xa,%YMM12,%YMM15,%YMM13 |
(104) 0x41118a VBLENDPD $0xc,0x40(%R9,%R11,1),%YMM1,%YMM1 |
(104) 0x411192 VBLENDPD $0xa,%YMM1,%YMM12,%YMM12 |
(104) 0x411198 VFMADD231PD %YMM9,%YMM2,%YMM13 |
(104) 0x41119d VFMADD231PD %YMM4,%YMM2,%YMM3 |
(104) 0x4111a2 VFMADD231PD %YMM11,%YMM2,%YMM6 |
(104) 0x4111a7 VFMADD231PD %YMM5,%YMM2,%YMM7 |
(104) 0x4111ac VFMADD231PD %YMM14,%YMM2,%YMM12 |
(104) 0x4111b1 VFMADD231PD %YMM10,%YMM2,%YMM8 |
(104) 0x4111b6 VMOVDDUP %XMM6,%XMM1 |
(104) 0x4111ba VPERM2F128 $0x20,%YMM13,%YMM1,%YMM1 |
(104) 0x4111c0 VMOVDDUP %XMM7,%XMM4 |
(104) 0x4111c4 VPERM2F128 $0x20,%YMM3,%YMM4,%YMM4 |
(104) 0x4111ca VINSERTF128 $0x1,%XMM8,%YMM3,%YMM5 |
(104) 0x4111d0 VBLENDPD $0xa,%YMM4,%YMM5,%YMM4 |
(104) 0x4111d6 VINSERTF128 $0x1,%XMM12,%YMM13,%YMM5 |
(104) 0x4111dc VBLENDPD $0xa,%YMM1,%YMM5,%YMM1 |
(104) 0x4111e2 VSHUFPD $0x1,%YMM7,%YMM7,%YMM5 |
(104) 0x4111e7 VBLENDPD $0x4,%YMM3,%YMM5,%YMM5 |
(104) 0x4111ed VSHUFPD $0x4,%YMM7,%YMM7,%YMM7 |
(104) 0x4111f2 VPERM2F128 $0x31,%YMM8,%YMM3,%YMM3 |
(104) 0x4111f8 VPERM2F128 $0x31,%YMM7,%YMM8,%YMM7 |
(104) 0x4111fe VBLENDPD $0xa,%YMM3,%YMM7,%YMM3 |
(104) 0x411204 VSHUFPD $0x1,%YMM6,%YMM6,%YMM7 |
(104) 0x411209 VBLENDPD $0x4,%YMM13,%YMM7,%YMM7 |
(104) 0x41120f VSHUFPD $0x4,%YMM6,%YMM6,%YMM6 |
(104) 0x411214 VPERM2F128 $0x31,%YMM12,%YMM13,%YMM9 |
(104) 0x41121a VPERM2F128 $0x31,%YMM6,%YMM12,%YMM6 |
(104) 0x411220 VBLENDPD $0xa,%YMM9,%YMM6,%YMM6 |
(104) 0x411226 VBLENDPD $0x2,%YMM12,%YMM7,%YMM7 |
(104) 0x41122c VBLENDPD $0x2,%YMM8,%YMM5,%YMM5 |
(104) 0x411232 VMOVUPD %YMM6,0x40(%R9,%R11,1) |
(104) 0x411239 VMOVUPD %YMM3,0xa0(%R9,%R11,1) |
(104) 0x411243 VMOVUPD %YMM5,0x80(%R9,%R11,1) |
(104) 0x41124d VMOVUPD %YMM7,0x20(%R9,%R11,1) |
(104) 0x411254 VMOVUPD %YMM1,(%R9,%R11,1) |
(104) 0x41125a VMOVUPD %YMM4,0x60(%R9,%R11,1) |
(104) 0x411261 ADD $0x8,%R13D |
(104) 0x411265 ADD $0xc0,%R11 |
(104) 0x41126c CMP %R12D,%R13D |
(104) 0x41126f JLE 411050 |
(102) 0x411275 CMP %R15D,%R8D |
(102) 0x411278 VMOVDQU -0x50(%RBP),%XMM3 |
(102) 0x41127d JE 410f60 |
(102) 0x411283 JMP 411288 |
(102) 0x411285 XOR %R15D,%R15D |
(102) 0x411288 SUB %R15D,%R8D |
(102) 0x41128b MOVSXD %R15D,%R11 |
(102) 0x41128e SAL $0x3,%R11 |
(102) 0x411292 LEA (%R11,%R11,2),%R11 |
(102) 0x411296 ADD %R11,%R9 |
(102) 0x411299 ADD %R11,%R10 |
(102) 0x41129c NOPL (%RAX) |
(103) 0x4112a0 VMOVUPD (%R10,%R14,1),%XMM1 |
(103) 0x4112a6 VFMADD213PD (%R9,%R14,1),%XMM3,%XMM1 |
(103) 0x4112ac VMOVUPD %XMM1,(%R9,%R14,1) |
(103) 0x4112b2 VMOVSD 0x10(%R10,%R14,1),%XMM1 |
(103) 0x4112b9 VFMADD213SD 0x10(%R9,%R14,1),%XMM0,%XMM1 |
(103) 0x4112c0 VMOVSD %XMM1,0x10(%R9,%R14,1) |
(103) 0x4112c7 ADD $0x18,%R14 |
(103) 0x4112cb DEC %R8D |
(103) 0x4112ce JNE 4112a0 |
(102) 0x4112d0 JMP 410f60 |
0x4112d5 NOPW %CS:(%RAX,%RAX,1) |
0x4112df NOP |
Path / |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 58 |
nb uops | 60 |
loop length | 209 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 4.00 | 4.00 | 8.00 | 3.00 | 1.80 | 8.00 | 8.00 | 8.00 | 1.60 | 4.00 |
cycles | 1.80 | 1.80 | 4.00 | 4.00 | 8.00 | 3.00 | 1.80 | 8.00 | 8.00 | 8.00 | 1.60 | 4.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.79-9.83 |
Stall cycles | 0.00 |
Front-end | 10.00 |
Dispatch | 8.00 |
Overall L1 | 10.00 |
all | 10% |
load | 0% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 6% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x28,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62f8f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403120 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 410f29 <advanceVelocity.extracted+0x89> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62f910,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x28,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM0,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM3,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JMP 410f6c <advanceVelocity.extracted+0xcc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 58 |
nb uops | 60 |
loop length | 209 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 4.00 | 4.00 | 8.00 | 3.00 | 1.80 | 8.00 | 8.00 | 8.00 | 1.60 | 4.00 |
cycles | 1.80 | 1.80 | 4.00 | 4.00 | 8.00 | 3.00 | 1.80 | 8.00 | 8.00 | 8.00 | 1.60 | 4.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.79-9.83 |
Stall cycles | 0.00 |
Front-end | 10.00 |
Dispatch | 8.00 |
Overall L1 | 10.00 |
all | 10% |
load | 0% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 6% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x28,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62f8f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403120 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 410f29 <advanceVelocity.extracted+0x89> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62f910,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x28,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %XMM0,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM3,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JMP 410f6c <advanceVelocity.extracted+0xcc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advanceVelocity.extracted– | 2.43 | 0.63 |
▼Loop 102 - timestep.c:71-78 - exec– | 0.03 | 0.01 |
○Loop 104 - timestep.c:74-78 - exec | 2.13 | 0.55 |
○Loop 103 - timestep.c:74-78 - exec | 0.27 | 0.07 |
○Loop 105 - timestep.c:74-78 - exec | 0 | 0 |