Function: advancePosition.extracted | Module: exec | Source: timestep.c:85-94 | Coverage: 1.43% |
---|
Function: advancePosition.extracted | Module: exec | Source: timestep.c:85-94 | Coverage: 1.43% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-2581/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 85 - 94 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for |
86: for (int iBox=0; iBox<nBoxes; iBox++) |
87: { |
88: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
89: { |
90: int iSpecies = s->atoms->iSpecies[iOff]; |
91: real_t invMass = 1.0/s->species[iSpecies].mass; |
92: s->atoms->r[iOff][0] += dt*s->atoms->p[iOff][0]*invMass; |
93: s->atoms->r[iOff][1] += dt*s->atoms->p[iOff][1]*invMass; |
94: s->atoms->r[iOff][2] += dt*s->atoms->p[iOff][2]*invMass; |
0x4112e0 PUSH %RBP |
0x4112e1 MOV %RSP,%RBP |
0x4112e4 PUSH %R15 |
0x4112e6 PUSH %R14 |
0x4112e8 PUSH %R13 |
0x4112ea PUSH %R12 |
0x4112ec PUSH %RBX |
0x4112ed SUB $0x58,%RSP |
0x4112f1 MOV %RCX,%R15 |
0x4112f4 MOV %RDX,%RBX |
0x4112f7 MOVL $0,-0x4c(%RBP) |
0x4112fe MOV (%RDI),%ESI |
0x411300 MOVL $0,-0x30(%RBP) |
0x411307 MOV %R9D,-0x2c(%RBP) |
0x41130b MOVL $0x1,-0x48(%RBP) |
0x411312 SUB $0x8,%RSP |
0x411316 LEA -0x48(%RBP),%RAX |
0x41131a LEA -0x4c(%RBP),%RCX |
0x41131e LEA -0x30(%RBP),%R8 |
0x411322 LEA -0x2c(%RBP),%R9 |
0x411326 MOV $0x62f950,%EDI |
0x41132b MOV %ESI,-0x44(%RBP) |
0x41132e MOV $0x22,%EDX |
0x411333 PUSH $0x1 |
0x411335 PUSH $0x1 |
0x411337 PUSH %RAX |
0x411338 CALL 403120 <__kmpc_for_static_init_4@plt> |
0x41133d ADD $0x20,%RSP |
0x411341 MOV -0x30(%RBP),%R14D |
0x411345 MOV -0x2c(%RBP),%EAX |
0x411348 MOV %RAX,-0x58(%RBP) |
0x41134c CMP %EAX,%R14D |
0x41134f JBE 41136f |
0x411351 MOV $0x62f970,%EDI |
0x411356 MOV -0x44(%RBP),%ESI |
0x411359 ADD $0x58,%RSP |
0x41135d POP %RBX |
0x41135e POP %R12 |
0x411360 POP %R13 |
0x411362 POP %R14 |
0x411364 POP %R15 |
0x411366 POP %RBP |
0x411367 VZEROUPPER |
0x41136a JMP 402fe0 |
0x41136f MOV %RBX,%RSI |
0x411372 VMOVQ %R15,%XMM0 |
0x411377 MOV 0x18(%RBX),%RCX |
0x41137b MOV 0x78(%RCX),%RAX |
0x41137f MOV %RAX,-0x78(%RBP) |
0x411383 SUB %R14,-0x58(%RBP) |
0x411387 MOV %R14D,%R12D |
0x41138a SAL $0x6,%R12D |
0x41138e XOR %EDI,%EDI |
0x411390 VPBROADCASTQ %XMM0,%YMM1 |
0x411395 MOV %RBX,-0x60(%RBP) |
0x411399 MOV %R14,-0x38(%RBP) |
0x41139d JMP 4113ae |
0x41139f NOP |
(106) 0x4113a0 ADD $0x40,%R12D |
(106) 0x4113a4 CMP -0x58(%RBP),%RDI |
(106) 0x4113a8 LEA 0x1(%RDI),%RDI |
(106) 0x4113ac JE 411351 |
(106) 0x4113ae MOV %R12D,%R12D |
(106) 0x4113b1 LEA (%RDI,%R14,1),%RCX |
(106) 0x4113b5 MOV -0x78(%RBP),%RAX |
(106) 0x4113b9 MOV (%RAX,%RCX,4),%R8D |
(106) 0x4113bd TEST %R8D,%R8D |
(106) 0x4113c0 JLE 4113a0 |
(106) 0x4113c2 LEA (,%R12,8),%RCX |
(106) 0x4113ca MOV %R12,-0x40(%RBP) |
(106) 0x4113ce LEA (,%R12,4),%R12 |
(106) 0x4113d6 LEA (%RDI,%R14,1),%EBX |
(106) 0x4113da SAL $0x6,%EBX |
(106) 0x4113dd MOV 0x20(%RSI),%RDX |
(106) 0x4113e1 MOV 0x28(%RSI),%R9 |
(106) 0x4113e5 MOV 0x10(%RDX),%R15 |
(106) 0x4113e9 MOV 0x18(%RDX),%R10 |
(106) 0x4113ed MOV 0x20(%RDX),%R11 |
(106) 0x4113f1 LEA -0x1(%R8),%EDX |
(106) 0x4113f5 MOVSXD %EDX,%RDX |
(106) 0x4113f8 ADD %RBX,%RDX |
(106) 0x4113fb SAL $0x3,%RDX |
(106) 0x4113ff LEA (%RDX,%RDX,2),%RDX |
(106) 0x411403 LEA (%R11,%RDX,1),%R14 |
(106) 0x411407 ADD $0x10,%R14 |
(106) 0x41140b SAL $0x3,%RBX |
(106) 0x41140f LEA (%RBX,%RBX,2),%RBX |
(106) 0x411413 LEA (%R10,%RBX,1),%R13 |
(106) 0x411417 CMP %R13,%R14 |
(106) 0x41141a JB 4114b0 |
(106) 0x411420 ADD %R11,%RBX |
(106) 0x411423 ADD %R10,%RDX |
(106) 0x411426 ADD $0x10,%RDX |
(106) 0x41142a CMP %RBX,%RDX |
(106) 0x41142d JB 4114b0 |
(106) 0x411433 LEA (%RCX,%RCX,2),%RCX |
(106) 0x411437 ADD $0x10,%RCX |
(106) 0x41143b ADD %R12,%R15 |
(106) 0x41143e XOR %EDX,%EDX |
(106) 0x411440 MOV -0x38(%RBP),%R14 |
(106) 0x411444 MOV -0x40(%RBP),%R12 |
(106) 0x411448 NOPL (%RAX,%RAX,1) |
(109) 0x411450 MOVSXD (%R15,%RDX,4),%RAX |
(109) 0x411454 SAL $0x4,%RAX |
(109) 0x411458 VDIVSD 0x8(%R9,%RAX,1),%XMM0,%XMM2 |
(109) 0x41145f VMOVSD -0x10(%R11,%RCX,1),%XMM3 |
(109) 0x411466 VFMADD213SD -0x10(%R10,%RCX,1),%XMM2,%XMM3 |
(109) 0x41146d VMOVSD %XMM3,-0x10(%R10,%RCX,1) |
(109) 0x411474 VMOVSD -0x8(%R11,%RCX,1),%XMM3 |
(109) 0x41147b VFMADD213SD -0x8(%R10,%RCX,1),%XMM2,%XMM3 |
(109) 0x411482 VMOVSD %XMM3,-0x8(%R10,%RCX,1) |
(109) 0x411489 VMOVSD (%R11,%RCX,1),%XMM3 |
(109) 0x41148f VFMADD213SD (%R10,%RCX,1),%XMM2,%XMM3 |
(109) 0x411495 VMOVSD %XMM3,(%R10,%RCX,1) |
(109) 0x41149b ADD $0x18,%RCX |
(109) 0x41149f INC %RDX |
(109) 0x4114a2 CMP %EDX,%R8D |
(109) 0x4114a5 JNE 411450 |
(106) 0x4114a7 JMP 4113a0 |
0x4114ac NOPL (%RAX) |
(106) 0x4114b0 LEA (%RCX,%RCX,2),%R13 |
(106) 0x4114b4 MOV %R8D,%EAX |
(106) 0x4114b7 AND $-0x4,%EAX |
(106) 0x4114ba JE 41163d |
(106) 0x4114c0 MOV %RAX,-0x68(%RBP) |
(106) 0x4114c4 LEA -0x1(%RAX),%EDX |
(106) 0x4114c7 ADD %R15,%R12 |
(106) 0x4114ca MOV %R13,-0x70(%RBP) |
(106) 0x4114ce XOR %EBX,%EBX |
(108) 0x4114d0 MOVSXD (%R12,%RBX,4),%RCX |
(108) 0x4114d4 MOVSXD 0x4(%R12,%RBX,4),%RAX |
(108) 0x4114d9 MOVSXD 0x8(%R12,%RBX,4),%R14 |
(108) 0x4114de MOVSXD 0xc(%R12,%RBX,4),%RSI |
(108) 0x4114e3 SAL $0x4,%RCX |
(108) 0x4114e7 SAL $0x4,%R14 |
(108) 0x4114eb SAL $0x4,%RSI |
(108) 0x4114ef VMOVSD 0x8(%R9,%R14,1),%XMM2 |
(108) 0x4114f6 VMOVHPD 0x8(%R9,%RSI,1),%XMM2,%XMM2 |
(108) 0x4114fd SAL $0x4,%RAX |
(108) 0x411501 VMOVSD 0x8(%R9,%RCX,1),%XMM3 |
(108) 0x411508 VMOVHPD 0x8(%R9,%RAX,1),%XMM3,%XMM3 |
(108) 0x41150f VINSERTF128 $0x1,%XMM2,%YMM3,%YMM2 |
(108) 0x411515 VDIVPD %YMM2,%YMM1,%YMM2 |
(108) 0x411519 VMOVUPD 0x20(%R11,%R13,1),%YMM3 |
(108) 0x411520 VBLENDPD $0x3,(%R11,%R13,1),%YMM3,%YMM4 |
(108) 0x411527 VMOVUPD 0x10(%R11,%R13,1),%XMM5 |
(108) 0x41152e VMOVUPD 0x20(%R11,%R13,1),%XMM6 |
(108) 0x411535 VINSERTF128 $0x1,0x40(%R11,%R13,1),%YMM5,%YMM5 |
(108) 0x41153d VBLENDPD $0xa,%YMM5,%YMM4,%YMM7 |
(108) 0x411543 VSHUFPD $0x5,%YMM3,%YMM4,%YMM3 |
(108) 0x411548 VBROADCASTSD 0x50(%R11,%R13,1),%YMM4 |
(108) 0x41154f VBLENDPD $0xc,0x40(%R11,%R13,1),%YMM6,%YMM6 |
(108) 0x411557 VBLENDPD $0x8,%YMM4,%YMM3,%YMM3 |
(108) 0x41155d VMOVUPD 0x20(%R10,%R13,1),%YMM4 |
(108) 0x411564 VBLENDPD $0x3,(%R10,%R13,1),%YMM4,%YMM8 |
(108) 0x41156b VBLENDPD $0xa,%YMM6,%YMM5,%YMM5 |
(108) 0x411571 VMOVUPD 0x10(%R10,%R13,1),%XMM6 |
(108) 0x411578 VINSERTF128 $0x1,0x40(%R10,%R13,1),%YMM6,%YMM6 |
(108) 0x411580 VMOVUPD 0x20(%R10,%R13,1),%XMM9 |
(108) 0x411587 VBLENDPD $0xa,%YMM6,%YMM8,%YMM10 |
(108) 0x41158d VSHUFPD $0x5,%YMM4,%YMM8,%YMM4 |
(108) 0x411592 VBROADCASTSD 0x50(%R10,%R13,1),%YMM8 |
(108) 0x411599 VBLENDPD $0x8,%YMM8,%YMM4,%YMM4 |
(108) 0x41159f VBLENDPD $0xc,0x40(%R10,%R13,1),%YMM9,%YMM8 |
(108) 0x4115a7 VBLENDPD $0xa,%YMM8,%YMM6,%YMM6 |
(108) 0x4115ad VFMADD231PD %YMM7,%YMM2,%YMM10 |
(108) 0x4115b2 VFMADD231PD %YMM3,%YMM2,%YMM4 |
(108) 0x4115b7 VFMADD231PD %YMM5,%YMM2,%YMM6 |
(108) 0x4115bc VSHUFPD $0x1,%YMM4,%YMM4,%YMM2 |
(108) 0x4115c1 VBLENDPD $0x4,%YMM10,%YMM2,%YMM2 |
(108) 0x4115c7 VMOVDDUP %XMM4,%XMM3 |
(108) 0x4115cb VPERM2F128 $0x20,%YMM10,%YMM3,%YMM3 |
(108) 0x4115d1 VSHUFPD $0x4,%YMM4,%YMM4,%YMM4 |
(108) 0x4115d6 VINSERTF128 $0x1,%XMM6,%YMM10,%YMM5 |
(108) 0x4115dc VBLENDPD $0xa,%YMM3,%YMM5,%YMM3 |
(108) 0x4115e2 VPERM2F128 $0x31,%YMM6,%YMM10,%YMM5 |
(108) 0x4115e8 VPERM2F128 $0x31,%YMM4,%YMM6,%YMM4 |
(108) 0x4115ee VBLENDPD $0xa,%YMM5,%YMM4,%YMM4 |
(108) 0x4115f4 VBLENDPD $0x2,%YMM6,%YMM2,%YMM2 |
(108) 0x4115fa VMOVUPD %YMM2,0x20(%R10,%R13,1) |
(108) 0x411601 VMOVUPD %YMM4,0x40(%R10,%R13,1) |
(108) 0x411608 VMOVUPD %YMM3,(%R10,%R13,1) |
(108) 0x41160e ADD $0x4,%RBX |
(108) 0x411612 ADD $0x60,%R13 |
(108) 0x411616 CMP %EDX,%EBX |
(108) 0x411618 JLE 4114d0 |
(106) 0x41161e MOV -0x68(%RBP),%RAX |
(106) 0x411622 CMP %EAX,%R8D |
(106) 0x411625 MOV -0x60(%RBP),%RSI |
(106) 0x411629 MOV -0x38(%RBP),%R14 |
(106) 0x41162d MOV -0x40(%RBP),%R12 |
(106) 0x411631 MOV -0x70(%RBP),%R13 |
(106) 0x411635 JE 4113a0 |
(106) 0x41163b JMP 411647 |
(106) 0x41163d XOR %EAX,%EAX |
(106) 0x41163f MOV -0x38(%RBP),%R14 |
(106) 0x411643 MOV -0x40(%RBP),%R12 |
(106) 0x411647 SUB %EAX,%R8D |
(106) 0x41164a MOVSXD %EAX,%RDX |
(106) 0x41164d LEA (%RDX,%RDX,2),%RBX |
(106) 0x411651 LEA (,%RBX,8),%RCX |
(106) 0x411659 ADD %R13,%RCX |
(106) 0x41165c ADD %RCX,%R10 |
(106) 0x41165f ADD %RCX,%R11 |
(106) 0x411662 ADD %R12,%RDX |
(106) 0x411665 LEA (%R15,%RDX,4),%RCX |
(106) 0x411669 XOR %EDX,%EDX |
(106) 0x41166b XOR %EBX,%EBX |
(106) 0x41166d NOPL (%RAX) |
(107) 0x411670 MOVSXD (%RCX,%RBX,4),%RAX |
(107) 0x411674 SAL $0x4,%RAX |
(107) 0x411678 VDIVSD 0x8(%R9,%RAX,1),%XMM0,%XMM2 |
(107) 0x41167f VMOVUPD (%R11,%RDX,1),%XMM3 |
(107) 0x411685 VMOVDDUP %XMM2,%XMM4 |
(107) 0x411689 VFMADD213PD (%R10,%RDX,1),%XMM3,%XMM4 |
(107) 0x41168f VMOVUPD %XMM4,(%R10,%RDX,1) |
(107) 0x411695 VMOVSD 0x10(%R11,%RDX,1),%XMM3 |
(107) 0x41169c VFMADD213SD 0x10(%R10,%RDX,1),%XMM2,%XMM3 |
(107) 0x4116a3 VMOVSD %XMM3,0x10(%R10,%RDX,1) |
(107) 0x4116aa INC %RBX |
(107) 0x4116ad ADD $0x18,%RDX |
(107) 0x4116b1 CMP %EBX,%R8D |
(107) 0x4116b4 JNE 411670 |
(106) 0x4116b6 JMP 4113a0 |
0x4116bb NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 60 |
nb uops | 63 |
loop length | 201 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 4.33 | 4.33 | 10.00 | 2.00 | 1.80 | 10.00 | 10.00 | 10.00 | 1.60 | 4.33 |
cycles | 1.80 | 1.80 | 4.33 | 4.33 | 10.00 | 2.00 | 1.80 | 10.00 | 10.00 | 10.00 | 1.60 | 4.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.59-10.60 |
Stall cycles | 0.00 |
Front-end | 10.50 |
Dispatch | 10.00 |
Overall L1 | 10.50 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 10% |
load | 6% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x4c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62f950,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403120 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 41136f <advancePosition.extracted+0x8f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62f970,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x44(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R14,-0x58(%RBP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%R12D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RBX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4113ae <advancePosition.extracted+0xce> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 60 |
nb uops | 63 |
loop length | 201 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 4.33 | 4.33 | 10.00 | 2.00 | 1.80 | 10.00 | 10.00 | 10.00 | 1.60 | 4.33 |
cycles | 1.80 | 1.80 | 4.33 | 4.33 | 10.00 | 2.00 | 1.80 | 10.00 | 10.00 | 10.00 | 1.60 | 4.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.59-10.60 |
Stall cycles | 0.00 |
Front-end | 10.50 |
Dispatch | 10.00 |
Overall L1 | 10.50 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 10% |
load | 6% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x4c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62f950,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403120 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 41136f <advancePosition.extracted+0x8f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62f970,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x44(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R14,-0x58(%RBP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%R12D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RBX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4113ae <advancePosition.extracted+0xce> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advancePosition.extracted– | 1.43 | 0.37 |
▼Loop 106 - timestep.c:85-94 - exec– | 0.02 | 0 |
○Loop 108 - timestep.c:88-94 - exec | 1.35 | 0.35 |
○Loop 107 - timestep.c:88-94 - exec | 0.07 | 0.02 |
○Loop 109 - timestep.c:88-94 - exec | 0 | 0 |