Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage: 2.83% |
---|
Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage: 2.83% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-2581/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 85 - 94 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for |
86: for (int iBox=0; iBox<nBoxes; iBox++) |
87: { |
88: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
89: { |
90: int iSpecies = s->atoms->iSpecies[iOff]; |
91: real_t invMass = 1.0/s->species[iSpecies].mass; |
92: s->atoms->r[iOff][0] += dt*s->atoms->p[iOff][0]*invMass; |
93: s->atoms->r[iOff][1] += dt*s->atoms->p[iOff][1]*invMass; |
94: s->atoms->r[iOff][2] += dt*s->atoms->p[iOff][2]*invMass; |
0x40dd50 PUSH %RBP |
0x40dd51 MOV %RSP,%RBP |
0x40dd54 PUSH %R14 |
0x40dd56 PUSH %R13 |
0x40dd58 PUSH %R12 |
0x40dd5a MOV %RDI,%R12 |
0x40dd5d PUSH %RBX |
0x40dd5e CALL 403070 <omp_get_num_threads@plt> |
0x40dd63 MOV %EAX,%EBX |
0x40dd65 CALL 403160 <omp_get_thread_num@plt> |
0x40dd6a MOV %EAX,%R9D |
0x40dd6d MOV 0x10(%R12),%EAX |
0x40dd72 CLTD |
0x40dd73 IDIV %EBX |
0x40dd75 CMP %EDX,%R9D |
0x40dd78 JL 40e03c |
0x40dd7e IMUL %EAX,%R9D |
0x40dd82 ADD %EDX,%R9D |
0x40dd85 LEA (%RAX,%R9,1),%EBX |
0x40dd89 CMP %EBX,%R9D |
0x40dd8c JGE 40e033 |
0x40dd92 VMOVSD 0x8(%R12),%XMM0 |
0x40dd99 MOV (%R12),%R12 |
0x40dd9d MOVSXD %R9D,%R10 |
0x40dda0 SAL $0x6,%R9D |
0x40dda4 LEA (%R10,%R10,2),%RAX |
0x40dda8 VMOVSD 0x1988(%RIP),%XMM2 |
0x40ddb0 MOV 0x18(%R12),%RCX |
0x40ddb5 SAL $0x9,%RAX |
0x40ddb9 MOV 0x78(%RCX),%R13 |
0x40ddbd NOPL (%RAX) |
(91) 0x40ddc0 MOVSXD (%R13,%R10,4),%R8 |
(91) 0x40ddc5 TEST %R8D,%R8D |
(91) 0x40ddc8 JLE 40e01d |
(91) 0x40ddce MOV 0x20(%R12),%R11 |
(91) 0x40ddd3 MOVSXD %R9D,%RSI |
(91) 0x40ddd6 MOV 0x28(%R12),%RDI |
(91) 0x40dddb MOV 0x10(%R11),%R14 |
(91) 0x40dddf MOV 0x18(%R11),%RDX |
(91) 0x40dde3 MOV 0x20(%R11),%RCX |
(91) 0x40dde7 MOV %R10,%R11 |
(91) 0x40ddea SAL $0x6,%R11 |
(91) 0x40ddee LEA (%R14,%RSI,4),%RSI |
(91) 0x40ddf2 ADD %RAX,%RDX |
(91) 0x40ddf5 ADD %R8,%R11 |
(91) 0x40ddf8 ADD %RAX,%RCX |
(91) 0x40ddfb LEA (%R14,%R11,4),%R11 |
(91) 0x40ddff MOV %R11,%R8 |
(91) 0x40de02 SUB %RSI,%R8 |
(91) 0x40de05 SUB $0x4,%R8 |
(91) 0x40de09 SHR $0x2,%R8 |
(91) 0x40de0d INC %R8 |
(91) 0x40de10 AND $0x3,%R8D |
(91) 0x40de14 JE 40df0e |
(91) 0x40de1a CMP $0x1,%R8 |
(91) 0x40de1e JE 40debc |
(91) 0x40de24 CMP $0x2,%R8 |
(91) 0x40de28 JE 40de73 |
(91) 0x40de2a VMULSD (%RCX),%XMM0,%XMM3 |
(91) 0x40de2e MOVSXD (%RSI),%R14 |
(91) 0x40de31 ADD $0x18,%RDX |
(91) 0x40de35 ADD $0x4,%RSI |
(91) 0x40de39 ADD $0x18,%RCX |
(91) 0x40de3d SAL $0x4,%R14 |
(91) 0x40de41 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM1 |
(91) 0x40de48 VFMADD213SD -0x18(%RDX),%XMM1,%XMM3 |
(91) 0x40de4e VMOVSD %XMM3,-0x18(%RDX) |
(91) 0x40de53 VMULSD -0x10(%RCX),%XMM0,%XMM4 |
(91) 0x40de58 VFMADD213SD -0x10(%RDX),%XMM1,%XMM4 |
(91) 0x40de5e VMOVSD %XMM4,-0x10(%RDX) |
(91) 0x40de63 VMULSD -0x8(%RCX),%XMM0,%XMM5 |
(91) 0x40de68 VFMADD213SD -0x8(%RDX),%XMM5,%XMM1 |
(91) 0x40de6e VMOVSD %XMM1,-0x8(%RDX) |
(91) 0x40de73 VMULSD (%RCX),%XMM0,%XMM7 |
(91) 0x40de77 MOVSXD (%RSI),%R8 |
(91) 0x40de7a ADD $0x18,%RDX |
(91) 0x40de7e ADD $0x4,%RSI |
(91) 0x40de82 ADD $0x18,%RCX |
(91) 0x40de86 SAL $0x4,%R8 |
(91) 0x40de8a VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM6 |
(91) 0x40de91 VFMADD213SD -0x18(%RDX),%XMM6,%XMM7 |
(91) 0x40de97 VMOVSD %XMM7,-0x18(%RDX) |
(91) 0x40de9c VMULSD -0x10(%RCX),%XMM0,%XMM8 |
(91) 0x40dea1 VFMADD213SD -0x10(%RDX),%XMM6,%XMM8 |
(91) 0x40dea7 VMOVSD %XMM8,-0x10(%RDX) |
(91) 0x40deac VMULSD -0x8(%RCX),%XMM0,%XMM9 |
(91) 0x40deb1 VFMADD213SD -0x8(%RDX),%XMM9,%XMM6 |
(91) 0x40deb7 VMOVSD %XMM6,-0x8(%RDX) |
(91) 0x40debc VMULSD (%RCX),%XMM0,%XMM11 |
(91) 0x40dec0 MOVSXD (%RSI),%R14 |
(91) 0x40dec3 ADD $0x4,%RSI |
(91) 0x40dec7 ADD $0x18,%RDX |
(91) 0x40decb ADD $0x18,%RCX |
(91) 0x40decf SAL $0x4,%R14 |
(91) 0x40ded3 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM10 |
(91) 0x40deda VFMADD213SD -0x18(%RDX),%XMM10,%XMM11 |
(91) 0x40dee0 VMOVSD %XMM11,-0x18(%RDX) |
(91) 0x40dee5 VMULSD -0x10(%RCX),%XMM0,%XMM12 |
(91) 0x40deea VFMADD213SD -0x10(%RDX),%XMM10,%XMM12 |
(91) 0x40def0 VMOVSD %XMM12,-0x10(%RDX) |
(91) 0x40def5 VMULSD -0x8(%RCX),%XMM0,%XMM13 |
(91) 0x40defa VFMADD213SD -0x8(%RDX),%XMM13,%XMM10 |
(91) 0x40df00 VMOVSD %XMM10,-0x8(%RDX) |
(91) 0x40df05 CMP %RSI,%R11 |
(91) 0x40df08 JE 40e01d |
(92) 0x40df0e VMULSD (%RCX),%XMM0,%XMM15 |
(92) 0x40df12 MOVSXD (%RSI),%R8 |
(92) 0x40df15 ADD $0x10,%RSI |
(92) 0x40df19 ADD $0x60,%RDX |
(92) 0x40df1d MOVSXD -0xc(%RSI),%R14 |
(92) 0x40df21 ADD $0x60,%RCX |
(92) 0x40df25 SAL $0x4,%R8 |
(92) 0x40df29 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM14 |
(92) 0x40df30 SAL $0x4,%R14 |
(92) 0x40df34 MOVSXD -0x8(%RSI),%R8 |
(92) 0x40df38 VFMADD213SD -0x60(%RDX),%XMM14,%XMM15 |
(92) 0x40df3e SAL $0x4,%R8 |
(92) 0x40df42 VMOVSD %XMM15,-0x60(%RDX) |
(92) 0x40df47 VMULSD -0x58(%RCX),%XMM0,%XMM1 |
(92) 0x40df4c VFMADD213SD -0x58(%RDX),%XMM14,%XMM1 |
(92) 0x40df52 VMOVSD %XMM1,-0x58(%RDX) |
(92) 0x40df57 VMULSD -0x50(%RCX),%XMM0,%XMM3 |
(92) 0x40df5c VFMADD213SD -0x50(%RDX),%XMM3,%XMM14 |
(92) 0x40df62 VMOVSD %XMM14,-0x50(%RDX) |
(92) 0x40df67 VMULSD -0x48(%RCX),%XMM0,%XMM5 |
(92) 0x40df6c VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM4 |
(92) 0x40df73 VFMADD213SD -0x48(%RDX),%XMM4,%XMM5 |
(92) 0x40df79 VMOVSD %XMM5,-0x48(%RDX) |
(92) 0x40df7e VMULSD -0x40(%RCX),%XMM0,%XMM6 |
(92) 0x40df83 VFMADD213SD -0x40(%RDX),%XMM4,%XMM6 |
(92) 0x40df89 VMOVSD %XMM6,-0x40(%RDX) |
(92) 0x40df8e VMULSD -0x38(%RCX),%XMM0,%XMM7 |
(92) 0x40df93 VFMADD213SD -0x38(%RDX),%XMM7,%XMM4 |
(92) 0x40df99 VMOVSD %XMM4,-0x38(%RDX) |
(92) 0x40df9e VMULSD -0x30(%RCX),%XMM0,%XMM9 |
(92) 0x40dfa3 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM8 |
(92) 0x40dfaa VFMADD213SD -0x30(%RDX),%XMM8,%XMM9 |
(92) 0x40dfb0 VMOVSD %XMM9,-0x30(%RDX) |
(92) 0x40dfb5 VMULSD -0x28(%RCX),%XMM0,%XMM10 |
(92) 0x40dfba VFMADD213SD -0x28(%RDX),%XMM8,%XMM10 |
(92) 0x40dfc0 VMOVSD %XMM10,-0x28(%RDX) |
(92) 0x40dfc5 VMULSD -0x20(%RCX),%XMM0,%XMM11 |
(92) 0x40dfca VFMADD213SD -0x20(%RDX),%XMM11,%XMM8 |
(92) 0x40dfd0 VMOVSD %XMM8,-0x20(%RDX) |
(92) 0x40dfd5 VMULSD -0x18(%RCX),%XMM0,%XMM13 |
(92) 0x40dfda MOVSXD -0x4(%RSI),%R14 |
(92) 0x40dfde SAL $0x4,%R14 |
(92) 0x40dfe2 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM12 |
(92) 0x40dfe9 VFMADD213SD -0x18(%RDX),%XMM12,%XMM13 |
(92) 0x40dfef VMOVSD %XMM13,-0x18(%RDX) |
(92) 0x40dff4 VMULSD -0x10(%RCX),%XMM0,%XMM14 |
(92) 0x40dff9 VFMADD213SD -0x10(%RDX),%XMM12,%XMM14 |
(92) 0x40dfff VMOVSD %XMM14,-0x10(%RDX) |
(92) 0x40e004 VMULSD -0x8(%RCX),%XMM0,%XMM15 |
(92) 0x40e009 VFMADD213SD -0x8(%RDX),%XMM15,%XMM12 |
(92) 0x40e00f VMOVSD %XMM12,-0x8(%RDX) |
(92) 0x40e014 CMP %RSI,%R11 |
(92) 0x40e017 JNE 40df0e |
(91) 0x40e01d INC %R10 |
(91) 0x40e020 ADD $0x40,%R9D |
(91) 0x40e024 ADD $0x600,%RAX |
(91) 0x40e02a CMP %R10D,%EBX |
(91) 0x40e02d JG 40ddc0 |
0x40e033 POP %RBX |
0x40e034 POP %R12 |
0x40e036 POP %R13 |
0x40e038 POP %R14 |
0x40e03a POP %RBP |
0x40e03b RET |
0x40e03c INC %EAX |
0x40e03e XOR %EDX,%EDX |
0x40e040 JMP 40dd7e |
0x40e045 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 41 |
nb uops | 46 |
loop length | 141 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 4.00 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
cycles | 3.00 | 5.33 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 7.33-7.36 |
Stall cycles | 0.00 |
Front-end | 7.67 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 7.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40e03c <advancePosition._omp_fn.0+0x2ec> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R9,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %EBX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40e033 <advancePosition._omp_fn.0+0x2e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x8(%R12),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R9D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x6,%R9D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R10,%R10,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x1988(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40dd7e <advancePosition._omp_fn.0+0x2e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 41 |
nb uops | 46 |
loop length | 141 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 4.00 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
cycles | 3.00 | 5.33 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 7.33-7.36 |
Stall cycles | 0.00 |
Front-end | 7.67 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 7.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40e03c <advancePosition._omp_fn.0+0x2ec> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R9,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %EBX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40e033 <advancePosition._omp_fn.0+0x2e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x8(%R12),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R9D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x6,%R9D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R10,%R10,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x1988(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40dd7e <advancePosition._omp_fn.0+0x2e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advancePosition._omp_fn.0– | 2.83 | 0.52 |
▼Loop 91 - timestep.c:88-94 - exec– | 0.97 | 0.13 |
○Loop 92 - timestep.c:88-94 - exec | 1.86 | 0.25 |