Function: advanceVelocity._omp_fn.0 | Module: exec | Source: timestep.c:71-78 | Coverage: 6.17% |
---|
Function: advanceVelocity._omp_fn.0 | Module: exec | Source: timestep.c:71-78 | Coverage: 6.17% |
---|
/scratch_na/users/xoserete/qaas_runs/171-416-1926/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 71 - 78 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for |
72: for (int iBox=0; iBox<nBoxes; iBox++) |
73: { |
74: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
75: { |
76: s->atoms->p[iOff][0] += dt*s->atoms->f[iOff][0]; |
77: s->atoms->p[iOff][1] += dt*s->atoms->f[iOff][1]; |
78: s->atoms->p[iOff][2] += dt*s->atoms->f[iOff][2]; |
0x40eb10 PUSH %RBP |
0x40eb11 MOV %RSP,%RBP |
0x40eb14 PUSH %R12 |
0x40eb16 PUSH %RBX |
0x40eb17 MOV %RDI,%RBX |
0x40eb1a CALL 403070 <omp_get_num_threads@plt> |
0x40eb1f MOV %EAX,%R12D |
0x40eb22 CALL 403160 <omp_get_thread_num@plt> |
0x40eb27 MOV %EAX,%ESI |
0x40eb29 MOV 0x10(%RBX),%EAX |
0x40eb2c CLTD |
0x40eb2d IDIV %R12D |
0x40eb30 CMP %EDX,%ESI |
0x40eb32 JL 40ef5a |
0x40eb38 IMUL %EAX,%ESI |
0x40eb3b ADD %EDX,%ESI |
0x40eb3d ADD %ESI,%EAX |
0x40eb3f CMP %EAX,%ESI |
0x40eb41 JGE 40ef55 |
0x40eb47 MOV (%RBX),%R10 |
0x40eb4a MOVSXD %ESI,%R12 |
0x40eb4d VMOVSD 0x8(%RBX),%XMM0 |
0x40eb52 LEA (%R12,%R12,2),%R8 |
0x40eb56 MOV 0x18(%R10),%RCX |
0x40eb5a SAL $0x9,%R8 |
0x40eb5e MOV 0x78(%RCX),%R9 |
0x40eb62 NOPW (%RAX,%RAX,1) |
(93) 0x40eb68 MOVSXD (%R9,%R12,4),%RDI |
(93) 0x40eb6c TEST %EDI,%EDI |
(93) 0x40eb6e JLE 40ef42 |
(93) 0x40eb74 MOV 0x20(%R10),%R11 |
(93) 0x40eb78 LEA (%RDI,%RDI,2),%RBX |
(93) 0x40eb7c LEA -0x18(,%RBX,8),%RDI |
(93) 0x40eb84 MOV 0x20(%R11),%RSI |
(93) 0x40eb88 MOV 0x28(%R11),%RCX |
(93) 0x40eb8c SHR $0x3,%RDI |
(93) 0x40eb90 MOV $0xaaaaaaaaaaaaaab,%R11 |
(93) 0x40eb9a IMUL %R11,%RDI |
(93) 0x40eb9e ADD %R8,%RSI |
(93) 0x40eba1 ADD %R8,%RCX |
(93) 0x40eba4 LEA (%RSI,%RBX,8),%RDX |
(93) 0x40eba8 INC %RDI |
(93) 0x40ebab AND $0x7,%EDI |
(93) 0x40ebae JE 40ed6b |
(93) 0x40ebb4 CMP $0x1,%RDI |
(93) 0x40ebb8 JE 40ed2c |
(93) 0x40ebbe CMP $0x2,%RDI |
(93) 0x40ebc2 JE 40ecf6 |
(93) 0x40ebc8 CMP $0x3,%RDI |
(93) 0x40ebcc JE 40ecc0 |
(93) 0x40ebd2 CMP $0x4,%RDI |
(93) 0x40ebd6 JE 40ec8a |
(93) 0x40ebdc CMP $0x5,%RDI |
(93) 0x40ebe0 JE 40ec54 |
(93) 0x40ebe2 CMP $0x6,%RDI |
(93) 0x40ebe6 JE 40ec1e |
(93) 0x40ebe8 VMOVSD (%RCX),%XMM1 |
(93) 0x40ebec VFMADD213SD (%RSI),%XMM0,%XMM1 |
(93) 0x40ebf1 ADD $0x18,%RCX |
(93) 0x40ebf5 ADD $0x18,%RSI |
(93) 0x40ebf9 VMOVSD %XMM1,-0x18(%RSI) |
(93) 0x40ebfe VMOVSD -0x10(%RCX),%XMM2 |
(93) 0x40ec03 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
(93) 0x40ec09 VMOVSD %XMM2,-0x10(%RSI) |
(93) 0x40ec0e VMOVSD -0x8(%RCX),%XMM3 |
(93) 0x40ec13 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
(93) 0x40ec19 VMOVSD %XMM3,-0x8(%RSI) |
(93) 0x40ec1e VMOVSD (%RCX),%XMM4 |
(93) 0x40ec22 VFMADD213SD (%RSI),%XMM0,%XMM4 |
(93) 0x40ec27 ADD $0x18,%RCX |
(93) 0x40ec2b ADD $0x18,%RSI |
(93) 0x40ec2f VMOVSD %XMM4,-0x18(%RSI) |
(93) 0x40ec34 VMOVSD -0x10(%RCX),%XMM5 |
(93) 0x40ec39 VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
(93) 0x40ec3f VMOVSD %XMM5,-0x10(%RSI) |
(93) 0x40ec44 VMOVSD -0x8(%RCX),%XMM6 |
(93) 0x40ec49 VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
(93) 0x40ec4f VMOVSD %XMM6,-0x8(%RSI) |
(93) 0x40ec54 VMOVSD (%RCX),%XMM7 |
(93) 0x40ec58 VFMADD213SD (%RSI),%XMM0,%XMM7 |
(93) 0x40ec5d ADD $0x18,%RCX |
(93) 0x40ec61 ADD $0x18,%RSI |
(93) 0x40ec65 VMOVSD %XMM7,-0x18(%RSI) |
(93) 0x40ec6a VMOVSD -0x10(%RCX),%XMM8 |
(93) 0x40ec6f VFMADD213SD -0x10(%RSI),%XMM0,%XMM8 |
(93) 0x40ec75 VMOVSD %XMM8,-0x10(%RSI) |
(93) 0x40ec7a VMOVSD -0x8(%RCX),%XMM9 |
(93) 0x40ec7f VFMADD213SD -0x8(%RSI),%XMM0,%XMM9 |
(93) 0x40ec85 VMOVSD %XMM9,-0x8(%RSI) |
(93) 0x40ec8a VMOVSD (%RCX),%XMM10 |
(93) 0x40ec8e VFMADD213SD (%RSI),%XMM0,%XMM10 |
(93) 0x40ec93 ADD $0x18,%RCX |
(93) 0x40ec97 ADD $0x18,%RSI |
(93) 0x40ec9b VMOVSD %XMM10,-0x18(%RSI) |
(93) 0x40eca0 VMOVSD -0x10(%RCX),%XMM11 |
(93) 0x40eca5 VFMADD213SD -0x10(%RSI),%XMM0,%XMM11 |
(93) 0x40ecab VMOVSD %XMM11,-0x10(%RSI) |
(93) 0x40ecb0 VMOVSD -0x8(%RCX),%XMM12 |
(93) 0x40ecb5 VFMADD213SD -0x8(%RSI),%XMM0,%XMM12 |
(93) 0x40ecbb VMOVSD %XMM12,-0x8(%RSI) |
(93) 0x40ecc0 VMOVSD (%RCX),%XMM13 |
(93) 0x40ecc4 VFMADD213SD (%RSI),%XMM0,%XMM13 |
(93) 0x40ecc9 ADD $0x18,%RCX |
(93) 0x40eccd ADD $0x18,%RSI |
(93) 0x40ecd1 VMOVSD %XMM13,-0x18(%RSI) |
(93) 0x40ecd6 VMOVSD -0x10(%RCX),%XMM14 |
(93) 0x40ecdb VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
(93) 0x40ece1 VMOVSD %XMM14,-0x10(%RSI) |
(93) 0x40ece6 VMOVSD -0x8(%RCX),%XMM15 |
(93) 0x40eceb VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
(93) 0x40ecf1 VMOVSD %XMM15,-0x8(%RSI) |
(93) 0x40ecf6 VMOVSD (%RCX),%XMM1 |
(93) 0x40ecfa VFMADD213SD (%RSI),%XMM0,%XMM1 |
(93) 0x40ecff ADD $0x18,%RCX |
(93) 0x40ed03 ADD $0x18,%RSI |
(93) 0x40ed07 VMOVSD %XMM1,-0x18(%RSI) |
(93) 0x40ed0c VMOVSD -0x10(%RCX),%XMM2 |
(93) 0x40ed11 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
(93) 0x40ed17 VMOVSD %XMM2,-0x10(%RSI) |
(93) 0x40ed1c VMOVSD -0x8(%RCX),%XMM3 |
(93) 0x40ed21 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
(93) 0x40ed27 VMOVSD %XMM3,-0x8(%RSI) |
(93) 0x40ed2c VMOVSD (%RCX),%XMM4 |
(93) 0x40ed30 VFMADD213SD (%RSI),%XMM0,%XMM4 |
(93) 0x40ed35 ADD $0x18,%RSI |
(93) 0x40ed39 ADD $0x18,%RCX |
(93) 0x40ed3d VMOVSD %XMM4,-0x18(%RSI) |
(93) 0x40ed42 VMOVSD -0x10(%RCX),%XMM5 |
(93) 0x40ed47 VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
(93) 0x40ed4d VMOVSD %XMM5,-0x10(%RSI) |
(93) 0x40ed52 VMOVSD -0x8(%RCX),%XMM6 |
(93) 0x40ed57 VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
(93) 0x40ed5d VMOVSD %XMM6,-0x8(%RSI) |
(93) 0x40ed62 CMP %RDX,%RSI |
(93) 0x40ed65 JE 40ef42 |
(94) 0x40ed6b VMOVSD (%RCX),%XMM7 |
(94) 0x40ed6f VFMADD213SD (%RSI),%XMM0,%XMM7 |
(94) 0x40ed74 ADD $0xc0,%RSI |
(94) 0x40ed7b ADD $0xc0,%RCX |
(94) 0x40ed82 VMOVSD %XMM7,-0xc0(%RSI) |
(94) 0x40ed8a VMOVSD -0xb8(%RCX),%XMM8 |
(94) 0x40ed92 VFMADD213SD -0xb8(%RSI),%XMM0,%XMM8 |
(94) 0x40ed9b VMOVSD %XMM8,-0xb8(%RSI) |
(94) 0x40eda3 VMOVSD -0xb0(%RCX),%XMM9 |
(94) 0x40edab VFMADD213SD -0xb0(%RSI),%XMM0,%XMM9 |
(94) 0x40edb4 VMOVSD %XMM9,-0xb0(%RSI) |
(94) 0x40edbc VMOVSD -0xa8(%RCX),%XMM10 |
(94) 0x40edc4 VFMADD213SD -0xa8(%RSI),%XMM0,%XMM10 |
(94) 0x40edcd VMOVSD %XMM10,-0xa8(%RSI) |
(94) 0x40edd5 VMOVSD -0xa0(%RCX),%XMM11 |
(94) 0x40eddd VFMADD213SD -0xa0(%RSI),%XMM0,%XMM11 |
(94) 0x40ede6 VMOVSD %XMM11,-0xa0(%RSI) |
(94) 0x40edee VMOVSD -0x98(%RCX),%XMM12 |
(94) 0x40edf6 VFMADD213SD -0x98(%RSI),%XMM0,%XMM12 |
(94) 0x40edff VMOVSD %XMM12,-0x98(%RSI) |
(94) 0x40ee07 VMOVSD -0x90(%RCX),%XMM13 |
(94) 0x40ee0f VFMADD213SD -0x90(%RSI),%XMM0,%XMM13 |
(94) 0x40ee18 VMOVSD %XMM13,-0x90(%RSI) |
(94) 0x40ee20 VMOVSD -0x88(%RCX),%XMM14 |
(94) 0x40ee28 VFMADD213SD -0x88(%RSI),%XMM0,%XMM14 |
(94) 0x40ee31 VMOVSD %XMM14,-0x88(%RSI) |
(94) 0x40ee39 VMOVSD -0x80(%RCX),%XMM15 |
(94) 0x40ee3e VFMADD213SD -0x80(%RSI),%XMM0,%XMM15 |
(94) 0x40ee44 VMOVSD %XMM15,-0x80(%RSI) |
(94) 0x40ee49 VMOVSD -0x78(%RCX),%XMM1 |
(94) 0x40ee4e VFMADD213SD -0x78(%RSI),%XMM0,%XMM1 |
(94) 0x40ee54 VMOVSD %XMM1,-0x78(%RSI) |
(94) 0x40ee59 VMOVSD -0x70(%RCX),%XMM2 |
(94) 0x40ee5e VFMADD213SD -0x70(%RSI),%XMM0,%XMM2 |
(94) 0x40ee64 VMOVSD %XMM2,-0x70(%RSI) |
(94) 0x40ee69 VMOVSD -0x68(%RCX),%XMM3 |
(94) 0x40ee6e VFMADD213SD -0x68(%RSI),%XMM0,%XMM3 |
(94) 0x40ee74 VMOVSD %XMM3,-0x68(%RSI) |
(94) 0x40ee79 VMOVSD -0x60(%RCX),%XMM4 |
(94) 0x40ee7e VFMADD213SD -0x60(%RSI),%XMM0,%XMM4 |
(94) 0x40ee84 VMOVSD %XMM4,-0x60(%RSI) |
(94) 0x40ee89 VMOVSD -0x58(%RCX),%XMM5 |
(94) 0x40ee8e VFMADD213SD -0x58(%RSI),%XMM0,%XMM5 |
(94) 0x40ee94 VMOVSD %XMM5,-0x58(%RSI) |
(94) 0x40ee99 VMOVSD -0x50(%RCX),%XMM6 |
(94) 0x40ee9e VFMADD213SD -0x50(%RSI),%XMM0,%XMM6 |
(94) 0x40eea4 VMOVSD %XMM6,-0x50(%RSI) |
(94) 0x40eea9 VMOVSD -0x48(%RCX),%XMM7 |
(94) 0x40eeae VFMADD213SD -0x48(%RSI),%XMM0,%XMM7 |
(94) 0x40eeb4 VMOVSD %XMM7,-0x48(%RSI) |
(94) 0x40eeb9 VMOVSD -0x40(%RCX),%XMM8 |
(94) 0x40eebe VFMADD213SD -0x40(%RSI),%XMM0,%XMM8 |
(94) 0x40eec4 VMOVSD %XMM8,-0x40(%RSI) |
(94) 0x40eec9 VMOVSD -0x38(%RCX),%XMM9 |
(94) 0x40eece VFMADD213SD -0x38(%RSI),%XMM0,%XMM9 |
(94) 0x40eed4 VMOVSD %XMM9,-0x38(%RSI) |
(94) 0x40eed9 VMOVSD -0x30(%RCX),%XMM10 |
(94) 0x40eede VFMADD213SD -0x30(%RSI),%XMM0,%XMM10 |
(94) 0x40eee4 VMOVSD %XMM10,-0x30(%RSI) |
(94) 0x40eee9 VMOVSD -0x28(%RCX),%XMM11 |
(94) 0x40eeee VFMADD213SD -0x28(%RSI),%XMM0,%XMM11 |
(94) 0x40eef4 VMOVSD %XMM11,-0x28(%RSI) |
(94) 0x40eef9 VMOVSD -0x20(%RCX),%XMM12 |
(94) 0x40eefe VFMADD213SD -0x20(%RSI),%XMM0,%XMM12 |
(94) 0x40ef04 VMOVSD %XMM12,-0x20(%RSI) |
(94) 0x40ef09 VMOVSD -0x18(%RCX),%XMM13 |
(94) 0x40ef0e VFMADD213SD -0x18(%RSI),%XMM0,%XMM13 |
(94) 0x40ef14 VMOVSD %XMM13,-0x18(%RSI) |
(94) 0x40ef19 VMOVSD -0x10(%RCX),%XMM14 |
(94) 0x40ef1e VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
(94) 0x40ef24 VMOVSD %XMM14,-0x10(%RSI) |
(94) 0x40ef29 VMOVSD -0x8(%RCX),%XMM15 |
(94) 0x40ef2e VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
(94) 0x40ef34 VMOVSD %XMM15,-0x8(%RSI) |
(94) 0x40ef39 CMP %RDX,%RSI |
(94) 0x40ef3c JNE 40ed6b |
(93) 0x40ef42 INC %R12 |
(93) 0x40ef45 ADD $0x600,%R8 |
(93) 0x40ef4c CMP %R12D,%EAX |
(93) 0x40ef4f JG 40eb68 |
0x40ef55 POP %RBX |
0x40ef56 POP %R12 |
0x40ef58 POP %RBP |
0x40ef59 RET |
0x40ef5a INC %EAX |
0x40ef5c XOR %EDX,%EDX |
0x40ef5e JMP 40eb38 |
0x40ef63 NOPW %CS:(%RAX,%RAX,1) |
0x40ef6e XCHG %AX,%AX |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○97.56 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○2.42 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 36 |
nb uops | 41 |
loop length | 115 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 6.83 cycles |
front end | 6.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 4.00 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
cycles | 2.70 | 5.33 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 6.52-6.58 |
Stall cycles | 0.00 |
Front-end | 6.83 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 6.83 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40ef5a <advanceVelocity._omp_fn.0+0x44a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40ef55 <advanceVelocity._omp_fn.0+0x445> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %ESI,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%RBX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R12,%R12,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R10),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40eb38 <advanceVelocity._omp_fn.0+0x28> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 36 |
nb uops | 41 |
loop length | 115 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 6.83 cycles |
front end | 6.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 4.00 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
cycles | 2.70 | 5.33 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 6.52-6.58 |
Stall cycles | 0.00 |
Front-end | 6.83 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 6.83 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40ef5a <advanceVelocity._omp_fn.0+0x44a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40ef55 <advanceVelocity._omp_fn.0+0x445> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %ESI,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%RBX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R12,%R12,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R10),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40eb38 <advanceVelocity._omp_fn.0+0x28> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advanceVelocity._omp_fn.0– | 6.17 | 0.96 |
▼Loop 93 - timestep.c:74-78 - exec– | 2.82 | 0.3 |
○Loop 94 - timestep.c:74-78 - exec | 3.34 | 0.36 |