Function: advanceVelocity._omp_fn.0 | Module: exec | Source: timestep.c:71-78 | Coverage: 4.04% |
---|
Function: advanceVelocity._omp_fn.0 | Module: exec | Source: timestep.c:71-78 | Coverage: 4.04% |
---|
/scratch_na/users/xoserete/qaas_runs/171-322-9862/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 71 - 78 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for |
72: for (int iBox=0; iBox<nBoxes; iBox++) |
73: { |
74: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
75: { |
76: s->atoms->p[iOff][0] += dt*s->atoms->f[iOff][0]; |
77: s->atoms->p[iOff][1] += dt*s->atoms->f[iOff][1]; |
78: s->atoms->p[iOff][2] += dt*s->atoms->f[iOff][2]; |
0x4059c0 PUSH %RBP |
0x4059c1 MOV %RSP,%RBP |
0x4059c4 PUSH %R12 |
0x4059c6 PUSH %RBX |
0x4059c7 MOV %RDI,%RBX |
0x4059ca CALL 403070 <omp_get_num_threads@plt> |
0x4059cf MOV %EAX,%R12D |
0x4059d2 CALL 403150 <omp_get_thread_num@plt> |
0x4059d7 MOV %EAX,%ESI |
0x4059d9 MOV 0x10(%RBX),%EAX |
0x4059dc CLTD |
0x4059dd IDIV %R12D |
0x4059e0 CMP %EDX,%ESI |
0x4059e2 JL 405e15 |
0x4059e8 IMUL %EAX,%ESI |
0x4059eb ADD %EDX,%ESI |
0x4059ed ADD %ESI,%EAX |
0x4059ef CMP %EAX,%ESI |
0x4059f1 JGE 405e10 |
0x4059f7 MOV (%RBX),%R10 |
0x4059fa MOVSXD %ESI,%R12 |
0x4059fd VMOVSD 0x8(%RBX),%XMM0 |
0x405a02 LEA (%R12,%R12,2),%R8 |
0x405a06 MOV 0x18(%R10),%RCX |
0x405a0a SAL $0x9,%R8 |
0x405a0e MOV 0x78(%RCX),%R9 |
0x405a12 NOPW (%RAX,%RAX,1) |
(17) 0x405a18 MOVSXD (%R9,%R12,4),%RDI |
(17) 0x405a1c TEST %EDI,%EDI |
(17) 0x405a1e JLE 405dfd |
(17) 0x405a24 MOV 0x20(%R10),%R11 |
(17) 0x405a28 LEA (%RDI,%RDI,2),%RBX |
(17) 0x405a2c LEA -0x18(,%RBX,8),%RDI |
(17) 0x405a34 MOV 0x20(%R11),%RSI |
(17) 0x405a38 MOV 0x28(%R11),%RCX |
(17) 0x405a3c SHR $0x3,%RDI |
(17) 0x405a40 MOV $0xaaaaaaaaaaaaaab,%R11 |
(17) 0x405a4a IMUL %R11,%RDI |
(17) 0x405a4e ADD %R8,%RSI |
(17) 0x405a51 ADD %R8,%RCX |
(17) 0x405a54 LEA (%RSI,%RBX,8),%RDX |
(17) 0x405a58 INC %RDI |
(17) 0x405a5b AND $0x7,%EDI |
(17) 0x405a5e JE 405c22 |
(17) 0x405a64 CMP $0x1,%RDI |
(17) 0x405a68 JE 405be2 |
(17) 0x405a6e CMP $0x2,%RDI |
(17) 0x405a72 JE 405bab |
(17) 0x405a78 CMP $0x3,%RDI |
(17) 0x405a7c JE 405b74 |
(17) 0x405a82 CMP $0x4,%RDI |
(17) 0x405a86 JE 405b3d |
(17) 0x405a8c CMP $0x5,%RDI |
(17) 0x405a90 JE 405b06 |
(17) 0x405a92 CMP $0x6,%RDI |
(17) 0x405a96 JE 405acf |
(17) 0x405a98 VMOVSD (%RCX),%XMM1 |
(17) 0x405a9c ADD $0x18,%RSI |
(17) 0x405aa0 ADD $0x18,%RCX |
(17) 0x405aa4 VFMADD213SD -0x18(%RSI),%XMM0,%XMM1 |
(17) 0x405aaa VMOVSD %XMM1,-0x18(%RSI) |
(17) 0x405aaf VMOVSD -0x10(%RCX),%XMM2 |
(17) 0x405ab4 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
(17) 0x405aba VMOVSD %XMM2,-0x10(%RSI) |
(17) 0x405abf VMOVSD -0x8(%RCX),%XMM3 |
(17) 0x405ac4 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
(17) 0x405aca VMOVSD %XMM3,-0x8(%RSI) |
(17) 0x405acf VMOVSD (%RCX),%XMM4 |
(17) 0x405ad3 ADD $0x18,%RSI |
(17) 0x405ad7 ADD $0x18,%RCX |
(17) 0x405adb VFMADD213SD -0x18(%RSI),%XMM0,%XMM4 |
(17) 0x405ae1 VMOVSD %XMM4,-0x18(%RSI) |
(17) 0x405ae6 VMOVSD -0x10(%RCX),%XMM5 |
(17) 0x405aeb VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
(17) 0x405af1 VMOVSD %XMM5,-0x10(%RSI) |
(17) 0x405af6 VMOVSD -0x8(%RCX),%XMM6 |
(17) 0x405afb VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
(17) 0x405b01 VMOVSD %XMM6,-0x8(%RSI) |
(17) 0x405b06 VMOVSD (%RCX),%XMM7 |
(17) 0x405b0a ADD $0x18,%RSI |
(17) 0x405b0e ADD $0x18,%RCX |
(17) 0x405b12 VFMADD213SD -0x18(%RSI),%XMM0,%XMM7 |
(17) 0x405b18 VMOVSD %XMM7,-0x18(%RSI) |
(17) 0x405b1d VMOVSD -0x10(%RCX),%XMM8 |
(17) 0x405b22 VFMADD213SD -0x10(%RSI),%XMM0,%XMM8 |
(17) 0x405b28 VMOVSD %XMM8,-0x10(%RSI) |
(17) 0x405b2d VMOVSD -0x8(%RCX),%XMM9 |
(17) 0x405b32 VFMADD213SD -0x8(%RSI),%XMM0,%XMM9 |
(17) 0x405b38 VMOVSD %XMM9,-0x8(%RSI) |
(17) 0x405b3d VMOVSD (%RCX),%XMM10 |
(17) 0x405b41 ADD $0x18,%RSI |
(17) 0x405b45 ADD $0x18,%RCX |
(17) 0x405b49 VFMADD213SD -0x18(%RSI),%XMM0,%XMM10 |
(17) 0x405b4f VMOVSD %XMM10,-0x18(%RSI) |
(17) 0x405b54 VMOVSD -0x10(%RCX),%XMM11 |
(17) 0x405b59 VFMADD213SD -0x10(%RSI),%XMM0,%XMM11 |
(17) 0x405b5f VMOVSD %XMM11,-0x10(%RSI) |
(17) 0x405b64 VMOVSD -0x8(%RCX),%XMM12 |
(17) 0x405b69 VFMADD213SD -0x8(%RSI),%XMM0,%XMM12 |
(17) 0x405b6f VMOVSD %XMM12,-0x8(%RSI) |
(17) 0x405b74 VMOVSD (%RCX),%XMM13 |
(17) 0x405b78 ADD $0x18,%RSI |
(17) 0x405b7c ADD $0x18,%RCX |
(17) 0x405b80 VFMADD213SD -0x18(%RSI),%XMM0,%XMM13 |
(17) 0x405b86 VMOVSD %XMM13,-0x18(%RSI) |
(17) 0x405b8b VMOVSD -0x10(%RCX),%XMM14 |
(17) 0x405b90 VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
(17) 0x405b96 VMOVSD %XMM14,-0x10(%RSI) |
(17) 0x405b9b VMOVSD -0x8(%RCX),%XMM15 |
(17) 0x405ba0 VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
(17) 0x405ba6 VMOVSD %XMM15,-0x8(%RSI) |
(17) 0x405bab VMOVSD (%RCX),%XMM1 |
(17) 0x405baf ADD $0x18,%RSI |
(17) 0x405bb3 ADD $0x18,%RCX |
(17) 0x405bb7 VFMADD213SD -0x18(%RSI),%XMM0,%XMM1 |
(17) 0x405bbd VMOVSD %XMM1,-0x18(%RSI) |
(17) 0x405bc2 VMOVSD -0x10(%RCX),%XMM2 |
(17) 0x405bc7 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
(17) 0x405bcd VMOVSD %XMM2,-0x10(%RSI) |
(17) 0x405bd2 VMOVSD -0x8(%RCX),%XMM3 |
(17) 0x405bd7 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
(17) 0x405bdd VMOVSD %XMM3,-0x8(%RSI) |
(17) 0x405be2 VMOVSD (%RCX),%XMM4 |
(17) 0x405be6 ADD $0x18,%RSI |
(17) 0x405bea ADD $0x18,%RCX |
(17) 0x405bee VFMADD213SD -0x18(%RSI),%XMM0,%XMM4 |
(17) 0x405bf4 VMOVSD %XMM4,-0x18(%RSI) |
(17) 0x405bf9 VMOVSD -0x10(%RCX),%XMM5 |
(17) 0x405bfe VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
(17) 0x405c04 VMOVSD %XMM5,-0x10(%RSI) |
(17) 0x405c09 VMOVSD -0x8(%RCX),%XMM6 |
(17) 0x405c0e VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
(17) 0x405c14 VMOVSD %XMM6,-0x8(%RSI) |
(17) 0x405c19 CMP %RDX,%RSI |
(17) 0x405c1c JE 405dfd |
(18) 0x405c22 VMOVSD (%RCX),%XMM7 |
(18) 0x405c26 ADD $0xc0,%RSI |
(18) 0x405c2d ADD $0xc0,%RCX |
(18) 0x405c34 VFMADD213SD -0xc0(%RSI),%XMM0,%XMM7 |
(18) 0x405c3d VMOVSD %XMM7,-0xc0(%RSI) |
(18) 0x405c45 VMOVSD -0xb8(%RCX),%XMM8 |
(18) 0x405c4d VFMADD213SD -0xb8(%RSI),%XMM0,%XMM8 |
(18) 0x405c56 VMOVSD %XMM8,-0xb8(%RSI) |
(18) 0x405c5e VMOVSD -0xb0(%RCX),%XMM9 |
(18) 0x405c66 VFMADD213SD -0xb0(%RSI),%XMM0,%XMM9 |
(18) 0x405c6f VMOVSD %XMM9,-0xb0(%RSI) |
(18) 0x405c77 VMOVSD -0xa8(%RCX),%XMM10 |
(18) 0x405c7f VFMADD213SD -0xa8(%RSI),%XMM0,%XMM10 |
(18) 0x405c88 VMOVSD %XMM10,-0xa8(%RSI) |
(18) 0x405c90 VMOVSD -0xa0(%RCX),%XMM11 |
(18) 0x405c98 VFMADD213SD -0xa0(%RSI),%XMM0,%XMM11 |
(18) 0x405ca1 VMOVSD %XMM11,-0xa0(%RSI) |
(18) 0x405ca9 VMOVSD -0x98(%RCX),%XMM12 |
(18) 0x405cb1 VFMADD213SD -0x98(%RSI),%XMM0,%XMM12 |
(18) 0x405cba VMOVSD %XMM12,-0x98(%RSI) |
(18) 0x405cc2 VMOVSD -0x90(%RCX),%XMM13 |
(18) 0x405cca VFMADD213SD -0x90(%RSI),%XMM0,%XMM13 |
(18) 0x405cd3 VMOVSD %XMM13,-0x90(%RSI) |
(18) 0x405cdb VMOVSD -0x88(%RCX),%XMM14 |
(18) 0x405ce3 VFMADD213SD -0x88(%RSI),%XMM0,%XMM14 |
(18) 0x405cec VMOVSD %XMM14,-0x88(%RSI) |
(18) 0x405cf4 VMOVSD -0x80(%RCX),%XMM15 |
(18) 0x405cf9 VFMADD213SD -0x80(%RSI),%XMM0,%XMM15 |
(18) 0x405cff VMOVSD %XMM15,-0x80(%RSI) |
(18) 0x405d04 VMOVSD -0x78(%RCX),%XMM1 |
(18) 0x405d09 VFMADD213SD -0x78(%RSI),%XMM0,%XMM1 |
(18) 0x405d0f VMOVSD %XMM1,-0x78(%RSI) |
(18) 0x405d14 VMOVSD -0x70(%RCX),%XMM2 |
(18) 0x405d19 VFMADD213SD -0x70(%RSI),%XMM0,%XMM2 |
(18) 0x405d1f VMOVSD %XMM2,-0x70(%RSI) |
(18) 0x405d24 VMOVSD -0x68(%RCX),%XMM3 |
(18) 0x405d29 VFMADD213SD -0x68(%RSI),%XMM0,%XMM3 |
(18) 0x405d2f VMOVSD %XMM3,-0x68(%RSI) |
(18) 0x405d34 VMOVSD -0x60(%RCX),%XMM4 |
(18) 0x405d39 VFMADD213SD -0x60(%RSI),%XMM0,%XMM4 |
(18) 0x405d3f VMOVSD %XMM4,-0x60(%RSI) |
(18) 0x405d44 VMOVSD -0x58(%RCX),%XMM5 |
(18) 0x405d49 VFMADD213SD -0x58(%RSI),%XMM0,%XMM5 |
(18) 0x405d4f VMOVSD %XMM5,-0x58(%RSI) |
(18) 0x405d54 VMOVSD -0x50(%RCX),%XMM6 |
(18) 0x405d59 VFMADD213SD -0x50(%RSI),%XMM0,%XMM6 |
(18) 0x405d5f VMOVSD %XMM6,-0x50(%RSI) |
(18) 0x405d64 VMOVSD -0x48(%RCX),%XMM7 |
(18) 0x405d69 VFMADD213SD -0x48(%RSI),%XMM0,%XMM7 |
(18) 0x405d6f VMOVSD %XMM7,-0x48(%RSI) |
(18) 0x405d74 VMOVSD -0x40(%RCX),%XMM8 |
(18) 0x405d79 VFMADD213SD -0x40(%RSI),%XMM0,%XMM8 |
(18) 0x405d7f VMOVSD %XMM8,-0x40(%RSI) |
(18) 0x405d84 VMOVSD -0x38(%RCX),%XMM9 |
(18) 0x405d89 VFMADD213SD -0x38(%RSI),%XMM0,%XMM9 |
(18) 0x405d8f VMOVSD %XMM9,-0x38(%RSI) |
(18) 0x405d94 VMOVSD -0x30(%RCX),%XMM10 |
(18) 0x405d99 VFMADD213SD -0x30(%RSI),%XMM0,%XMM10 |
(18) 0x405d9f VMOVSD %XMM10,-0x30(%RSI) |
(18) 0x405da4 VMOVSD -0x28(%RCX),%XMM11 |
(18) 0x405da9 VFMADD213SD -0x28(%RSI),%XMM0,%XMM11 |
(18) 0x405daf VMOVSD %XMM11,-0x28(%RSI) |
(18) 0x405db4 VMOVSD -0x20(%RCX),%XMM12 |
(18) 0x405db9 VFMADD213SD -0x20(%RSI),%XMM0,%XMM12 |
(18) 0x405dbf VMOVSD %XMM12,-0x20(%RSI) |
(18) 0x405dc4 VMOVSD -0x18(%RCX),%XMM13 |
(18) 0x405dc9 VFMADD213SD -0x18(%RSI),%XMM0,%XMM13 |
(18) 0x405dcf VMOVSD %XMM13,-0x18(%RSI) |
(18) 0x405dd4 VMOVSD -0x10(%RCX),%XMM14 |
(18) 0x405dd9 VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
(18) 0x405ddf VMOVSD %XMM14,-0x10(%RSI) |
(18) 0x405de4 VMOVSD -0x8(%RCX),%XMM15 |
(18) 0x405de9 VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
(18) 0x405def VMOVSD %XMM15,-0x8(%RSI) |
(18) 0x405df4 CMP %RDX,%RSI |
(18) 0x405df7 JNE 405c22 |
(17) 0x405dfd INC %R12 |
(17) 0x405e00 ADD $0x600,%R8 |
(17) 0x405e07 CMP %R12D,%EAX |
(17) 0x405e0a JG 405a18 |
0x405e10 POP %RBX |
0x405e11 POP %R12 |
0x405e13 POP %RBP |
0x405e14 RET |
0x405e15 INC %EAX |
0x405e17 XOR %EDX,%EDX |
0x405e19 JMP 4059e8 |
0x405e1e XCHG %AX,%AX |
Path / |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 35 |
nb uops | 40 |
loop length | 104 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 6.67 cycles |
front end | 6.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 4.00 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
cycles | 2.70 | 5.33 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 6.54-6.55 |
Stall cycles | 0.00 |
Front-end | 6.67 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 6.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 405e15 <advanceVelocity._omp_fn.0+0x455> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 405e10 <advanceVelocity._omp_fn.0+0x450> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %ESI,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%RBX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R12,%R12,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R10),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4059e8 <advanceVelocity._omp_fn.0+0x28> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 35 |
nb uops | 40 |
loop length | 104 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 6.67 cycles |
front end | 6.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 4.00 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
cycles | 2.70 | 5.33 | 3.00 | 3.00 | 2.50 | 2.87 | 2.70 | 2.50 | 2.50 | 2.50 | 2.73 | 3.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 6.54-6.55 |
Stall cycles | 0.00 |
Front-end | 6.67 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 6.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 405e15 <advanceVelocity._omp_fn.0+0x455> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 405e10 <advanceVelocity._omp_fn.0+0x450> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %ESI,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%RBX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R12,%R12,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R10),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4059e8 <advanceVelocity._omp_fn.0+0x28> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advanceVelocity._omp_fn.0– | 4.04 | 0.74 |
▼Loop 17 - timestep.c:74-78 - exec– | 1.81 | 0.24 |
○Loop 18 - timestep.c:74-78 - exec | 2.22 | 0.3 |