Loop Id: 19 | Module: exec | Source: timestep.c:88-94 | Coverage: 0.96% |
---|
Loop Id: 19 | Module: exec | Source: timestep.c:88-94 | Coverage: 0.96% |
---|
0x405e90 MOVSXD (%R13,%R10,4),%R8 |
0x405e95 TEST %R8D,%R8D |
0x405e98 JLE 4060ed |
0x405e9e MOV 0x20(%R12),%R11 |
0x405ea3 MOVSXD %R9D,%RSI |
0x405ea6 MOV 0x28(%R12),%RDI |
0x405eab MOV 0x10(%R11),%R14 |
0x405eaf MOV 0x18(%R11),%RDX |
0x405eb3 MOV 0x20(%R11),%RCX |
0x405eb7 MOV %R10,%R11 |
0x405eba SAL $0x6,%R11 |
0x405ebe LEA (%R14,%RSI,4),%RSI |
0x405ec2 ADD %RAX,%RDX |
0x405ec5 ADD %R8,%R11 |
0x405ec8 ADD %RAX,%RCX |
0x405ecb LEA (%R14,%R11,4),%R11 |
0x405ecf MOV %R11,%R8 |
0x405ed2 SUB %RSI,%R8 |
0x405ed5 SUB $0x4,%R8 |
0x405ed9 SHR $0x2,%R8 |
0x405edd INC %R8 |
0x405ee0 AND $0x3,%R8D |
0x405ee4 JE 405fde |
0x405eea CMP $0x1,%R8 |
0x405eee JE 405f8c |
0x405ef4 CMP $0x2,%R8 |
0x405ef8 JE 405f43 |
0x405efa VMULSD (%RCX),%XMM0,%XMM3 |
0x405efe MOVSXD (%RSI),%R14 |
0x405f01 ADD $0x18,%RDX |
0x405f05 ADD $0x4,%RSI |
0x405f09 ADD $0x18,%RCX |
0x405f0d SAL $0x4,%R14 |
0x405f11 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM1 |
0x405f18 VFMADD213SD -0x18(%RDX),%XMM1,%XMM3 |
0x405f1e VMOVSD %XMM3,-0x18(%RDX) |
0x405f23 VMULSD -0x10(%RCX),%XMM0,%XMM4 |
0x405f28 VFMADD213SD -0x10(%RDX),%XMM1,%XMM4 |
0x405f2e VMOVSD %XMM4,-0x10(%RDX) |
0x405f33 VMULSD -0x8(%RCX),%XMM0,%XMM5 |
0x405f38 VFMADD213SD -0x8(%RDX),%XMM5,%XMM1 |
0x405f3e VMOVSD %XMM1,-0x8(%RDX) |
0x405f43 VMULSD (%RCX),%XMM0,%XMM7 |
0x405f47 MOVSXD (%RSI),%R8 |
0x405f4a ADD $0x18,%RDX |
0x405f4e ADD $0x4,%RSI |
0x405f52 ADD $0x18,%RCX |
0x405f56 SAL $0x4,%R8 |
0x405f5a VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM6 |
0x405f61 VFMADD213SD -0x18(%RDX),%XMM6,%XMM7 |
0x405f67 VMOVSD %XMM7,-0x18(%RDX) |
0x405f6c VMULSD -0x10(%RCX),%XMM0,%XMM8 |
0x405f71 VFMADD213SD -0x10(%RDX),%XMM6,%XMM8 |
0x405f77 VMOVSD %XMM8,-0x10(%RDX) |
0x405f7c VMULSD -0x8(%RCX),%XMM0,%XMM9 |
0x405f81 VFMADD213SD -0x8(%RDX),%XMM9,%XMM6 |
0x405f87 VMOVSD %XMM6,-0x8(%RDX) |
0x405f8c VMULSD (%RCX),%XMM0,%XMM11 |
0x405f90 MOVSXD (%RSI),%R14 |
0x405f93 ADD $0x4,%RSI |
0x405f97 ADD $0x18,%RDX |
0x405f9b ADD $0x18,%RCX |
0x405f9f SAL $0x4,%R14 |
0x405fa3 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM10 |
0x405faa VFMADD213SD -0x18(%RDX),%XMM10,%XMM11 |
0x405fb0 VMOVSD %XMM11,-0x18(%RDX) |
0x405fb5 VMULSD -0x10(%RCX),%XMM0,%XMM12 |
0x405fba VFMADD213SD -0x10(%RDX),%XMM10,%XMM12 |
0x405fc0 VMOVSD %XMM12,-0x10(%RDX) |
0x405fc5 VMULSD -0x8(%RCX),%XMM0,%XMM13 |
0x405fca VFMADD213SD -0x8(%RDX),%XMM13,%XMM10 |
0x405fd0 VMOVSD %XMM10,-0x8(%RDX) |
0x405fd5 CMP %RSI,%R11 |
0x405fd8 JE 4060ed |
(20) 0x405fde VMULSD (%RCX),%XMM0,%XMM15 |
(20) 0x405fe2 MOVSXD (%RSI),%R8 |
(20) 0x405fe5 ADD $0x10,%RSI |
(20) 0x405fe9 ADD $0x60,%RDX |
(20) 0x405fed MOVSXD -0xc(%RSI),%R14 |
(20) 0x405ff1 ADD $0x60,%RCX |
(20) 0x405ff5 SAL $0x4,%R8 |
(20) 0x405ff9 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM14 |
(20) 0x406000 SAL $0x4,%R14 |
(20) 0x406004 MOVSXD -0x8(%RSI),%R8 |
(20) 0x406008 VFMADD213SD -0x60(%RDX),%XMM14,%XMM15 |
(20) 0x40600e SAL $0x4,%R8 |
(20) 0x406012 VMOVSD %XMM15,-0x60(%RDX) |
(20) 0x406017 VMULSD -0x58(%RCX),%XMM0,%XMM1 |
(20) 0x40601c VFMADD213SD -0x58(%RDX),%XMM14,%XMM1 |
(20) 0x406022 VMOVSD %XMM1,-0x58(%RDX) |
(20) 0x406027 VMULSD -0x50(%RCX),%XMM0,%XMM3 |
(20) 0x40602c VFMADD213SD -0x50(%RDX),%XMM3,%XMM14 |
(20) 0x406032 VMOVSD %XMM14,-0x50(%RDX) |
(20) 0x406037 VMULSD -0x48(%RCX),%XMM0,%XMM5 |
(20) 0x40603c VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM4 |
(20) 0x406043 VFMADD213SD -0x48(%RDX),%XMM4,%XMM5 |
(20) 0x406049 VMOVSD %XMM5,-0x48(%RDX) |
(20) 0x40604e VMULSD -0x40(%RCX),%XMM0,%XMM6 |
(20) 0x406053 VFMADD213SD -0x40(%RDX),%XMM4,%XMM6 |
(20) 0x406059 VMOVSD %XMM6,-0x40(%RDX) |
(20) 0x40605e VMULSD -0x38(%RCX),%XMM0,%XMM7 |
(20) 0x406063 VFMADD213SD -0x38(%RDX),%XMM7,%XMM4 |
(20) 0x406069 VMOVSD %XMM4,-0x38(%RDX) |
(20) 0x40606e VMULSD -0x30(%RCX),%XMM0,%XMM9 |
(20) 0x406073 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM8 |
(20) 0x40607a VFMADD213SD -0x30(%RDX),%XMM8,%XMM9 |
(20) 0x406080 VMOVSD %XMM9,-0x30(%RDX) |
(20) 0x406085 VMULSD -0x28(%RCX),%XMM0,%XMM10 |
(20) 0x40608a VFMADD213SD -0x28(%RDX),%XMM8,%XMM10 |
(20) 0x406090 VMOVSD %XMM10,-0x28(%RDX) |
(20) 0x406095 VMULSD -0x20(%RCX),%XMM0,%XMM11 |
(20) 0x40609a VFMADD213SD -0x20(%RDX),%XMM11,%XMM8 |
(20) 0x4060a0 VMOVSD %XMM8,-0x20(%RDX) |
(20) 0x4060a5 VMULSD -0x18(%RCX),%XMM0,%XMM13 |
(20) 0x4060aa MOVSXD -0x4(%RSI),%R14 |
(20) 0x4060ae SAL $0x4,%R14 |
(20) 0x4060b2 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM12 |
(20) 0x4060b9 VFMADD213SD -0x18(%RDX),%XMM12,%XMM13 |
(20) 0x4060bf VMOVSD %XMM13,-0x18(%RDX) |
(20) 0x4060c4 VMULSD -0x10(%RCX),%XMM0,%XMM14 |
(20) 0x4060c9 VFMADD213SD -0x10(%RDX),%XMM12,%XMM14 |
(20) 0x4060cf VMOVSD %XMM14,-0x10(%RDX) |
(20) 0x4060d4 VMULSD -0x8(%RCX),%XMM0,%XMM15 |
(20) 0x4060d9 VFMADD213SD -0x8(%RDX),%XMM15,%XMM12 |
(20) 0x4060df VMOVSD %XMM12,-0x8(%RDX) |
(20) 0x4060e4 CMP %RSI,%R11 |
(20) 0x4060e7 JNE 405fde |
0x4060ed INC %R10 |
0x4060f0 ADD $0x40,%R9D |
0x4060f4 ADD $0x600,%RAX |
0x4060fa CMP %R10D,%EBX |
0x4060fd JG 405e90 |
/scratch_na/users/xoserete/qaas_runs/171-322-9862/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 88 - 94 |
-------------------------------------------------------------------------------- |
88: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
89: { |
90: int iSpecies = s->atoms->iSpecies[iOff]; |
91: real_t invMass = 1.0/s->species[iSpecies].mass; |
92: s->atoms->r[iOff][0] += dt*s->atoms->p[iOff][0]*invMass; |
93: s->atoms->r[iOff][1] += dt*s->atoms->p[iOff][1]*invMass; |
94: s->atoms->r[iOff][2] += dt*s->atoms->p[iOff][2]*invMass; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.13 |
CQA speedup if FP arith vectorized | 1.83 |
CQA speedup if fully vectorized | 2.25 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | NA |
Bottlenecks | micro-operation queue, |
Function | advancePosition._omp_fn.0 |
Source | timestep.c:88-94 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 13.50 |
CQA cycles if no scalar integer | 12.00 |
CQA cycles if FP arith vectorized | 7.38 |
CQA cycles if fully vectorized | 6.00 |
Front-end cycles | 13.50 |
DIV/SQRT cycles | 11.50 |
P0 cycles | 11.50 |
P1 cycles | 10.00 |
P2 cycles | 10.00 |
P3 cycles | 4.50 |
P4 cycles | 6.07 |
P5 cycles | 9.00 |
P6 cycles | 4.50 |
P7 cycles | 4.50 |
P8 cycles | 4.50 |
P9 cycles | 5.93 |
P10 cycles | 10.00 |
P11 cycles | 12.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 15.89 - 15.88 |
Stall cycles (UFS) | 2.04 |
Nb insns | 79.00 |
Nb uops | 78.00 |
Nb loads | 30.00 |
Nb stores | 9.00 |
Nb stack references | 0.00 |
FLOP/cycle | 2.22 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 9.00 |
Nb FLOP fma | 9.00 |
Nb FLOP div | 3.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.93 |
Bytes prefetched | 0.00 |
Bytes loaded | 224.00 |
Bytes stored | 72.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.15 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 9.38 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 10.94 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.13 |
CQA speedup if FP arith vectorized | 1.83 |
CQA speedup if fully vectorized | 2.25 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | NA |
Bottlenecks | micro-operation queue, |
Function | advancePosition._omp_fn.0 |
Source | timestep.c:88-94 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 13.50 |
CQA cycles if no scalar integer | 12.00 |
CQA cycles if FP arith vectorized | 7.38 |
CQA cycles if fully vectorized | 6.00 |
Front-end cycles | 13.50 |
DIV/SQRT cycles | 11.50 |
P0 cycles | 11.50 |
P1 cycles | 10.00 |
P2 cycles | 10.00 |
P3 cycles | 4.50 |
P4 cycles | 6.07 |
P5 cycles | 9.00 |
P6 cycles | 4.50 |
P7 cycles | 4.50 |
P8 cycles | 4.50 |
P9 cycles | 5.93 |
P10 cycles | 10.00 |
P11 cycles | 12.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 15.89 - 15.88 |
Stall cycles (UFS) | 2.04 |
Nb insns | 79.00 |
Nb uops | 78.00 |
Nb loads | 30.00 |
Nb stores | 9.00 |
Nb stack references | 0.00 |
FLOP/cycle | 2.22 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 9.00 |
Nb FLOP fma | 9.00 |
Nb FLOP div | 3.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.93 |
Bytes prefetched | 0.00 |
Bytes loaded | 224.00 |
Bytes stored | 72.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.15 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 9.38 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 10.94 |
Path / |
Function | advancePosition._omp_fn.0 |
Source file and lines | timestep.c:88-94 |
Module | exec |
nb instructions | 79 |
nb uops | 78 |
loop length | 356 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 14 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 13.50 cycles |
front end | 13.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.50 | 11.50 | 10.00 | 10.00 | 4.50 | 6.07 | 9.00 | 4.50 | 4.50 | 4.50 | 5.93 | 10.00 |
cycles | 11.50 | 11.50 | 10.00 | 10.00 | 4.50 | 6.07 | 9.00 | 4.50 | 4.50 | 4.50 | 5.93 | 10.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.89-15.88 |
Stall cycles | 2.04 |
LM full (events) | 4.08 |
Front-end | 13.50 |
Dispatch | 11.50 |
DIV/SQRT | 12.00 |
Overall L1 | 13.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 0% |
div/sqrt | 0% |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 12% |
div/sqrt | 12% |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 9% |
fma | 12% |
div/sqrt | 12% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD (%R13,%R10,4),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R8D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4060ed <advancePosition._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R9D,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x28(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R11),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R11),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R14,%RSI,4),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R14,%R11,4),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB $0x4,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x3,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 405fde <advancePosition._omp_fn.0+0x1be> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405f8c <advancePosition._omp_fn.0+0x16c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405f43 <advancePosition._omp_fn.0+0x123> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMULSD (%RCX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD (%RSI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x18,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x4,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x4,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM1 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VFMADD213SD -0x18(%RDX),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM3,-0x18(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x10(%RCX),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x10(%RDX),%XMM1,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM4,-0x10(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x8(%RCX),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x8(%RDX),%XMM5,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM1,-0x8(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RCX),%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD (%RSI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x18,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x4,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x4,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM6 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VFMADD213SD -0x18(%RDX),%XMM6,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM7,-0x18(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x10(%RCX),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x10(%RDX),%XMM6,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,-0x10(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x8(%RCX),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x8(%RDX),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM6,-0x8(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RCX),%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD (%RSI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x4,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM10 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VFMADD213SD -0x18(%RDX),%XMM10,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM11,-0x18(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x10(%RCX),%XMM0,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x10(%RDX),%XMM10,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM12,-0x10(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x8(%RCX),%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x8(%RDX),%XMM13,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM10,-0x8(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RSI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4060ed <advancePosition._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x40,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD $0x600,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R10D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JG 405e90 <advancePosition._omp_fn.0+0x70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | advancePosition._omp_fn.0 |
Source file and lines | timestep.c:88-94 |
Module | exec |
nb instructions | 79 |
nb uops | 78 |
loop length | 356 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 14 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 13.50 cycles |
front end | 13.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.50 | 11.50 | 10.00 | 10.00 | 4.50 | 6.07 | 9.00 | 4.50 | 4.50 | 4.50 | 5.93 | 10.00 |
cycles | 11.50 | 11.50 | 10.00 | 10.00 | 4.50 | 6.07 | 9.00 | 4.50 | 4.50 | 4.50 | 5.93 | 10.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.89-15.88 |
Stall cycles | 2.04 |
LM full (events) | 4.08 |
Front-end | 13.50 |
Dispatch | 11.50 |
DIV/SQRT | 12.00 |
Overall L1 | 13.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 0% |
div/sqrt | 0% |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 12% |
div/sqrt | 12% |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 9% |
fma | 12% |
div/sqrt | 12% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD (%R13,%R10,4),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R8D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4060ed <advancePosition._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R9D,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x28(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R11),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R11),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R14,%RSI,4),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R14,%R11,4),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB $0x4,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x3,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 405fde <advancePosition._omp_fn.0+0x1be> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405f8c <advancePosition._omp_fn.0+0x16c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405f43 <advancePosition._omp_fn.0+0x123> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMULSD (%RCX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD (%RSI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x18,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x4,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x4,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM1 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VFMADD213SD -0x18(%RDX),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM3,-0x18(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x10(%RCX),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x10(%RDX),%XMM1,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM4,-0x10(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x8(%RCX),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x8(%RDX),%XMM5,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM1,-0x8(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RCX),%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD (%RSI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x18,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x4,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x4,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM6 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VFMADD213SD -0x18(%RDX),%XMM6,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM7,-0x18(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x10(%RCX),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x10(%RDX),%XMM6,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,-0x10(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x8(%RCX),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x8(%RDX),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM6,-0x8(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RCX),%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD (%RSI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x4,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM10 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VFMADD213SD -0x18(%RDX),%XMM10,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM11,-0x18(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x10(%RCX),%XMM0,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x10(%RDX),%XMM10,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM12,-0x10(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD -0x8(%RCX),%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213SD -0x8(%RDX),%XMM13,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM10,-0x8(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RSI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4060ed <advancePosition._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x40,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD $0x600,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R10D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JG 405e90 <advancePosition._omp_fn.0+0x70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |