Function: setTemperature._omp_fn.1 | Module: exec | Source: initAtoms.c:174-181 | Coverage: 0.02% |
---|
Function: setTemperature._omp_fn.1 | Module: exec | Source: initAtoms.c:174-181 | Coverage: 0.02% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-2581/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 174 - 181 |
-------------------------------------------------------------------------------- |
174: #pragma omp parallel for |
175: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
176: { |
177: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
178: { |
179: s->atoms->p[iOff][0] *= scaleFactor; |
180: s->atoms->p[iOff][1] *= scaleFactor; |
181: s->atoms->p[iOff][2] *= scaleFactor; |
0x408de0 PUSH %RBP |
0x408de1 MOV %RSP,%RBP |
0x408de4 PUSH %R14 |
0x408de6 PUSH %R13 |
0x408de8 MOV %RDI,%R13 |
0x408deb PUSH %R12 |
0x408ded PUSH %RBX |
0x408dee MOV (%RDI),%RBX |
0x408df1 CALL 403070 <omp_get_num_threads@plt> |
0x408df6 MOV 0x18(%RBX),%R14 |
0x408dfa MOV %EAX,%R12D |
0x408dfd CALL 403160 <omp_get_thread_num@plt> |
0x408e02 MOV %EAX,%ECX |
0x408e04 MOV 0xc(%R14),%EAX |
0x408e08 CLTD |
0x408e09 IDIV %R12D |
0x408e0c CMP %EDX,%ECX |
0x408e0e JL 409063 |
0x408e14 IMUL %EAX,%ECX |
0x408e17 ADD %EDX,%ECX |
0x408e19 LEA (%RAX,%RCX,1),%R8D |
0x408e1d CMP %R8D,%ECX |
0x408e20 JGE 40905a |
0x408e26 MOVSXD %ECX,%RSI |
0x408e29 VMOVSD 0x8(%R13),%XMM0 |
0x408e2f MOV 0x78(%R14),%RDI |
0x408e33 SAL $0x6,%ECX |
0x408e36 LEA (%RSI,%RSI,2),%R13 |
0x408e3a SAL $0x6,%R8D |
0x408e3e VMOVDDUP %XMM0,%XMM1 |
0x408e42 LEA (%RDI,%RSI,4),%R11 |
0x408e46 SAL $0x9,%R13 |
0x408e4a NOPW (%RAX,%RAX,1) |
(60) 0x408e50 MOVSXD (%R11),%R10 |
(60) 0x408e53 TEST %R10D,%R10D |
(60) 0x408e56 JLE 409043 |
(60) 0x408e5c MOV 0x20(%RBX),%R9 |
(60) 0x408e60 MOVSXD %ECX,%R12 |
(60) 0x408e63 LEA (%R10,%R10,2),%RDX |
(60) 0x408e67 MOV $0xaaaaaaaaaaaaaab,%R10 |
(60) 0x408e71 LEA (%R12,%R12,2),%RAX |
(60) 0x408e75 MOV 0x20(%R9),%R14 |
(60) 0x408e79 LEA (%R14,%RAX,8),%RSI |
(60) 0x408e7d ADD %R13,%R14 |
(60) 0x408e80 LEA (%R14,%RDX,8),%RDI |
(60) 0x408e84 MOV %RDI,%R9 |
(60) 0x408e87 SUB %RSI,%R9 |
(60) 0x408e8a SUB $0x18,%R9 |
(60) 0x408e8e SHR $0x3,%R9 |
(60) 0x408e92 IMUL %R10,%R9 |
(60) 0x408e96 INC %R9 |
(60) 0x408e99 AND $0x7,%R9D |
(60) 0x408e9d JE 408f79 |
(60) 0x408ea3 CMP $0x1,%R9 |
(60) 0x408ea7 JE 408f59 |
(60) 0x408ead CMP $0x2,%R9 |
(60) 0x408eb1 JE 408f42 |
(60) 0x408eb7 CMP $0x3,%R9 |
(60) 0x408ebb JE 408f2b |
(60) 0x408ebd CMP $0x4,%R9 |
(60) 0x408ec1 JE 408f14 |
(60) 0x408ec3 CMP $0x5,%R9 |
(60) 0x408ec7 JE 408efd |
(60) 0x408ec9 CMP $0x6,%R9 |
(60) 0x408ecd JE 408ee6 |
(60) 0x408ecf VMULPD (%RSI),%XMM1,%XMM2 |
(60) 0x408ed3 ADD $0x18,%RSI |
(60) 0x408ed7 VMULSD -0x8(%RSI),%XMM0,%XMM3 |
(60) 0x408edc VMOVUPD %XMM2,-0x18(%RSI) |
(60) 0x408ee1 VMOVSD %XMM3,-0x8(%RSI) |
(60) 0x408ee6 VMULPD (%RSI),%XMM1,%XMM4 |
(60) 0x408eea ADD $0x18,%RSI |
(60) 0x408eee VMULSD -0x8(%RSI),%XMM0,%XMM5 |
(60) 0x408ef3 VMOVUPD %XMM4,-0x18(%RSI) |
(60) 0x408ef8 VMOVSD %XMM5,-0x8(%RSI) |
(60) 0x408efd VMULPD (%RSI),%XMM1,%XMM6 |
(60) 0x408f01 ADD $0x18,%RSI |
(60) 0x408f05 VMULSD -0x8(%RSI),%XMM0,%XMM7 |
(60) 0x408f0a VMOVUPD %XMM6,-0x18(%RSI) |
(60) 0x408f0f VMOVSD %XMM7,-0x8(%RSI) |
(60) 0x408f14 VMULPD (%RSI),%XMM1,%XMM8 |
(60) 0x408f18 ADD $0x18,%RSI |
(60) 0x408f1c VMULSD -0x8(%RSI),%XMM0,%XMM9 |
(60) 0x408f21 VMOVUPD %XMM8,-0x18(%RSI) |
(60) 0x408f26 VMOVSD %XMM9,-0x8(%RSI) |
(60) 0x408f2b VMULPD (%RSI),%XMM1,%XMM10 |
(60) 0x408f2f ADD $0x18,%RSI |
(60) 0x408f33 VMULSD -0x8(%RSI),%XMM0,%XMM11 |
(60) 0x408f38 VMOVUPD %XMM10,-0x18(%RSI) |
(60) 0x408f3d VMOVSD %XMM11,-0x8(%RSI) |
(60) 0x408f42 VMULPD (%RSI),%XMM1,%XMM12 |
(60) 0x408f46 ADD $0x18,%RSI |
(60) 0x408f4a VMULSD -0x8(%RSI),%XMM0,%XMM13 |
(60) 0x408f4f VMOVUPD %XMM12,-0x18(%RSI) |
(60) 0x408f54 VMOVSD %XMM13,-0x8(%RSI) |
(60) 0x408f59 VMULPD (%RSI),%XMM1,%XMM14 |
(60) 0x408f5d ADD $0x18,%RSI |
(60) 0x408f61 VMULSD -0x8(%RSI),%XMM0,%XMM15 |
(60) 0x408f66 VMOVUPD %XMM14,-0x18(%RSI) |
(60) 0x408f6b VMOVSD %XMM15,-0x8(%RSI) |
(60) 0x408f70 CMP %RDI,%RSI |
(60) 0x408f73 JE 409043 |
(61) 0x408f79 VMULPD (%RSI),%XMM1,%XMM2 |
(61) 0x408f7d ADD $0xc0,%RSI |
(61) 0x408f84 VMULSD -0xb0(%RSI),%XMM0,%XMM3 |
(61) 0x408f8c VMULPD -0xa8(%RSI),%XMM1,%XMM4 |
(61) 0x408f94 VMULSD -0x98(%RSI),%XMM0,%XMM5 |
(61) 0x408f9c VMULPD -0x90(%RSI),%XMM1,%XMM6 |
(61) 0x408fa4 VMOVUPD %XMM2,-0xc0(%RSI) |
(61) 0x408fac VMULSD -0x80(%RSI),%XMM0,%XMM7 |
(61) 0x408fb1 VMOVSD %XMM3,-0xb0(%RSI) |
(61) 0x408fb9 VMULPD -0x78(%RSI),%XMM1,%XMM8 |
(61) 0x408fbe VMULSD -0x68(%RSI),%XMM0,%XMM9 |
(61) 0x408fc3 VMOVUPD %XMM4,-0xa8(%RSI) |
(61) 0x408fcb VMULPD -0x60(%RSI),%XMM1,%XMM10 |
(61) 0x408fd0 VMOVSD %XMM5,-0x98(%RSI) |
(61) 0x408fd8 VMULSD -0x50(%RSI),%XMM0,%XMM11 |
(61) 0x408fdd VMOVUPD %XMM6,-0x90(%RSI) |
(61) 0x408fe5 VMULPD -0x48(%RSI),%XMM1,%XMM12 |
(61) 0x408fea VMOVSD %XMM7,-0x80(%RSI) |
(61) 0x408fef VMULSD -0x38(%RSI),%XMM0,%XMM13 |
(61) 0x408ff4 VMOVUPD %XMM8,-0x78(%RSI) |
(61) 0x408ff9 VMULPD -0x30(%RSI),%XMM1,%XMM14 |
(61) 0x408ffe VMOVSD %XMM9,-0x68(%RSI) |
(61) 0x409003 VMULSD -0x20(%RSI),%XMM0,%XMM15 |
(61) 0x409008 VMOVUPD %XMM10,-0x60(%RSI) |
(61) 0x40900d VMULPD -0x18(%RSI),%XMM1,%XMM2 |
(61) 0x409012 VMOVSD %XMM11,-0x50(%RSI) |
(61) 0x409017 VMULSD -0x8(%RSI),%XMM0,%XMM3 |
(61) 0x40901c VMOVUPD %XMM12,-0x48(%RSI) |
(61) 0x409021 VMOVSD %XMM13,-0x38(%RSI) |
(61) 0x409026 VMOVUPD %XMM14,-0x30(%RSI) |
(61) 0x40902b VMOVSD %XMM15,-0x20(%RSI) |
(61) 0x409030 VMOVUPD %XMM2,-0x18(%RSI) |
(61) 0x409035 VMOVSD %XMM3,-0x8(%RSI) |
(61) 0x40903a CMP %RDI,%RSI |
(61) 0x40903d JNE 408f79 |
(60) 0x409043 ADD $0x40,%ECX |
(60) 0x409046 ADD $0x4,%R11 |
(60) 0x40904a ADD $0x600,%R13 |
(60) 0x409051 CMP %R8D,%ECX |
(60) 0x409054 JNE 408e50 |
0x40905a POP %RBX |
0x40905b POP %R12 |
0x40905d POP %R13 |
0x40905f POP %R14 |
0x409061 POP %RBP |
0x409062 RET |
0x409063 INC %EAX |
0x409065 XOR %EDX,%EDX |
0x409067 JMP 408e14 |
0x40906c NOPL (%RAX) |
Path / |
Source file and lines | initAtoms.c:174-181 |
Module | exec |
nb instructions | 43 |
nb uops | 48 |
loop length | 134 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 8.00 cycles |
front end | 8.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 4.00 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
cycles | 3.50 | 5.33 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.64-15.65 |
Stall cycles | 7.64 |
LM full (events) | 9.09 |
Front-end | 8.00 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 8.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 409063 <setTemperature._omp_fn.1+0x283> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RCX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40905a <setTemperature._omp_fn.1+0x27a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%R13),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RSI,%RSI,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%R8D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%RDI,%RSI,4),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 408e14 <setTemperature._omp_fn.1+0x34> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:174-181 |
Module | exec |
nb instructions | 43 |
nb uops | 48 |
loop length | 134 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 8.00 cycles |
front end | 8.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 4.00 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
cycles | 3.50 | 5.33 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.64-15.65 |
Stall cycles | 7.64 |
LM full (events) | 9.09 |
Front-end | 8.00 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 8.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 409063 <setTemperature._omp_fn.1+0x283> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RCX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40905a <setTemperature._omp_fn.1+0x27a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%R13),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RSI,%RSI,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%R8D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%RDI,%RSI,4),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 408e14 <setTemperature._omp_fn.1+0x34> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setTemperature._omp_fn.1– | 0.02 | 0 |
▼Loop 60 - initAtoms.c:177-181 - exec– | 0.01 | 0 |
○Loop 61 - initAtoms.c:177-181 - exec | 0.01 | 0 |