Function: setTemperature._omp_fn.1 | Module: exec | Source: initAtoms.c:174-181 | Coverage: 0.01% |
---|
Function: setTemperature._omp_fn.1 | Module: exec | Source: initAtoms.c:174-181 | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-419-7821/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 174 - 181 |
-------------------------------------------------------------------------------- |
174: #pragma omp parallel for |
175: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
176: { |
177: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
178: { |
179: s->atoms->p[iOff][0] *= scaleFactor; |
180: s->atoms->p[iOff][1] *= scaleFactor; |
181: s->atoms->p[iOff][2] *= scaleFactor; |
0x405150 PUSH %RBP |
0x405151 MOV %RSP,%RBP |
0x405154 PUSH %R14 |
0x405156 PUSH %R13 |
0x405158 MOV %RDI,%R13 |
0x40515b PUSH %R12 |
0x40515d PUSH %RBX |
0x40515e MOV (%RDI),%RBX |
0x405161 CALL 403070 <omp_get_num_threads@plt> |
0x405166 MOV 0x18(%RBX),%R14 |
0x40516a MOV %EAX,%R12D |
0x40516d CALL 403150 <omp_get_thread_num@plt> |
0x405172 MOV %EAX,%ECX |
0x405174 MOV 0xc(%R14),%EAX |
0x405178 CLTD |
0x405179 IDIV %R12D |
0x40517c CMP %EDX,%ECX |
0x40517e JL 4053d3 |
0x405184 IMUL %EAX,%ECX |
0x405187 ADD %EDX,%ECX |
0x405189 LEA (%RAX,%RCX,1),%R8D |
0x40518d CMP %R8D,%ECX |
0x405190 JGE 4053ca |
0x405196 MOVSXD %ECX,%RSI |
0x405199 VMOVSD 0x8(%R13),%XMM0 |
0x40519f MOV 0x78(%R14),%RDI |
0x4051a3 SAL $0x6,%ECX |
0x4051a6 LEA (%RSI,%RSI,2),%R13 |
0x4051aa SAL $0x6,%R8D |
0x4051ae VMOVDDUP %XMM0,%XMM1 |
0x4051b2 LEA (%RDI,%RSI,4),%R11 |
0x4051b6 SAL $0x9,%R13 |
0x4051ba NOPW (%RAX,%RAX,1) |
(12) 0x4051c0 MOVSXD (%R11),%R10 |
(12) 0x4051c3 TEST %R10D,%R10D |
(12) 0x4051c6 JLE 4053b3 |
(12) 0x4051cc MOV 0x20(%RBX),%R9 |
(12) 0x4051d0 MOVSXD %ECX,%R12 |
(12) 0x4051d3 LEA (%R10,%R10,2),%RDX |
(12) 0x4051d7 MOV $0xaaaaaaaaaaaaaab,%R10 |
(12) 0x4051e1 LEA (%R12,%R12,2),%RAX |
(12) 0x4051e5 MOV 0x20(%R9),%R14 |
(12) 0x4051e9 LEA (%R14,%RAX,8),%RSI |
(12) 0x4051ed ADD %R13,%R14 |
(12) 0x4051f0 LEA (%R14,%RDX,8),%RDI |
(12) 0x4051f4 MOV %RDI,%R9 |
(12) 0x4051f7 SUB %RSI,%R9 |
(12) 0x4051fa SUB $0x18,%R9 |
(12) 0x4051fe SHR $0x3,%R9 |
(12) 0x405202 IMUL %R10,%R9 |
(12) 0x405206 INC %R9 |
(12) 0x405209 AND $0x7,%R9D |
(12) 0x40520d JE 4052e9 |
(12) 0x405213 CMP $0x1,%R9 |
(12) 0x405217 JE 4052c9 |
(12) 0x40521d CMP $0x2,%R9 |
(12) 0x405221 JE 4052b2 |
(12) 0x405227 CMP $0x3,%R9 |
(12) 0x40522b JE 40529b |
(12) 0x40522d CMP $0x4,%R9 |
(12) 0x405231 JE 405284 |
(12) 0x405233 CMP $0x5,%R9 |
(12) 0x405237 JE 40526d |
(12) 0x405239 CMP $0x6,%R9 |
(12) 0x40523d JE 405256 |
(12) 0x40523f VMULPD (%RSI),%XMM1,%XMM2 |
(12) 0x405243 ADD $0x18,%RSI |
(12) 0x405247 VMULSD -0x8(%RSI),%XMM0,%XMM3 |
(12) 0x40524c VMOVUPD %XMM2,-0x18(%RSI) |
(12) 0x405251 VMOVSD %XMM3,-0x8(%RSI) |
(12) 0x405256 VMULPD (%RSI),%XMM1,%XMM4 |
(12) 0x40525a ADD $0x18,%RSI |
(12) 0x40525e VMULSD -0x8(%RSI),%XMM0,%XMM5 |
(12) 0x405263 VMOVUPD %XMM4,-0x18(%RSI) |
(12) 0x405268 VMOVSD %XMM5,-0x8(%RSI) |
(12) 0x40526d VMULPD (%RSI),%XMM1,%XMM6 |
(12) 0x405271 ADD $0x18,%RSI |
(12) 0x405275 VMULSD -0x8(%RSI),%XMM0,%XMM7 |
(12) 0x40527a VMOVUPD %XMM6,-0x18(%RSI) |
(12) 0x40527f VMOVSD %XMM7,-0x8(%RSI) |
(12) 0x405284 VMULPD (%RSI),%XMM1,%XMM8 |
(12) 0x405288 ADD $0x18,%RSI |
(12) 0x40528c VMULSD -0x8(%RSI),%XMM0,%XMM9 |
(12) 0x405291 VMOVUPD %XMM8,-0x18(%RSI) |
(12) 0x405296 VMOVSD %XMM9,-0x8(%RSI) |
(12) 0x40529b VMULPD (%RSI),%XMM1,%XMM10 |
(12) 0x40529f ADD $0x18,%RSI |
(12) 0x4052a3 VMULSD -0x8(%RSI),%XMM0,%XMM11 |
(12) 0x4052a8 VMOVUPD %XMM10,-0x18(%RSI) |
(12) 0x4052ad VMOVSD %XMM11,-0x8(%RSI) |
(12) 0x4052b2 VMULPD (%RSI),%XMM1,%XMM12 |
(12) 0x4052b6 ADD $0x18,%RSI |
(12) 0x4052ba VMULSD -0x8(%RSI),%XMM0,%XMM13 |
(12) 0x4052bf VMOVUPD %XMM12,-0x18(%RSI) |
(12) 0x4052c4 VMOVSD %XMM13,-0x8(%RSI) |
(12) 0x4052c9 VMULPD (%RSI),%XMM1,%XMM14 |
(12) 0x4052cd ADD $0x18,%RSI |
(12) 0x4052d1 VMULSD -0x8(%RSI),%XMM0,%XMM15 |
(12) 0x4052d6 VMOVUPD %XMM14,-0x18(%RSI) |
(12) 0x4052db VMOVSD %XMM15,-0x8(%RSI) |
(12) 0x4052e0 CMP %RDI,%RSI |
(12) 0x4052e3 JE 4053b3 |
(13) 0x4052e9 VMULPD (%RSI),%XMM1,%XMM2 |
(13) 0x4052ed ADD $0xc0,%RSI |
(13) 0x4052f4 VMULSD -0xb0(%RSI),%XMM0,%XMM3 |
(13) 0x4052fc VMULPD -0xa8(%RSI),%XMM1,%XMM4 |
(13) 0x405304 VMULSD -0x98(%RSI),%XMM0,%XMM5 |
(13) 0x40530c VMULPD -0x90(%RSI),%XMM1,%XMM6 |
(13) 0x405314 VMOVUPD %XMM2,-0xc0(%RSI) |
(13) 0x40531c VMULSD -0x80(%RSI),%XMM0,%XMM7 |
(13) 0x405321 VMOVSD %XMM3,-0xb0(%RSI) |
(13) 0x405329 VMULPD -0x78(%RSI),%XMM1,%XMM8 |
(13) 0x40532e VMULSD -0x68(%RSI),%XMM0,%XMM9 |
(13) 0x405333 VMOVUPD %XMM4,-0xa8(%RSI) |
(13) 0x40533b VMULPD -0x60(%RSI),%XMM1,%XMM10 |
(13) 0x405340 VMOVSD %XMM5,-0x98(%RSI) |
(13) 0x405348 VMULSD -0x50(%RSI),%XMM0,%XMM11 |
(13) 0x40534d VMOVUPD %XMM6,-0x90(%RSI) |
(13) 0x405355 VMULPD -0x48(%RSI),%XMM1,%XMM12 |
(13) 0x40535a VMOVSD %XMM7,-0x80(%RSI) |
(13) 0x40535f VMULSD -0x38(%RSI),%XMM0,%XMM13 |
(13) 0x405364 VMOVUPD %XMM8,-0x78(%RSI) |
(13) 0x405369 VMULPD -0x30(%RSI),%XMM1,%XMM14 |
(13) 0x40536e VMOVSD %XMM9,-0x68(%RSI) |
(13) 0x405373 VMULSD -0x20(%RSI),%XMM0,%XMM15 |
(13) 0x405378 VMOVUPD %XMM10,-0x60(%RSI) |
(13) 0x40537d VMULPD -0x18(%RSI),%XMM1,%XMM2 |
(13) 0x405382 VMOVSD %XMM11,-0x50(%RSI) |
(13) 0x405387 VMULSD -0x8(%RSI),%XMM0,%XMM3 |
(13) 0x40538c VMOVUPD %XMM12,-0x48(%RSI) |
(13) 0x405391 VMOVSD %XMM13,-0x38(%RSI) |
(13) 0x405396 VMOVUPD %XMM14,-0x30(%RSI) |
(13) 0x40539b VMOVSD %XMM15,-0x20(%RSI) |
(13) 0x4053a0 VMOVUPD %XMM2,-0x18(%RSI) |
(13) 0x4053a5 VMOVSD %XMM3,-0x8(%RSI) |
(13) 0x4053aa CMP %RDI,%RSI |
(13) 0x4053ad JNE 4052e9 |
(12) 0x4053b3 ADD $0x40,%ECX |
(12) 0x4053b6 ADD $0x4,%R11 |
(12) 0x4053ba ADD $0x600,%R13 |
(12) 0x4053c1 CMP %R8D,%ECX |
(12) 0x4053c4 JNE 4051c0 |
0x4053ca POP %RBX |
0x4053cb POP %R12 |
0x4053cd POP %R13 |
0x4053cf POP %R14 |
0x4053d1 POP %RBP |
0x4053d2 RET |
0x4053d3 INC %EAX |
0x4053d5 XOR %EDX,%EDX |
0x4053d7 JMP 405184 |
0x4053dc NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○96.67 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○3.33 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | initAtoms.c:174-181 |
Module | exec |
nb instructions | 43 |
nb uops | 48 |
loop length | 134 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 8.00 cycles |
front end | 8.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 4.00 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
cycles | 3.50 | 5.33 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.64-15.65 |
Stall cycles | 7.64 |
LM full (events) | 9.09 |
Front-end | 8.00 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 8.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4053d3 <setTemperature._omp_fn.1+0x283> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RCX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4053ca <setTemperature._omp_fn.1+0x27a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%R13),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RSI,%RSI,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%R8D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%RDI,%RSI,4),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 405184 <setTemperature._omp_fn.1+0x34> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:174-181 |
Module | exec |
nb instructions | 43 |
nb uops | 48 |
loop length | 134 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 8.00 cycles |
front end | 8.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 4.00 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
cycles | 3.50 | 5.33 | 3.67 | 3.67 | 3.50 | 3.60 | 3.50 | 3.50 | 3.50 | 3.50 | 3.40 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.64-15.65 |
Stall cycles | 7.64 |
LM full (events) | 9.09 |
Front-end | 8.00 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 8.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4053d3 <setTemperature._omp_fn.1+0x283> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RCX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4053ca <setTemperature._omp_fn.1+0x27a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVSD 0x8(%R13),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RSI,%RSI,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%R8D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%RDI,%RSI,4),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 405184 <setTemperature._omp_fn.1+0x34> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setTemperature._omp_fn.1– | 0.01 | 0 |
▼Loop 12 - initAtoms.c:177-181 - exec– | 0 | 0 |
○Loop 13 - initAtoms.c:177-181 - exec | 0.01 | 0 |