Function: setTemperature.extracted | Module: exec | Source: initAtoms.c:174-183 | Coverage: 0.01% |
---|
Function: setTemperature.extracted | Module: exec | Source: initAtoms.c:174-183 | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-419-7821/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 174 - 183 |
-------------------------------------------------------------------------------- |
174: #pragma omp parallel for |
175: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
176: { |
177: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
178: { |
179: s->atoms->p[iOff][0] *= scaleFactor; |
180: s->atoms->p[iOff][1] *= scaleFactor; |
181: s->atoms->p[iOff][2] *= scaleFactor; |
182: } |
183: } |
0x40a190 PUSH %RBP |
0x40a191 MOV %RSP,%RBP |
0x40a194 PUSH %R15 |
0x40a196 PUSH %R14 |
0x40a198 PUSH %R12 |
0x40a19a PUSH %RBX |
0x40a19b SUB $0x10,%RSP |
0x40a19f MOV %RCX,%R15 |
0x40a1a2 MOV %RDX,%RBX |
0x40a1a5 MOVL $0,-0x30(%RBP) |
0x40a1ac MOV (%RDI),%R14D |
0x40a1af MOVL $0,-0x28(%RBP) |
0x40a1b6 MOV %R9D,-0x24(%RBP) |
0x40a1ba MOVL $0x1,-0x2c(%RBP) |
0x40a1c1 SUB $0x8,%RSP |
0x40a1c5 LEA -0x2c(%RBP),%RAX |
0x40a1c9 LEA -0x30(%RBP),%RCX |
0x40a1cd LEA -0x28(%RBP),%R8 |
0x40a1d1 LEA -0x24(%RBP),%R9 |
0x40a1d5 MOV $0x62e630,%EDI |
0x40a1da MOV %R14D,%ESI |
0x40a1dd MOV $0x22,%EDX |
0x40a1e2 PUSH $0x1 |
0x40a1e4 PUSH $0x1 |
0x40a1e6 PUSH %RAX |
0x40a1e7 CALL 403130 <__kmpc_for_static_init_4@plt> |
0x40a1ec ADD $0x20,%RSP |
0x40a1f0 MOV -0x28(%RBP),%EAX |
0x40a1f3 MOV -0x24(%RBP),%ECX |
0x40a1f6 CMP %ECX,%EAX |
0x40a1f8 JBE 40a216 |
0x40a1fa MOV $0x62e650,%EDI |
0x40a1ff MOV %R14D,%ESI |
0x40a202 ADD $0x10,%RSP |
0x40a206 POP %RBX |
0x40a207 POP %R12 |
0x40a209 POP %R14 |
0x40a20b POP %R15 |
0x40a20d POP %RBP |
0x40a20e VZEROUPPER |
0x40a211 JMP 402fe0 |
0x40a216 VMOVQ %R15,%XMM0 |
0x40a21b MOV 0x18(%RBX),%RDX |
0x40a21f MOV 0x78(%RDX),%RDX |
0x40a223 SUB %RAX,%RCX |
0x40a226 VPBROADCASTQ %XMM0,%YMM1 |
0x40a22b MOV %EAX,%ESI |
0x40a22d SAL $0x6,%ESI |
0x40a230 XOR %EDI,%EDI |
0x40a232 JMP 40a24f |
0x40a234 NOPW %CS:(%RAX,%RAX,1) |
(75) 0x40a240 LEA 0x1(%RDI),%R8 |
(75) 0x40a244 ADD $0x40,%ESI |
(75) 0x40a247 CMP %RCX,%RDI |
(75) 0x40a24a MOV %R8,%RDI |
(75) 0x40a24d JE 40a1fa |
(75) 0x40a24f MOV %ESI,%ESI |
(75) 0x40a251 LEA (%RDI,%RAX,1),%R8 |
(75) 0x40a255 MOV (%RDX,%R8,4),%R8D |
(75) 0x40a259 TEST %R8D,%R8D |
(75) 0x40a25c JLE 40a240 |
(75) 0x40a25e MOV 0x20(%RBX),%R9 |
(75) 0x40a262 MOV 0x20(%R9),%R9 |
(75) 0x40a266 MOV %R8D,%R10D |
(75) 0x40a269 AND $-0x8,%R10D |
(75) 0x40a26d JE 40a400 |
(75) 0x40a273 LEA (,%RSI,8),%R11 |
(75) 0x40a27b LEA (%R11,%R11,2),%R11 |
(75) 0x40a27f LEA -0x1(%R10),%R15D |
(75) 0x40a283 ADD %R9,%R11 |
(75) 0x40a286 XOR %R12D,%R12D |
(75) 0x40a289 NOPL (%RAX) |
(77) 0x40a290 VMOVUPD 0x20(%R11),%YMM2 |
(77) 0x40a296 VMOVUPD 0x10(%R11),%XMM3 |
(77) 0x40a29c VBLENDPD $0x3,(%R11),%YMM2,%YMM4 |
(77) 0x40a2a2 VINSERTF128 $0x1,0x40(%R11),%YMM3,%YMM3 |
(77) 0x40a2a9 VMOVUPD 0x80(%R11),%YMM5 |
(77) 0x40a2b2 VBLENDPD $0xa,%YMM3,%YMM4,%YMM6 |
(77) 0x40a2b8 VBLENDPD $0x3,0x60(%R11),%YMM5,%YMM7 |
(77) 0x40a2bf VMOVUPD 0x70(%R11),%XMM8 |
(77) 0x40a2c5 VINSERTF128 $0x1,0xa0(%R11),%YMM8,%YMM8 |
(77) 0x40a2cf VBLENDPD $0xa,%YMM8,%YMM7,%YMM9 |
(77) 0x40a2d5 VSHUFPD $0x5,%YMM2,%YMM4,%YMM4 |
(77) 0x40a2da VBROADCASTSD 0x50(%R11),%YMM10 |
(77) 0x40a2e0 VBLENDPD $0x8,%YMM10,%YMM4,%YMM4 |
(77) 0x40a2e6 VSHUFPD $0x5,%YMM5,%YMM7,%YMM7 |
(77) 0x40a2eb VBROADCASTSD 0xb0(%R11),%YMM10 |
(77) 0x40a2f4 VBLENDPD $0x8,%YMM10,%YMM7,%YMM7 |
(77) 0x40a2fa VBLENDPD $0xc,0xa0(%R11),%YMM5,%YMM5 |
(77) 0x40a304 VBLENDPD $0xa,%YMM5,%YMM8,%YMM5 |
(77) 0x40a30a VBLENDPD $0xc,0x40(%R11),%YMM2,%YMM2 |
(77) 0x40a311 VBLENDPD $0xa,%YMM2,%YMM3,%YMM2 |
(77) 0x40a317 VMULPD %YMM1,%YMM9,%YMM3 |
(77) 0x40a31b VMULPD %YMM1,%YMM6,%YMM6 |
(77) 0x40a31f VMULPD %YMM1,%YMM7,%YMM7 |
(77) 0x40a323 VMULPD %YMM1,%YMM4,%YMM4 |
(77) 0x40a327 VMULPD %YMM1,%YMM2,%YMM2 |
(77) 0x40a32b VMULPD %YMM1,%YMM5,%YMM5 |
(77) 0x40a32f VSHUFPD $0x1,%YMM4,%YMM4,%YMM8 |
(77) 0x40a334 VBLENDPD $0x4,%YMM6,%YMM8,%YMM8 |
(77) 0x40a33a VSHUFPD $0x1,%YMM7,%YMM7,%YMM9 |
(77) 0x40a33f VBLENDPD $0x4,%YMM3,%YMM9,%YMM9 |
(77) 0x40a345 VMOVDDUP %XMM4,%XMM10 |
(77) 0x40a349 VPERM2F128 $0x20,%YMM6,%YMM10,%YMM10 |
(77) 0x40a34f VMOVDDUP %XMM7,%XMM11 |
(77) 0x40a353 VPERM2F128 $0x20,%YMM3,%YMM11,%YMM11 |
(77) 0x40a359 VSHUFPD $0x4,%YMM7,%YMM7,%YMM7 |
(77) 0x40a35e VSHUFPD $0x4,%YMM4,%YMM4,%YMM4 |
(77) 0x40a363 VINSERTF128 $0x1,%XMM5,%YMM3,%YMM12 |
(77) 0x40a369 VBLENDPD $0xa,%YMM11,%YMM12,%YMM11 |
(77) 0x40a36f VINSERTF128 $0x1,%XMM2,%YMM6,%YMM12 |
(77) 0x40a375 VBLENDPD $0xa,%YMM10,%YMM12,%YMM10 |
(77) 0x40a37b VBLENDPD $0x2,%YMM5,%YMM9,%YMM9 |
(77) 0x40a381 VPERM2F128 $0x31,%YMM2,%YMM6,%YMM6 |
(77) 0x40a387 VPERM2F128 $0x31,%YMM4,%YMM2,%YMM4 |
(77) 0x40a38d VBLENDPD $0xa,%YMM6,%YMM4,%YMM4 |
(77) 0x40a393 VPERM2F128 $0x31,%YMM5,%YMM3,%YMM3 |
(77) 0x40a399 VPERM2F128 $0x31,%YMM7,%YMM5,%YMM5 |
(77) 0x40a39f VBLENDPD $0xa,%YMM3,%YMM5,%YMM3 |
(77) 0x40a3a5 VBLENDPD $0x2,%YMM2,%YMM8,%YMM2 |
(77) 0x40a3ab VMOVUPD %YMM2,0x20(%R11) |
(77) 0x40a3b1 VMOVUPD %YMM3,0xa0(%R11) |
(77) 0x40a3ba VMOVUPD %YMM4,0x40(%R11) |
(77) 0x40a3c0 VMOVUPD %YMM9,0x80(%R11) |
(77) 0x40a3c9 VMOVUPD %YMM10,(%R11) |
(77) 0x40a3ce VMOVUPD %YMM11,0x60(%R11) |
(77) 0x40a3d4 ADD $0x8,%R12D |
(77) 0x40a3d8 ADD $0xc0,%R11 |
(77) 0x40a3df CMP %R15D,%R12D |
(77) 0x40a3e2 JLE 40a290 |
(75) 0x40a3e8 CMP %R10D,%R8D |
(75) 0x40a3eb JE 40a240 |
(75) 0x40a3f1 JMP 40a403 |
0x40a3f3 NOPW %CS:(%RAX,%RAX,1) |
(75) 0x40a400 XOR %R10D,%R10D |
(75) 0x40a403 VPBROADCASTQ %XMM0,%XMM2 |
(75) 0x40a408 SUB %R10D,%R8D |
(75) 0x40a40b MOVSXD %R10D,%R10 |
(75) 0x40a40e ADD %RSI,%R10 |
(75) 0x40a411 LEA (%R10,%R10,2),%R10 |
(75) 0x40a415 LEA (%R9,%R10,8),%R9 |
(75) 0x40a419 ADD $0x10,%R9 |
(75) 0x40a41d NOPL (%RAX) |
(76) 0x40a420 VMULPD -0x10(%R9),%XMM2,%XMM3 |
(76) 0x40a426 VMOVUPD %XMM3,-0x10(%R9) |
(76) 0x40a42c VMULSD (%R9),%XMM0,%XMM3 |
(76) 0x40a431 VMOVSD %XMM3,(%R9) |
(76) 0x40a436 ADD $0x18,%R9 |
(76) 0x40a43a DEC %R8D |
(76) 0x40a43d JNE 40a420 |
(75) 0x40a43f JMP 40a240 |
0x40a444 NOPW %CS:(%RAX,%RAX,1) |
0x40a44e XCHG %AX,%AX |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | initAtoms.c:174-183 |
Module | exec |
nb instructions | 54 |
nb uops | 56 |
loop length | 201 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 9.33 cycles |
front end | 9.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 3.33 | 3.33 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 3.33 |
cycles | 1.80 | 1.80 | 3.33 | 3.33 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 3.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.13-9.17 |
Stall cycles | 0.00 |
Front-end | 9.33 |
Dispatch | 6.50 |
Overall L1 | 9.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 9% |
load | 6% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x28(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x24(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x2c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x24(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62e630,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403130 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x28(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x24(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 40a216 <setTemperature.extracted+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62e650,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40a24f <setTemperature.extracted+0xbf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:174-183 |
Module | exec |
nb instructions | 54 |
nb uops | 56 |
loop length | 201 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 9.33 cycles |
front end | 9.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 3.33 | 3.33 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 3.33 |
cycles | 1.80 | 1.80 | 3.33 | 3.33 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 3.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.13-9.17 |
Stall cycles | 0.00 |
Front-end | 9.33 |
Dispatch | 6.50 |
Overall L1 | 9.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 9% |
load | 6% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x28(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x24(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x2c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x24(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62e630,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403130 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x28(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x24(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 40a216 <setTemperature.extracted+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62e650,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40a24f <setTemperature.extracted+0xbf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setTemperature.extracted– | 0.01 | 0 |
▼Loop 75 - initAtoms.c:174-183 - exec– | 0 | 0 |
○Loop 77 - initAtoms.c:177-181 - exec | 0.01 | 0 |
○Loop 76 - initAtoms.c:177-181 - exec | 0 | 0 |