Function: setTemperature.extracted | Module: exec | Source: initAtoms.c:174-183 | Coverage: 0.01% |
---|
Function: setTemperature.extracted | Module: exec | Source: initAtoms.c:174-183 | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-419-7821/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 174 - 183 |
-------------------------------------------------------------------------------- |
174: #pragma omp parallel for |
175: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
176: { |
177: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
178: { |
179: s->atoms->p[iOff][0] *= scaleFactor; |
180: s->atoms->p[iOff][1] *= scaleFactor; |
181: s->atoms->p[iOff][2] *= scaleFactor; |
182: } |
183: } |
0x4091e0 PUSH %RBP |
0x4091e1 MOV %RSP,%RBP |
0x4091e4 PUSH %R15 |
0x4091e6 PUSH %R14 |
0x4091e8 PUSH %R12 |
0x4091ea PUSH %RBX |
0x4091eb SUB $0x10,%RSP |
0x4091ef MOV %RCX,%R15 |
0x4091f2 MOV %RDX,%RBX |
0x4091f5 MOVL $0,-0x30(%RBP) |
0x4091fc MOV (%RDI),%R14D |
0x4091ff MOVL $0,-0x28(%RBP) |
0x409206 MOV %R9D,-0x24(%RBP) |
0x40920a MOVL $0x1,-0x2c(%RBP) |
0x409211 SUB $0x8,%RSP |
0x409215 LEA -0x2c(%RBP),%RAX |
0x409219 LEA -0x30(%RBP),%RCX |
0x40921d LEA -0x28(%RBP),%R8 |
0x409221 LEA -0x24(%RBP),%R9 |
0x409225 MOV $0x62c630,%EDI |
0x40922a MOV %R14D,%ESI |
0x40922d MOV $0x22,%EDX |
0x409232 PUSH $0x1 |
0x409234 PUSH $0x1 |
0x409236 PUSH %RAX |
0x409237 CALL 402d60 <__kmpc_for_static_init_4@plt> |
0x40923c ADD $0x20,%RSP |
0x409240 MOV -0x28(%RBP),%EAX |
0x409243 MOV -0x24(%RBP),%ECX |
0x409246 CMP %ECX,%EAX |
0x409248 JBE 409266 |
0x40924a MOV $0x62c650,%EDI |
0x40924f MOV %R14D,%ESI |
0x409252 ADD $0x10,%RSP |
0x409256 POP %RBX |
0x409257 POP %R12 |
0x409259 POP %R14 |
0x40925b POP %R15 |
0x40925d POP %RBP |
0x40925e VZEROUPPER |
0x409261 JMP 402c10 |
0x409266 VMOVQ %R15,%XMM0 |
0x40926b MOV 0x18(%RBX),%RDX |
0x40926f MOV 0x78(%RDX),%RDX |
0x409273 SUB %RAX,%RCX |
0x409276 VPBROADCASTQ %XMM0,%YMM1 |
0x40927b MOV %EAX,%ESI |
0x40927d SAL $0x6,%ESI |
0x409280 XOR %EDI,%EDI |
0x409282 VMOVUPD 0x193b6(%RIP),%YMM2 |
0x40928a VMOVUPD 0x193ce(%RIP),%YMM3 |
0x409292 VMOVUPD 0x19446(%RIP),%YMM4 |
0x40929a VMOVUPD 0x193fe(%RIP),%YMM5 |
0x4092a2 VMOVUPD 0x19416(%RIP),%YMM6 |
0x4092aa JMP 4092bf |
0x4092ac NOPL (%RAX) |
(69) 0x4092b0 LEA 0x1(%RDI),%R8 |
(69) 0x4092b4 ADD $0x40,%ESI |
(69) 0x4092b7 CMP %RCX,%RDI |
(69) 0x4092ba MOV %R8,%RDI |
(69) 0x4092bd JE 40924a |
(69) 0x4092bf MOV %ESI,%ESI |
(69) 0x4092c1 LEA (%RDI,%RAX,1),%R8 |
(69) 0x4092c5 MOV (%RDX,%R8,4),%R8D |
(69) 0x4092c9 TEST %R8D,%R8D |
(69) 0x4092cc JLE 4092b0 |
(69) 0x4092ce MOV 0x20(%RBX),%R9 |
(69) 0x4092d2 MOV 0x20(%R9),%R9 |
(69) 0x4092d6 MOV %R8D,%R10D |
(69) 0x4092d9 AND $-0x8,%R10D |
(69) 0x4092dd JE 409450 |
(69) 0x4092e3 LEA (,%RSI,8),%R11 |
(69) 0x4092eb LEA (%R11,%R11,2),%R11 |
(69) 0x4092ef LEA -0x1(%R10),%R15D |
(69) 0x4092f3 ADD %R9,%R11 |
(69) 0x4092f6 XOR %R12D,%R12D |
(69) 0x4092f9 NOPL (%RAX) |
(71) 0x409300 VMOVUPD 0x20(%R11),%YMM7 |
(71) 0x409306 VMOVUPD 0x80(%R11),%YMM8 |
(71) 0x40930f VMOVUPD 0x70(%R11),%XMM9 |
(71) 0x409315 VMOVUPD 0x10(%R11),%XMM10 |
(71) 0x40931b VBLENDPD $0x3,0x60(%R11),%YMM8,%YMM11 |
(71) 0x409322 VBLENDPD $0x3,(%R11),%YMM7,%YMM12 |
(71) 0x409328 VINSERTF128 $0x1,0xa0(%R11),%YMM9,%YMM9 |
(71) 0x409332 VINSERTF128 $0x1,0x40(%R11),%YMM10,%YMM10 |
(71) 0x409339 VBROADCASTSD 0xb0(%R11),%YMM13 |
(71) 0x409342 VBROADCASTSD 0x50(%R11),%YMM14 |
(71) 0x409348 VSHUFPD $0x5,%YMM8,%YMM11,%YMM15 |
(71) 0x40934e VBLENDPD $0x8,%YMM13,%YMM15,%YMM13 |
(71) 0x409354 VBLENDPD $0xc,0x40(%R11),%YMM7,%YMM15 |
(71) 0x40935b VBLENDPD $0xc,0xa0(%R11),%YMM8,%YMM8 |
(71) 0x409365 VSHUFPD $0x5,%YMM7,%YMM12,%YMM7 |
(71) 0x40936a VBLENDPD $0xa,%YMM9,%YMM11,%YMM11 |
(71) 0x409370 VBLENDPD $0xa,%YMM10,%YMM12,%YMM12 |
(71) 0x409376 VBLENDPD $0x8,%YMM14,%YMM7,%YMM7 |
(71) 0x40937c VMULPD %YMM1,%YMM12,%YMM12 |
(71) 0x409380 VMULPD %YMM1,%YMM11,%YMM11 |
(71) 0x409384 VMULPD %YMM1,%YMM7,%YMM7 |
(71) 0x409388 VMULPD %YMM1,%YMM13,%YMM13 |
(71) 0x40938c VBLENDPD $0xa,%YMM15,%YMM10,%YMM10 |
(71) 0x409392 VBLENDPD $0xa,%YMM8,%YMM9,%YMM8 |
(71) 0x409398 VMULPD %YMM1,%YMM8,%YMM8 |
(71) 0x40939c VMULPD %YMM1,%YMM10,%YMM9 |
(71) 0x4093a0 VMOVAPD %YMM13,%YMM10 |
(71) 0x4093a5 VPERMT2PD %YMM11,%YMM2,%YMM10 |
(71) 0x4093ab VMOVAPD %YMM13,%YMM14 |
(71) 0x4093b0 VPERMT2PD %YMM11,%YMM3,%YMM14 |
(71) 0x4093b6 VPERMT2PD %YMM13,%YMM4,%YMM11 |
(71) 0x4093bc VMOVAPD %YMM7,%YMM13 |
(71) 0x4093c0 VPERMT2PD %YMM12,%YMM2,%YMM13 |
(71) 0x4093c6 VMOVAPD %YMM7,%YMM15 |
(71) 0x4093ca VPERMT2PD %YMM12,%YMM3,%YMM15 |
(71) 0x4093d0 VPERMT2PD %YMM7,%YMM4,%YMM12 |
(71) 0x4093d6 VPERMT2PD %YMM9,%YMM5,%YMM12 |
(71) 0x4093dc VBLENDPD $0x2,%YMM9,%YMM15,%YMM7 |
(71) 0x4093e2 VPERMT2PD %YMM13,%YMM6,%YMM9 |
(71) 0x4093e8 VPERMT2PD %YMM8,%YMM5,%YMM11 |
(71) 0x4093ee VBLENDPD $0x2,%YMM8,%YMM14,%YMM13 |
(71) 0x4093f4 VPERMT2PD %YMM10,%YMM6,%YMM8 |
(71) 0x4093fa VMOVUPD %YMM13,0x80(%R11) |
(71) 0x409403 VMOVUPD %YMM7,0x20(%R11) |
(71) 0x409409 VMOVUPD %YMM8,0xa0(%R11) |
(71) 0x409412 VMOVUPD %YMM11,0x60(%R11) |
(71) 0x409418 VMOVUPD %YMM9,0x40(%R11) |
(71) 0x40941e VMOVUPD %YMM12,(%R11) |
(71) 0x409423 ADD $0x8,%R12D |
(71) 0x409427 ADD $0xc0,%R11 |
(71) 0x40942e CMP %R15D,%R12D |
(71) 0x409431 JLE 409300 |
(69) 0x409437 CMP %R10D,%R8D |
(69) 0x40943a JE 4092b0 |
(69) 0x409440 JMP 409453 |
0x409442 NOPW %CS:(%RAX,%RAX,1) |
(69) 0x409450 XOR %R10D,%R10D |
(69) 0x409453 SUB %R10D,%R8D |
(69) 0x409456 MOVSXD %R10D,%R10 |
(69) 0x409459 ADD %RSI,%R10 |
(69) 0x40945c LEA (%R10,%R10,2),%R10 |
(69) 0x409460 LEA 0x10(%R9,%R10,8),%R9 |
(69) 0x409465 NOPW %CS:(%RAX,%RAX,1) |
(70) 0x409470 VMULPD -0x10(%R9),%XMM1,%XMM7 |
(70) 0x409476 VMOVUPD %XMM7,-0x10(%R9) |
(70) 0x40947c VMULSD (%R9),%XMM0,%XMM7 |
(70) 0x409481 VMOVSD %XMM7,(%R9) |
(70) 0x409486 ADD $0x18,%R9 |
(70) 0x40948a DEC %R8D |
(70) 0x40948d JNE 409470 |
(69) 0x40948f JMP 4092b0 |
0x409494 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | initAtoms.c:174-183 |
Module | exec |
nb instructions | 58 |
nb uops | 60 |
loop length | 234 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 5.00 | 5.00 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 5.00 |
cycles | 1.80 | 1.80 | 5.00 | 5.00 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 5.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.79-9.83 |
Stall cycles | 0.00 |
Front-end | 10.00 |
Dispatch | 6.50 |
Overall L1 | 10.00 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 26% |
load | 83% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 9% |
load | 6% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 50% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 18% |
load | 42% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x28(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x24(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x2c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x24(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62c630,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 402d60 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x28(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x24(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 409266 <setTemperature.extracted+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62c650,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402c10 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD 0x193b6(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x193ce(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x19446(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x193fe(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x19416(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JMP 4092bf <setTemperature.extracted+0xdf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:174-183 |
Module | exec |
nb instructions | 58 |
nb uops | 60 |
loop length | 234 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.80 | 1.80 | 5.00 | 5.00 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 5.00 |
cycles | 1.80 | 1.80 | 5.00 | 5.00 | 6.50 | 2.00 | 1.80 | 6.50 | 6.50 | 6.50 | 1.60 | 5.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.79-9.83 |
Stall cycles | 0.00 |
Front-end | 10.00 |
Dispatch | 6.50 |
Overall L1 | 10.00 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 26% |
load | 83% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 9% |
load | 6% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 50% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 18% |
load | 42% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x28(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x24(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x2c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x24(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62c630,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 402d60 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x28(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x24(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 409266 <setTemperature.extracted+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62c650,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x10,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402c10 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVQ %R15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD 0x193b6(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x193ce(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x19446(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x193fe(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x19416(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JMP 4092bf <setTemperature.extracted+0xdf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setTemperature.extracted– | 0.01 | 0 |
▼Loop 69 - initAtoms.c:175-183 - exec– | 0 | 0 |
○Loop 71 - initAtoms.c:177-181 - exec | 0.01 | 0 |
○Loop 70 - initAtoms.c:177-181 - exec | 0 | 0 |