Function: setVcm.extracted | Module: exec | Source: initAtoms.c:123-133 | Coverage: 0.01% |
---|
Function: setVcm.extracted | Module: exec | Source: initAtoms.c:123-133 | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-2581/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 123 - 133 |
-------------------------------------------------------------------------------- |
123: #pragma omp parallel for |
124: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
125: { |
126: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
127: { |
128: int iSpecies = s->atoms->iSpecies[iOff]; |
129: real_t mass = s->species[iSpecies].mass; |
130: |
131: s->atoms->p[iOff][0] += mass * vShift[0]; |
132: s->atoms->p[iOff][1] += mass * vShift[1]; |
133: s->atoms->p[iOff][2] += mass * vShift[2]; |
0x40a120 PUSH %RBP |
0x40a121 MOV %RSP,%RBP |
0x40a124 PUSH %R15 |
0x40a126 PUSH %R14 |
0x40a128 PUSH %R13 |
0x40a12a PUSH %R12 |
0x40a12c PUSH %RBX |
0x40a12d SUB $0x48,%RSP |
0x40a131 MOV %RCX,-0x68(%RBP) |
0x40a135 MOV %RDX,-0x48(%RBP) |
0x40a139 MOVL $0,-0x3c(%RBP) |
0x40a140 MOV (%RDI),%ESI |
0x40a142 MOVL $0,-0x30(%RBP) |
0x40a149 MOV %R9D,-0x2c(%RBP) |
0x40a14d MOVL $0x1,-0x38(%RBP) |
0x40a154 SUB $0x8,%RSP |
0x40a158 LEA -0x38(%RBP),%RAX |
0x40a15c LEA -0x3c(%RBP),%RCX |
0x40a160 LEA -0x30(%RBP),%R8 |
0x40a164 LEA -0x2c(%RBP),%R9 |
0x40a168 MOV $0x62f490,%EDI |
0x40a16d MOV %ESI,-0x34(%RBP) |
0x40a170 MOV $0x22,%EDX |
0x40a175 PUSH $0x1 |
0x40a177 PUSH $0x1 |
0x40a179 PUSH %RAX |
0x40a17a CALL 403120 <__kmpc_for_static_init_4@plt> |
0x40a17f ADD $0x20,%RSP |
0x40a183 MOV -0x30(%RBP),%ECX |
0x40a186 MOV -0x2c(%RBP),%R14D |
0x40a18a CMP %R14D,%ECX |
0x40a18d JBE 40a1ad |
0x40a18f MOV $0x62f4b0,%EDI |
0x40a194 MOV -0x34(%RBP),%ESI |
0x40a197 ADD $0x48,%RSP |
0x40a19b POP %RBX |
0x40a19c POP %R12 |
0x40a19e POP %R13 |
0x40a1a0 POP %R14 |
0x40a1a2 POP %R15 |
0x40a1a4 POP %RBP |
0x40a1a5 VZEROUPPER |
0x40a1a8 JMP 402fe0 |
0x40a1ad MOV -0x48(%RBP),%RAX |
0x40a1b1 MOV 0x18(%RAX),%RDX |
0x40a1b5 MOV 0x78(%RDX),%R15 |
0x40a1b9 SUB %RCX,%R14 |
0x40a1bc MOV %ECX,%ESI |
0x40a1be SAL $0x6,%ESI |
0x40a1c1 XOR %EDI,%EDI |
0x40a1c3 MOV %RCX,-0x60(%RBP) |
0x40a1c7 MOV %R14,-0x58(%RBP) |
0x40a1cb MOV %R15,-0x50(%RBP) |
0x40a1cf JMP 40a1ec |
0x40a1d1 NOPW %CS:(%RAX,%RAX,1) |
(67) 0x40a1e0 ADD $0x40,%ESI |
(67) 0x40a1e3 CMP %R14,%RDI |
(67) 0x40a1e6 LEA 0x1(%RDI),%RDI |
(67) 0x40a1ea JE 40a18f |
(67) 0x40a1ec MOV %ESI,%ESI |
(67) 0x40a1ee LEA (%RDI,%RCX,1),%RDX |
(67) 0x40a1f2 MOV (%R15,%RDX,4),%R8D |
(67) 0x40a1f6 TEST %R8D,%R8D |
(67) 0x40a1f9 JLE 40a1e0 |
(67) 0x40a1fb MOV -0x48(%RBP),%RAX |
(67) 0x40a1ff MOV 0x20(%RAX),%RDX |
(67) 0x40a203 MOV 0x28(%RAX),%R9 |
(67) 0x40a207 MOV 0x10(%RDX),%R10 |
(67) 0x40a20b MOV 0x20(%RDX),%R11 |
(67) 0x40a20f MOV -0x68(%RBP),%RAX |
(67) 0x40a213 VMOVUPD (%RAX),%XMM0 |
(67) 0x40a217 VMOVSD 0x10(%RAX),%XMM1 |
(67) 0x40a21c MOV %R8D,%R12D |
(67) 0x40a21f AND $-0x4,%R12D |
(67) 0x40a223 JE 40a380 |
(67) 0x40a229 LEA (,%RSI,8),%RDX |
(67) 0x40a231 LEA (%RDX,%RDX,2),%R13 |
(67) 0x40a235 LEA (,%RSI,4),%R15 |
(67) 0x40a23d LEA -0x1(%R12),%EDX |
(67) 0x40a242 ADD %R11,%R13 |
(67) 0x40a245 MOV %R10,-0x70(%RBP) |
(67) 0x40a249 ADD %R10,%R15 |
(67) 0x40a24c VBROADCASTSD %XMM0,%YMM2 |
(67) 0x40a251 VPERMPD $0x55,%YMM0,%YMM3 |
(67) 0x40a257 VBROADCASTSD %XMM1,%YMM4 |
(67) 0x40a25c XOR %EBX,%EBX |
(67) 0x40a25e XCHG %AX,%AX |
(69) 0x40a260 MOVSXD (%R15,%RBX,4),%RAX |
(69) 0x40a264 MOVSXD 0x4(%R15,%RBX,4),%R14 |
(69) 0x40a269 MOVSXD 0x8(%R15,%RBX,4),%RCX |
(69) 0x40a26e MOVSXD 0xc(%R15,%RBX,4),%R10 |
(69) 0x40a273 SAL $0x4,%RAX |
(69) 0x40a277 SAL $0x4,%RCX |
(69) 0x40a27b SAL $0x4,%R10 |
(69) 0x40a27f VMOVSD 0x8(%R9,%RCX,1),%XMM5 |
(69) 0x40a286 VMOVHPD 0x8(%R9,%R10,1),%XMM5,%XMM5 |
(69) 0x40a28d SAL $0x4,%R14 |
(69) 0x40a291 VMOVSD 0x8(%R9,%RAX,1),%XMM6 |
(69) 0x40a298 VMOVHPD 0x8(%R9,%R14,1),%XMM6,%XMM6 |
(69) 0x40a29f VMOVUPD 0x20(%R13),%YMM7 |
(69) 0x40a2a5 VBLENDPD $0x3,(%R13),%YMM7,%YMM8 |
(69) 0x40a2ac VINSERTF128 $0x1,%XMM5,%YMM6,%YMM5 |
(69) 0x40a2b2 VMOVUPD 0x10(%R13),%XMM6 |
(69) 0x40a2b8 VINSERTF128 $0x1,0x40(%R13),%YMM6,%YMM6 |
(69) 0x40a2bf VMOVUPD 0x20(%R13),%XMM9 |
(69) 0x40a2c5 VBLENDPD $0xa,%YMM6,%YMM8,%YMM10 |
(69) 0x40a2cb VSHUFPD $0x5,%YMM7,%YMM8,%YMM7 |
(69) 0x40a2d0 VBROADCASTSD 0x50(%R13),%YMM8 |
(69) 0x40a2d6 VBLENDPD $0x8,%YMM8,%YMM7,%YMM7 |
(69) 0x40a2dc VBLENDPD $0xc,0x40(%R13),%YMM9,%YMM8 |
(69) 0x40a2e3 VBLENDPD $0xa,%YMM8,%YMM6,%YMM6 |
(69) 0x40a2e9 VFMADD231PD %YMM5,%YMM2,%YMM10 |
(69) 0x40a2ee VFMADD231PD %YMM5,%YMM3,%YMM7 |
(69) 0x40a2f3 VFMADD231PD %YMM5,%YMM4,%YMM6 |
(69) 0x40a2f8 VSHUFPD $0x1,%YMM7,%YMM7,%YMM5 |
(69) 0x40a2fd VBLENDPD $0x4,%YMM10,%YMM5,%YMM5 |
(69) 0x40a303 VMOVDDUP %XMM7,%XMM8 |
(69) 0x40a307 VPERM2F128 $0x20,%YMM10,%YMM8,%YMM8 |
(69) 0x40a30d VSHUFPD $0x4,%YMM7,%YMM7,%YMM7 |
(69) 0x40a312 VINSERTF128 $0x1,%XMM6,%YMM10,%YMM9 |
(69) 0x40a318 VBLENDPD $0xa,%YMM8,%YMM9,%YMM8 |
(69) 0x40a31e VPERM2F128 $0x31,%YMM6,%YMM10,%YMM9 |
(69) 0x40a324 VPERM2F128 $0x31,%YMM7,%YMM6,%YMM7 |
(69) 0x40a32a VBLENDPD $0xa,%YMM9,%YMM7,%YMM7 |
(69) 0x40a330 VBLENDPD $0x2,%YMM6,%YMM5,%YMM5 |
(69) 0x40a336 VMOVUPD %YMM5,0x20(%R13) |
(69) 0x40a33c VMOVUPD %YMM7,0x40(%R13) |
(69) 0x40a342 VMOVUPD %YMM8,(%R13) |
(69) 0x40a348 ADD $0x60,%R13 |
(69) 0x40a34c ADD $0x4,%RBX |
(69) 0x40a350 CMP %EDX,%EBX |
(69) 0x40a352 JLE 40a260 |
(67) 0x40a358 CMP %R12D,%R8D |
(67) 0x40a35b MOV -0x60(%RBP),%RCX |
(67) 0x40a35f MOV -0x58(%RBP),%R14 |
(67) 0x40a363 MOV -0x50(%RBP),%R15 |
(67) 0x40a367 MOV -0x70(%RBP),%R10 |
(67) 0x40a36b JE 40a1e0 |
(67) 0x40a371 JMP 40a383 |
0x40a373 NOPW %CS:(%RAX,%RAX,1) |
(67) 0x40a380 XOR %R12D,%R12D |
(67) 0x40a383 SUB %R12D,%R8D |
(67) 0x40a386 MOVSXD %R12D,%RBX |
(67) 0x40a389 ADD %RSI,%RBX |
(67) 0x40a38c LEA (%RBX,%RBX,2),%RDX |
(67) 0x40a390 LEA (%R11,%RDX,8),%RDX |
(67) 0x40a394 ADD $0x10,%RDX |
(67) 0x40a398 LEA (%R10,%RBX,4),%R10 |
(67) 0x40a39c XOR %R11D,%R11D |
(67) 0x40a39f NOP |
(68) 0x40a3a0 MOVSXD (%R10,%R11,4),%RAX |
(68) 0x40a3a4 SAL $0x4,%RAX |
(68) 0x40a3a8 VMOVDDUP 0x8(%R9,%RAX,1),%XMM2 |
(68) 0x40a3af VMOVAPD %XMM2,%XMM3 |
(68) 0x40a3b3 VFMADD213PD -0x10(%RDX),%XMM0,%XMM3 |
(68) 0x40a3b9 VMOVUPD %XMM3,-0x10(%RDX) |
(68) 0x40a3be VMOVSD (%RDX),%XMM3 |
(68) 0x40a3c2 VFMADD231SD %XMM2,%XMM1,%XMM3 |
(68) 0x40a3c7 VMOVSD %XMM3,(%RDX) |
(68) 0x40a3cb INC %R11 |
(68) 0x40a3ce ADD $0x18,%RDX |
(68) 0x40a3d2 CMP %R11D,%R8D |
(68) 0x40a3d5 JNE 40a3a0 |
(67) 0x40a3d7 JMP 40a1e0 |
0x40a3dc NOPL (%RAX) |
Path / |
Source file and lines | initAtoms.c:123-133 |
Module | exec |
nb instructions | 57 |
nb uops | 59 |
loop length | 209 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
cycles | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.09 |
Stall cycles | 0.08-0.10 |
RS full (events) | 0.36-0.28 |
Front-end | 9.83 |
Dispatch | 10.00 |
Overall L1 | 10.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 10% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62f490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403120 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 40a1ad <setVcm.extracted+0x8d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62f4b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 40a1ec <setVcm.extracted+0xcc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:123-133 |
Module | exec |
nb instructions | 57 |
nb uops | 59 |
loop length | 209 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
cycles | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.09 |
Stall cycles | 0.08-0.10 |
RS full (events) | 0.36-0.28 |
Front-end | 9.83 |
Dispatch | 10.00 |
Overall L1 | 10.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 10% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62f490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403120 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 40a1ad <setVcm.extracted+0x8d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62f4b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 40a1ec <setVcm.extracted+0xcc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setVcm.extracted– | 0.01 | 0 |
▼Loop 67 - initAtoms.c:123-133 - exec– | 0 | 0 |
○Loop 69 - initAtoms.c:126-133 - exec | 0.01 | 0 |
○Loop 68 - initAtoms.c:126-133 - exec | 0 | 0 |