Function: setVcm.extracted | Module: exec | Source: initAtoms.c:123-135 | Coverage: 0.01% |
---|
Function: setVcm.extracted | Module: exec | Source: initAtoms.c:123-135 | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-419-7821/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 123 - 135 |
-------------------------------------------------------------------------------- |
123: #pragma omp parallel for |
124: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
125: { |
126: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
127: { |
128: int iSpecies = s->atoms->iSpecies[iOff]; |
129: real_t mass = s->species[iSpecies].mass; |
130: |
131: s->atoms->p[iOff][0] += mass * vShift[0]; |
132: s->atoms->p[iOff][1] += mass * vShift[1]; |
133: s->atoms->p[iOff][2] += mass * vShift[2]; |
134: } |
135: } |
0x4097c0 PUSH %RBP |
0x4097c1 MOV %RSP,%RBP |
0x4097c4 PUSH %R15 |
0x4097c6 PUSH %R14 |
0x4097c8 PUSH %R13 |
0x4097ca PUSH %R12 |
0x4097cc PUSH %RBX |
0x4097cd SUB $0x48,%RSP |
0x4097d1 MOV %RCX,-0x68(%RBP) |
0x4097d5 MOV %RDX,-0x48(%RBP) |
0x4097d9 MOVL $0,-0x3c(%RBP) |
0x4097e0 MOV (%RDI),%ESI |
0x4097e2 MOVL $0,-0x30(%RBP) |
0x4097e9 MOV %R9D,-0x2c(%RBP) |
0x4097ed MOVL $0x1,-0x38(%RBP) |
0x4097f4 SUB $0x8,%RSP |
0x4097f8 LEA -0x38(%RBP),%RAX |
0x4097fc LEA -0x3c(%RBP),%RCX |
0x409800 LEA -0x30(%RBP),%R8 |
0x409804 LEA -0x2c(%RBP),%R9 |
0x409808 MOV $0x62e490,%EDI |
0x40980d MOV %ESI,-0x34(%RBP) |
0x409810 MOV $0x22,%EDX |
0x409815 PUSH $0x1 |
0x409817 PUSH $0x1 |
0x409819 PUSH %RAX |
0x40981a CALL 403130 <__kmpc_for_static_init_4@plt> |
0x40981f ADD $0x20,%RSP |
0x409823 MOV -0x30(%RBP),%ECX |
0x409826 MOV -0x2c(%RBP),%R14D |
0x40982a CMP %R14D,%ECX |
0x40982d JBE 40984d |
0x40982f MOV $0x62e4b0,%EDI |
0x409834 MOV -0x34(%RBP),%ESI |
0x409837 ADD $0x48,%RSP |
0x40983b POP %RBX |
0x40983c POP %R12 |
0x40983e POP %R13 |
0x409840 POP %R14 |
0x409842 POP %R15 |
0x409844 POP %RBP |
0x409845 VZEROUPPER |
0x409848 JMP 402fe0 |
0x40984d MOV -0x48(%RBP),%RAX |
0x409851 MOV 0x18(%RAX),%RDX |
0x409855 MOV 0x78(%RDX),%R15 |
0x409859 SUB %RCX,%R14 |
0x40985c MOV %ECX,%ESI |
0x40985e SAL $0x6,%ESI |
0x409861 XOR %EDI,%EDI |
0x409863 MOV %RCX,-0x60(%RBP) |
0x409867 MOV %R14,-0x58(%RBP) |
0x40986b MOV %R15,-0x50(%RBP) |
0x40986f JMP 40988f |
0x409871 NOPW %CS:(%RAX,%RAX,1) |
(67) 0x409880 LEA 0x1(%RDI),%RAX |
(67) 0x409884 ADD $0x40,%ESI |
(67) 0x409887 CMP %R14,%RDI |
(67) 0x40988a MOV %RAX,%RDI |
(67) 0x40988d JE 40982f |
(67) 0x40988f MOV %ESI,%ESI |
(67) 0x409891 LEA (%RDI,%RCX,1),%RDX |
(67) 0x409895 MOV (%R15,%RDX,4),%R8D |
(67) 0x409899 TEST %R8D,%R8D |
(67) 0x40989c JLE 409880 |
(67) 0x40989e MOV -0x48(%RBP),%RAX |
(67) 0x4098a2 MOV 0x20(%RAX),%RDX |
(67) 0x4098a6 MOV 0x28(%RAX),%R9 |
(67) 0x4098aa MOV 0x10(%RDX),%R10 |
(67) 0x4098ae MOV 0x20(%RDX),%R11 |
(67) 0x4098b2 MOV -0x68(%RBP),%RAX |
(67) 0x4098b6 VMOVUPD (%RAX),%XMM0 |
(67) 0x4098ba VMOVSD 0x10(%RAX),%XMM1 |
(67) 0x4098bf MOV %R8D,%R12D |
(67) 0x4098c2 AND $-0x4,%R12D |
(67) 0x4098c6 JE 409a30 |
(67) 0x4098cc LEA (,%RSI,8),%RDX |
(67) 0x4098d4 LEA (%RDX,%RDX,2),%R13 |
(67) 0x4098d8 LEA (,%RSI,4),%R15 |
(67) 0x4098e0 LEA -0x1(%R12),%EDX |
(67) 0x4098e5 VBROADCASTSD %XMM0,%YMM2 |
(67) 0x4098ea VPERMPD $0x55,%YMM0,%YMM3 |
(67) 0x4098f0 VBROADCASTSD %XMM1,%YMM4 |
(67) 0x4098f5 ADD %R11,%R13 |
(67) 0x4098f8 MOV %R10,-0x70(%RBP) |
(67) 0x4098fc ADD %R10,%R15 |
(67) 0x4098ff XOR %EBX,%EBX |
(67) 0x409901 NOPW %CS:(%RAX,%RAX,1) |
(69) 0x409910 MOVSXD (%R15,%RBX,4),%RAX |
(69) 0x409914 MOVSXD 0x4(%R15,%RBX,4),%RCX |
(69) 0x409919 MOVSXD 0x8(%R15,%RBX,4),%R14 |
(69) 0x40991e MOVSXD 0xc(%R15,%RBX,4),%R10 |
(69) 0x409923 SAL $0x4,%RAX |
(69) 0x409927 SAL $0x4,%R14 |
(69) 0x40992b SAL $0x4,%R10 |
(69) 0x40992f VMOVSD 0x8(%R9,%R14,1),%XMM5 |
(69) 0x409936 VMOVHPD 0x8(%R9,%R10,1),%XMM5,%XMM5 |
(69) 0x40993d SAL $0x4,%RCX |
(69) 0x409941 VMOVSD 0x8(%R9,%RAX,1),%XMM6 |
(69) 0x409948 VMOVHPD 0x8(%R9,%RCX,1),%XMM6,%XMM6 |
(69) 0x40994f VMOVUPD 0x20(%R13),%YMM7 |
(69) 0x409955 VBLENDPD $0x3,(%R13),%YMM7,%YMM8 |
(69) 0x40995c VINSERTF128 $0x1,%XMM5,%YMM6,%YMM5 |
(69) 0x409962 VMOVUPD 0x10(%R13),%XMM6 |
(69) 0x409968 VINSERTF128 $0x1,0x40(%R13),%YMM6,%YMM6 |
(69) 0x40996f VMOVUPD 0x20(%R13),%XMM9 |
(69) 0x409975 VBLENDPD $0xa,%YMM6,%YMM8,%YMM10 |
(69) 0x40997b VSHUFPD $0x5,%YMM7,%YMM8,%YMM7 |
(69) 0x409980 VBROADCASTSD 0x50(%R13),%YMM8 |
(69) 0x409986 VBLENDPD $0x8,%YMM8,%YMM7,%YMM7 |
(69) 0x40998c VBLENDPD $0xc,0x40(%R13),%YMM9,%YMM8 |
(69) 0x409993 VBLENDPD $0xa,%YMM8,%YMM6,%YMM6 |
(69) 0x409999 VFMADD231PD %YMM5,%YMM2,%YMM10 |
(69) 0x40999e VFMADD231PD %YMM5,%YMM3,%YMM7 |
(69) 0x4099a3 VFMADD231PD %YMM5,%YMM4,%YMM6 |
(69) 0x4099a8 VSHUFPD $0x1,%YMM7,%YMM7,%YMM5 |
(69) 0x4099ad VBLENDPD $0x4,%YMM10,%YMM5,%YMM5 |
(69) 0x4099b3 VMOVDDUP %XMM7,%XMM8 |
(69) 0x4099b7 VPERM2F128 $0x20,%YMM10,%YMM8,%YMM8 |
(69) 0x4099bd VSHUFPD $0x4,%YMM7,%YMM7,%YMM7 |
(69) 0x4099c2 VINSERTF128 $0x1,%XMM6,%YMM10,%YMM9 |
(69) 0x4099c8 VBLENDPD $0xa,%YMM8,%YMM9,%YMM8 |
(69) 0x4099ce VPERM2F128 $0x31,%YMM6,%YMM10,%YMM9 |
(69) 0x4099d4 VPERM2F128 $0x31,%YMM7,%YMM6,%YMM7 |
(69) 0x4099da VBLENDPD $0xa,%YMM9,%YMM7,%YMM7 |
(69) 0x4099e0 VBLENDPD $0x2,%YMM6,%YMM5,%YMM5 |
(69) 0x4099e6 VMOVUPD %YMM5,0x20(%R13) |
(69) 0x4099ec VMOVUPD %YMM7,0x40(%R13) |
(69) 0x4099f2 VMOVUPD %YMM8,(%R13) |
(69) 0x4099f8 ADD $0x60,%R13 |
(69) 0x4099fc ADD $0x4,%RBX |
(69) 0x409a00 CMP %EDX,%EBX |
(69) 0x409a02 JLE 409910 |
(67) 0x409a08 CMP %R12D,%R8D |
(67) 0x409a0b MOV -0x60(%RBP),%RCX |
(67) 0x409a0f MOV -0x58(%RBP),%R14 |
(67) 0x409a13 MOV -0x50(%RBP),%R15 |
(67) 0x409a17 MOV -0x70(%RBP),%R10 |
(67) 0x409a1b JE 409880 |
(67) 0x409a21 JMP 409a33 |
0x409a23 NOPW %CS:(%RAX,%RAX,1) |
(67) 0x409a30 XOR %R12D,%R12D |
(67) 0x409a33 SUB %R12D,%R8D |
(67) 0x409a36 MOVSXD %R12D,%RBX |
(67) 0x409a39 ADD %RSI,%RBX |
(67) 0x409a3c LEA (%RBX,%RBX,2),%RDX |
(67) 0x409a40 LEA (%R11,%RDX,8),%RDX |
(67) 0x409a44 ADD $0x10,%RDX |
(67) 0x409a48 LEA (%R10,%RBX,4),%R10 |
(67) 0x409a4c XOR %R11D,%R11D |
(67) 0x409a4f NOP |
(68) 0x409a50 MOVSXD (%R10,%R11,4),%RAX |
(68) 0x409a54 SAL $0x4,%RAX |
(68) 0x409a58 VMOVDDUP 0x8(%R9,%RAX,1),%XMM2 |
(68) 0x409a5f VMOVAPD %XMM2,%XMM3 |
(68) 0x409a63 VFMADD213PD -0x10(%RDX),%XMM0,%XMM3 |
(68) 0x409a69 VMOVUPD %XMM3,-0x10(%RDX) |
(68) 0x409a6e VMOVSD (%RDX),%XMM3 |
(68) 0x409a72 VFMADD231SD %XMM2,%XMM1,%XMM3 |
(68) 0x409a77 VMOVSD %XMM3,(%RDX) |
(68) 0x409a7b INC %R11 |
(68) 0x409a7e ADD $0x18,%RDX |
(68) 0x409a82 CMP %R11D,%R8D |
(68) 0x409a85 JNE 409a50 |
(67) 0x409a87 JMP 409880 |
0x409a8c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | initAtoms.c:123-135 |
Module | exec |
nb instructions | 57 |
nb uops | 59 |
loop length | 209 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
cycles | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.09 |
Stall cycles | 0.08-0.10 |
RS full (events) | 0.36-0.28 |
Front-end | 9.83 |
Dispatch | 10.00 |
Overall L1 | 10.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 10% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62e490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403130 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 40984d <setVcm.extracted+0x8d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62e4b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 40988f <setVcm.extracted+0xcf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:123-135 |
Module | exec |
nb instructions | 57 |
nb uops | 59 |
loop length | 209 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
cycles | 1.40 | 1.40 | 4.33 | 4.33 | 10.00 | 1.40 | 1.40 | 10.00 | 10.00 | 10.00 | 1.40 | 4.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.09 |
Stall cycles | 0.08-0.10 |
RS full (events) | 0.36-0.28 |
Front-end | 9.83 |
Dispatch | 10.00 |
Overall L1 | 10.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 10% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x3c(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x2c(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x62e490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403130 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x2c(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 40984d <setVcm.extracted+0x8d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x62e4b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RDX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x6,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 40988f <setVcm.extracted+0xcf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setVcm.extracted– | 0.01 | 0 |
▼Loop 67 - initAtoms.c:123-135 - exec– | 0 | 0 |
○Loop 69 - initAtoms.c:126-133 - exec | 0.01 | 0 |
○Loop 68 - initAtoms.c:126-133 - exec | 0 | 0 |