Function: putAtomInBox | Module: exec | Source: linkCells.c:173-195 | Coverage: 0.07% |
---|
Function: putAtomInBox | Module: exec | Source: linkCells.c:173-195 | Coverage: 0.07% |
---|
/scratch_na/users/xoserete/qaas_runs/171-419-7821/intel/CoMD/build/CoMD/CoMD/src-openmp/linkCells.c: 173 - 195 |
-------------------------------------------------------------------------------- |
173: { |
174: real_t xyz[3] = {x,y,z}; |
175: |
176: // Find correct box. |
177: int iBox = getBoxFromCoord(boxes, xyz); |
178: int iOff = iBox*MAXATOMS; |
179: iOff += boxes->nAtoms[iBox]; |
180: |
181: // assign values to array elements |
182: if (iBox < boxes->nLocalBoxes) |
183: atoms->nLocal++; |
184: boxes->nAtoms[iBox]++; |
185: atoms->gid[iOff] = gid; |
186: atoms->iSpecies[iOff] = iType; |
187: |
188: atoms->r[iOff][0] = x; |
189: atoms->r[iOff][1] = y; |
190: atoms->r[iOff][2] = z; |
191: |
192: atoms->p[iOff][0] = px; |
193: atoms->p[iOff][1] = py; |
194: atoms->p[iOff][2] = pz; |
195: } |
0x40bdc0 PUSH %RBP |
0x40bdc1 VUNPCKLPD %XMM1,%XMM0,%XMM1 |
0x40bdc5 VUNPCKLPD %XMM4,%XMM3,%XMM3 |
0x40bdc9 MOV %RSP,%RBP |
0x40bdcc PUSH %R15 |
0x40bdce MOV %RDI,%R15 |
0x40bdd1 PUSH %R14 |
0x40bdd3 MOV %RSI,%R14 |
0x40bdd6 LEA -0x50(%RBP),%RSI |
0x40bdda PUSH %R13 |
0x40bddc MOV %EDX,%R13D |
0x40bddf PUSH %R12 |
0x40bde1 MOV %ECX,%R12D |
0x40bde4 PUSH %RBX |
0x40bde5 VMOVQ %XMM5,%RBX |
0x40bdea SUB $0x58,%RSP |
0x40bdee VMOVSD %XMM2,-0x40(%RBP) |
0x40bdf3 VMOVSD %XMM2,-0x58(%RBP) |
0x40bdf8 VMOVAPD %XMM3,-0x80(%RBP) |
0x40bdfd VMOVAPD %XMM1,-0x50(%RBP) |
0x40be02 VMOVAPD %XMM1,-0x70(%RBP) |
0x40be07 CALL 40bd20 <getBoxFromCoord> |
0x40be0c MOV 0x78(%R15),%RCX |
0x40be10 VMOVSD -0x58(%RBP),%XMM2 |
0x40be15 MOVSXD %EAX,%RSI |
0x40be18 MOV %EAX,%EDX |
0x40be1a VMOVAPD -0x70(%RBP),%XMM0 |
0x40be1f VMOVAPD -0x80(%RBP),%XMM4 |
0x40be24 LEA (%RCX,%RSI,4),%RDI |
0x40be28 SAL $0x6,%EDX |
0x40be2b MOV (%RDI),%R8D |
0x40be2e ADD %R8D,%EDX |
0x40be31 CMP %EAX,0xc(%R15) |
0x40be35 JLE 40be3d |
0x40be37 INCL (%R14) |
0x40be3a MOV (%RDI),%R8D |
0x40be3d MOVSXD %EDX,%RAX |
0x40be40 MOV 0x18(%R14),%R15 |
0x40be44 MOV 0x8(%R14),%R9 |
0x40be48 INC %R8D |
0x40be4b LEA (%RAX,%RAX,2),%R11 |
0x40be4f MOV 0x10(%R14),%R10 |
0x40be53 MOV %R8D,(%RDI) |
0x40be56 SAL $0x3,%R11 |
0x40be5a MOV %R13D,(%R9,%RAX,4) |
0x40be5e ADD %R11,%R15 |
0x40be61 ADD 0x20(%R14),%R11 |
0x40be65 MOV %R12D,(%R10,%RAX,4) |
0x40be69 VMOVUPD %XMM0,(%R15) |
0x40be6e VMOVSD %XMM2,0x10(%R15) |
0x40be74 MOV %RBX,0x10(%R11) |
0x40be78 VMOVUPD %XMM4,(%R11) |
0x40be7d ADD $0x58,%RSP |
0x40be81 POP %RBX |
0x40be82 POP %R12 |
0x40be84 POP %R13 |
0x40be86 POP %R14 |
0x40be88 POP %R15 |
0x40be8a POP %RBP |
0x40be8b RET |
0x40be8c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►14.88+ | unloadAtomsBuffer | haloExchange.c:421 | exec |
○ | exchangeData | haloExchange.c:302 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►13.69+ | unloadAtomsBuffer | haloExchange.c:414 | exec |
○ | exchangeData | memUtils.h:28 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►9.52+ | unloadAtomsBuffer | haloExchange.c:421 | exec |
○ | exchangeData | haloExchange.c:302 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►7.14+ | unloadAtomsBuffer | haloExchange.c:414 | exec |
○ | exchangeData | haloExchange.c:302 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►7.14+ | unloadAtomsBuffer | haloExchange.c:414 | exec |
○ | exchangeData | memUtils.h:28 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►7.14+ | unloadAtomsBuffer | haloExchange.c:421 | exec |
○ | exchangeData | memUtils.h:28 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►7.14+ | unloadAtomsBuffer | haloExchange.c:421 | exec |
○ | exchangeData | memUtils.h:28 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►4.76+ | unloadAtomsBuffer | haloExchange.c:414 | exec |
○ | exchangeData | memUtils.h:28 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►4.76+ | unloadAtomsBuffer | haloExchange.c:414 | exec |
○ | exchangeData | haloExchange.c:302 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►4.76+ | unloadAtomsBuffer | haloExchange.c:421 | exec |
○ | exchangeData | haloExchange.c:302 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►4.17+ | createFccLattice | initAtoms.c:100 | exec |
○ | main | CoMD.c:199 | exec |
○ | __libc_start_main | libc-2.28.so | |
►4.17+ | unloadAtomsBuffer | haloExchange.c:421 | exec |
○ | exchangeData | memUtils.h:28 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►2.98+ | unloadAtomsBuffer | haloExchange.c:414 | exec |
○ | exchangeData | haloExchange.c:302 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►1.19+ | createFccLattice | initAtoms.c:100 | exec |
○ | main | CoMD.c:199 | exec |
○ | __libc_start_main | libc-2.28.so | |
►1.19+ | exchangeData | haloExchange.c:302 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so | |
►1.19+ | exchangeData | memUtils.h:28 | exec |
○ | redistributeAtoms | haloExchange.c:250 | exec |
○ | timestep | timestep.c:47 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.28.so |
Path / |
Source file and lines | linkCells.c:173-195 |
Module | exec |
nb instructions | 59 |
nb uops | 61 |
loop length | 201 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 10.17 cycles |
front end | 10.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 2.90 | 6.00 | 6.00 | 9.75 | 2.90 | 2.80 | 9.75 | 9.75 | 9.75 | 2.90 | 6.00 |
cycles | 3.00 | 2.90 | 6.00 | 6.00 | 9.75 | 2.90 | 2.80 | 9.75 | 9.75 | 9.75 | 2.90 | 6.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.21 |
Stall cycles | 0.00 |
Front-end | 10.17 |
Dispatch | 9.75 |
Overall L1 | 10.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 53% |
load | 66% |
store | 62% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 25% |
load | 34% |
store | 40% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 7% |
load | 6% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 19% |
load | 20% |
store | 20% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 13% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Source file and lines | linkCells.c:173-195 |
Module | exec |
nb instructions | 60 |
nb uops | 63 |
loop length | 204 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 3.00 | 6.33 | 6.33 | 10.00 | 3.00 | 2.90 | 10.00 | 10.00 | 10.00 | 3.00 | 6.33 |
cycles | 3.10 | 3.00 | 6.33 | 6.33 | 10.00 | 3.00 | 2.90 | 10.00 | 10.00 | 10.00 | 3.00 | 6.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.62 |
Stall cycles | 0.00 |
Front-end | 10.50 |
Dispatch | 10.00 |
Overall L1 | 10.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 53% |
load | 66% |
store | 62% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 25% |
load | 28% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 7% |
load | 6% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 19% |
load | 20% |
store | 20% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
VUNPCKLPD %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKLPD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %ECX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
VMOVQ %XMM5,%RBX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SUB $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM2,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM2,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM3,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVAPD %XMM1,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVAPD %XMM1,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
CALL 40bd20 <getBoxFromCoord> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x78(%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x58(%RBP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVAPD -0x70(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD -0x80(%RBP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
LEA (%RCX,%RSI,4),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%EDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%RDI),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R8D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,0xc(%R15) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 40be3d <putAtomInBox+0x7d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INCL (%R14) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV (%RDI),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EDX,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x18(%R14),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R14),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RAX,2),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%R14),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8D,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R13D,(%R9,%RAX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD 0x20(%R14),%R11 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R12D,(%R10,%RAX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM0,(%R15) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD %XMM2,0x10(%R15) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x10(%R11) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM4,(%R11) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
ADD $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
Source file and lines | linkCells.c:173-195 |
Module | exec |
nb instructions | 58 |
nb uops | 59 |
loop length | 198 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.90 | 2.80 | 5.67 | 5.67 | 9.50 | 2.80 | 2.70 | 9.50 | 9.50 | 9.50 | 2.80 | 5.67 |
cycles | 2.90 | 2.80 | 5.67 | 5.67 | 9.50 | 2.80 | 2.70 | 9.50 | 9.50 | 9.50 | 2.80 | 5.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.80 |
Stall cycles | 0.00 |
Front-end | 9.83 |
Dispatch | 9.50 |
Overall L1 | 9.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 53% |
load | 66% |
store | 62% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 26% |
load | 40% |
store | 41% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 7% |
load | 6% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 19% |
load | 20% |
store | 20% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 15% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
VUNPCKLPD %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKLPD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %ECX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
VMOVQ %XMM5,%RBX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SUB $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM2,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM2,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM3,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVAPD %XMM1,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVAPD %XMM1,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
CALL 40bd20 <getBoxFromCoord> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x78(%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x58(%RBP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVAPD -0x70(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD -0x80(%RBP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
LEA (%RCX,%RSI,4),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%EDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%RDI),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R8D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,0xc(%R15) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 40be3d <putAtomInBox+0x7d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDX,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x18(%R14),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R14),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RAX,2),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%R14),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8D,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R13D,(%R9,%RAX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD 0x20(%R14),%R11 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R12D,(%R10,%RAX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM0,(%R15) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD %XMM2,0x10(%R15) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x10(%R11) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM4,(%R11) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
ADD $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
Name | Coverage (%) | Time (s) |
---|---|---|
○putAtomInBox | 0.07 | 0.01 |