Function: computeVcm._omp_fn.0 | Module: exec | Source: initAtoms.c:218-228 | Coverage: 0.03% |
---|
Function: computeVcm._omp_fn.0 | Module: exec | Source: initAtoms.c:218-228 | Coverage: 0.03% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-4338/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 218 - 228 |
-------------------------------------------------------------------------------- |
218: #pragma omp parallel for reduction(+:v0) reduction(+:v1) reduction(+:v2) reduction(+:v3) |
219: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
220: { |
221: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
222: { |
223: v0 += s->atoms->p[iOff][0]; |
224: v1 += s->atoms->p[iOff][1]; |
225: v2 += s->atoms->p[iOff][2]; |
226: |
227: int iSpecies = s->atoms->iSpecies[iOff]; |
228: v3 += s->species[iSpecies].mass; |
0x408d70 PUSH %RBP |
0x408d71 MOV %RSP,%RBP |
0x408d74 PUSH %R14 |
0x408d76 PUSH %R13 |
0x408d78 PUSH %R12 |
0x408d7a PUSH %RBX |
0x408d7b MOV %RDI,%RBX |
0x408d7e AND $-0x20,%RSP |
0x408d82 SUB $0x20,%RSP |
0x408d86 MOV (%RDI),%R12 |
0x408d89 CALL 403070 <omp_get_num_threads@plt> |
0x408d8e MOV 0x18(%R12),%R14 |
0x408d93 MOV %EAX,%R13D |
0x408d96 CALL 403150 <omp_get_thread_num@plt> |
0x408d9b MOV %EAX,%EDI |
0x408d9d MOV 0xc(%R14),%EAX |
0x408da1 CLTD |
0x408da2 IDIV %R13D |
0x408da5 CMP %EDX,%EDI |
0x408da7 JL 408fc7 |
0x408dad IMUL %EAX,%EDI |
0x408db0 VXORPD %XMM0,%XMM0,%XMM0 |
0x408db4 ADD %EDX,%EDI |
0x408db6 ADD %EDI,%EAX |
0x408db8 CMP %EAX,%EDI |
0x408dba JGE 408f97 |
0x408dc0 MOVSXD %EDI,%R8 |
0x408dc3 MOV 0x78(%R14),%R11 |
0x408dc7 SAL $0x6,%EDI |
0x408dca LEA (%R8,%R8,2),%R10 |
0x408dce SAL $0x9,%R10 |
0x408dd2 NOPW (%RAX,%RAX,1) |
(51) 0x408dd8 MOVSXD (%R11,%R8,4),%R9 |
(51) 0x408ddc TEST %R9D,%R9D |
(51) 0x408ddf JLE 408f81 |
(51) 0x408de5 MOV 0x20(%R12),%RCX |
(51) 0x408dea MOVSXD %EDI,%R14 |
(51) 0x408ded MOV 0x28(%R12),%RSI |
(51) 0x408df2 MOV 0x10(%RCX),%R13 |
(51) 0x408df6 MOV 0x20(%RCX),%RDX |
(51) 0x408dfa LEA (%R13,%R14,4),%RCX |
(51) 0x408dff MOV %R8,%R14 |
(51) 0x408e02 ADD %R10,%RDX |
(51) 0x408e05 SAL $0x6,%R14 |
(51) 0x408e09 ADD %R14,%R9 |
(51) 0x408e0c LEA (%R13,%R9,4),%R9 |
(51) 0x408e11 MOV %R9,%R13 |
(51) 0x408e14 SUB %RCX,%R13 |
(51) 0x408e17 SUB $0x4,%R13 |
(51) 0x408e1b SHR $0x2,%R13 |
(51) 0x408e1f INC %R13 |
(51) 0x408e22 AND $0x3,%R13D |
(51) 0x408e26 JE 408ed0 |
(51) 0x408e2c CMP $0x1,%R13 |
(51) 0x408e30 JE 408e97 |
(51) 0x408e32 CMP $0x2,%R13 |
(51) 0x408e36 JE 408e67 |
(51) 0x408e38 MOVSXD (%RCX),%R14 |
(51) 0x408e3b VMOVSD 0x8(%RDX),%XMM2 |
(51) 0x408e40 ADD $0x4,%RCX |
(51) 0x408e44 ADD $0x18,%RDX |
(51) 0x408e48 SAL $0x4,%R14 |
(51) 0x408e4c VMOVHPD -0x18(%RDX),%XMM2,%XMM3 |
(51) 0x408e51 VMOVSD 0x8(%RSI,%R14,1),%XMM1 |
(51) 0x408e58 VMOVHPD -0x8(%RDX),%XMM1,%XMM4 |
(51) 0x408e5d VINSERTF128 $0x1,%XMM3,%YMM4,%YMM5 |
(51) 0x408e63 VADDPD %YMM5,%YMM0,%YMM0 |
(51) 0x408e67 MOVSXD (%RCX),%R13 |
(51) 0x408e6a VMOVSD 0x8(%RDX),%XMM6 |
(51) 0x408e6f ADD $0x4,%RCX |
(51) 0x408e73 ADD $0x18,%RDX |
(51) 0x408e77 SAL $0x4,%R13 |
(51) 0x408e7b VMOVHPD -0x18(%RDX),%XMM6,%XMM7 |
(51) 0x408e80 VMOVSD 0x8(%RSI,%R13,1),%XMM8 |
(51) 0x408e87 VMOVHPD -0x8(%RDX),%XMM8,%XMM9 |
(51) 0x408e8c VINSERTF128 $0x1,%XMM7,%YMM9,%YMM10 |
(51) 0x408e92 VADDPD %YMM10,%YMM0,%YMM0 |
(51) 0x408e97 MOVSXD (%RCX),%R14 |
(51) 0x408e9a VMOVSD 0x8(%RDX),%XMM11 |
(51) 0x408e9f ADD $0x4,%RCX |
(51) 0x408ea3 ADD $0x18,%RDX |
(51) 0x408ea7 SAL $0x4,%R14 |
(51) 0x408eab VMOVHPD -0x18(%RDX),%XMM11,%XMM12 |
(51) 0x408eb0 VMOVSD 0x8(%RSI,%R14,1),%XMM13 |
(51) 0x408eb7 VMOVHPD -0x8(%RDX),%XMM13,%XMM14 |
(51) 0x408ebc VINSERTF128 $0x1,%XMM12,%YMM14,%YMM15 |
(51) 0x408ec2 VADDPD %YMM15,%YMM0,%YMM0 |
(51) 0x408ec7 CMP %R9,%RCX |
(51) 0x408eca JE 408f81 |
(52) 0x408ed0 MOVSXD (%RCX),%R13 |
(52) 0x408ed3 VMOVSD 0x8(%RDX),%XMM2 |
(52) 0x408ed8 ADD $0x10,%RCX |
(52) 0x408edc ADD $0x60,%RDX |
(52) 0x408ee0 MOVSXD -0xc(%RCX),%R14 |
(52) 0x408ee4 VMOVSD -0x40(%RDX),%XMM7 |
(52) 0x408ee9 SAL $0x4,%R13 |
(52) 0x408eed VMOVHPD -0x60(%RDX),%XMM2,%XMM3 |
(52) 0x408ef2 VMOVSD -0x28(%RDX),%XMM13 |
(52) 0x408ef7 VMOVSD 0x8(%RSI,%R13,1),%XMM1 |
(52) 0x408efe SAL $0x4,%R14 |
(52) 0x408f02 MOVSXD -0x8(%RCX),%R13 |
(52) 0x408f06 VMOVHPD -0x48(%RDX),%XMM7,%XMM8 |
(52) 0x408f0b VMOVSD 0x8(%RSI,%R14,1),%XMM9 |
(52) 0x408f12 MOVSXD -0x4(%RCX),%R14 |
(52) 0x408f16 VMOVHPD -0x30(%RDX),%XMM13,%XMM14 |
(52) 0x408f1b VMOVHPD -0x50(%RDX),%XMM1,%XMM4 |
(52) 0x408f20 SAL $0x4,%R13 |
(52) 0x408f24 VMOVSD -0x10(%RDX),%XMM1 |
(52) 0x408f29 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM5 |
(52) 0x408f2f VMOVHPD -0x38(%RDX),%XMM9,%XMM10 |
(52) 0x408f34 VMOVSD 0x8(%RSI,%R13,1),%XMM15 |
(52) 0x408f3b SAL $0x4,%R14 |
(52) 0x408f3f VADDPD %YMM5,%YMM0,%YMM6 |
(52) 0x408f43 VINSERTF128 $0x1,%XMM8,%YMM10,%YMM11 |
(52) 0x408f49 VMOVSD 0x8(%RSI,%R14,1),%XMM5 |
(52) 0x408f50 VMOVHPD -0x18(%RDX),%XMM1,%XMM4 |
(52) 0x408f55 VMOVHPD -0x20(%RDX),%XMM15,%XMM0 |
(52) 0x408f5a VINSERTF128 $0x1,%XMM14,%YMM0,%YMM2 |
(52) 0x408f60 VADDPD %YMM11,%YMM6,%YMM12 |
(52) 0x408f65 VMOVHPD -0x8(%RDX),%XMM5,%XMM6 |
(52) 0x408f6a VINSERTF128 $0x1,%XMM4,%YMM6,%YMM7 |
(52) 0x408f70 VADDPD %YMM2,%YMM12,%YMM3 |
(52) 0x408f74 VADDPD %YMM7,%YMM3,%YMM0 |
(52) 0x408f78 CMP %R9,%RCX |
(52) 0x408f7b JNE 408ed0 |
(51) 0x408f81 INC %R8 |
(51) 0x408f84 ADD $0x40,%EDI |
(51) 0x408f87 ADD $0x600,%R10 |
(51) 0x408f8e CMP %R8D,%EAX |
(51) 0x408f91 JG 408dd8 |
0x408f97 VMOVAPD %YMM0,(%RSP) |
0x408f9c VZEROUPPER |
0x408f9f CALL 4031b0 <GOMP_atomic_start@plt> |
0x408fa4 VMOVAPD (%RSP),%YMM8 |
0x408fa9 VADDPD 0x8(%RBX),%YMM8,%YMM9 |
0x408fae VMOVUPD %YMM9,0x8(%RBX) |
0x408fb3 VZEROUPPER |
0x408fb6 LEA -0x20(%RBP),%RSP |
0x408fba POP %RBX |
0x408fbb POP %R12 |
0x408fbd POP %R13 |
0x408fbf POP %R14 |
0x408fc1 POP %RBP |
0x408fc2 JMP 403180 |
0x408fc7 INC %EAX |
0x408fc9 XOR %EDX,%EDX |
0x408fcb JMP 408dad |
Path / |
Source file and lines | initAtoms.c:218-228 |
Module | exec |
nb instructions | 49 |
nb uops | 57 |
loop length | 161 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 9.50 cycles |
front end | 9.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.30 | 4.00 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
cycles | 3.30 | 5.53 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 20.14-21.59 |
Stall cycles | 11.11-12.55 |
LM full (events) | 12.56-14.01 |
Front-end | 9.50 |
Dispatch | 5.53 |
DIV/SQRT | 6.00 |
Overall L1 | 9.50 |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 22% |
all | 100% |
load | 100% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 46% |
load | 66% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 33% |
all | 11% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 45% |
load | 50% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 22% |
load | 37% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 408fc7 <computeVcm._omp_fn.0+0x257> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 408f97 <computeVcm._omp_fn.0+0x227> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDI,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x78(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%R8,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM0,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4031b0 <GOMP_atomic_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVAPD (%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD 0x8(%RBX),%YMM8,%YMM9 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVUPD %YMM9,0x8(%RBX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x20(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 403180 <GOMP_atomic_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 408dad <computeVcm._omp_fn.0+0x3d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | initAtoms.c:218-228 |
Module | exec |
nb instructions | 49 |
nb uops | 57 |
loop length | 161 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 9.50 cycles |
front end | 9.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.30 | 4.00 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
cycles | 3.30 | 5.53 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 20.14-21.59 |
Stall cycles | 11.11-12.55 |
LM full (events) | 12.56-14.01 |
Front-end | 9.50 |
Dispatch | 5.53 |
DIV/SQRT | 6.00 |
Overall L1 | 9.50 |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 22% |
all | 100% |
load | 100% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 46% |
load | 66% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 33% |
all | 11% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 45% |
load | 50% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 22% |
load | 37% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 408fc7 <computeVcm._omp_fn.0+0x257> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 408f97 <computeVcm._omp_fn.0+0x227> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDI,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x78(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%R8,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM0,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4031b0 <GOMP_atomic_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVAPD (%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD 0x8(%RBX),%YMM8,%YMM9 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVUPD %YMM9,0x8(%RBX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x20(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 403180 <GOMP_atomic_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 408dad <computeVcm._omp_fn.0+0x3d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼computeVcm._omp_fn.0– | 0.03 | 0.01 |
▼Loop 51 - initAtoms.c:221-228 - exec– | 0.01 | 0 |
○Loop 52 - initAtoms.c:221-228 - exec | 0.02 | 0 |