Function: computeVcm._omp_fn.0 | Module: exec | Source: initAtoms.c:218-228 | Coverage: 0.02% |
---|
Function: computeVcm._omp_fn.0 | Module: exec | Source: initAtoms.c:218-228 | Coverage: 0.02% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-2581/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 218 - 228 |
-------------------------------------------------------------------------------- |
218: #pragma omp parallel for reduction(+:v0) reduction(+:v1) reduction(+:v2) reduction(+:v3) |
219: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
220: { |
221: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
222: { |
223: v0 += s->atoms->p[iOff][0]; |
224: v1 += s->atoms->p[iOff][1]; |
225: v2 += s->atoms->p[iOff][2]; |
226: |
227: int iSpecies = s->atoms->iSpecies[iOff]; |
228: v3 += s->species[iSpecies].mass; |
0x408b80 PUSH %RBP |
0x408b81 MOV %RSP,%RBP |
0x408b84 PUSH %R14 |
0x408b86 PUSH %R13 |
0x408b88 PUSH %R12 |
0x408b8a PUSH %RBX |
0x408b8b MOV %RDI,%RBX |
0x408b8e AND $-0x20,%RSP |
0x408b92 SUB $0x20,%RSP |
0x408b96 MOV (%RDI),%R12 |
0x408b99 CALL 403070 <omp_get_num_threads@plt> |
0x408b9e MOV 0x18(%R12),%R14 |
0x408ba3 MOV %EAX,%R13D |
0x408ba6 CALL 403160 <omp_get_thread_num@plt> |
0x408bab MOV %EAX,%EDI |
0x408bad MOV 0xc(%R14),%EAX |
0x408bb1 CLTD |
0x408bb2 IDIV %R13D |
0x408bb5 CMP %EDX,%EDI |
0x408bb7 JL 408dd7 |
0x408bbd IMUL %EAX,%EDI |
0x408bc0 VXORPD %XMM0,%XMM0,%XMM0 |
0x408bc4 ADD %EDX,%EDI |
0x408bc6 ADD %EDI,%EAX |
0x408bc8 CMP %EAX,%EDI |
0x408bca JGE 408da7 |
0x408bd0 MOVSXD %EDI,%R8 |
0x408bd3 MOV 0x78(%R14),%R11 |
0x408bd7 SAL $0x6,%EDI |
0x408bda LEA (%R8,%R8,2),%R10 |
0x408bde SAL $0x9,%R10 |
0x408be2 NOPW (%RAX,%RAX,1) |
(58) 0x408be8 MOVSXD (%R11,%R8,4),%R9 |
(58) 0x408bec TEST %R9D,%R9D |
(58) 0x408bef JLE 408d91 |
(58) 0x408bf5 MOV 0x20(%R12),%RCX |
(58) 0x408bfa MOVSXD %EDI,%R14 |
(58) 0x408bfd MOV 0x28(%R12),%RSI |
(58) 0x408c02 MOV 0x10(%RCX),%R13 |
(58) 0x408c06 MOV 0x20(%RCX),%RDX |
(58) 0x408c0a LEA (%R13,%R14,4),%RCX |
(58) 0x408c0f MOV %R8,%R14 |
(58) 0x408c12 ADD %R10,%RDX |
(58) 0x408c15 SAL $0x6,%R14 |
(58) 0x408c19 ADD %R14,%R9 |
(58) 0x408c1c LEA (%R13,%R9,4),%R9 |
(58) 0x408c21 MOV %R9,%R13 |
(58) 0x408c24 SUB %RCX,%R13 |
(58) 0x408c27 SUB $0x4,%R13 |
(58) 0x408c2b SHR $0x2,%R13 |
(58) 0x408c2f INC %R13 |
(58) 0x408c32 AND $0x3,%R13D |
(58) 0x408c36 JE 408ce0 |
(58) 0x408c3c CMP $0x1,%R13 |
(58) 0x408c40 JE 408ca7 |
(58) 0x408c42 CMP $0x2,%R13 |
(58) 0x408c46 JE 408c77 |
(58) 0x408c48 MOVSXD (%RCX),%R14 |
(58) 0x408c4b VMOVSD 0x8(%RDX),%XMM2 |
(58) 0x408c50 ADD $0x4,%RCX |
(58) 0x408c54 ADD $0x18,%RDX |
(58) 0x408c58 SAL $0x4,%R14 |
(58) 0x408c5c VMOVHPD -0x18(%RDX),%XMM2,%XMM3 |
(58) 0x408c61 VMOVSD 0x8(%RSI,%R14,1),%XMM1 |
(58) 0x408c68 VMOVHPD -0x8(%RDX),%XMM1,%XMM4 |
(58) 0x408c6d VINSERTF128 $0x1,%XMM3,%YMM4,%YMM5 |
(58) 0x408c73 VADDPD %YMM5,%YMM0,%YMM0 |
(58) 0x408c77 MOVSXD (%RCX),%R13 |
(58) 0x408c7a VMOVSD 0x8(%RDX),%XMM6 |
(58) 0x408c7f ADD $0x4,%RCX |
(58) 0x408c83 ADD $0x18,%RDX |
(58) 0x408c87 SAL $0x4,%R13 |
(58) 0x408c8b VMOVHPD -0x18(%RDX),%XMM6,%XMM7 |
(58) 0x408c90 VMOVSD 0x8(%RSI,%R13,1),%XMM8 |
(58) 0x408c97 VMOVHPD -0x8(%RDX),%XMM8,%XMM9 |
(58) 0x408c9c VINSERTF128 $0x1,%XMM7,%YMM9,%YMM10 |
(58) 0x408ca2 VADDPD %YMM10,%YMM0,%YMM0 |
(58) 0x408ca7 MOVSXD (%RCX),%R14 |
(58) 0x408caa VMOVSD 0x8(%RDX),%XMM11 |
(58) 0x408caf ADD $0x4,%RCX |
(58) 0x408cb3 ADD $0x18,%RDX |
(58) 0x408cb7 SAL $0x4,%R14 |
(58) 0x408cbb VMOVHPD -0x18(%RDX),%XMM11,%XMM12 |
(58) 0x408cc0 VMOVSD 0x8(%RSI,%R14,1),%XMM13 |
(58) 0x408cc7 VMOVHPD -0x8(%RDX),%XMM13,%XMM14 |
(58) 0x408ccc VINSERTF128 $0x1,%XMM12,%YMM14,%YMM15 |
(58) 0x408cd2 VADDPD %YMM15,%YMM0,%YMM0 |
(58) 0x408cd7 CMP %R9,%RCX |
(58) 0x408cda JE 408d91 |
(59) 0x408ce0 MOVSXD (%RCX),%R13 |
(59) 0x408ce3 VMOVSD 0x8(%RDX),%XMM2 |
(59) 0x408ce8 ADD $0x10,%RCX |
(59) 0x408cec ADD $0x60,%RDX |
(59) 0x408cf0 MOVSXD -0xc(%RCX),%R14 |
(59) 0x408cf4 VMOVSD -0x40(%RDX),%XMM7 |
(59) 0x408cf9 SAL $0x4,%R13 |
(59) 0x408cfd VMOVHPD -0x60(%RDX),%XMM2,%XMM3 |
(59) 0x408d02 VMOVSD -0x28(%RDX),%XMM13 |
(59) 0x408d07 VMOVSD 0x8(%RSI,%R13,1),%XMM1 |
(59) 0x408d0e SAL $0x4,%R14 |
(59) 0x408d12 MOVSXD -0x8(%RCX),%R13 |
(59) 0x408d16 VMOVHPD -0x48(%RDX),%XMM7,%XMM8 |
(59) 0x408d1b VMOVSD 0x8(%RSI,%R14,1),%XMM9 |
(59) 0x408d22 MOVSXD -0x4(%RCX),%R14 |
(59) 0x408d26 VMOVHPD -0x30(%RDX),%XMM13,%XMM14 |
(59) 0x408d2b VMOVHPD -0x50(%RDX),%XMM1,%XMM4 |
(59) 0x408d30 SAL $0x4,%R13 |
(59) 0x408d34 VMOVSD -0x10(%RDX),%XMM1 |
(59) 0x408d39 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM5 |
(59) 0x408d3f VMOVHPD -0x38(%RDX),%XMM9,%XMM10 |
(59) 0x408d44 VMOVSD 0x8(%RSI,%R13,1),%XMM15 |
(59) 0x408d4b SAL $0x4,%R14 |
(59) 0x408d4f VADDPD %YMM5,%YMM0,%YMM6 |
(59) 0x408d53 VINSERTF128 $0x1,%XMM8,%YMM10,%YMM11 |
(59) 0x408d59 VMOVSD 0x8(%RSI,%R14,1),%XMM5 |
(59) 0x408d60 VMOVHPD -0x18(%RDX),%XMM1,%XMM4 |
(59) 0x408d65 VMOVHPD -0x20(%RDX),%XMM15,%XMM0 |
(59) 0x408d6a VINSERTF128 $0x1,%XMM14,%YMM0,%YMM2 |
(59) 0x408d70 VADDPD %YMM11,%YMM6,%YMM12 |
(59) 0x408d75 VMOVHPD -0x8(%RDX),%XMM5,%XMM6 |
(59) 0x408d7a VINSERTF128 $0x1,%XMM4,%YMM6,%YMM7 |
(59) 0x408d80 VADDPD %YMM2,%YMM12,%YMM3 |
(59) 0x408d84 VADDPD %YMM7,%YMM3,%YMM0 |
(59) 0x408d88 CMP %R9,%RCX |
(59) 0x408d8b JNE 408ce0 |
(58) 0x408d91 INC %R8 |
(58) 0x408d94 ADD $0x40,%EDI |
(58) 0x408d97 ADD $0x600,%R10 |
(58) 0x408d9e CMP %R8D,%EAX |
(58) 0x408da1 JG 408be8 |
0x408da7 VMOVAPD %YMM0,(%RSP) |
0x408dac VZEROUPPER |
0x408daf CALL 4031c0 <GOMP_atomic_start@plt> |
0x408db4 VMOVAPD (%RSP),%YMM8 |
0x408db9 VADDPD 0x8(%RBX),%YMM8,%YMM9 |
0x408dbe VMOVUPD %YMM9,0x8(%RBX) |
0x408dc3 VZEROUPPER |
0x408dc6 LEA -0x20(%RBP),%RSP |
0x408dca POP %RBX |
0x408dcb POP %R12 |
0x408dcd POP %R13 |
0x408dcf POP %R14 |
0x408dd1 POP %RBP |
0x408dd2 JMP 403190 |
0x408dd7 INC %EAX |
0x408dd9 XOR %EDX,%EDX |
0x408ddb JMP 408bbd |
Path / |
Source file and lines | initAtoms.c:218-228 |
Module | exec |
nb instructions | 49 |
nb uops | 57 |
loop length | 161 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 9.50 cycles |
front end | 9.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.30 | 4.00 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
cycles | 3.30 | 5.53 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 20.14-21.59 |
Stall cycles | 11.11-12.55 |
LM full (events) | 12.56-14.01 |
Front-end | 9.50 |
Dispatch | 5.53 |
DIV/SQRT | 6.00 |
Overall L1 | 9.50 |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 22% |
all | 100% |
load | 100% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 46% |
load | 66% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 33% |
all | 11% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 45% |
load | 50% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 22% |
load | 37% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 408dd7 <computeVcm._omp_fn.0+0x257> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 408da7 <computeVcm._omp_fn.0+0x227> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDI,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x78(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%R8,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM0,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4031c0 <GOMP_atomic_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVAPD (%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD 0x8(%RBX),%YMM8,%YMM9 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVUPD %YMM9,0x8(%RBX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x20(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 403190 <GOMP_atomic_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 408bbd <computeVcm._omp_fn.0+0x3d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | initAtoms.c:218-228 |
Module | exec |
nb instructions | 49 |
nb uops | 57 |
loop length | 161 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 9.50 cycles |
front end | 9.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.30 | 4.00 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
cycles | 3.30 | 5.53 | 3.67 | 3.67 | 5.00 | 3.20 | 3.30 | 5.00 | 5.00 | 5.00 | 3.20 | 3.67 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 20.14-21.59 |
Stall cycles | 11.11-12.55 |
LM full (events) | 12.56-14.01 |
Front-end | 9.50 |
Dispatch | 5.53 |
DIV/SQRT | 6.00 |
Overall L1 | 9.50 |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 22% |
all | 100% |
load | 100% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 46% |
load | 66% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 33% |
all | 11% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 45% |
load | 50% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 22% |
load | 37% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 408dd7 <computeVcm._omp_fn.0+0x257> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 408da7 <computeVcm._omp_fn.0+0x227> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDI,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x78(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%R8,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x9,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM0,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4031c0 <GOMP_atomic_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVAPD (%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD 0x8(%RBX),%YMM8,%YMM9 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVUPD %YMM9,0x8(%RBX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x20(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 403190 <GOMP_atomic_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 408bbd <computeVcm._omp_fn.0+0x3d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼computeVcm._omp_fn.0– | 0.02 | 0 |
▼Loop 58 - initAtoms.c:221-228 - exec– | 0.01 | 0 |
○Loop 59 - initAtoms.c:221-228 - exec | 0.01 | 0 |