Function: setTemperature._omp_fn.0 | Module: exec | Source: initAtoms.c:151-162 [...] | Coverage: 0.01% |
---|
Function: setTemperature._omp_fn.0 | Module: exec | Source: initAtoms.c:151-162 [...] | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-4338/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 151 - 162 |
-------------------------------------------------------------------------------- |
151: #pragma omp parallel for |
152: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
153: { |
154: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
155: { |
156: int iType = s->atoms->iSpecies[iOff]; |
157: real_t mass = s->species[iType].mass; |
158: real_t sigma = sqrt(kB_eV * temperature/mass); |
159: uint64_t seed = mkSeed(s->atoms->gid[iOff], 123); |
160: s->atoms->p[iOff][0] = mass * sigma * gasdev(&seed); |
161: s->atoms->p[iOff][1] = mass * sigma * gasdev(&seed); |
162: s->atoms->p[iOff][2] = mass * sigma * gasdev(&seed); |
/scratch_na/users/xoserete/qaas_runs/171-172-4338/intel/CoMD/build/CoMD/CoMD/src-openmp/random.c: 45 - 70 |
-------------------------------------------------------------------------------- |
45: *seed *= UINT64_C(437799614237992725); |
46: *seed %= UINT64_C(2305843009213693951); |
[...] |
68: uint32_t s2 = (id+callSite) * UINT32_C(2654435761); |
69: |
70: uint64_t iSeed = (UINT64_C(0x100000000) * s1) + s2; |
0x40edc0 PUSH %RBP |
0x40edc1 MOV %RSP,%RBP |
0x40edc4 PUSH %R15 |
0x40edc6 PUSH %R14 |
0x40edc8 PUSH %R13 |
0x40edca MOV %RDI,%R13 |
0x40edcd PUSH %R12 |
0x40edcf PUSH %RBX |
0x40edd0 SUB $0x68,%RSP |
0x40edd4 MOV (%RDI),%RBX |
0x40edd7 CALL 403070 <omp_get_num_threads@plt> |
0x40eddc MOV 0x18(%RBX),%R14 |
0x40ede0 MOV %EAX,%R12D |
0x40ede3 CALL 403150 <omp_get_thread_num@plt> |
0x40ede8 MOV %EAX,%R10D |
0x40edeb MOV 0xc(%R14),%EAX |
0x40edef CLTD |
0x40edf0 IDIV %R12D |
0x40edf3 CMP %EDX,%R10D |
0x40edf6 JL 40f0fd |
0x40edfc IMUL %EAX,%R10D |
0x40ee00 ADD %EDX,%R10D |
0x40ee03 LEA (%RAX,%R10,1),%R11D |
0x40ee07 CMP %R11D,%R10D |
0x40ee0a JGE 40f0ee |
0x40ee10 VMOVSD 0x2e48(%RIP),%XMM0 |
0x40ee18 MOV 0x78(%R14),%RCX |
0x40ee1c MOV %RBX,-0x88(%RBP) |
0x40ee23 MOV $0x613606df9756715,%R14 |
0x40ee2d MOV %R11D,-0x7c(%RBP) |
0x40ee31 VMULSD 0x8(%R13),%XMM0,%XMM4 |
0x40ee37 MOV %RCX,-0x78(%RBP) |
0x40ee3b MOVSXD %R10D,%RCX |
0x40ee3e MOV %RCX,%R11 |
0x40ee41 VMOVSD %XMM4,-0x68(%RBP) |
0x40ee46 NOPW %CS:(%RAX,%RAX,1) |
(93) 0x40ee50 MOV -0x78(%RBP),%RSI |
(93) 0x40ee54 MOV %R11D,%EDI |
(93) 0x40ee57 SAL $0x6,%EDI |
(93) 0x40ee5a MOVSXD (%RSI,%R11,4),%R8 |
(93) 0x40ee5e TEST %R8D,%R8D |
(93) 0x40ee61 JLE 40f0e1 |
(93) 0x40ee67 MOV -0x88(%RBP),%R9 |
(93) 0x40ee6e MOV %R11,%RAX |
(93) 0x40ee71 MOV %R11,-0x70(%RBP) |
(93) 0x40ee75 MOVSXD %EDI,%R10 |
(93) 0x40ee78 SAL $0x6,%RAX |
(93) 0x40ee7c MOV 0x20(%R9),%R13 |
(93) 0x40ee80 ADD %RAX,%R8 |
(93) 0x40ee83 MOV 0x28(%R9),%RBX |
(93) 0x40ee87 SAL $0x2,%R8 |
(93) 0x40ee8b MOV 0x10(%R13),%R15 |
(93) 0x40ee8f MOV 0x8(%R13),%R12 |
(93) 0x40ee93 MOV %R8,-0x50(%RBP) |
(93) 0x40ee97 MOV %RBX,-0x60(%RBP) |
(93) 0x40ee9b LEA (,%R10,4),%RBX |
(93) 0x40eea3 MOV %R15,-0x58(%RBP) |
(93) 0x40eea7 NOPW (%RAX,%RAX,1) |
(94) 0x40eeb0 MOV (%R12,%RBX,1),%EDI |
(94) 0x40eeb4 MOV -0x58(%RBP),%RDX |
(94) 0x40eeb8 MOV $0x9,%R9D |
(94) 0x40eebe MOV -0x60(%RBP),%RCX |
(94) 0x40eec2 VMOVSD -0x68(%RBP),%XMM3 |
(94) 0x40eec7 IMUL $-0x61c8864f,%EDI,%ESI |
(94) 0x40eecd ADD $0x7b,%EDI |
(94) 0x40eed0 MOVSXD (%RDX,%RBX,1),%R11 |
(94) 0x40eed4 IMUL $-0x61c8864f,%EDI,%R8D |
(94) 0x40eedb SAL $0x4,%R11 |
(94) 0x40eedf SAL $0x20,%RSI |
(94) 0x40eee3 VMOVSD 0x8(%RCX,%R11,1),%XMM1 |
(94) 0x40eeea ADD %R8,%RSI |
(94) 0x40eeed IMUL %R14,%RSI |
(94) 0x40eef1 VDIVSD %XMM1,%XMM3,%XMM2 |
(94) 0x40eef5 MOV %RSI,%RAX |
(94) 0x40eef8 MOV %RSI,%R15 |
(94) 0x40eefb MUL %R9 |
(94) 0x40eefe SUB %RDX,%R15 |
(94) 0x40ef01 SHR $0x1,%R15 |
(94) 0x40ef04 ADD %R15,%RDX |
(94) 0x40ef07 SHR $0x3c,%RDX |
(94) 0x40ef0b MOV %RDX,%R10 |
(94) 0x40ef0e SAL $0x3d,%R10 |
(94) 0x40ef12 SUB %RDX,%R10 |
(94) 0x40ef15 SUB %R10,%RSI |
(94) 0x40ef18 IMUL %R14,%RSI |
(94) 0x40ef1c MOV %RSI,%RAX |
(94) 0x40ef1f MOV %RSI,%R11 |
(94) 0x40ef22 MUL %R9 |
(94) 0x40ef25 SUB %RDX,%R11 |
(94) 0x40ef28 SHR $0x1,%R11 |
(94) 0x40ef2b ADD %R11,%RDX |
(94) 0x40ef2e SHR $0x3c,%RDX |
(94) 0x40ef32 MOV %RDX,%RCX |
(94) 0x40ef35 SAL $0x3d,%RCX |
(94) 0x40ef39 SUB %RDX,%RCX |
(94) 0x40ef3c VSQRTSD %XMM2,%XMM2,%XMM2 |
(94) 0x40ef40 SUB %RCX,%RSI |
(94) 0x40ef43 IMUL %R14,%RSI |
(94) 0x40ef47 VMULSD %XMM2,%XMM1,%XMM5 |
(94) 0x40ef4b MOV %RSI,%RAX |
(94) 0x40ef4e MOV %RSI,%RDI |
(94) 0x40ef51 MUL %R9 |
(94) 0x40ef54 VMOVSD %XMM5,-0x48(%RBP) |
(94) 0x40ef59 SUB %RDX,%RDI |
(94) 0x40ef5c SHR $0x1,%RDI |
(94) 0x40ef5f ADD %RDI,%RDX |
(94) 0x40ef62 SHR $0x3c,%RDX |
(94) 0x40ef66 MOV %RDX,%R8 |
(94) 0x40ef69 SAL $0x3d,%R8 |
(94) 0x40ef6d SUB %RDX,%R8 |
(94) 0x40ef70 SUB %R8,%RSI |
(94) 0x40ef73 IMUL %R14,%RSI |
(94) 0x40ef77 MOV %RSI,%RAX |
(94) 0x40ef7a MOV %RSI,%R15 |
(94) 0x40ef7d MUL %R9 |
(94) 0x40ef80 SUB %RDX,%R15 |
(94) 0x40ef83 SHR $0x1,%R15 |
(94) 0x40ef86 ADD %R15,%RDX |
(94) 0x40ef89 SHR $0x3c,%RDX |
(94) 0x40ef8d MOV %RDX,%R10 |
(94) 0x40ef90 SAL $0x3d,%R10 |
(94) 0x40ef94 SUB %RDX,%R10 |
(94) 0x40ef97 SUB %R10,%RSI |
(94) 0x40ef9a IMUL %R14,%RSI |
(94) 0x40ef9e MOV %RSI,%RAX |
(94) 0x40efa1 MOV %RSI,%R11 |
(94) 0x40efa4 MUL %R9 |
(94) 0x40efa7 SUB %RDX,%R11 |
(94) 0x40efaa SHR $0x1,%R11 |
(94) 0x40efad ADD %R11,%RDX |
(94) 0x40efb0 SHR $0x3c,%RDX |
(94) 0x40efb4 MOV %RDX,%RCX |
(94) 0x40efb7 SAL $0x3d,%RCX |
(94) 0x40efbb SUB %RDX,%RCX |
(94) 0x40efbe SUB %RCX,%RSI |
(94) 0x40efc1 IMUL %R14,%RSI |
(94) 0x40efc5 MOV %RSI,%RAX |
(94) 0x40efc8 MOV %RSI,%RDI |
(94) 0x40efcb MUL %R9 |
(94) 0x40efce SUB %RDX,%RDI |
(94) 0x40efd1 SHR $0x1,%RDI |
(94) 0x40efd4 ADD %RDI,%RDX |
(94) 0x40efd7 SHR $0x3c,%RDX |
(94) 0x40efdb MOV %RDX,%R8 |
(94) 0x40efde SAL $0x3d,%R8 |
(94) 0x40efe2 SUB %RDX,%R8 |
(94) 0x40efe5 SUB %R8,%RSI |
(94) 0x40efe8 IMUL %R14,%RSI |
(94) 0x40efec MOV %RSI,%RAX |
(94) 0x40efef MOV %RSI,%R15 |
(94) 0x40eff2 MUL %R9 |
(94) 0x40eff5 SUB %RDX,%R15 |
(94) 0x40eff8 SHR $0x1,%R15 |
(94) 0x40effb ADD %R15,%RDX |
(94) 0x40effe SHR $0x3c,%RDX |
(94) 0x40f002 MOV %RDX,%R10 |
(94) 0x40f005 SAL $0x3d,%R10 |
(94) 0x40f009 SUB %RDX,%R10 |
(94) 0x40f00c SUB %R10,%RSI |
(94) 0x40f00f IMUL %R14,%RSI |
(94) 0x40f013 MOV %RSI,%RAX |
(94) 0x40f016 MOV %RSI,%R11 |
(94) 0x40f019 MUL %R9 |
(94) 0x40f01c SUB %RDX,%R11 |
(94) 0x40f01f SHR $0x1,%R11 |
(94) 0x40f022 ADD %R11,%RDX |
(94) 0x40f025 SHR $0x3c,%RDX |
(94) 0x40f029 MOV %RDX,%RCX |
(94) 0x40f02c SAL $0x3d,%RCX |
(94) 0x40f030 SUB %RDX,%RCX |
(94) 0x40f033 SUB %RCX,%RSI |
(94) 0x40f036 IMUL %R14,%RSI |
(94) 0x40f03a MOV %RSI,%RAX |
(94) 0x40f03d MOV %RSI,%RDI |
(94) 0x40f040 MUL %R9 |
(94) 0x40f043 SUB %RDX,%RDI |
(94) 0x40f046 SHR $0x1,%RDI |
(94) 0x40f049 ADD %RDI,%RDX |
(94) 0x40f04c LEA -0x38(%RBP),%RDI |
(94) 0x40f050 SHR $0x3c,%RDX |
(94) 0x40f054 MOV %RDX,%R8 |
(94) 0x40f057 SAL $0x3d,%R8 |
(94) 0x40f05b SUB %RDX,%R8 |
(94) 0x40f05e SUB %R8,%RSI |
(94) 0x40f061 IMUL %R14,%RSI |
(94) 0x40f065 MOV %RSI,%RAX |
(94) 0x40f068 MUL %R9 |
(94) 0x40f06b MOV %RSI,%R9 |
(94) 0x40f06e SUB %RDX,%R9 |
(94) 0x40f071 SHR $0x1,%R9 |
(94) 0x40f074 ADD %R9,%RDX |
(94) 0x40f077 SHR $0x3c,%RDX |
(94) 0x40f07b MOV %RDX,%R15 |
(94) 0x40f07e SAL $0x3d,%R15 |
(94) 0x40f082 SUB %RDX,%R15 |
(94) 0x40f085 SUB %R15,%RSI |
(94) 0x40f088 MOV %RSI,-0x38(%RBP) |
(94) 0x40f08c CALL 40ecd0 <gasdev> |
(94) 0x40f091 MOV 0x20(%R13),%R10 |
(94) 0x40f095 LEA (%RBX,%RBX,2),%RSI |
(94) 0x40f099 LEA -0x38(%RBP),%RDI |
(94) 0x40f09d VMULSD -0x48(%RBP),%XMM0,%XMM6 |
(94) 0x40f0a2 ADD $0x4,%RBX |
(94) 0x40f0a6 LEA (%R10,%RSI,2),%R15 |
(94) 0x40f0aa VMOVSD %XMM6,(%R15) |
(94) 0x40f0af CALL 40ecd0 <gasdev> |
(94) 0x40f0b4 LEA -0x38(%RBP),%RDI |
(94) 0x40f0b8 VMULSD -0x48(%RBP),%XMM0,%XMM7 |
(94) 0x40f0bd VMOVSD %XMM7,0x8(%R15) |
(94) 0x40f0c3 CALL 40ecd0 <gasdev> |
(94) 0x40f0c8 VMULSD -0x48(%RBP),%XMM0,%XMM8 |
(94) 0x40f0cd VMOVSD %XMM8,0x10(%R15) |
(94) 0x40f0d3 CMP %RBX,-0x50(%RBP) |
(94) 0x40f0d7 JNE 40eeb0 |
(93) 0x40f0dd MOV -0x70(%RBP),%R11 |
(93) 0x40f0e1 INC %R11 |
(93) 0x40f0e4 CMP %R11D,-0x7c(%RBP) |
(93) 0x40f0e8 JG 40ee50 |
0x40f0ee ADD $0x68,%RSP |
0x40f0f2 POP %RBX |
0x40f0f3 POP %R12 |
0x40f0f5 POP %R13 |
0x40f0f7 POP %R14 |
0x40f0f9 POP %R15 |
0x40f0fb POP %RBP |
0x40f0fc RET |
0x40f0fd INC %EAX |
0x40f0ff XOR %EDX,%EDX |
0x40f101 JMP 40edfc |
0x40f106 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | initAtoms.c:151-162 |
Module | exec |
nb instructions | 48 |
nb uops | 53 |
loop length | 178 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 8.83 cycles |
front end | 8.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
cycles | 3.10 | 5.53 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 8.53-8.57 |
Stall cycles | 0.00 |
Front-end | 8.83 |
Dispatch | 6.00 |
DIV/SQRT | 6.00 |
Overall L1 | 8.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40f0fd <setTemperature._omp_fn.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R10D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R10,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40f0ee <setTemperature._omp_fn.0+0x32e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x2e48(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x613606df9756715,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R11D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x8(%R13),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R10D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40edfc <setTemperature._omp_fn.0+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:151-162 |
Module | exec |
nb instructions | 48 |
nb uops | 53 |
loop length | 178 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 8.83 cycles |
front end | 8.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
cycles | 3.10 | 5.53 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 8.53-8.57 |
Stall cycles | 0.00 |
Front-end | 8.83 |
Dispatch | 6.00 |
DIV/SQRT | 6.00 |
Overall L1 | 8.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40f0fd <setTemperature._omp_fn.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R10D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R10,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40f0ee <setTemperature._omp_fn.0+0x32e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x2e48(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x613606df9756715,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R11D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x8(%R13),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R10D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40edfc <setTemperature._omp_fn.0+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setTemperature._omp_fn.0– | 0.01 | 0 |
▼Loop 93 - initAtoms.c:154-162 - exec– | 0 | 0 |
○Loop 94 - initAtoms.c:154-162 - exec | 0.01 | 0 |