Function: setTemperature._omp_fn.0 | Module: exec | Source: initAtoms.c:151-162 [...] | Coverage: 0.01% |
---|
Function: setTemperature._omp_fn.0 | Module: exec | Source: initAtoms.c:151-162 [...] | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-419-7821/intel/CoMD/build/CoMD/CoMD/src-openmp/random.c: 45 - 70 |
-------------------------------------------------------------------------------- |
45: *seed *= UINT64_C(437799614237992725); |
46: *seed %= UINT64_C(2305843009213693951); |
[...] |
68: uint32_t s2 = (id+callSite) * UINT32_C(2654435761); |
69: |
70: uint64_t iSeed = (UINT64_C(0x100000000) * s1) + s2; |
/scratch_na/users/xoserete/qaas_runs/171-419-7821/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 151 - 162 |
-------------------------------------------------------------------------------- |
151: #pragma omp parallel for |
152: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
153: { |
154: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
155: { |
156: int iType = s->atoms->iSpecies[iOff]; |
157: real_t mass = s->species[iType].mass; |
158: real_t sigma = sqrt(kB_eV * temperature/mass); |
159: uint64_t seed = mkSeed(s->atoms->gid[iOff], 123); |
160: s->atoms->p[iOff][0] = mass * sigma * gasdev(&seed); |
161: s->atoms->p[iOff][1] = mass * sigma * gasdev(&seed); |
162: s->atoms->p[iOff][2] = mass * sigma * gasdev(&seed); |
0x40eda0 PUSH %RBP |
0x40eda1 MOV %RSP,%RBP |
0x40eda4 PUSH %R15 |
0x40eda6 PUSH %R14 |
0x40eda8 PUSH %R13 |
0x40edaa MOV %RDI,%R13 |
0x40edad PUSH %R12 |
0x40edaf PUSH %RBX |
0x40edb0 SUB $0x68,%RSP |
0x40edb4 MOV (%RDI),%RBX |
0x40edb7 CALL 403070 <omp_get_num_threads@plt> |
0x40edbc MOV 0x18(%RBX),%R14 |
0x40edc0 MOV %EAX,%R12D |
0x40edc3 CALL 403150 <omp_get_thread_num@plt> |
0x40edc8 MOV %EAX,%R10D |
0x40edcb MOV 0xc(%R14),%EAX |
0x40edcf CLTD |
0x40edd0 IDIV %R12D |
0x40edd3 CMP %EDX,%R10D |
0x40edd6 JL 40f0dd |
0x40eddc IMUL %EAX,%R10D |
0x40ede0 ADD %EDX,%R10D |
0x40ede3 LEA (%RAX,%R10,1),%R11D |
0x40ede7 CMP %R11D,%R10D |
0x40edea JGE 40f0ce |
0x40edf0 VMOVSD 0x2e48(%RIP),%XMM0 |
0x40edf8 MOV 0x78(%R14),%RCX |
0x40edfc MOV %RBX,-0x88(%RBP) |
0x40ee03 MOV $0x613606df9756715,%R14 |
0x40ee0d MOV %R11D,-0x7c(%RBP) |
0x40ee11 VMULSD 0x8(%R13),%XMM0,%XMM4 |
0x40ee17 MOV %RCX,-0x78(%RBP) |
0x40ee1b MOVSXD %R10D,%RCX |
0x40ee1e MOV %RCX,%R11 |
0x40ee21 VMOVSD %XMM4,-0x68(%RBP) |
0x40ee26 NOPW %CS:(%RAX,%RAX,1) |
(93) 0x40ee30 MOV -0x78(%RBP),%RSI |
(93) 0x40ee34 MOV %R11D,%EDI |
(93) 0x40ee37 SAL $0x6,%EDI |
(93) 0x40ee3a MOVSXD (%RSI,%R11,4),%R8 |
(93) 0x40ee3e TEST %R8D,%R8D |
(93) 0x40ee41 JLE 40f0c1 |
(93) 0x40ee47 MOV -0x88(%RBP),%R9 |
(93) 0x40ee4e MOV %R11,%RAX |
(93) 0x40ee51 MOV %R11,-0x70(%RBP) |
(93) 0x40ee55 MOVSXD %EDI,%R10 |
(93) 0x40ee58 SAL $0x6,%RAX |
(93) 0x40ee5c MOV 0x20(%R9),%R13 |
(93) 0x40ee60 ADD %RAX,%R8 |
(93) 0x40ee63 MOV 0x28(%R9),%RBX |
(93) 0x40ee67 SAL $0x2,%R8 |
(93) 0x40ee6b MOV 0x10(%R13),%R15 |
(93) 0x40ee6f MOV 0x8(%R13),%R12 |
(93) 0x40ee73 MOV %R8,-0x50(%RBP) |
(93) 0x40ee77 MOV %RBX,-0x60(%RBP) |
(93) 0x40ee7b LEA (,%R10,4),%RBX |
(93) 0x40ee83 MOV %R15,-0x58(%RBP) |
(93) 0x40ee87 NOPW (%RAX,%RAX,1) |
(94) 0x40ee90 MOV (%R12,%RBX,1),%EDI |
(94) 0x40ee94 MOV -0x58(%RBP),%RDX |
(94) 0x40ee98 MOV $0x9,%R9D |
(94) 0x40ee9e MOV -0x60(%RBP),%RCX |
(94) 0x40eea2 VMOVSD -0x68(%RBP),%XMM3 |
(94) 0x40eea7 IMUL $-0x61c8864f,%EDI,%ESI |
(94) 0x40eead ADD $0x7b,%EDI |
(94) 0x40eeb0 MOVSXD (%RDX,%RBX,1),%R11 |
(94) 0x40eeb4 IMUL $-0x61c8864f,%EDI,%R8D |
(94) 0x40eebb SAL $0x4,%R11 |
(94) 0x40eebf SAL $0x20,%RSI |
(94) 0x40eec3 VMOVSD 0x8(%RCX,%R11,1),%XMM1 |
(94) 0x40eeca ADD %R8,%RSI |
(94) 0x40eecd IMUL %R14,%RSI |
(94) 0x40eed1 VDIVSD %XMM1,%XMM3,%XMM2 |
(94) 0x40eed5 MOV %RSI,%RAX |
(94) 0x40eed8 MOV %RSI,%R15 |
(94) 0x40eedb MUL %R9 |
(94) 0x40eede SUB %RDX,%R15 |
(94) 0x40eee1 SHR $0x1,%R15 |
(94) 0x40eee4 ADD %R15,%RDX |
(94) 0x40eee7 SHR $0x3c,%RDX |
(94) 0x40eeeb MOV %RDX,%R10 |
(94) 0x40eeee SAL $0x3d,%R10 |
(94) 0x40eef2 SUB %RDX,%R10 |
(94) 0x40eef5 SUB %R10,%RSI |
(94) 0x40eef8 IMUL %R14,%RSI |
(94) 0x40eefc MOV %RSI,%RAX |
(94) 0x40eeff MOV %RSI,%R11 |
(94) 0x40ef02 MUL %R9 |
(94) 0x40ef05 SUB %RDX,%R11 |
(94) 0x40ef08 SHR $0x1,%R11 |
(94) 0x40ef0b ADD %R11,%RDX |
(94) 0x40ef0e SHR $0x3c,%RDX |
(94) 0x40ef12 MOV %RDX,%RCX |
(94) 0x40ef15 SAL $0x3d,%RCX |
(94) 0x40ef19 SUB %RDX,%RCX |
(94) 0x40ef1c VSQRTSD %XMM2,%XMM2,%XMM2 |
(94) 0x40ef20 SUB %RCX,%RSI |
(94) 0x40ef23 IMUL %R14,%RSI |
(94) 0x40ef27 VMULSD %XMM2,%XMM1,%XMM5 |
(94) 0x40ef2b MOV %RSI,%RAX |
(94) 0x40ef2e MOV %RSI,%RDI |
(94) 0x40ef31 MUL %R9 |
(94) 0x40ef34 VMOVSD %XMM5,-0x48(%RBP) |
(94) 0x40ef39 SUB %RDX,%RDI |
(94) 0x40ef3c SHR $0x1,%RDI |
(94) 0x40ef3f ADD %RDI,%RDX |
(94) 0x40ef42 SHR $0x3c,%RDX |
(94) 0x40ef46 MOV %RDX,%R8 |
(94) 0x40ef49 SAL $0x3d,%R8 |
(94) 0x40ef4d SUB %RDX,%R8 |
(94) 0x40ef50 SUB %R8,%RSI |
(94) 0x40ef53 IMUL %R14,%RSI |
(94) 0x40ef57 MOV %RSI,%RAX |
(94) 0x40ef5a MOV %RSI,%R15 |
(94) 0x40ef5d MUL %R9 |
(94) 0x40ef60 SUB %RDX,%R15 |
(94) 0x40ef63 SHR $0x1,%R15 |
(94) 0x40ef66 ADD %R15,%RDX |
(94) 0x40ef69 SHR $0x3c,%RDX |
(94) 0x40ef6d MOV %RDX,%R10 |
(94) 0x40ef70 SAL $0x3d,%R10 |
(94) 0x40ef74 SUB %RDX,%R10 |
(94) 0x40ef77 SUB %R10,%RSI |
(94) 0x40ef7a IMUL %R14,%RSI |
(94) 0x40ef7e MOV %RSI,%RAX |
(94) 0x40ef81 MOV %RSI,%R11 |
(94) 0x40ef84 MUL %R9 |
(94) 0x40ef87 SUB %RDX,%R11 |
(94) 0x40ef8a SHR $0x1,%R11 |
(94) 0x40ef8d ADD %R11,%RDX |
(94) 0x40ef90 SHR $0x3c,%RDX |
(94) 0x40ef94 MOV %RDX,%RCX |
(94) 0x40ef97 SAL $0x3d,%RCX |
(94) 0x40ef9b SUB %RDX,%RCX |
(94) 0x40ef9e SUB %RCX,%RSI |
(94) 0x40efa1 IMUL %R14,%RSI |
(94) 0x40efa5 MOV %RSI,%RAX |
(94) 0x40efa8 MOV %RSI,%RDI |
(94) 0x40efab MUL %R9 |
(94) 0x40efae SUB %RDX,%RDI |
(94) 0x40efb1 SHR $0x1,%RDI |
(94) 0x40efb4 ADD %RDI,%RDX |
(94) 0x40efb7 SHR $0x3c,%RDX |
(94) 0x40efbb MOV %RDX,%R8 |
(94) 0x40efbe SAL $0x3d,%R8 |
(94) 0x40efc2 SUB %RDX,%R8 |
(94) 0x40efc5 SUB %R8,%RSI |
(94) 0x40efc8 IMUL %R14,%RSI |
(94) 0x40efcc MOV %RSI,%RAX |
(94) 0x40efcf MOV %RSI,%R15 |
(94) 0x40efd2 MUL %R9 |
(94) 0x40efd5 SUB %RDX,%R15 |
(94) 0x40efd8 SHR $0x1,%R15 |
(94) 0x40efdb ADD %R15,%RDX |
(94) 0x40efde SHR $0x3c,%RDX |
(94) 0x40efe2 MOV %RDX,%R10 |
(94) 0x40efe5 SAL $0x3d,%R10 |
(94) 0x40efe9 SUB %RDX,%R10 |
(94) 0x40efec SUB %R10,%RSI |
(94) 0x40efef IMUL %R14,%RSI |
(94) 0x40eff3 MOV %RSI,%RAX |
(94) 0x40eff6 MOV %RSI,%R11 |
(94) 0x40eff9 MUL %R9 |
(94) 0x40effc SUB %RDX,%R11 |
(94) 0x40efff SHR $0x1,%R11 |
(94) 0x40f002 ADD %R11,%RDX |
(94) 0x40f005 SHR $0x3c,%RDX |
(94) 0x40f009 MOV %RDX,%RCX |
(94) 0x40f00c SAL $0x3d,%RCX |
(94) 0x40f010 SUB %RDX,%RCX |
(94) 0x40f013 SUB %RCX,%RSI |
(94) 0x40f016 IMUL %R14,%RSI |
(94) 0x40f01a MOV %RSI,%RAX |
(94) 0x40f01d MOV %RSI,%RDI |
(94) 0x40f020 MUL %R9 |
(94) 0x40f023 SUB %RDX,%RDI |
(94) 0x40f026 SHR $0x1,%RDI |
(94) 0x40f029 ADD %RDI,%RDX |
(94) 0x40f02c LEA -0x38(%RBP),%RDI |
(94) 0x40f030 SHR $0x3c,%RDX |
(94) 0x40f034 MOV %RDX,%R8 |
(94) 0x40f037 SAL $0x3d,%R8 |
(94) 0x40f03b SUB %RDX,%R8 |
(94) 0x40f03e SUB %R8,%RSI |
(94) 0x40f041 IMUL %R14,%RSI |
(94) 0x40f045 MOV %RSI,%RAX |
(94) 0x40f048 MUL %R9 |
(94) 0x40f04b MOV %RSI,%R9 |
(94) 0x40f04e SUB %RDX,%R9 |
(94) 0x40f051 SHR $0x1,%R9 |
(94) 0x40f054 ADD %R9,%RDX |
(94) 0x40f057 SHR $0x3c,%RDX |
(94) 0x40f05b MOV %RDX,%R15 |
(94) 0x40f05e SAL $0x3d,%R15 |
(94) 0x40f062 SUB %RDX,%R15 |
(94) 0x40f065 SUB %R15,%RSI |
(94) 0x40f068 MOV %RSI,-0x38(%RBP) |
(94) 0x40f06c CALL 40ecb0 <gasdev> |
(94) 0x40f071 MOV 0x20(%R13),%R10 |
(94) 0x40f075 LEA (%RBX,%RBX,2),%RSI |
(94) 0x40f079 LEA -0x38(%RBP),%RDI |
(94) 0x40f07d VMULSD -0x48(%RBP),%XMM0,%XMM6 |
(94) 0x40f082 ADD $0x4,%RBX |
(94) 0x40f086 LEA (%R10,%RSI,2),%R15 |
(94) 0x40f08a VMOVSD %XMM6,(%R15) |
(94) 0x40f08f CALL 40ecb0 <gasdev> |
(94) 0x40f094 LEA -0x38(%RBP),%RDI |
(94) 0x40f098 VMULSD -0x48(%RBP),%XMM0,%XMM7 |
(94) 0x40f09d VMOVSD %XMM7,0x8(%R15) |
(94) 0x40f0a3 CALL 40ecb0 <gasdev> |
(94) 0x40f0a8 VMULSD -0x48(%RBP),%XMM0,%XMM8 |
(94) 0x40f0ad VMOVSD %XMM8,0x10(%R15) |
(94) 0x40f0b3 CMP %RBX,-0x50(%RBP) |
(94) 0x40f0b7 JNE 40ee90 |
(93) 0x40f0bd MOV -0x70(%RBP),%R11 |
(93) 0x40f0c1 INC %R11 |
(93) 0x40f0c4 CMP %R11D,-0x7c(%RBP) |
(93) 0x40f0c8 JG 40ee30 |
0x40f0ce ADD $0x68,%RSP |
0x40f0d2 POP %RBX |
0x40f0d3 POP %R12 |
0x40f0d5 POP %R13 |
0x40f0d7 POP %R14 |
0x40f0d9 POP %R15 |
0x40f0db POP %RBP |
0x40f0dc RET |
0x40f0dd INC %EAX |
0x40f0df XOR %EDX,%EDX |
0x40f0e1 JMP 40eddc |
0x40f0e6 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○96.43 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○3.57 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | initAtoms.c:151-162 |
Module | exec |
nb instructions | 48 |
nb uops | 53 |
loop length | 178 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 8.83 cycles |
front end | 8.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
cycles | 3.10 | 5.53 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 8.53-8.57 |
Stall cycles | 0.00 |
Front-end | 8.83 |
Dispatch | 6.00 |
DIV/SQRT | 6.00 |
Overall L1 | 8.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40f0dd <setTemperature._omp_fn.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R10D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R10,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40f0ce <setTemperature._omp_fn.0+0x32e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x2e48(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x613606df9756715,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R11D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x8(%R13),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R10D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40eddc <setTemperature._omp_fn.0+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:151-162 |
Module | exec |
nb instructions | 48 |
nb uops | 53 |
loop length | 178 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 8.83 cycles |
front end | 8.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
cycles | 3.10 | 5.53 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 8.53-8.57 |
Stall cycles | 0.00 |
Front-end | 8.83 |
Dispatch | 6.00 |
DIV/SQRT | 6.00 |
Overall L1 | 8.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40f0dd <setTemperature._omp_fn.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R10D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R10,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40f0ce <setTemperature._omp_fn.0+0x32e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x2e48(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x613606df9756715,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R11D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD 0x8(%R13),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R10D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40eddc <setTemperature._omp_fn.0+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setTemperature._omp_fn.0– | 0.01 | 0 |
▼Loop 93 - initAtoms.c:154-162 - exec– | 0 | 0 |
○Loop 94 - initAtoms.c:154-162 - exec | 0.01 | 0 |