Function: miniqmcreference::TwoBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::ratioGrad(qmcpl ... | Module: libqmcwfs.so | Source: TwoBodyJastrowRef.h:295-304 [...] | Coverage: 2.56% |
---|
Function: miniqmcreference::TwoBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::ratioGrad(qmcpl ... | Module: libqmcwfs.so | Source: TwoBodyJastrowRef.h:295-304 [...] | Coverage: 2.56% |
---|
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 295 - 304 |
-------------------------------------------------------------------------------- |
295: TwoBodyJastrowRef<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat) |
296: { |
297: UpdateMode = ORB_PBYP_PARTIAL; |
298: |
299: computeU3(P, iat, P.DistTables[0]->Temp_r.data(), cur_u.data(), cur_du.data(), cur_d2u.data()); |
300: cur_Uat = std::accumulate(cur_u.begin(), cur_u.begin() + N, valT()); |
301: DiffVal = Uat[iat] - cur_Uat; |
302: grad_iat += accumulateG(cur_du.data(), P.DistTables[0]->Temp_dr); |
303: return std::exp(DiffVal); |
304: } |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 94 - 183 |
-------------------------------------------------------------------------------- |
94: (const_cast<T1&>(a) += b); |
[...] |
183: return (const_cast<T1&>(a) = b); |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrow.h: 148 - 155 |
-------------------------------------------------------------------------------- |
148: for (int idim = 0; idim < OHMMS_DIM; ++idim) |
149: { |
150: const valT* restrict dX = displ.data(idim); |
151: valT s = valT(); |
152: |
153: for (int jat = 0; jat < N; ++jat) |
154: s += du[jat] * dX[jat]; |
155: grad[idim] = s; |
/usr/include/c++/13.1.1/bits/stl_vector.h: 1258 - 1258 |
-------------------------------------------------------------------------------- |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 221 - 221 |
-------------------------------------------------------------------------------- |
221: inline Type_t& operator[](size_t i) |
/usr/include/c++/13.1.1/bits/stl_iterator.h: 1148 - 1148 |
-------------------------------------------------------------------------------- |
1148: { return __normal_iterator(_M_current + __n); } |
/usr/include/c++/13.1.1/bits/stl_numeric.h: 140 - 141 |
-------------------------------------------------------------------------------- |
140: for (; __first != __last; ++__first) |
141: __init = _GLIBCXX_MOVE_IF_20(__init) + *__first; |
0x3c9b0 PUSH %RBP |
0x3c9b1 MOV %RSP,%RBP |
0x3c9b4 PUSH %R15 |
0x3c9b6 MOVSXD %EDX,%R15 |
0x3c9b9 MOV %R15D,%EDX |
0x3c9bc PUSH %R14 |
0x3c9be PUSH %R13 |
0x3c9c0 MOV %RDI,%R13 |
0x3c9c3 PUSH %R12 |
0x3c9c5 MOV %RCX,%R12 |
0x3c9c8 PUSH %RBX |
0x3c9c9 SUB $0x38,%RSP |
0x3c9cd MOV %FS:0x28,%RAX |
0x3c9d6 MOV %RAX,-0x38(%RBP) |
0x3c9da XOR %EAX,%EAX |
0x3c9dc MOV 0xa10(%RSI),%RAX |
0x3c9e3 MOV 0x140(%RDI),%R8 |
0x3c9ea MOVL $0x2,0xc(%RDI) |
0x3c9f1 MOV 0x158(%RDI),%RBX |
0x3c9f8 MOV (%RAX),%R14 |
0x3c9fb MOV %RBX,%R9 |
0x3c9fe MOV 0x68(%R14),%RCX |
0x3ca02 PUSH $0 |
0x3ca04 PUSHQ 0x170(%RDI) |
0x3ca0a MOV %R8,-0x58(%RBP) |
0x3ca0e CALL 3b260 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b> |
0x3ca13 MOV 0x90(%R13),%R9 |
0x3ca1a MOV -0x58(%RBP),%R8 |
0x3ca1e VXORPD %XMM0,%XMM0,%XMM0 |
0x3ca22 POP %RDX |
0x3ca23 LEA (,%R9,8),%RSI |
0x3ca2b POP %RCX |
0x3ca2c LEA (%R8,%RSI,1),%RDI |
0x3ca30 CMP %R8,%RDI |
0x3ca33 JE 3caea |
0x3ca39 MOV %RDI,%RDX |
0x3ca3c SUB %R8,%RDX |
0x3ca3f SUB $0x8,%RDX |
0x3ca43 SHR $0x3,%RDX |
0x3ca47 INC %RDX |
0x3ca4a AND $0x7,%EDX |
0x3ca4d JE 3cab2 |
0x3ca4f CMP $0x1,%RDX |
0x3ca53 JE 3caa4 |
0x3ca55 CMP $0x2,%RDX |
0x3ca59 JE 3ca9b |
0x3ca5b CMP $0x3,%RDX |
0x3ca5f JE 3ca92 |
0x3ca61 CMP $0x4,%RDX |
0x3ca65 JE 3ca89 |
0x3ca67 CMP $0x5,%RDX |
0x3ca6b JE 3ca80 |
0x3ca6d CMP $0x6,%RDX |
0x3ca71 JNE 3cce2 |
0x3ca77 VADDSD (%R8),%XMM0,%XMM0 |
0x3ca7c ADD $0x8,%R8 |
0x3ca80 VADDSD (%R8),%XMM0,%XMM0 |
0x3ca85 ADD $0x8,%R8 |
0x3ca89 VADDSD (%R8),%XMM0,%XMM0 |
0x3ca8e ADD $0x8,%R8 |
0x3ca92 VADDSD (%R8),%XMM0,%XMM0 |
0x3ca97 ADD $0x8,%R8 |
0x3ca9b VADDSD (%R8),%XMM0,%XMM0 |
0x3caa0 ADD $0x8,%R8 |
0x3caa4 VADDSD (%R8),%XMM0,%XMM0 |
0x3caa9 ADD $0x8,%R8 |
0x3caad CMP %RDI,%R8 |
0x3cab0 JE 3caea |
(374) 0x3cab2 VADDSD (%R8),%XMM0,%XMM1 |
(374) 0x3cab7 ADD $0x40,%R8 |
(374) 0x3cabb VADDSD -0x38(%R8),%XMM1,%XMM2 |
(374) 0x3cac1 VADDSD -0x30(%R8),%XMM2,%XMM3 |
(374) 0x3cac7 VADDSD -0x28(%R8),%XMM3,%XMM4 |
(374) 0x3cacd VADDSD -0x20(%R8),%XMM4,%XMM5 |
(374) 0x3cad3 VADDSD -0x18(%R8),%XMM5,%XMM6 |
(374) 0x3cad9 VADDSD -0x10(%R8),%XMM6,%XMM7 |
(374) 0x3cadf VADDSD -0x8(%R8),%XMM7,%XMM0 |
(374) 0x3cae5 CMP %RDI,%R8 |
(374) 0x3cae8 JNE 3cab2 |
0x3caea MOV 0xd8(%R13),%RCX |
0x3caf1 VMOVSD %XMM0,0x138(%R13) |
0x3cafa MOV 0x88(%R14),%R10 |
0x3cb01 VXORPD %XMM10,%XMM10,%XMM10 |
0x3cb06 MOVQ $0,-0x40(%RBP) |
0x3cb0e MOV 0x98(%R14),%R14 |
0x3cb15 LEA -0x50(%RBP),%R11 |
0x3cb19 LEA -0x38(%RBP),%R8 |
0x3cb1d VMOVSD (%RCX,%R15,8),%XMM8 |
0x3cb23 VMOVAPD %XMM10,-0x50(%RBP) |
0x3cb28 LEA (,%R10,8),%R15 |
0x3cb30 VSUBSD %XMM0,%XMM8,%XMM9 |
0x3cb34 VMOVSD %XMM9,0xb0(%R13) |
(372) 0x3cb3d TEST %R9,%R9 |
(372) 0x3cb40 JE 3ccd8 |
(372) 0x3cb46 LEA -0x8(%RSI),%RDI |
(372) 0x3cb4a XOR %EAX,%EAX |
(372) 0x3cb4c VXORPD %XMM11,%XMM11,%XMM11 |
(372) 0x3cb51 SHR $0x3,%RDI |
(372) 0x3cb55 INC %RDI |
(372) 0x3cb58 AND $0x7,%EDI |
(372) 0x3cb5b JE 3cbe8 |
(372) 0x3cb61 CMP $0x1,%RDI |
(372) 0x3cb65 JE 3cbd4 |
(372) 0x3cb67 CMP $0x2,%RDI |
(372) 0x3cb6b JE 3cbc5 |
(372) 0x3cb6d CMP $0x3,%RDI |
(372) 0x3cb71 JE 3cbb6 |
(372) 0x3cb73 CMP $0x4,%RDI |
(372) 0x3cb77 JE 3cba7 |
(372) 0x3cb79 CMP $0x5,%RDI |
(372) 0x3cb7d JE 3cb98 |
(372) 0x3cb7f CMP $0x6,%RDI |
(372) 0x3cb83 JNE 3ccc0 |
(372) 0x3cb89 VMOVSD (%RBX,%RAX,1),%XMM13 |
(372) 0x3cb8e VFMADD231SD (%R14,%RAX,1),%XMM13,%XMM11 |
(372) 0x3cb94 ADD $0x8,%RAX |
(372) 0x3cb98 VMOVSD (%RBX,%RAX,1),%XMM14 |
(372) 0x3cb9d VFMADD231SD (%R14,%RAX,1),%XMM14,%XMM11 |
(372) 0x3cba3 ADD $0x8,%RAX |
(372) 0x3cba7 VMOVSD (%RBX,%RAX,1),%XMM15 |
(372) 0x3cbac VFMADD231SD (%R14,%RAX,1),%XMM15,%XMM11 |
(372) 0x3cbb2 ADD $0x8,%RAX |
(372) 0x3cbb6 VMOVSD (%RBX,%RAX,1),%XMM0 |
(372) 0x3cbbb VFMADD231SD (%R14,%RAX,1),%XMM0,%XMM11 |
(372) 0x3cbc1 ADD $0x8,%RAX |
(372) 0x3cbc5 VMOVSD (%RBX,%RAX,1),%XMM1 |
(372) 0x3cbca VFMADD231SD (%R14,%RAX,1),%XMM1,%XMM11 |
(372) 0x3cbd0 ADD $0x8,%RAX |
(372) 0x3cbd4 VMOVSD (%RBX,%RAX,1),%XMM2 |
(372) 0x3cbd9 VFMADD231SD (%R14,%RAX,1),%XMM2,%XMM11 |
(372) 0x3cbdf ADD $0x8,%RAX |
(372) 0x3cbe3 CMP %RAX,%RSI |
(372) 0x3cbe6 JE 3cc57 |
(373) 0x3cbe8 VMOVSD (%RBX,%RAX,1),%XMM3 |
(373) 0x3cbed VMOVSD 0x8(%RBX,%RAX,1),%XMM4 |
(373) 0x3cbf3 VMOVSD 0x10(%RBX,%RAX,1),%XMM5 |
(373) 0x3cbf9 VMOVSD 0x18(%RBX,%RAX,1),%XMM6 |
(373) 0x3cbff VFMADD231SD (%R14,%RAX,1),%XMM3,%XMM11 |
(373) 0x3cc05 VMOVSD 0x20(%RBX,%RAX,1),%XMM7 |
(373) 0x3cc0b VMOVSD 0x28(%RBX,%RAX,1),%XMM8 |
(373) 0x3cc11 VMOVSD 0x30(%RBX,%RAX,1),%XMM9 |
(373) 0x3cc17 VMOVSD 0x38(%RBX,%RAX,1),%XMM10 |
(373) 0x3cc1d ADD $0x40,%RAX |
(373) 0x3cc21 VFMADD231SD -0x38(%RAX,%R14,1),%XMM4,%XMM11 |
(373) 0x3cc28 VFMADD231SD -0x30(%RAX,%R14,1),%XMM5,%XMM11 |
(373) 0x3cc2f VFMADD231SD -0x28(%RAX,%R14,1),%XMM6,%XMM11 |
(373) 0x3cc36 VFMADD231SD -0x20(%RAX,%R14,1),%XMM7,%XMM11 |
(373) 0x3cc3d VFMADD231SD -0x18(%RAX,%R14,1),%XMM8,%XMM11 |
(373) 0x3cc44 VFMADD231SD -0x10(%RAX,%R14,1),%XMM9,%XMM11 |
(373) 0x3cc4b VFMADD231SD -0x8(%RAX,%R14,1),%XMM10,%XMM11 |
(373) 0x3cc52 CMP %RAX,%RSI |
(373) 0x3cc55 JNE 3cbe8 |
(372) 0x3cc57 VMOVSD %XMM11,(%R11) |
(372) 0x3cc5c ADD $0x8,%R11 |
(372) 0x3cc60 ADD %R15,%R14 |
(372) 0x3cc63 CMP %R8,%R11 |
(372) 0x3cc66 JNE 3cb3d |
0x3cc6c VMOVUPD (%R12),%XMM11 |
0x3cc72 VMOVSD 0x10(%R12),%XMM13 |
0x3cc79 VADDPD -0x50(%RBP),%XMM11,%XMM12 |
0x3cc7e VADDSD -0x40(%RBP),%XMM13,%XMM14 |
0x3cc83 VMOVUPD %XMM12,(%R12) |
0x3cc89 VMOVSD %XMM14,0x10(%R12) |
0x3cc90 MOV -0x38(%RBP),%RAX |
0x3cc94 SUB %FS:0x28,%RAX |
0x3cc9d JNE 3ccf0 |
0x3cc9f VMOVSD 0xb0(%R13),%XMM0 |
0x3cca8 LEA -0x28(%RBP),%RSP |
0x3ccac POP %RBX |
0x3ccad POP %R12 |
0x3ccaf POP %R13 |
0x3ccb1 POP %R14 |
0x3ccb3 POP %R15 |
0x3ccb5 POP %RBP |
0x3ccb6 JMP 8060 |
0x3ccbb NOPL (%RAX,%RAX,1) |
(372) 0x3ccc0 VMOVSD (%RBX),%XMM12 |
(372) 0x3ccc4 MOV $0x8,%EAX |
(372) 0x3ccc9 VFMADD231SD (%R14),%XMM12,%XMM11 |
(372) 0x3ccce JMP 3cb89 |
0x3ccd3 NOPL (%RAX,%RAX,1) |
(372) 0x3ccd8 VXORPD %XMM11,%XMM11,%XMM11 |
(372) 0x3ccdd JMP 3cc57 |
0x3cce2 VMOVSD (%R8),%XMM0 |
0x3cce7 ADD $0x8,%R8 |
0x3cceb JMP 3ca77 |
0x3ccf0 CALL 80d0 <__stack_chk_fail@plt> |
0x3ccf5 NOP |
0x3ccf6 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:207 | libqmcwfs.so |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | TwoBodyJastrowRef.h:295-304 |
Module | libqmcwfs.so |
nb instructions | 107 |
nb uops | 110 |
loop length | 460 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 27.50 cycles |
front end | 27.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 13.00 | 13.00 | 18.17 | 17.83 | 19.00 | 13.00 | 13.00 | 18.00 |
cycles | 13.00 | 13.00 | 18.17 | 17.83 | 19.00 | 13.00 | 13.00 | 18.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 27.36 |
Stall cycles | 0.00 |
Front-end | 27.50 |
Dispatch | 19.00 |
Overall L1 | 27.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 28% |
load | 15% |
store | 40% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 14% |
load | 14% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 7% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 11% |
load | 12% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 16% |
load | 14% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 13% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 14% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 13% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD %EDX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xa10(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVL $0x2,0xc(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0x158(%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x68(%R14),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH $0 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSHQ 0x170(%RDI) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 3b260 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x90(%R13),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LEA (,%R9,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LEA (%R8,%RSI,1),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3caea | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3cab2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3caa4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca9b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca92 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca89 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca80 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 3cce2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDI,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3caea | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0xd8(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD %XMM0,0x138(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x88(%R14),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVQ $0,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0x98(%R14),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x50(%RBP),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x38(%RBP),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX,%R15,8),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVAPD %XMM10,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (,%R10,8),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM0,%XMM8,%XMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM9,0xb0(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD (%R12),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x10(%R12),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDPD -0x50(%RBP),%XMM11,%XMM12 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x40(%RBP),%XMM13,%XMM14 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM12,(%R12) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM14,0x10(%R12) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 3ccf0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0xb0(%R13),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 8060 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD (%R8),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 3ca77 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CALL 80d0 <__stack_chk_fail@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | TwoBodyJastrowRef.h:295-304 |
Module | libqmcwfs.so |
nb instructions | 107 |
nb uops | 110 |
loop length | 460 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 27.50 cycles |
front end | 27.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 13.00 | 13.00 | 18.17 | 17.83 | 19.00 | 13.00 | 13.00 | 18.00 |
cycles | 13.00 | 13.00 | 18.17 | 17.83 | 19.00 | 13.00 | 13.00 | 18.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 27.36 |
Stall cycles | 0.00 |
Front-end | 27.50 |
Dispatch | 19.00 |
Overall L1 | 27.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 28% |
load | 15% |
store | 40% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 14% |
load | 14% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 7% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 11% |
load | 12% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 16% |
load | 14% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 13% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 14% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 13% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD %EDX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xa10(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x140(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVL $0x2,0xc(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0x158(%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x68(%R14),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH $0 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSHQ 0x170(%RDI) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 3b260 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x90(%R13),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LEA (,%R9,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LEA (%R8,%RSI,1),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3caea | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3cab2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3caa4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca9b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca92 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca89 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3ca80 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 3cce2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VADDSD (%R8),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDI,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 3caea | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0xd8(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD %XMM0,0x138(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x88(%R14),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVQ $0,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV 0x98(%R14),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x50(%RBP),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x38(%RBP),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX,%R15,8),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVAPD %XMM10,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (,%R10,8),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM0,%XMM8,%XMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM9,0xb0(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVUPD (%R12),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x10(%R12),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDPD -0x50(%RBP),%XMM11,%XMM12 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x40(%RBP),%XMM13,%XMM14 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM12,(%R12) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM14,0x10(%R12) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 3ccf0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0xb0(%R13),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 8060 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD (%R8),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 3ca77 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CALL 80d0 <__stack_chk_fail@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::TwoBodyJastrowRef | 2.56 | 0.03 |
○Loop 374 - stl_numeric.h:140-141 - libqmcwfs.so | 0.37 | 0 |
▼Loop 372 - TwoBodyJastrow.h:148-155 - libqmcwfs.so– | 0 | 0 |
○Loop 373 - TwoBodyJastrow.h:153-154 - libqmcwfs.so | 2.2 | 0.03 |