| Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::recompute(qmcpl ... | Module: exec | Source: OneBodyJastrowRef.h:107-194 [...] | Coverage (incl. loops): NA% | (excl. loops): NA% |
|---|
| Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::recompute(qmcpl ... | Module: exec | Source: OneBodyJastrowRef.h:107-194 [...] | Coverage (incl. loops): NA% | (excl. loops): NA% |
|---|
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 107 - 194 |
-------------------------------------------------------------------------------- |
107: { |
108: const auto& d_ie = P.getDistTableAB(myTableID); |
109: for (int iat = 0; iat < Nelec; ++iat) |
110: { |
111: computeU3(P, iat, d_ie.getDistRow(iat).data()); |
112: Vat[iat] = std::accumulate(U.begin(), U.begin() + Nions, valT()); |
113: Lap[iat] = accumulateGL(dU.data(), d2U.data(), d_ie.getDisplRow(iat), Grad[iat]); |
114: } |
115: } |
[...] |
186: for (int jat = 0; jat < Nions; ++jat) |
187: lap += d2u[jat] + lapfac * du[jat]; |
188: for (int idim = 0; idim < OHMMS_DIM; ++idim) |
189: { |
190: const valT* restrict dX = displ.data(idim); |
191: valT s = valT(); |
192: for (int jat = 0; jat < Nions; ++jat) |
193: s += du[jat] * dX[jat]; |
194: grad[idim] = s; |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/stl_vector.h: 1150 - 1150 |
-------------------------------------------------------------------------------- |
1150: return *(this->_M_impl._M_start + __n); |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/stl_numeric.h: 140 - 141 |
-------------------------------------------------------------------------------- |
140: for (; __first != __last; ++__first) |
141: __init = _GLIBCXX_MOVE_IF_20(__init) + *__first; |
0x423020 STP X29, X30, [SP, #928]! |
0x423024 STP X28, X27, [SP, #16] |
0x423028 STP X26, X25, [SP, #32] |
0x42302c STP X24, X23, [SP, #48] |
0x423030 STP X22, X21, [SP, #64] |
0x423034 STP X20, X19, [SP, #80] |
0x423038 ADD X29, SP, #0 |
0x42303c ORR X19, XZR, X1 |
0x423040 LDR W1, [X0, #168] |
0x423044 ORR X20, XZR, X0 |
0x423048 ORR X0, XZR, X19 |
0x42304c BL 452bb0 |
0x423050 LDR W8, [X20, #148] |
0x423054 CMP W8, #1 |
0x423058 B.LT 42343c |
0x42305c CNTW X23, ALL |
0x423060 MOVZ W25, #40 |
0x423064 MOVZ W26, #24 |
0x423068 ORR X21, XZR, X0 |
0x42306c ORR X22, XZR, XZR |
0x423070 SUB X24, XZR, X23 |
0x423074 RDVL X27, #2 |
0x423078 RDVL X28, #1 |
0x42307c B 4230a4 |
(279) 0x423080 MOVI D0, #0 |
(279) 0x423084 STP XZR, XZR, [X9] |
(279) 0x423088 STR XZR, [X9, #16] |
(279) 0x42308c LDR X8, [X20, #440] |
(279) 0x423090 STR D0, [X8, X22,LSL #3] |
(279) 0x423094 LDRSW X8, [X20, #148] |
(279) 0x423098 ADD X22, X22, #1 |
(279) 0x42309c CMP X22, X8 |
(279) 0x4230a0 B.GE 42343c |
(279) 0x4230a4 LDR X8, [X21, #72] |
(279) 0x4230a8 ORR X0, XZR, X20 |
(279) 0x4230ac ORR X1, XZR, X19 |
(279) 0x4230b0 ORR W2, WZR, W22 |
(279) 0x4230b4 MADD X8, X22, X25, X8 |
(279) 0x4230b8 LDR X3, [X8, #24] |
(279) 0x4230bc BL 4224e0 |
(279) 0x4230c0 LDRSW X8, [X20, #144] |
(279) 0x4230c4 CBZ W8, 423100 |
(279) 0x4230c8 UBFM X9, X8, #61, #60 |
(279) 0x4230cc LDR X10, [X20, #256] |
(279) 0x4230d0 MOVI D0, #0 |
(279) 0x4230d4 SUB X9, X9, #8 |
(279) 0x4230d8 UBFM X9, X9, #3, #63 |
(279) 0x4230dc ADD X11, X9, #1 |
(279) 0x4230e0 CMP X11, X23 |
(279) 0x4230e4 B.CS 423120 |
(279) 0x4230e8 ORR X9, XZR, X10 |
(279) 0x4230ec PTRUE P0.D, ALL |
(279) 0x4230f0 FDUP Z6.D, #0 |
(279) 0x4230f4 B 42316c |
0x4230f8 HINT #0 |
0x4230fc HINT #0 |
(279) 0x423100 MOVI D0, #0 |
(279) 0x423104 PTRUE P0.D, ALL |
(279) 0x423108 FDUP Z6.D, #0 |
(279) 0x42310c B 423190 |
0x423110 HINT #0 |
0x423114 HINT #0 |
0x423118 HINT #0 |
0x42311c HINT #0 |
(279) 0x423120 AND X12, X11, X24 |
(279) 0x423124 MOVI V0.2D, #0 |
(279) 0x423128 MOVI V1.2D, #0 |
(279) 0x42312c PTRUE P0.D, ALL |
(279) 0x423130 FDUP Z6.D, #0 |
(279) 0x423134 AND X13, X11, X24 |
(279) 0x423138 ORR X14, XZR, X10 |
(279) 0x42313c ADD X9, X10, X12,LSL #3 |
(289) 0x423140 LDR Z2, [X14, MUL VL] |
(289) 0x423144 LDR Z3, [X14, #1, MUL VL] |
(289) 0x423148 ADD X14, X14, X27 |
(289) 0x42314c SUBS X13, X13, X23 |
(289) 0x423150 FADD Z0.D, Z2.D, Z0.D |
(289) 0x423154 FADD Z1.D, Z3.D, Z1.D |
(289) 0x423158 B.NE 423140 |
(279) 0x42315c FADD Z0.D, Z1.D, Z0.D |
(279) 0x423160 CMP X11, X12 |
(279) 0x423164 FADDV D0, P0, Z0.D |
(279) 0x423168 B.EQ 423190 |
(279) 0x42316c ADD X10, X10, X8,LSL #3 |
(279) 0x423170 HINT #0 |
(279) 0x423174 HINT #0 |
(279) 0x423178 HINT #0 |
(279) 0x42317c HINT #0 |
(290) 0x423180 LDR D1, [X9], #8 |
(290) 0x423184 CMP X9, X10 |
(290) 0x423188 FADD D0, D1, D0 |
(290) 0x42318c B.NE 423180 |
(279) 0x423190 LDR X9, [X20, #240] |
(279) 0x423194 CMP W8, #1 |
(279) 0x423198 STR D0, [X9, X22,LSL #3] |
(279) 0x42319c LDR X9, [X20, #400] |
(279) 0x4231a0 MADD X9, X22, X26, X9 |
(279) 0x4231a4 B.LT 423080 |
(285) 0x4231a8 LDR X10, [X20, #280] |
(285) 0x4231ac LDR X11, [X20, #304] |
(285) 0x4231b0 CMP X23, X8 |
(285) 0x4231b4 B.LS 4231c4 |
(285) 0x4231b8 ORR X12, XZR, XZR |
(285) 0x4231bc MOVI D0, #0 |
(285) 0x4231c0 B 423220 |
(285) 0x4231c4 MOVI V0.2D, #0 |
(285) 0x4231c8 MOVI V1.2D, #0 |
(285) 0x4231cc AND X12, X24, X8 |
(285) 0x4231d0 AND X15, X24, X8 |
(285) 0x4231d4 ORR X13, XZR, X11 |
(285) 0x4231d8 ORR X14, XZR, X10 |
(285) 0x4231dc HINT #0 |
(280) 0x4231e0 LDR Z2, [X13, MUL VL] |
(280) 0x4231e4 LDR Z3, [X13, #1, MUL VL] |
(280) 0x4231e8 SUBS X15, X15, X23 |
(280) 0x4231ec ADD X13, X13, X27 |
(280) 0x4231f0 FADD Z0.D, Z2.D, Z0.D |
(280) 0x4231f4 LDR Z4, [X14, MUL VL] |
(280) 0x4231f8 LDR Z5, [X14, #1, MUL VL] |
(280) 0x4231fc FADD Z1.D, Z3.D, Z1.D |
(280) 0x423200 ADD X14, X14, X27 |
(280) 0x423204 FMLA Z0.D, P0/M, Z4.D, Z6.D |
(280) 0x423208 FMLA Z1.D, P0/M, Z5.D, Z6.D |
(280) 0x42320c B.NE 4231e0 |
(285) 0x423210 FADD Z0.D, Z1.D, Z0.D |
(285) 0x423214 CMP X12, X8 |
(285) 0x423218 FADDV D0, P0, Z0.D |
(285) 0x42321c B.EQ 423248 |
(285) 0x423220 SUB X13, X8, X12 |
(285) 0x423224 ADD X14, X10, X12,LSL #3 |
(285) 0x423228 ADD X11, X11, X12,LSL #3 |
(288) 0x42322c LDR D1, [X11], #8 |
(288) 0x423230 LDR D2, [X14], #8 |
(288) 0x423234 SUBS X13, X13, #1 |
(288) 0x423238 FADD D2, D2, D2 |
(288) 0x42323c FADD D0, D1, D0 |
(288) 0x423240 FADD D0, D0, D2 |
(288) 0x423244 B.NE 42322c |
(285) 0x423248 LDR X11, [X21, #96] |
(285) 0x42324c CMP X23, X8 |
(285) 0x423250 MADD X12, X22, X25, X11 |
(285) 0x423254 LDR X11, [X12, #24] |
(285) 0x423258 B.LS 423268 |
(285) 0x42325c ORR X13, XZR, XZR |
(285) 0x423260 MOVI D1, #0 |
(285) 0x423264 B 4232b8 |
(285) 0x423268 MOVI V1.2D, #0 |
(285) 0x42326c MOVI V2.2D, #0 |
(285) 0x423270 AND X13, X24, X8 |
(285) 0x423274 AND X16, X24, X8 |
(285) 0x423278 ORR X14, XZR, X10 |
(285) 0x42327c ORR X15, XZR, X11 |
(281) 0x423280 LDR Z3, [X14, MUL VL] |
(281) 0x423284 LDR Z4, [X14, #1, MUL VL] |
(281) 0x423288 SUBS X16, X16, X23 |
(281) 0x42328c ADD X14, X14, X27 |
(281) 0x423290 LDR Z5, [X15, MUL VL] |
(281) 0x423294 FMLA Z1.D, P0/M, Z5.D, Z3.D |
(281) 0x423298 LDR Z6, [X15, #1, MUL VL] |
(281) 0x42329c FMLA Z2.D, P0/M, Z6.D, Z4.D |
(281) 0x4232a0 ADD X15, X15, X27 |
(281) 0x4232a4 B.NE 423280 |
(285) 0x4232a8 FADD Z1.D, Z2.D, Z1.D |
(285) 0x4232ac CMP X13, X8 |
(285) 0x4232b0 FADDV D1, P0, Z1.D |
(285) 0x4232b4 B.EQ 4232d8 |
(285) 0x4232b8 SUB X14, X8, X13 |
(285) 0x4232bc ADD X15, X11, X13,LSL #3 |
(285) 0x4232c0 ADD X13, X10, X13,LSL #3 |
(287) 0x4232c4 LDR D2, [X13], #8 |
(287) 0x4232c8 LDR D3, [X15], #8 |
(287) 0x4232cc SUBS X14, X14, #1 |
(287) 0x4232d0 FMADD D1, D3, D2, D1 |
(287) 0x4232d4 B.NE 4232c4 |
(285) 0x4232d8 LDR X12, [X12, #8] |
(285) 0x4232dc CMP X23, X8 |
(285) 0x4232e0 STR D1, [X9] |
(285) 0x4232e4 B.LS 423300 |
(285) 0x4232e8 ORR X13, XZR, XZR |
(285) 0x4232ec MOVI D1, #0 |
(285) 0x4232f0 B 423358 |
0x4232f4 HINT #0 |
0x4232f8 HINT #0 |
0x4232fc HINT #0 |
(285) 0x423300 ADD X15, X11, X12,LSL #3 |
(285) 0x423304 MOVI V1.2D, #0 |
(285) 0x423308 MOVI V2.2D, #0 |
(285) 0x42330c AND X13, X24, X8 |
(285) 0x423310 ORR X14, XZR, XZR |
(285) 0x423314 ORR X17, XZR, X10 |
(285) 0x423318 ADD X16, X15, X28 |
(285) 0x42331c HINT #0 |
(282) 0x423320 LDR Z3, [X17, MUL VL] |
(282) 0x423324 LDR Z4, [X17, #1, MUL VL] |
(282) 0x423328 ADD X17, X17, X27 |
(282) 0x42332c LD1D {Z5.D}, P0/Z, [X15, X14,LSL #3] |
(282) 0x423330 FMLA Z1.D, P0/M, Z5.D, Z3.D |
(282) 0x423334 LD1D {Z6.D}, P0/Z, [X16, X14,LSL #3] |
(282) 0x423338 FMLA Z2.D, P0/M, Z6.D, Z4.D |
(282) 0x42333c ADD X14, X14, X23 |
(282) 0x423340 CMP X13, X14 |
(282) 0x423344 B.NE 423320 |
(285) 0x423348 FADD Z1.D, Z2.D, Z1.D |
(285) 0x42334c CMP X13, X8 |
(285) 0x423350 FADDV D1, P0, Z1.D |
(285) 0x423354 B.EQ 423380 |
(285) 0x423358 UBFM X15, X13, #61, #60 |
(285) 0x42335c SUB X14, X8, X13 |
(285) 0x423360 ADD X13, X10, X13,LSL #3 |
(285) 0x423364 ADD X15, X15, X12,LSL #3 |
(285) 0x423368 ADD X15, X11, X15 |
(286) 0x42336c LDR D2, [X13], #8 |
(286) 0x423370 LDR D3, [X15], #8 |
(286) 0x423374 SUBS X14, X14, #1 |
(286) 0x423378 FMADD D1, D3, D2, D1 |
(286) 0x42337c B.NE 42336c |
(285) 0x423380 CMP X23, X8 |
(285) 0x423384 STR D1, [X9, #8] |
(285) 0x423388 B.LS 4233a0 |
(285) 0x42338c ORR X13, XZR, XZR |
(285) 0x423390 MOVI D1, #0 |
(285) 0x423394 B 4233f8 |
0x423398 HINT #0 |
0x42339c HINT #0 |
(283) 0x4233a0 MOVI V1.2D, #0 |
(283) 0x4233a4 MOVI V2.2D, #0 |
(283) 0x4233a8 ADD X14, X11, X12,LSL #4 |
(283) 0x4233ac AND X13, X24, X8 |
(283) 0x4233b0 AND X16, X24, X8 |
(283) 0x4233b4 ORR X15, XZR, X10 |
(283) 0x4233b8 HINT #0 |
(283) 0x4233bc HINT #0 |
(278) 0x4233c0 LDR Z3, [X15, MUL VL] |
(278) 0x4233c4 LDR Z4, [X15, #1, MUL VL] |
(278) 0x4233c8 SUBS X16, X16, X23 |
(278) 0x4233cc ADD X15, X15, X27 |
(278) 0x4233d0 LDR Z5, [X14, MUL VL] |
(278) 0x4233d4 FMLA Z1.D, P0/M, Z5.D, Z3.D |
(278) 0x4233d8 LDR Z6, [X14, #1, MUL VL] |
(278) 0x4233dc FMLA Z2.D, P0/M, Z6.D, Z4.D |
(278) 0x4233e0 ADD X14, X14, X27 |
(278) 0x4233e4 B.NE 4233c0 |
(283) 0x4233e8 FADD Z1.D, Z2.D, Z1.D |
(283) 0x4233ec CMP X13, X8 |
(283) 0x4233f0 FADDV D1, P0, Z1.D |
(283) 0x4233f4 B.EQ 423420 |
(285) 0x4233f8 UBFM X12, X12, #60, #59 |
(285) 0x4233fc SUB X8, X8, X13 |
(285) 0x423400 ADD X10, X10, X13,LSL #3 |
(285) 0x423404 ADD X12, X12, X13,LSL #3 |
(285) 0x423408 ADD X11, X11, X12 |
(284) 0x42340c LDR D2, [X10], #8 |
(284) 0x423410 LDR D3, [X11], #8 |
(284) 0x423414 SUBS X8, X8, #1 |
(284) 0x423418 FMADD D1, D3, D2, D1 |
(284) 0x42341c B.NE 42340c |
(285) 0x423420 STR D1, [X9, #16] |
(285) 0x423424 LDR X8, [X20, #440] |
(285) 0x423428 STR D0, [X8, X22,LSL #3] |
(285) 0x42342c LDRSW X8, [X20, #148] |
(285) 0x423430 ADD X22, X22, #1 |
(285) 0x423434 CMP X22, X8 |
(285) 0x423438 B.LT 4230a4 |
0x42343c LDP X20, X19, [SP, #80] |
0x423440 LDP X22, X21, [SP, #64] |
0x423444 LDP X24, X23, [SP, #48] |
0x423448 LDP X26, X25, [SP, #32] |
0x42344c LDP X28, X27, [SP, #16] |
0x423450 LDP X29, X30, [SP], #96 |
0x423454 RET |
0x423458 HINT #0 |
0x42345c HINT #0 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | OneBodyJastrowRef.h:107-194 |
| Module | exec |
| nb instructions | 44 |
| nb uops | 31 |
| loop length | 176 |
| used w registers | 4 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 12 |
| micro-operation queue | 3.88 cycles |
| front end | 3.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 0.00 | 0.00 | 0.00 | 0.00 | 4.67 | 4.67 | 4.67 | 3.00 | 3.00 |
| cycles | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 0.00 | 0.00 | 0.00 | 0.00 | 4.67 | 4.67 | 4.67 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 3.88 |
| Dispatch | 4.67 |
| Overall L1 | 4.67 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 33% |
| load | 37% |
| store | 50% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #928]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X28, X27, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X26, X25, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X24, X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X22, X21, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X19, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR W1, [X0, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X0, XZR, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 452bb0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W8, [X20, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| CMP W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 42343c <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9recomputeERNS1_11ParticleSetE+0x41c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CNTW X23, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| MOVZ W25, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W26, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X22, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X24, XZR, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RDVL X27, #2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| RDVL X28, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B 4230a4 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9recomputeERNS1_11ParticleSetE+0x84> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X22, X21, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X24, X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X26, X25, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X28, X27, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | OneBodyJastrowRef.h:107-194 |
| Module | exec |
| nb instructions | 44 |
| nb uops | 31 |
| loop length | 176 |
| used w registers | 4 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 12 |
| micro-operation queue | 3.88 cycles |
| front end | 3.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 0.00 | 0.00 | 0.00 | 0.00 | 4.67 | 4.67 | 4.67 | 3.00 | 3.00 |
| cycles | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 0.00 | 0.00 | 0.00 | 0.00 | 4.67 | 4.67 | 4.67 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 3.88 |
| Dispatch | 4.67 |
| Overall L1 | 4.67 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 33% |
| load | 37% |
| store | 50% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #928]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X28, X27, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X26, X25, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X24, X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X22, X21, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X19, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR W1, [X0, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X0, XZR, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 452bb0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W8, [X20, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| CMP W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 42343c <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9recomputeERNS1_11ParticleSetE+0x41c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CNTW X23, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| MOVZ W25, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W26, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR X22, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X24, XZR, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RDVL X27, #2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| RDVL X28, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B 4230a4 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9recomputeERNS1_11ParticleSetE+0x84> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X22, X21, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X24, X23, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X26, X25, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X28, X27, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | ||||||
| 1x2 | 2 | 1 | 1 | 2 | 0.0049999998882413 | 0.011293084360659 |
| 1x4 | 4 | 1 | 1 | 4 | 0.0049999998882413 | 0.011058918200433 |
| 1x8 | 7 | 1 | 1 | 8 | 0.0050000003539026 | 0.0089964028447866 |
| 1x16 | 2 | 1 | 1 | 16 | 0.0099999997764826 | 0.0019579634536058 |
| 1x24 | 1 | 1 | 1 | 24 | 0.010000000707805 | 0.00053075782489032 |
| 1x32 | 10 | 1 | 1 | 32 | 0.010000000707805 | 0.0033945313189179 |
| 1x40 | ||||||
| 1x48 | 4 | 1 | 1 | 48 | 0.015000000596046 | 0.0010277985129505 |
| 1x56 | 11 | 1 | 1 | 56 | 0.019999999552965 | 0.0022279203403741 |
| 1x64 | 4 | 1 | 1 | 64 | 0.02000000141561 | 0.00072514929343015 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼miniqmcreference::OneBodyJastrowRef | 0.00 | 0.00 |
| ▼Loop 283 - OneBodyJastrowRef.h:109-194 - exec– | 0.00 | 0.00 |
| ▼Loop 285 - OneBodyJastrowRef.h:109-194 - exec– | 0.00 | 0.00 |
| ○Loop 287 - OneBodyJastrowRef.h:192-193 - exec | 0.00 | 0.00 |
| ▼Loop 279 - OneBodyJastrowRef.h:109-194 - exec– | 0.00 | 0.00 |
| ○Loop 289 - stl_numeric.h:140-141 - exec | 0.00 | 0.00 |
| ○Loop 290 - stl_numeric.h:140-141 - exec | 0.00 | 0.00 |
| ○Loop 284 - OneBodyJastrowRef.h:192-193 - exec | 0.00 | 0.00 |
| ○Loop 280 - OneBodyJastrowRef.h:186-187 - exec | 0.00 | 0.00 |
| ○Loop 281 - OneBodyJastrowRef.h:192-193 - exec | 0.00 | 0.00 |
| ○Loop 288 - OneBodyJastrowRef.h:186-187 - exec | 0.00 | 0.00 |
| ○Loop 286 - OneBodyJastrowRef.h:192-193 - exec | 0.00 | 0.00 |
| ○Loop 282 - OneBodyJastrowRef.h:192-193 - exec | 0.00 | 0.00 |
| ○Loop 278 - OneBodyJastrowRef.h:192-193 - exec | 0.00 | 0.00 |
