| Function: qmcplusplus::SoaDistanceTableAAOMPTarget<double, 3u, 40>::evaluate(qmcplusplus::ParticleSe ... | Module: exec | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage (incl. loops): 0.07% | (excl. loops): 0.00% |
|---|
| Function: qmcplusplus::SoaDistanceTableAAOMPTarget<double, 3u, 40>::evaluate(qmcplusplus::ParticleSe ... | Module: exec | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage (incl. loops): 0.07% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Particle/SoaDistanceTableAAOMPTarget.h: 179 - 187 |
-------------------------------------------------------------------------------- |
179: inline void evaluate(ParticleSet& P) override |
180: { |
181: ScopedTimer local_timer(evaluate_timer_); |
182: |
183: constexpr T BigR = std::numeric_limits<T>::max(); |
184: for (int iat = 1; iat < num_targets_; ++iat) |
185: DTD_BConds<T, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), |
186: displacements_[iat], 0, iat, iat); |
187: } |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Particle/RealSpacePositionsOMPTarget.h: 163 - 163 |
-------------------------------------------------------------------------------- |
163: const PosVectorSoa& getAllParticlePos() const override { return RSoA_hostview; } |
/usr/include/c++/14/bits/stl_vector.h: 1131 - 1131 |
-------------------------------------------------------------------------------- |
1131: return *(this->_M_impl._M_start + __n); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 234 - 255 |
-------------------------------------------------------------------------------- |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/usr/include/c++/14/bits/unique_ptr.h: 193 - 193 |
-------------------------------------------------------------------------------- |
193: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
0x4ffa80 STP X29, X30, [SP, #912]! |
0x4ffa84 ADD X29, SP, #0 |
0x4ffa88 STP X23, X24, [SP, #48] |
0x4ffa8c ORR X24, XZR, X0 |
0x4ffa90 STP X25, X26, [SP, #64] |
0x4ffa94 ORR X25, XZR, X1 |
0x4ffa98 LDR X0, [X0, #632] |
0x4ffa9c STR X0, [SP, #104] |
0x4ffaa0 BL 4cf640 |
0x4ffaa4 LDR X13, [X24, #24] |
0x4ffaa8 CMP X13, #1 |
0x4ffaac B.LS 4ffc74 |
0x4ffab0 ADRP X26, |
0x4ffab4 MOVZ X23, #24 |
0x4ffab8 STP X19, X20, [SP, #16] |
0x4ffabc ADD X26, X26, #2848 |
0x4ffac0 ADD X19, X24, #280 |
0x4ffac4 PTRUE P7.B, ALL |
0x4ffac8 STP X21, X22, [SP, #32] |
0x4ffacc MOVZ X22, #1 |
0x4ffad0 CNTD X21, ALL |
0x4ffad4 STP X27, X28, [SP, #80] |
0x4ffad8 ADD X28, X25, #40 |
0x4ffadc MOVZ W27, #40 |
(2532) 0x4ffae0 LDR X0, [X25, #632] |
(2532) 0x4ffae4 LDR X4, [X28, #24] |
(2532) 0x4ffae8 LDR X1, [X0] |
(2532) 0x4ffaec ADD X20, X4, X23 |
(2532) 0x4ffaf0 LDR X2, [X1, #72] |
(2532) 0x4ffaf4 CMP X2, X26 |
(2532) 0x4ffaf8 B.NE 4ffc8c |
(2532) 0x4ffafc ADD X0, X0, #64 |
(2532) 0x4ffb00 LDR X5, [X24, #96] |
(2532) 0x4ffb04 UMULL X3, W22, W27 |
(2532) 0x4ffb08 MOVZ X2, #0 |
(2532) 0x4ffb0c ORR W7, WZR, W22 |
(2532) 0x4ffb10 LD1RD {Z31.D}, P7/Z, [X20] |
(2532) 0x4ffb14 LD1RD {Z30.D}, P7/Z, [X20, #1] |
(2532) 0x4ffb18 LDR X6, [X24, #72] |
(2532) 0x4ffb1c LD1RD {Z29.D}, P7/Z, [X20, #2] |
(2532) 0x4ffb20 WHILELO P6.D, WZR, W22 |
(2532) 0x4ffb24 LDR X8, [X0, #8] |
(2532) 0x4ffb28 ADD X10, X5, X3 |
(2532) 0x4ffb2c LDR X15, [X10, #8] |
(2532) 0x4ffb30 ADD X14, X6, X3 |
(2532) 0x4ffb34 LDR X9, [X0, #24] |
(2532) 0x4ffb38 LDR X16, [X10, #24] |
(2532) 0x4ffb3c LDR X18, [X14, #24] |
(2532) 0x4ffb40 ADD X11, X9, X8,LSL #3 |
(2532) 0x4ffb44 ADD X0, X9, X8,LSL #4 |
(2532) 0x4ffb48 ADD X12, X16, X15,LSL #3 |
(2532) 0x4ffb4c ADD X17, X16, X15,LSL #4 |
(2531) 0x4ffb50 LD1D {Z27.D}, P6/Z, [X11, X2,LSL #3] |
(2531) 0x4ffb54 LD1D {Z28.D}, P6/Z, [X9, X2,LSL #3] |
(2531) 0x4ffb58 LD1D {Z26.D}, P6/Z, [X0, X2,LSL #3] |
(2531) 0x4ffb5c LD1RD {Z1.D}, P7/Z, [X19, #13] |
(2531) 0x4ffb60 LD1RD {Z2.D}, P7/Z, [X19, #12] |
(2531) 0x4ffb64 LD1RD {Z24.D}, P7/Z, [X19, #10] |
(2531) 0x4ffb68 LD1RD {Z25.D}, P7/Z, [X19, #9] |
(2531) 0x4ffb6c LD1RD {Z17.D}, P7/Z, [X19, #16] |
(2531) 0x4ffb70 LD1RD {Z18.D}, P7/Z, [X19, #15] |
(2531) 0x4ffb74 FSUB Z3.D, Z27.D, Z30.D |
(2531) 0x4ffb78 FSUB Z5.D, Z28.D, Z31.D |
(2531) 0x4ffb7c FSUB Z6.D, Z26.D, Z29.D |
(2531) 0x4ffb80 LD1RD {Z0.D}, P7/Z, [X19, #14] |
(2531) 0x4ffb84 LD1RD {Z23.D}, P7/Z, [X19, #11] |
(2531) 0x4ffb88 LD1RD {Z16.D}, P7/Z, [X19, #17] |
(2531) 0x4ffb8c FMUL Z4.D, Z1.D, Z3.D |
(2531) 0x4ffb90 FMUL Z7.D, Z24.D, Z3.D |
(2531) 0x4ffb94 FMLA Z4.D, P7/M, Z2.D, Z5.D |
(2531) 0x4ffb98 FMLA Z7.D, P7/M, Z25.D, Z5.D |
(2531) 0x4ffb9c FMAD Z0.D, P7/M, Z6.D, Z4.D |
(2531) 0x4ffba0 FMAD Z23.D, P7/M, Z6.D, Z7.D |
(2531) 0x4ffba4 FMUL Z19.D, Z17.D, Z3.D |
(2531) 0x4ffba8 FMLA Z19.D, P7/M, Z18.D, Z5.D |
(2531) 0x4ffbac FMAD Z16.D, P7/M, Z6.D, Z19.D |
(2531) 0x4ffbb0 LD1RD {Z20.D}, P7/Z, [X19, #1] |
(2531) 0x4ffbb4 LD1RD {Z3.D}, P7/Z, [X19] |
(2531) 0x4ffbb8 LD1RD {Z5.D}, P7/Z, [X19, #2] |
(2531) 0x4ffbbc ADD X30, X16, X2,LSL #3 |
(2531) 0x4ffbc0 MOVPRFX Z21, Z0 |
(2531) 0x4ffbc4 FRINTA Z21.D, P7/M, Z0.D |
(2531) 0x4ffbc8 MOVPRFX Z22, Z23 |
(2531) 0x4ffbcc FRINTA Z22.D, P7/M, Z23.D |
(2531) 0x4ffbd0 FSUB Z27.D, Z0.D, Z21.D |
(2531) 0x4ffbd4 FSUB Z28.D, Z23.D, Z22.D |
(2531) 0x4ffbd8 ADD X1, X12, X2,LSL #3 |
(2531) 0x4ffbdc MOVPRFX Z4, Z16 |
(2531) 0x4ffbe0 FRINTA Z4.D, P7/M, Z16.D |
(2531) 0x4ffbe4 FMUL Z1.D, Z20.D, Z27.D |
(2531) 0x4ffbe8 FSUB Z26.D, Z16.D, Z4.D |
(2531) 0x4ffbec FMLA Z1.D, P7/M, Z3.D, Z28.D |
(2531) 0x4ffbf0 FMAD Z5.D, P7/M, Z26.D, Z1.D |
(2531) 0x4ffbf4 ST1D {Z5.D}, P6, [X30, MUL VL] |
(2531) 0x4ffbf8 LD1RD {Z6.D}, P7/Z, [X19, #3] |
(2531) 0x4ffbfc LD1RD {Z0.D}, P7/Z, [X19, #4] |
(2531) 0x4ffc00 LD1RD {Z2.D}, P7/Z, [X19, #5] |
(2531) 0x4ffc04 FMUL Z24.D, Z0.D, Z27.D |
(2531) 0x4ffc08 FMLA Z24.D, P7/M, Z6.D, Z28.D |
(2531) 0x4ffc0c FMAD Z2.D, P7/M, Z26.D, Z24.D |
(2531) 0x4ffc10 ST1D {Z2.D}, P6, [X1, MUL VL] |
(2531) 0x4ffc14 LD1RD {Z7.D}, P7/Z, [X19, #7] |
(2531) 0x4ffc18 LD1RD {Z25.D}, P7/Z, [X19, #6] |
(2531) 0x4ffc1c LD1RD {Z17.D}, P7/Z, [X19, #8] |
(2531) 0x4ffc20 FMUL Z23.D, Z7.D, Z27.D |
(2531) 0x4ffc24 FMLA Z23.D, P7/M, Z25.D, Z28.D |
(2531) 0x4ffc28 FMAD Z17.D, P7/M, Z26.D, Z23.D |
(2531) 0x4ffc2c ST1D {Z17.D}, P6, [X17, X2,LSL #3] |
(2531) 0x4ffc30 LD1D {Z18.D}, P6/Z, [X1, MUL VL] |
(2531) 0x4ffc34 LD1D {Z19.D}, P6/Z, [X30, MUL VL] |
(2531) 0x4ffc38 FMUL Z16.D, Z18.D, Z18.D |
(2531) 0x4ffc3c FMLA Z16.D, P7/M, Z19.D, Z19.D |
(2531) 0x4ffc40 FMLA Z16.D, P7/M, Z17.D, Z17.D |
(2531) 0x4ffc44 FSQRT Z16.D, P7/M, Z16.D |
(2531) 0x4ffc48 ST1D {Z16.D}, P6, [X18, X2,LSL #3] |
(2531) 0x4ffc4c ADD X2, X2, X21 |
(2531) 0x4ffc50 WHILELO P6.D, W2, W7 |
(2531) 0x4ffc54 B.NE 4ffb50 |
(2532) 0x4ffc58 ADD X22, X22, #1 |
(2532) 0x4ffc5c ADD X23, X23, #24 |
(2532) 0x4ffc60 CMP X22, X13 |
(2532) 0x4ffc64 B.CC 4ffae0 |
0x4ffc68 LDP X19, X20, [SP, #16] |
0x4ffc6c LDP X21, X22, [SP, #32] |
0x4ffc70 LDP X27, X28, [SP, #80] |
0x4ffc74 LDR X0, [SP, #104] |
0x4ffc78 BL 4cf8e0 |
0x4ffc7c LDP X23, X24, [SP, #48] |
0x4ffc80 LDP X25, X26, [SP, #64] |
0x4ffc84 LDP X29, X30, [SP], #112 |
0x4ffc88 RET |
(2532) 0x4ffc8c BLR X2 |
(2532) 0x4ffc90 LDR X13, [X24, #24] |
(2532) 0x4ffc94 PTRUE P7.B, ALL |
(2532) 0x4ffc98 B 4ffb00 |
0x4ffc9c ORR X22, XZR, X0 |
0x4ffca0 LDR X0, [SP, #104] |
0x4ffca4 BL 4cf8e0 |
0x4ffca8 ORR X0, XZR, X22 |
0x4ffcac BL 4104a0 |
0x4ffcb0 HINT #0 |
0x4ffcb4 HINT #0 |
0x4ffcb8 HINT #0 |
0x4ffcbc HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.69+ | qmcplusplus::ParticleSet::upda[...] | stl_vector.h:993 | exec |
| ○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
| ○ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.31+ | qmcplusplus::ParticleSet::upda[...] | stl_vector.h:993 | exec |
| ○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
| ○ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | main | refwrap.h:350 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | miniqmc.cpp:529 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
| Module | exec |
| nb instructions | 42 |
| nb uops | 38 |
| loop length | 168 |
| used w registers | 1 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 4.75 cycles |
| front end | 4.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| cycles | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 4.75 |
| Dispatch | 5.83 |
| Overall L1 | 5.83 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 38% |
| load | 43% |
| store | 46% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 33% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X24, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X25, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X0, #632] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| BL 4cf640 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X13, [X24, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP X13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LS 4ffc74 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x1f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X26, <2004ccab0> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ X23, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X26, X26, #2848 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X19, X24, #280 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| PTRUE P7.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MOVZ X22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CNTD X21, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X28, X25, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W27, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cf8e0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cf8e0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X0, XZR, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4104a0 <@plt_start@+0x480> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
| Module | exec |
| nb instructions | 42 |
| nb uops | 38 |
| loop length | 168 |
| used w registers | 1 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 4.75 cycles |
| front end | 4.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| cycles | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 4.75 |
| Dispatch | 5.83 |
| Overall L1 | 5.83 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 38% |
| load | 43% |
| store | 46% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 33% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X24, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X25, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X0, #632] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| BL 4cf640 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X13, [X24, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP X13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LS 4ffc74 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x1f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X26, <2004ccab0> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ X23, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X26, X26, #2848 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X19, X24, #280 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| PTRUE P7.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MOVZ X22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CNTD X21, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X28, X25, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W27, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cf8e0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cf8e0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X0, XZR, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4104a0 <@plt_start@+0x480> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼qmcplusplus::SoaDistanceTableAAOMPTarget | 0.07 | 0.10 |
| ▼Loop 2532 - SoaDistanceTableAAOMPTarget.h:184-185 - exec– | 0.00 | 0.00 |
| ○Loop 2531 - ParticleBConds3DSoa.h:237-255 - exec | 0.07 | 0.10 |
