| Function: qmcplusplus::SoaDistanceTableAAOMPTarget<double, 3u, 40>::evaluate(qmcplusplus::ParticleSe ... | Module: exec | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage (incl. loops): 0.06% | (excl. loops): 0.00% |
|---|
| Function: qmcplusplus::SoaDistanceTableAAOMPTarget<double, 3u, 40>::evaluate(qmcplusplus::ParticleSe ... | Module: exec | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage (incl. loops): 0.06% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Particle/SoaDistanceTableAAOMPTarget.h: 179 - 187 |
-------------------------------------------------------------------------------- |
179: inline void evaluate(ParticleSet& P) override |
180: { |
181: ScopedTimer local_timer(evaluate_timer_); |
182: |
183: constexpr T BigR = std::numeric_limits<T>::max(); |
184: for (int iat = 1; iat < num_targets_; ++iat) |
185: DTD_BConds<T, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), |
186: displacements_[iat], 0, iat, iat); |
187: } |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Particle/RealSpacePositionsOMPTarget.h: 163 - 163 |
-------------------------------------------------------------------------------- |
163: const PosVectorSoa& getAllParticlePos() const override { return RSoA_hostview; } |
/usr/include/c++/14/bits/stl_vector.h: 1131 - 1131 |
-------------------------------------------------------------------------------- |
1131: return *(this->_M_impl._M_start + __n); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 234 - 255 |
-------------------------------------------------------------------------------- |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/usr/include/c++/14/bits/unique_ptr.h: 193 - 193 |
-------------------------------------------------------------------------------- |
193: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
0x4ffc40 STP X29, X30, [SP, #912]! |
0x4ffc44 ADD X29, SP, #0 |
0x4ffc48 STP X23, X24, [SP, #48] |
0x4ffc4c ORR X24, XZR, X0 |
0x4ffc50 STP X25, X26, [SP, #64] |
0x4ffc54 ORR X25, XZR, X1 |
0x4ffc58 LDR X0, [X0, #632] |
0x4ffc5c STR X0, [SP, #104] |
0x4ffc60 BL 4cf800 |
0x4ffc64 LDR X13, [X24, #24] |
0x4ffc68 CMP X13, #1 |
0x4ffc6c B.LS 4ffe34 |
0x4ffc70 ADRP X26, |
0x4ffc74 MOVZ X23, #24 |
0x4ffc78 STP X19, X20, [SP, #16] |
0x4ffc7c ADD X26, X26, #3296 |
0x4ffc80 ADD X19, X24, #280 |
0x4ffc84 PTRUE P7.B, ALL |
0x4ffc88 STP X21, X22, [SP, #32] |
0x4ffc8c MOVZ X22, #1 |
0x4ffc90 CNTD X21, ALL |
0x4ffc94 STP X27, X28, [SP, #80] |
0x4ffc98 ADD X28, X25, #40 |
0x4ffc9c MOVZ W27, #40 |
(2532) 0x4ffca0 LDR X0, [X25, #632] |
(2532) 0x4ffca4 LDR X4, [X28, #24] |
(2532) 0x4ffca8 LDR X1, [X0] |
(2532) 0x4ffcac ADD X20, X4, X23 |
(2532) 0x4ffcb0 LDR X2, [X1, #72] |
(2532) 0x4ffcb4 CMP X2, X26 |
(2532) 0x4ffcb8 B.NE 4ffe4c |
(2532) 0x4ffcbc ADD X0, X0, #64 |
(2532) 0x4ffcc0 LDR X5, [X24, #96] |
(2532) 0x4ffcc4 UMULL X3, W22, W27 |
(2532) 0x4ffcc8 MOVZ X2, #0 |
(2532) 0x4ffccc ORR W7, WZR, W22 |
(2532) 0x4ffcd0 LD1RD {Z31.D}, P7/Z, [X20] |
(2532) 0x4ffcd4 LD1RD {Z30.D}, P7/Z, [X20, #1] |
(2532) 0x4ffcd8 LDR X6, [X24, #72] |
(2532) 0x4ffcdc LD1RD {Z29.D}, P7/Z, [X20, #2] |
(2532) 0x4ffce0 WHILELO P6.D, WZR, W22 |
(2532) 0x4ffce4 LDR X8, [X0, #8] |
(2532) 0x4ffce8 ADD X10, X5, X3 |
(2532) 0x4ffcec LDR X15, [X10, #8] |
(2532) 0x4ffcf0 ADD X14, X6, X3 |
(2532) 0x4ffcf4 LDR X9, [X0, #24] |
(2532) 0x4ffcf8 LDR X16, [X10, #24] |
(2532) 0x4ffcfc LDR X18, [X14, #24] |
(2532) 0x4ffd00 ADD X11, X9, X8,LSL #3 |
(2532) 0x4ffd04 ADD X0, X9, X8,LSL #4 |
(2532) 0x4ffd08 ADD X12, X16, X15,LSL #3 |
(2532) 0x4ffd0c ADD X17, X16, X15,LSL #4 |
(2531) 0x4ffd10 LD1D {Z27.D}, P6/Z, [X11, X2,LSL #3] |
(2531) 0x4ffd14 LD1D {Z28.D}, P6/Z, [X9, X2,LSL #3] |
(2531) 0x4ffd18 LD1D {Z26.D}, P6/Z, [X0, X2,LSL #3] |
(2531) 0x4ffd1c LD1RD {Z1.D}, P7/Z, [X19, #13] |
(2531) 0x4ffd20 LD1RD {Z2.D}, P7/Z, [X19, #12] |
(2531) 0x4ffd24 LD1RD {Z24.D}, P7/Z, [X19, #10] |
(2531) 0x4ffd28 LD1RD {Z25.D}, P7/Z, [X19, #9] |
(2531) 0x4ffd2c LD1RD {Z17.D}, P7/Z, [X19, #16] |
(2531) 0x4ffd30 LD1RD {Z18.D}, P7/Z, [X19, #15] |
(2531) 0x4ffd34 FSUB Z3.D, Z27.D, Z30.D |
(2531) 0x4ffd38 FSUB Z5.D, Z28.D, Z31.D |
(2531) 0x4ffd3c FSUB Z6.D, Z26.D, Z29.D |
(2531) 0x4ffd40 LD1RD {Z0.D}, P7/Z, [X19, #14] |
(2531) 0x4ffd44 LD1RD {Z23.D}, P7/Z, [X19, #11] |
(2531) 0x4ffd48 LD1RD {Z16.D}, P7/Z, [X19, #17] |
(2531) 0x4ffd4c FMUL Z4.D, Z1.D, Z3.D |
(2531) 0x4ffd50 FMUL Z7.D, Z24.D, Z3.D |
(2531) 0x4ffd54 FMLA Z4.D, P7/M, Z2.D, Z5.D |
(2531) 0x4ffd58 FMLA Z7.D, P7/M, Z25.D, Z5.D |
(2531) 0x4ffd5c FMAD Z0.D, P7/M, Z6.D, Z4.D |
(2531) 0x4ffd60 FMAD Z23.D, P7/M, Z6.D, Z7.D |
(2531) 0x4ffd64 FMUL Z19.D, Z17.D, Z3.D |
(2531) 0x4ffd68 FMLA Z19.D, P7/M, Z18.D, Z5.D |
(2531) 0x4ffd6c FMAD Z16.D, P7/M, Z6.D, Z19.D |
(2531) 0x4ffd70 LD1RD {Z20.D}, P7/Z, [X19, #1] |
(2531) 0x4ffd74 LD1RD {Z3.D}, P7/Z, [X19] |
(2531) 0x4ffd78 LD1RD {Z5.D}, P7/Z, [X19, #2] |
(2531) 0x4ffd7c ADD X30, X16, X2,LSL #3 |
(2531) 0x4ffd80 MOVPRFX Z21, Z0 |
(2531) 0x4ffd84 FRINTA Z21.D, P7/M, Z0.D |
(2531) 0x4ffd88 MOVPRFX Z22, Z23 |
(2531) 0x4ffd8c FRINTA Z22.D, P7/M, Z23.D |
(2531) 0x4ffd90 FSUB Z27.D, Z0.D, Z21.D |
(2531) 0x4ffd94 FSUB Z28.D, Z23.D, Z22.D |
(2531) 0x4ffd98 ADD X1, X12, X2,LSL #3 |
(2531) 0x4ffd9c MOVPRFX Z4, Z16 |
(2531) 0x4ffda0 FRINTA Z4.D, P7/M, Z16.D |
(2531) 0x4ffda4 FMUL Z1.D, Z20.D, Z27.D |
(2531) 0x4ffda8 FSUB Z26.D, Z16.D, Z4.D |
(2531) 0x4ffdac FMLA Z1.D, P7/M, Z3.D, Z28.D |
(2531) 0x4ffdb0 FMAD Z5.D, P7/M, Z26.D, Z1.D |
(2531) 0x4ffdb4 ST1D {Z5.D}, P6, [X30, MUL VL] |
(2531) 0x4ffdb8 LD1RD {Z6.D}, P7/Z, [X19, #3] |
(2531) 0x4ffdbc LD1RD {Z0.D}, P7/Z, [X19, #4] |
(2531) 0x4ffdc0 LD1RD {Z2.D}, P7/Z, [X19, #5] |
(2531) 0x4ffdc4 FMUL Z24.D, Z0.D, Z27.D |
(2531) 0x4ffdc8 FMLA Z24.D, P7/M, Z6.D, Z28.D |
(2531) 0x4ffdcc FMAD Z2.D, P7/M, Z26.D, Z24.D |
(2531) 0x4ffdd0 ST1D {Z2.D}, P6, [X1, MUL VL] |
(2531) 0x4ffdd4 LD1RD {Z7.D}, P7/Z, [X19, #7] |
(2531) 0x4ffdd8 LD1RD {Z25.D}, P7/Z, [X19, #6] |
(2531) 0x4ffddc LD1RD {Z17.D}, P7/Z, [X19, #8] |
(2531) 0x4ffde0 FMUL Z23.D, Z7.D, Z27.D |
(2531) 0x4ffde4 FMLA Z23.D, P7/M, Z25.D, Z28.D |
(2531) 0x4ffde8 FMAD Z17.D, P7/M, Z26.D, Z23.D |
(2531) 0x4ffdec ST1D {Z17.D}, P6, [X17, X2,LSL #3] |
(2531) 0x4ffdf0 LD1D {Z18.D}, P6/Z, [X1, MUL VL] |
(2531) 0x4ffdf4 LD1D {Z19.D}, P6/Z, [X30, MUL VL] |
(2531) 0x4ffdf8 FMUL Z16.D, Z18.D, Z18.D |
(2531) 0x4ffdfc FMLA Z16.D, P7/M, Z19.D, Z19.D |
(2531) 0x4ffe00 FMLA Z16.D, P7/M, Z17.D, Z17.D |
(2531) 0x4ffe04 FSQRT Z16.D, P7/M, Z16.D |
(2531) 0x4ffe08 ST1D {Z16.D}, P6, [X18, X2,LSL #3] |
(2531) 0x4ffe0c ADD X2, X2, X21 |
(2531) 0x4ffe10 WHILELO P6.D, W2, W7 |
(2531) 0x4ffe14 B.NE 4ffd10 |
(2532) 0x4ffe18 ADD X22, X22, #1 |
(2532) 0x4ffe1c ADD X23, X23, #24 |
(2532) 0x4ffe20 CMP X22, X13 |
(2532) 0x4ffe24 B.CC 4ffca0 |
0x4ffe28 LDP X19, X20, [SP, #16] |
0x4ffe2c LDP X21, X22, [SP, #32] |
0x4ffe30 LDP X27, X28, [SP, #80] |
0x4ffe34 LDR X0, [SP, #104] |
0x4ffe38 BL 4cfaa0 |
0x4ffe3c LDP X23, X24, [SP, #48] |
0x4ffe40 LDP X25, X26, [SP, #64] |
0x4ffe44 LDP X29, X30, [SP], #112 |
0x4ffe48 RET |
(2532) 0x4ffe4c BLR X2 |
(2532) 0x4ffe50 LDR X13, [X24, #24] |
(2532) 0x4ffe54 PTRUE P7.B, ALL |
(2532) 0x4ffe58 B 4ffcc0 |
0x4ffe5c ORR X22, XZR, X0 |
0x4ffe60 LDR X0, [SP, #104] |
0x4ffe64 BL 4cfaa0 |
0x4ffe68 ORR X0, XZR, X22 |
0x4ffe6c BL 410460 |
0x4ffe70 HINT #0 |
0x4ffe74 HINT #0 |
0x4ffe78 HINT #0 |
0x4ffe7c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►99.00+ | qmcplusplus::ParticleSet::upda[...] | stl_vector.h:993 | exec |
| ○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
| ○ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_1
| Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
| Module | exec |
| nb instructions | 42 |
| nb uops | 38 |
| loop length | 168 |
| used w registers | 1 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 4.75 cycles |
| front end | 4.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| cycles | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 4.75 |
| Dispatch | 5.83 |
| Overall L1 | 5.83 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 38% |
| load | 43% |
| store | 46% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 33% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X24, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X25, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X0, #632] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| BL 4cf800 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X13, [X24, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP X13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LS 4ffe34 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x1f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X26, <2004ccc70> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ X23, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X26, X26, #3296 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X19, X24, #280 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| PTRUE P7.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MOVZ X22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CNTD X21, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X28, X25, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W27, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cfaa0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cfaa0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X0, XZR, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 410460 <@plt_start@+0x440> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_1
| Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
| Module | exec |
| nb instructions | 42 |
| nb uops | 38 |
| loop length | 168 |
| used w registers | 1 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 4.75 cycles |
| front end | 4.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| cycles | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.83 | 5.50 | 5.67 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 4.75 |
| Dispatch | 5.83 |
| Overall L1 | 5.83 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 38% |
| load | 43% |
| store | 46% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 33% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X24, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X25, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X0, #632] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| BL 4cf800 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X13, [X24, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP X13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LS 4ffe34 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x1f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X26, <2004ccc70> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ X23, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X26, X26, #3296 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X19, X24, #280 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| PTRUE P7.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MOVZ X22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CNTD X21, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X28, X25, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W27, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cfaa0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 4cfaa0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR X0, XZR, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 410460 <@plt_start@+0x440> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼qmcplusplus::SoaDistanceTableAAOMPTarget | 0.06 | 0.10 |
| ▼Loop 2532 - SoaDistanceTableAAOMPTarget.h:184-185 - exec– | 0.00 | 0.00 |
| ○Loop 2531 - ParticleBConds3DSoa.h:237-255 - exec | 0.06 | 0.09 |
