| Loop Id: 271 | Module: exec | Source: BsplineFunctor.h:303-336 [...] | Coverage: 0.08% |
|---|
| Loop Id: 271 | Module: exec | Source: BsplineFunctor.h:303-336 [...] | Coverage: 0.08% |
|---|
0x422840 LD1D {Z16.D}, P0/Z, [X9, X13,LSL #3] [4] |
0x422844 DUP Z20.D, X14 |
0x422848 LDR Z24, [SP, #14, MUL VL] [5] |
0x42284c LDR Z27, [SP, #4, MUL VL] [5] |
0x422850 FMUL Z18.D, Z16.D, Z6.D |
0x422854 MOVPRFX Z17, Z18 |
0x422858 FCVTZS Z17.D, P1/M, Z18.D |
0x42285c MOVPRFX Z19, Z17 |
0x422860 SCVTF Z19.D, P1/M, Z17.D |
0x422864 ADR Z20.D, [Z20, Z17.D,LSL #3] [2] |
0x422868 LD1D {Z17.D}, P0/Z, [X14, Z17.D,LSL #3] [9] |
0x42286c LD1D {Z21.D}, P0/Z, [V20, #1.D] [1] |
0x422870 LD1D {Z22.D}, P0/Z, [V20, #2.D] [1] |
0x422874 LD1D {Z20.D}, P0/Z, [V20, #3.D] [1] |
0x422878 FSUB Z25.D, Z18.D, Z19.D |
0x42287c LDR Z18, [SP, #17, MUL VL] [5] |
0x422880 LDR Z19, [SP, #16, MUL VL] [5] |
0x422884 FMUL Z26.D, Z25.D, Z25.D |
0x422888 FMUL Z23.D, Z26.D, Z25.D |
0x42288c FMAD Z18.D, P1/M, Z25.D, Z19.D |
0x422890 LDR Z19, [SP, #15, MUL VL] [5] |
0x422894 FMUL Z18.D, Z18.D, Z17.D |
0x422898 FMLA Z24.D, P1/M, Z19.D, Z25.D |
0x42289c LDR Z19, [SP, #9, MUL VL] [5] |
0x4228a0 FMLA Z18.D, P1/M, Z24.D, Z21.D |
0x4228a4 LDR Z24, [SP, #7, MUL VL] [5] |
0x4228a8 FMLA Z24.D, P1/M, Z19.D, Z26.D |
0x4228ac LDR Z19, [SP, #8, MUL VL] [5] |
0x4228b0 FMLA Z24.D, P1/M, Z19.D, Z25.D |
0x4228b4 LDR Z19, [SP, #6, MUL VL] [5] |
0x4228b8 FMUL Z24.D, Z24.D, Z17.D |
0x4228bc FMAD Z19.D, P1/M, Z26.D, Z27.D |
0x4228c0 LDR Z27, [SP, #5, MUL VL] [5] |
0x4228c4 FMLA Z19.D, P1/M, Z27.D, Z25.D |
0x4228c8 FMAD Z19.D, P1/M, Z21.D, Z24.D |
0x4228cc MOVPRFX Z24, Z9 |
0x4228d0 FMLA Z24.D, P1/M, Z30.D, Z23.D |
0x4228d4 FMLA Z24.D, P1/M, Z31.D, Z26.D |
0x4228d8 FMLA Z24.D, P1/M, Z8.D, Z25.D |
0x4228dc FMUL Z17.D, Z24.D, Z17.D |
0x4228e0 MOVPRFX Z24, Z13 |
0x4228e4 FMLA Z24.D, P1/M, Z10.D, Z23.D |
0x4228e8 FMLA Z24.D, P1/M, Z11.D, Z26.D |
0x4228ec FMLA Z24.D, P1/M, Z12.D, Z25.D |
0x4228f0 FMLA Z17.D, P1/M, Z24.D, Z21.D |
0x4228f4 LDR Z21, [SP, #13, MUL VL] [5] |
0x4228f8 LDR Z24, [SP, #12, MUL VL] [5] |
0x4228fc FMAD Z21.D, P1/M, Z25.D, Z24.D |
0x422900 LDR Z24, [SP, #1, MUL VL] [5] |
0x422904 FMLA Z18.D, P1/M, Z21.D, Z22.D |
0x422908 LDR Z21, [SP, #3, MUL VL] [5] |
0x42290c FMAD Z21.D, P1/M, Z26.D, Z24.D |
0x422910 LDR Z24, [SP, #2, MUL VL] [5] |
0x422914 FMLA Z21.D, P1/M, Z24.D, Z25.D |
0x422918 FMLA Z19.D, P1/M, Z21.D, Z22.D |
0x42291c MOVPRFX Z21, Z1 |
0x422920 FMLA Z21.D, P1/M, Z14.D, Z23.D |
0x422924 FMAD Z23.D, P1/M, Z2.D, Z5.D |
0x422928 FMLA Z21.D, P1/M, Z15.D, Z26.D |
0x42292c FMLA Z23.D, P1/M, Z3.D, Z26.D |
0x422930 FMLA Z21.D, P1/M, Z0.D, Z25.D |
0x422934 FMLA Z23.D, P1/M, Z4.D, Z25.D |
0x422938 FMLA Z17.D, P1/M, Z21.D, Z22.D |
0x42293c LDR Z21, [SP, #11, MUL VL] [5] |
0x422940 LDR Z22, [SP, #10, MUL VL] [5] |
0x422944 FMLA Z17.D, P1/M, Z23.D, Z20.D |
0x422948 FMAD Z21.D, P1/M, Z25.D, Z22.D |
0x42294c FMLA Z18.D, P1/M, Z21.D, Z20.D |
0x422950 LDR Z21, [SP, MUL VL] [5] |
0x422954 FMUL Z18.D, Z7.D, Z18.D |
0x422958 FMAD Z21.D, P1/M, Z26.D, Z29.D |
0x42295c FMLA Z21.D, P1/M, Z28.D, Z25.D |
0x422960 FMLA Z19.D, P1/M, Z21.D, Z20.D |
0x422964 LD1SW {Z20.D}, P0/Z, [X8, X13,LSL #2] [3] |
0x422968 ADD X13, X13, X16 |
0x42296c ST1D {Z18.D}, P0, [X12, Z20.D,LSL #3] [8] |
0x422970 FMUL Z18.D, Z19.D, Z6.D |
0x422974 FDIVR Z16.D, P1/M, Z16.D, Z18.D |
0x422978 ST1D {Z16.D}, P0, [X11, Z20.D,LSL #3] [7] |
0x42297c ST1D {Z17.D}, P0, [X10, Z20.D,LSL #3] [6] |
0x422980 WHILELO P0.D, X13, X15 |
0x422984 B.MI 422840 |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/stl_vector.h: 1150 - 1150 |
-------------------------------------------------------------------------------- |
1150: return *(this->_M_impl._M_start + __n); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 303 - 336 |
-------------------------------------------------------------------------------- |
303: for (int j = 0; j < iCount; j++) |
304: { |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►71.43+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►28.57+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.00+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►22.22+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►16.67+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►11.11+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►44.83+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►20.69+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►17.24+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►13.79+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►3.45+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | stl_vector.h:1131 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►48.05+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►33.77+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►7.79+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►5.19+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►3.90+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►1.30+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:260 | exec |
| ○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►57.34+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►34.27+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►5.59+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►2.10+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►54.07+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►36.18+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►5.28+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.44+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►1.63+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►55.33+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►36.43+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.09+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.75+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►2.06+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►59.55+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►32.81+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.04+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.35+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:260 | exec |
| ○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.35+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►60.97+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►28.48+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.85+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.38+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:260 | exec |
| ○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.27+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►1.05+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►58.97+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►30.47+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.68+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.47+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:260 | exec |
| ○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.51+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►62.26+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►28.75+ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:450 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.36+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
| ○ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:411 | exec |
| ○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:175 | exec |
| ○ | main.omp_outlined | miniqmc.cpp:379 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.27+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:260 | exec |
| ○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:201 | exec |
| ○ | main.omp_outlined.62 | miniqmc.cpp:442 | exec |
| ○ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.01 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.21 |
| Bottlenecks | P6, P7, |
| Function | qmcplusplus::BsplineFunctor |
| Source | stl_vector.h:1150-1150,BsplineFunctor.h:303-336 |
| Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
| Source loop unroll confidence level | max |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 25.50 |
| CQA cycles if no scalar integer | 25.50 |
| CQA cycles if FP arith vectorized | 25.50 |
| CQA cycles if fully vectorized | 25.13 |
| Front-end cycles | 10.25 |
| P0 cycles | 0.50 |
| P1 cycles | 0.50 |
| P2 cycles | 0.50 |
| P3 cycles | 0.25 |
| P4 cycles | 2.00 |
| P5 cycles | 0.25 |
| P6 cycles | 25.50 |
| P7 cycles | 25.50 |
| P8 cycles | 21.00 |
| P9 cycles | 21.00 |
| P10 cycles | 12.00 |
| P11 cycles | 12.00 |
| P12 cycles | 3.00 |
| P13 cycles | 0.00 |
| P14 cycles | 0.00 |
| DIV/SQRT cycles | 6.99 - 14.08 |
| Inter-iter dependencies cycles | 1 |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 82.00 |
| Nb uops | 82.00 |
| Nb loads | NA |
| Nb stores | 3.00 |
| Nb stack references | 18.00 |
| FLOP/cycle | 11.92 |
| Nb FLOP add-sub | 4.00 |
| Nb FLOP mul | 32.00 |
| Nb FLOP fma | 132.00 |
| Nb FLOP div | 4.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 33.89 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 768.13 |
| Bytes stored | 96.00 |
| Stride 0 | 1.00 |
| Stride 1 | 0.00 |
| Stride n | 0.00 |
| Stride unknown | 3.00 |
| Stride indirect | 5.00 |
| Vectorization ratio all | 91.14 |
| Vectorization ratio load | 96.00 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | 100.00 |
| Vectorization ratio add_sub | 50.00 |
| Vectorization ratio fma | 100.00 |
| Vectorization ratio div_sqrt | 100.00 |
| Vectorization ratio other | 25.00 |
| Vector-efficiency ratio all | 98.10 |
| Vector-efficiency ratio load | 97.00 |
| Vector-efficiency ratio store | 100.00 |
| Vector-efficiency ratio mul | 100.00 |
| Vector-efficiency ratio add_sub | 62.50 |
| Vector-efficiency ratio fma | 100.00 |
| Vector-efficiency ratio div_sqrt | 100.00 |
| Vector-efficiency ratio other | 90.63 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.01 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.21 |
| Bottlenecks | P6, P7, |
| Function | qmcplusplus::BsplineFunctor |
| Source | stl_vector.h:1150-1150,BsplineFunctor.h:303-336 |
| Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
| Source loop unroll confidence level | max |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 25.50 |
| CQA cycles if no scalar integer | 25.50 |
| CQA cycles if FP arith vectorized | 25.50 |
| CQA cycles if fully vectorized | 25.13 |
| Front-end cycles | 10.25 |
| P0 cycles | 0.50 |
| P1 cycles | 0.50 |
| P2 cycles | 0.50 |
| P3 cycles | 0.25 |
| P4 cycles | 2.00 |
| P5 cycles | 0.25 |
| P6 cycles | 25.50 |
| P7 cycles | 25.50 |
| P8 cycles | 21.00 |
| P9 cycles | 21.00 |
| P10 cycles | 12.00 |
| P11 cycles | 12.00 |
| P12 cycles | 3.00 |
| P13 cycles | 0.00 |
| P14 cycles | 0.00 |
| DIV/SQRT cycles | 6.99 - 14.08 |
| Inter-iter dependencies cycles | 1 |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 82.00 |
| Nb uops | 82.00 |
| Nb loads | NA |
| Nb stores | 3.00 |
| Nb stack references | 18.00 |
| FLOP/cycle | 11.92 |
| Nb FLOP add-sub | 4.00 |
| Nb FLOP mul | 32.00 |
| Nb FLOP fma | 132.00 |
| Nb FLOP div | 4.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 33.89 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 768.13 |
| Bytes stored | 96.00 |
| Stride 0 | 1.00 |
| Stride 1 | 0.00 |
| Stride n | 0.00 |
| Stride unknown | 3.00 |
| Stride indirect | 5.00 |
| Vectorization ratio all | 91.14 |
| Vectorization ratio load | 96.00 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | 100.00 |
| Vectorization ratio add_sub | 50.00 |
| Vectorization ratio fma | 100.00 |
| Vectorization ratio div_sqrt | 100.00 |
| Vectorization ratio other | 25.00 |
| Vector-efficiency ratio all | 98.10 |
| Vector-efficiency ratio load | 97.00 |
| Vector-efficiency ratio store | 100.00 |
| Vector-efficiency ratio mul | 100.00 |
| Vector-efficiency ratio add_sub | 62.50 |
| Vector-efficiency ratio fma | 100.00 |
| Vector-efficiency ratio div_sqrt | 100.00 |
| Vector-efficiency ratio other | 90.63 |
| Path / |
| Function | qmcplusplus::BsplineFunctor |
| Source file and lines | BsplineFunctor.h:303-336 |
| Module | exec |
| nb instructions | 82 |
| nb uops | 82 |
| loop length | 328 |
| used w registers | 0 |
| used x registers | 9 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 32 |
| nb stack references | 18 |
| ADD-SUB / MUL ratio | 0.13 |
| micro-operation queue | 10.25 cycles |
| front end | 10.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 0.50 | 0.50 | 0.50 | 0.25 | 2.00 | 0.25 | 25.50 | 25.50 | 3.00 | 3.00 | 12.00 | 12.00 | 3.00 | 0.00 | 0.00 |
| cycles | 0.50 | 0.50 | 0.50 | 0.25 | 2.00 | 0.25 | 25.50 | 25.50 | 21.00 | 21.00 | 12.00 | 12.00 | 3.00 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | 6.99-14.08 |
| Longest recurrence chain latency (RecMII) | 1.00 |
| Front-end | 10.25 |
| Dispatch | 25.50 |
| DIV/SQRT | 6.99-14.08 |
| Data deps. | 1.00 |
| Overall L1 | 25.50 |
| all | 80% |
| load | 96% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 14% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 100% |
| all | 91% |
| load | 96% |
| store | 100% |
| mul | 100% |
| add-sub | 50% |
| fma | 100% |
| div/sqrt | 100% |
| other | 25% |
| all | 95% |
| load | 97% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 89% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 100% |
| all | 98% |
| load | 97% |
| store | 100% |
| mul | 100% |
| add-sub | 62% |
| fma | 100% |
| div/sqrt | 100% |
| other | 90% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LD1D {Z16.D}, P0/Z, [X9, X13,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| DUP Z20.D, X14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (25.0%) |
| LDR Z24, [SP, #14, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z27, [SP, #4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z16.D, Z6.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| MOVPRFX Z17, Z18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FCVTZS Z17.D, P1/M, Z18.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
| MOVPRFX Z19, Z17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| SCVTF Z19.D, P1/M, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
| ADR Z20.D, [Z20, Z17.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| LD1D {Z17.D}, P0/Z, [X14, Z17.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LD1D {Z21.D}, P0/Z, [V20, #1.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 | vect (100.0%) |
| LD1D {Z22.D}, P0/Z, [V20, #2.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 | vect (100.0%) |
| LD1D {Z20.D}, P0/Z, [V20, #3.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 | vect (100.0%) |
| FSUB Z25.D, Z18.D, Z19.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| LDR Z18, [SP, #17, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #16, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z26.D, Z25.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMUL Z23.D, Z26.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMAD Z18.D, P1/M, Z25.D, Z19.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #15, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z18.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z19.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #9, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z18.D, P1/M, Z24.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #7, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z19.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #8, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z19.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #6, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z24.D, Z24.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMAD Z19.D, P1/M, Z26.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z27, [SP, #5, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z19.D, P1/M, Z27.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMAD Z19.D, P1/M, Z21.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| MOVPRFX Z24, Z9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FMLA Z24.D, P1/M, Z30.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z31.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z8.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMUL Z17.D, Z24.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| MOVPRFX Z24, Z13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FMLA Z24.D, P1/M, Z10.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z11.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z12.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z17.D, P1/M, Z24.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, #13, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #12, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z25.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #1, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z18.D, P1/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, #3, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z26.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #2, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z24.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z19.D, P1/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| MOVPRFX Z21, Z1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FMLA Z21.D, P1/M, Z14.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMAD Z23.D, P1/M, Z2.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z15.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z23.D, P1/M, Z3.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z0.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z23.D, P1/M, Z4.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z17.D, P1/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, #11, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z22, [SP, #10, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z17.D, P1/M, Z23.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z25.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z18.D, P1/M, Z21.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z7.D, Z18.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z26.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z28.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z19.D, P1/M, Z21.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LD1SW {Z20.D}, P0/Z, [X8, X13,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| ADD X13, X13, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ST1D {Z18.D}, P0, [X12, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z19.D, Z6.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FDIVR Z16.D, P1/M, Z16.D, Z18.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 6.99-14.08 | vect (100.0%) |
| ST1D {Z16.D}, P0, [X11, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| ST1D {Z17.D}, P0, [X10, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| WHILELO P0.D, X13, X15 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | N/A |
| B.MI 422840 <_ZNK11qmcplusplus14BsplineFunctorIdE11evaluateVGLEiiiPKdPdS4_S4_S4_Pi+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Function | qmcplusplus::BsplineFunctor |
| Source file and lines | BsplineFunctor.h:303-336 |
| Module | exec |
| nb instructions | 82 |
| nb uops | 82 |
| loop length | 328 |
| used w registers | 0 |
| used x registers | 9 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 32 |
| nb stack references | 18 |
| ADD-SUB / MUL ratio | 0.13 |
| micro-operation queue | 10.25 cycles |
| front end | 10.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 0.50 | 0.50 | 0.50 | 0.25 | 2.00 | 0.25 | 25.50 | 25.50 | 3.00 | 3.00 | 12.00 | 12.00 | 3.00 | 0.00 | 0.00 |
| cycles | 0.50 | 0.50 | 0.50 | 0.25 | 2.00 | 0.25 | 25.50 | 25.50 | 21.00 | 21.00 | 12.00 | 12.00 | 3.00 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | 6.99-14.08 |
| Longest recurrence chain latency (RecMII) | 1.00 |
| Front-end | 10.25 |
| Dispatch | 25.50 |
| DIV/SQRT | 6.99-14.08 |
| Data deps. | 1.00 |
| Overall L1 | 25.50 |
| all | 80% |
| load | 96% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 14% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 100% |
| all | 91% |
| load | 96% |
| store | 100% |
| mul | 100% |
| add-sub | 50% |
| fma | 100% |
| div/sqrt | 100% |
| other | 25% |
| all | 95% |
| load | 97% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 89% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 100% |
| all | 98% |
| load | 97% |
| store | 100% |
| mul | 100% |
| add-sub | 62% |
| fma | 100% |
| div/sqrt | 100% |
| other | 90% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LD1D {Z16.D}, P0/Z, [X9, X13,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| DUP Z20.D, X14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (25.0%) |
| LDR Z24, [SP, #14, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z27, [SP, #4, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z16.D, Z6.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| MOVPRFX Z17, Z18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FCVTZS Z17.D, P1/M, Z18.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
| MOVPRFX Z19, Z17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| SCVTF Z19.D, P1/M, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
| ADR Z20.D, [Z20, Z17.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| LD1D {Z17.D}, P0/Z, [X14, Z17.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LD1D {Z21.D}, P0/Z, [V20, #1.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 | vect (100.0%) |
| LD1D {Z22.D}, P0/Z, [V20, #2.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 | vect (100.0%) |
| LD1D {Z20.D}, P0/Z, [V20, #3.D] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 2 | vect (100.0%) |
| FSUB Z25.D, Z18.D, Z19.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| LDR Z18, [SP, #17, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #16, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z26.D, Z25.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMUL Z23.D, Z26.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMAD Z18.D, P1/M, Z25.D, Z19.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #15, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z18.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z19.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #9, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z18.D, P1/M, Z24.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #7, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z19.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #8, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z19.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z19, [SP, #6, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z24.D, Z24.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMAD Z19.D, P1/M, Z26.D, Z27.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z27, [SP, #5, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z19.D, P1/M, Z27.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMAD Z19.D, P1/M, Z21.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| MOVPRFX Z24, Z9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FMLA Z24.D, P1/M, Z30.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z31.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z8.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMUL Z17.D, Z24.D, Z17.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| MOVPRFX Z24, Z13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FMLA Z24.D, P1/M, Z10.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z11.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z24.D, P1/M, Z12.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z17.D, P1/M, Z24.D, Z21.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, #13, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #12, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z25.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #1, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z18.D, P1/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, #3, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z26.D, Z24.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z24, [SP, #2, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z24.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z19.D, P1/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| MOVPRFX Z21, Z1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (100.0%) |
| FMLA Z21.D, P1/M, Z14.D, Z23.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMAD Z23.D, P1/M, Z2.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z15.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z23.D, P1/M, Z3.D, Z26.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z0.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z23.D, P1/M, Z4.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z17.D, P1/M, Z21.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, #11, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| LDR Z22, [SP, #10, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMLA Z17.D, P1/M, Z23.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z25.D, Z22.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z18.D, P1/M, Z21.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LDR Z21, [SP, MUL VL] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z7.D, Z18.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FMAD Z21.D, P1/M, Z26.D, Z29.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z21.D, P1/M, Z28.D, Z25.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| FMLA Z19.D, P1/M, Z21.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| LD1SW {Z20.D}, P0/Z, [X8, X13,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | vect (100.0%) |
| ADD X13, X13, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ST1D {Z18.D}, P0, [X12, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| FMUL Z18.D, Z19.D, Z6.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| FDIVR Z16.D, P1/M, Z16.D, Z18.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 6.99-14.08 | vect (100.0%) |
| ST1D {Z16.D}, P0, [X11, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| ST1D {Z17.D}, P0, [X10, Z20.D,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| WHILELO P0.D, X13, X15 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | N/A |
| B.MI 422840 <_ZNK11qmcplusplus14BsplineFunctorIdE11evaluateVGLEiiiPKdPdS4_S4_S4_Pi+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.78 | 0.02 | 0.96 | 0 | 0.72 | 0.03 | 0.78 | 0.02 | 0.68 | 0.02 | 0.77 | 0.01 | 0.63 | 0.02 | 0.7 | 0.01 | 0.59 | 0.02 | 0.61 | 0.01 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 0.035000000149012 | 0.079672403633595 |
| 1x2 | 2 | 0.78 | 0.78 | 2 | 0.054999999701977 | 0.10163775086403 |
| 1x4 | 4 | 0.96 | 0.96 | 4 | 0.049999997019768 | 0.080177158117294 |
| 1x8 | 8 | 0.72 | 0.72 | 8 | 0.079999998211861 | 0.098960436880589 |
| 1x16 | 16 | 0.78 | 0.78 | 16 | 0.060000002384186 | 0.069997191429138 |
| 1x24 | 24 | 0.68 | 0.68 | 24 | 0.074999995529652 | 0.065283209085464 |
| 1x32 | 32 | 0.77 | 0.77 | 32 | 0.080000005662441 | 0.049390435218811 |
| 1x40 | 40 | 0.63 | 0.63 | 40 | 0.10000000149012 | 0.052253510802984 |
| 1x48 | 48 | 0.7 | 0.7 | 48 | 0.075000002980232 | 0.040598038583994 |
| 1x56 | 56 | 0.59 | 0.59 | 56 | 0.10499999672174 | 0.043444447219372 |
| 1x64 | 64 | 0.61 | 0.61 | 64 | 0.10999999940395 | 0.038018546998501 |
