| Function: advanceVelocity.omp_outlined | Module: exec | Source: timestep.c:71-78 | Coverage (incl. loops): 1.05% | (excl. loops): 0.00% |
|---|
| Function: advanceVelocity.omp_outlined | Module: exec | Source: timestep.c:71-78 | Coverage (incl. loops): 1.05% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 71 - 78 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for |
72: for (int iBox=0; iBox<nBoxes; iBox++) |
73: { |
74: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
75: { |
76: s->atoms->p[iOff][0] += dt*s->atoms->f[iOff][0]; |
77: s->atoms->p[iOff][1] += dt*s->atoms->f[iOff][1]; |
78: s->atoms->p[iOff][2] += dt*s->atoms->f[iOff][2]; |
0x4193b0 SUB SP, SP, #80 |
0x4193b4 STP X29, X30, [SP, #32] |
0x4193b8 STP X22, X21, [SP, #48] |
0x4193bc STP X20, X19, [SP, #64] |
0x4193c0 ADD X29, SP, #32 |
0x4193c4 LDR W8, [X2] |
0x4193c8 SUBS W22, W8, #1 |
0x4193cc B.LT 419444 |
0x4193d0 LDR W20, [X0] |
0x4193d4 ORR X19, XZR, X4 |
0x4193d8 ORR X21, XZR, X3 |
0x4193dc MOVZ W8, #1 |
0x4193e0 ADRP X0, |
0x4193e4 ADD X0, X0, #3096 |
0x4193e8 ADD X3, SP, #16 |
0x4193ec ORR W1, WZR, W20 |
0x4193f0 SUB X4, X29, #4 |
0x4193f4 SUB X5, X29, #8 |
0x4193f8 SUB X6, X29, #12 |
0x4193fc MOVZ W2, #34 |
0x419400 MOVZ W7, #1 |
0x419404 STP W22, WZR, [X29, #504] |
0x419408 STUR W8, [X29, #500] |
0x41940c STR WZR, [SP, #16] |
0x419410 STR W8, [SP] |
0x419414 BL 410220 |
0x419418 LDP W9, W8, [X29, #504] |
0x41941c SBFM X8, X8, #0, #31 |
0x419420 CMP W9, W22 |
0x419424 CSEL W11, W9, W22, #11 |
0x419428 CMP W8, W11 |
0x41942c STUR W11, [X29, #504] |
0x419430 B.LE 419458 |
0x419434 ADRP X0, |
0x419438 ADD X0, X0, #3120 |
0x41943c ORR W1, WZR, W20 |
0x419440 BL 4100d0 |
0x419444 LDP X20, X19, [SP, #64] |
0x419448 LDP X22, X21, [SP, #48] |
0x41944c LDP X29, X30, [SP, #32] |
0x419450 ADD SP, SP, #80 |
0x419454 RET |
0x419458 LDR X9, [X21] |
0x41945c UBFM W18, W8, #26, #25 |
0x419460 ADD W11, W11, #1 |
0x419464 ADD X12, X19, #8 |
0x419468 LDR X10, [X9, #24] |
0x41946c LDR X10, [X10, #120] |
0x419470 B 419490 |
0x419474 HINT #0 |
0x419478 HINT #0 |
0x41947c HINT #0 |
(111) 0x419480 ADD X8, X8, #1 |
(111) 0x419484 ADD W18, W13, #64 |
(111) 0x419488 CMP W11, W8 |
(111) 0x41948c B.EQ 419434 |
(111) 0x419490 LDR W14, [X10, X8,LSL #2] |
(111) 0x419494 SBFM X13, X18, #0, #31 |
(111) 0x419498 CMP W14, #1 |
(111) 0x41949c B.LT 419480 |
(111) 0x4194a0 LDR X16, [X9, #32] |
(111) 0x4194a4 CMP W14, #1 |
(111) 0x4194a8 LDP X16, X15, [X16, #32] |
(111) 0x4194ac B.NE 419524 |
(111) 0x4194b0 ORR W17, WZR, WZR |
(111) 0x4194b4 ORR X18, XZR, X13 |
(111) 0x4194b8 SUB W14, W14, W17 |
(111) 0x4194bc ADD X17, X18, X18,LSL #1 |
(111) 0x4194c0 ADD X16, X16, X17,LSL #3 |
(111) 0x4194c4 ADD X15, X15, X17,LSL #3 |
(111) 0x4194c8 ADD X16, X16, #16 |
(111) 0x4194cc ADD X15, X15, #16 |
(111) 0x4194d0 HINT #0 |
(111) 0x4194d4 HINT #0 |
(111) 0x4194d8 HINT #0 |
(111) 0x4194dc HINT #0 |
(112) 0x4194e0 LDR D0, [X19] |
(112) 0x4194e4 LDUR D1, [X15, #496] |
(112) 0x4194e8 LDP D2, D3, [X16, #1008] |
(112) 0x4194ec SUBS W14, W14, #1 |
(112) 0x4194f0 FMADD D0, D0, D1, D2 |
(112) 0x4194f4 LDR D2, [X16] |
(112) 0x4194f8 STUR D0, [X16, #496] |
(112) 0x4194fc LDR D0, [X19] |
(112) 0x419500 LDUR D1, [X15, #504] |
(112) 0x419504 FMADD D0, D0, D1, D3 |
(112) 0x419508 STUR D0, [X16, #504] |
(112) 0x41950c LDR D0, [X19] |
(112) 0x419510 LDR D1, [X15], #24 |
(112) 0x419514 FMADD D0, D0, D1, D2 |
(112) 0x419518 STR D0, [X16], #24 |
(112) 0x41951c B.NE 4194e0 |
(111) 0x419520 B 419480 |
(111) 0x419524 SUB W0, W14, #1 |
(111) 0x419528 ADD X18, X13, W18,SXTW #1 |
(111) 0x41952c ORR W17, WZR, WZR |
(111) 0x419530 ADD X0, X13, X0 |
(111) 0x419534 ADD X0, X0, X0,LSL #1 |
(111) 0x419538 UBFM X1, X18, #61, #60 |
(111) 0x41953c ADD X2, X16, X0,LSL #3 |
(111) 0x419540 ADD X0, X15, X0,LSL #3 |
(111) 0x419544 ADD X18, X16, X1 |
(111) 0x419548 ADD X0, X0, #24 |
(111) 0x41954c ADD X2, X2, #24 |
(111) 0x419550 CMP X18, X0 |
(111) 0x419554 ADD X0, X15, X1 |
(111) 0x419558 CCMP X0, X2, #2, #3 |
(111) 0x41955c CSINC W0, WZR, WZR, #2 |
(111) 0x419560 CMP X19, X2 |
(111) 0x419564 CCMP X18, X12, #2, #3 |
(111) 0x419568 B.CC 4194b4 |
(111) 0x41956c ORR X18, XZR, X13 |
(111) 0x419570 TBNZ W0, #0, 4194b8 |
(111) 0x419574 LD1R {V0.2D}, [X19] |
(111) 0x419578 AND X17, X14, #0x0 |
(111) 0x41957c ADD X0, X16, X1 |
(111) 0x419580 ADD X1, X15, X1 |
(111) 0x419584 AND X2, X14, #0x0 |
(111) 0x419588 ADD X18, X17, X13 |
(113) 0x41958c LDP Q3, Q1, [X1, #16] |
(113) 0x419590 LDR Q2, [X1], #48 |
(113) 0x419594 SUBS X2, X2, #2 |
(113) 0x419598 EXT V4.16B, V2.16B, V1.16B, #8 |
(113) 0x41959c MOV V2.D[1], V3.D[1] |
(113) 0x4195a0 MOV V3.D[1], V1.D[1] |
(113) 0x4195a4 LDP Q16, Q1, [X0, #16] |
(113) 0x4195a8 LDR Q5, [X0] |
(113) 0x4195ac ORR V7.16B, V16.16B, V16.16B |
(113) 0x4195b0 MOV V7.D[1], V1.D[1] |
(113) 0x4195b4 FMLA V7.2D, V3.2D, V0.D[0] |
(113) 0x4195b8 EXT V6.16B, V5.16B, V1.16B, #8 |
(113) 0x4195bc MOV V5.D[1], V16.D[1] |
(113) 0x4195c0 FMLA V6.2D, V4.2D, V0.2D |
(113) 0x4195c4 FMLA V5.2D, V2.2D, V0.2D |
(113) 0x4195c8 ST3 {V5.2D, V6.2D, V7.2D}, [X0], #48 |
(113) 0x4195cc B.NE 41958c |
(111) 0x4195d0 CMP X17, X14 |
(111) 0x4195d4 B.EQ 419480 |
(111) 0x4195d8 B 4194b8 |
0x4195dc HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.91+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.14+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | timestep | timestep.c:39 | exec |
| ○ | main | CoMD.c:125 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | eam.c:831 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run orig_0
| Source file and lines | timestep.c:71-78 |
| Module | exec |
| nb instructions | 53 |
| nb uops | 49 |
| loop length | 212 |
| used w registers | 10 |
| used x registers | 17 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 8 |
| micro-operation queue | 6.13 cycles |
| front end | 6.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 4.00 | 4.00 |
| cycles | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 4.00 | 4.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 6.13 |
| Dispatch | 7.00 |
| Overall L1 | 7.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 23% |
| load | 12% |
| store | 28% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 20% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SUB SP, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X29, X30, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X22, X21, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X20, X19, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR W8, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SUBS W22, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 419444 <advanceVelocity.omp_outlined+0x94> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W20, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR X19, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X21, XZR, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADRP X0, <43f3e0> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #3096 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X3, SP, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W1, WZR, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB X4, X29, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X5, X29, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X6, X29, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W22, WZR, [X29, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STUR W8, [X29, #500] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR WZR, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W8, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410220 <@plt_start@+0x200> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W9, W8, [X29, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SBFM X8, X8, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| CMP W9, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W11, W9, W22, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W8, W11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| STUR W11, [X29, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| B.LE 419458 <advanceVelocity.omp_outlined+0xa8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <43f434> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #3120 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W1, WZR, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4100d0 <@plt_start@+0xb0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X20, X19, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X22, X21, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADD SP, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X9, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| UBFM W18, W8, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W11, W11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X12, X19, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X10, [X9, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X10, [X10, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 419490 <advanceVelocity.omp_outlined+0xe0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run orig_0
| Source file and lines | timestep.c:71-78 |
| Module | exec |
| nb instructions | 53 |
| nb uops | 49 |
| loop length | 212 |
| used w registers | 10 |
| used x registers | 17 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 8 |
| micro-operation queue | 6.13 cycles |
| front end | 6.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 4.00 | 4.00 |
| cycles | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 4.00 | 4.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 6.13 |
| Dispatch | 7.00 |
| Overall L1 | 7.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 23% |
| load | 12% |
| store | 28% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 20% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SUB SP, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X29, X30, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X22, X21, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X20, X19, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR W8, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SUBS W22, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 419444 <advanceVelocity.omp_outlined+0x94> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W20, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR X19, XZR, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR X21, XZR, X3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADRP X0, <43f3e0> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #3096 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X3, SP, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W1, WZR, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB X4, X29, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X5, X29, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X6, X29, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W22, WZR, [X29, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STUR W8, [X29, #500] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR WZR, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W8, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410220 <@plt_start@+0x200> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W9, W8, [X29, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SBFM X8, X8, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| CMP W9, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W11, W9, W22, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W8, W11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| STUR W11, [X29, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| B.LE 419458 <advanceVelocity.omp_outlined+0xa8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <43f434> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X0, X0, #3120 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W1, WZR, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4100d0 <@plt_start@+0xb0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X20, X19, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X22, X21, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADD SP, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X9, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| UBFM W18, W8, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W11, W11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X12, X19, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X10, [X9, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X10, [X10, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 419490 <advanceVelocity.omp_outlined+0xe0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advanceVelocity.omp_outlined– | 1.05 | 0.21 |
| ▼Loop 111 - timestep.c:72-78 - exec– | 0.01 | 0.01 |
| ○Loop 113 - timestep.c:74-78 - exec | 1.00 | 0.20 |
| ○Loop 112 - timestep.c:74-78 - exec | 0.04 | 0.01 |
