| Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage (incl. loops): 1.24% | (excl. loops): 0.00% |
|---|
| Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage (incl. loops): 1.24% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 85 - 94 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for |
86: for (int iBox=0; iBox<nBoxes; iBox++) |
87: { |
88: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
89: { |
90: int iSpecies = s->atoms->iSpecies[iOff]; |
91: real_t invMass = 1.0/s->species[iSpecies].mass; |
92: s->atoms->r[iOff][0] += dt*s->atoms->p[iOff][0]*invMass; |
93: s->atoms->r[iOff][1] += dt*s->atoms->p[iOff][1]*invMass; |
94: s->atoms->r[iOff][2] += dt*s->atoms->p[iOff][2]*invMass; |
0x41a200 STP X29, X30, [SP, #976]! |
0x41a204 ADD X29, SP, #0 |
0x41a208 STR X21, [SP, #32] |
0x41a20c LDR W21, [X0, #16] |
0x41a210 STP X19, X20, [SP, #16] |
0x41a214 ORR X20, XZR, X0 |
0x41a218 BL 410120 |
0x41a21c ORR W19, WZR, W0 |
0x41a220 BL 410140 |
0x41a224 SDIV W1, W21, W19 |
0x41a228 ORR W14, WZR, W0 |
0x41a22c MSUB W2, W1, W19, W21 |
0x41a230 CMP W0, W2 |
0x41a234 B.LT 41a3f0 |
0x41a238 MADD W3, W1, W14, W2 |
0x41a23c ADD W16, W1, W3 |
0x41a240 CMP W3, W16 |
0x41a244 B.GE 41a3e0 |
0x41a248 LDR X17, [X20] |
0x41a24c UBFM W15, W3, #26, #25 |
0x41a250 MOVZ W0, #24 |
0x41a254 FMOV D4, #1.0000000 |
0x41a258 SBFM X14, X3, #0, #31 |
0x41a25c LDR D0, [X20, #8] |
0x41a260 SMULL X15, W15, W0 |
0x41a264 LDR X4, [X17, #24] |
0x41a268 LDR X18, [X4, #120] |
(101) 0x41a26c LDR W5, [X18, X14,LSL #2] |
(101) 0x41a270 CMP W5, #0 |
(101) 0x41a274 B.LE 41a3d0 |
(101) 0x41a278 LDP X10, X13, [X17, #32] |
(101) 0x41a27c SUB W9, W5, #1 |
(101) 0x41a280 UBFM X7, X14, #56, #55 |
(101) 0x41a284 ADD X8, X9, X14,LSL #6 |
(101) 0x41a288 ORR X0, XZR, X15 |
(101) 0x41a28c LDP X6, X30, [X10, #16] |
(101) 0x41a290 LDR X20, [X10, #32] |
(101) 0x41a294 ADD X11, X6, #4 |
(101) 0x41a298 ADD X3, X6, X7 |
(101) 0x41a29c ADD X21, X11, X8,LSL #2 |
(101) 0x41a2a0 ADD X19, X30, #8 |
(101) 0x41a2a4 ADD X2, X30, #16 |
(101) 0x41a2a8 SUB X1, X21, X3 |
(101) 0x41a2ac ADD X12, X20, #8 |
(101) 0x41a2b0 ADD X4, X20, #16 |
(101) 0x41a2b4 TBZ W1, #2, 41a320 |
(101) 0x41a2b8 LDRSW X5, [X6, X7] |
(101) 0x41a2bc ADD X3, X3, #4 |
(101) 0x41a2c0 ADD X0, X15, #24 |
(101) 0x41a2c4 LDR D3, [X20, X15] |
(101) 0x41a2c8 LDR D2, [X30, X15] |
(101) 0x41a2cc ADD X9, X13, X5,LSL #4 |
(101) 0x41a2d0 LDR D1, [X9, #8] |
(101) 0x41a2d4 FMUL D5, D0, D3 |
(101) 0x41a2d8 FDIV D6, D4, D1 |
(101) 0x41a2dc FMADD D7, D6, D5, D2 |
(101) 0x41a2e0 STR D7, [X30, X15] |
(101) 0x41a2e4 LDR D16, [X12, X15] |
(101) 0x41a2e8 LDR D17, [X19, X15] |
(101) 0x41a2ec FMUL D18, D0, D16 |
(101) 0x41a2f0 FMADD D19, D6, D18, D17 |
(101) 0x41a2f4 STR D19, [X19, X15] |
(101) 0x41a2f8 LDR D20, [X4, X15] |
(101) 0x41a2fc LDR D21, [X2, X15] |
(101) 0x41a300 FMUL D22, D0, D20 |
(101) 0x41a304 FMADD D23, D6, D22, D21 |
(101) 0x41a308 STR D23, [X2, X15] |
(101) 0x41a30c CMP X21, X3 |
(101) 0x41a310 B.EQ 41a3d0 |
(101) 0x41a314 HINT #0 |
(101) 0x41a318 HINT #0 |
(101) 0x41a31c HINT #0 |
(102) 0x41a320 ORR X8, XZR, X3 |
(102) 0x41a324 LDR D24, [X20, X0] |
(102) 0x41a328 ADD X10, X0, #24 |
(102) 0x41a32c LDRSW X7, [X8], #4 |
(102) 0x41a330 LDR D25, [X30, X0] |
(102) 0x41a334 FMUL D26, D0, D24 |
(102) 0x41a338 LDRSW X6, [X3, #4] |
(102) 0x41a33c ADD X11, X13, X7,LSL #4 |
(102) 0x41a340 ADD X3, X8, #4 |
(102) 0x41a344 LDR D27, [X11, #8] |
(102) 0x41a348 ADD X1, X13, X6,LSL #4 |
(102) 0x41a34c FDIV D28, D4, D27 |
(102) 0x41a350 FMADD D29, D28, D26, D25 |
(102) 0x41a354 STR D29, [X30, X0] |
(102) 0x41a358 LDR D30, [X12, X0] |
(102) 0x41a35c LDR D31, [X19, X0] |
(102) 0x41a360 FMUL D3, D0, D30 |
(102) 0x41a364 FMADD D2, D28, D3, D31 |
(102) 0x41a368 STR D2, [X19, X0] |
(102) 0x41a36c LDR D5, [X4, X0] |
(102) 0x41a370 LDR D1, [X2, X0] |
(102) 0x41a374 FMUL D6, D0, D5 |
(102) 0x41a378 FMADD D7, D28, D6, D1 |
(102) 0x41a37c STR D7, [X2, X0] |
(102) 0x41a380 ADD X0, X0, #48 |
(102) 0x41a384 LDR D19, [X1, #8] |
(102) 0x41a388 LDR D16, [X20, X10] |
(102) 0x41a38c LDR D17, [X30, X10] |
(102) 0x41a390 FDIV D20, D4, D19 |
(102) 0x41a394 FMUL D18, D0, D16 |
(102) 0x41a398 FMADD D21, D20, D18, D17 |
(102) 0x41a39c STR D21, [X30, X10] |
(102) 0x41a3a0 LDR D22, [X12, X10] |
(102) 0x41a3a4 LDR D23, [X19, X10] |
(102) 0x41a3a8 FMUL D24, D0, D22 |
(102) 0x41a3ac FMADD D25, D20, D24, D23 |
(102) 0x41a3b0 STR D25, [X19, X10] |
(102) 0x41a3b4 LDR D26, [X4, X10] |
(102) 0x41a3b8 LDR D27, [X2, X10] |
(102) 0x41a3bc FMUL D28, D0, D26 |
(102) 0x41a3c0 FMADD D29, D20, D28, D27 |
(102) 0x41a3c4 STR D29, [X2, X10] |
(102) 0x41a3c8 CMP X21, X3 |
(102) 0x41a3cc B.NE 41a320 |
(101) 0x41a3d0 ADD X14, X14, #1 |
(101) 0x41a3d4 ADD X15, X15, #1536 |
(101) 0x41a3d8 CMP W16, W14 |
(101) 0x41a3dc B.GT 41a26c |
0x41a3e0 LDP X19, X20, [SP, #16] |
0x41a3e4 LDR X21, [SP, #32] |
0x41a3e8 LDP X29, X30, [SP], #48 |
0x41a3ec RET |
0x41a3f0 ADD W1, W1, #1 |
0x41a3f4 MOVZ W2, #0 |
0x41a3f8 B 41a238 |
0x41a3fc HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.59+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.36+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | timestep | timestep.c:43 | exec |
| ○ | main | CoMD.c:125 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | CoMD.c:266 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | timestep.c:85-94 |
| Module | exec |
| nb instructions | 35 |
| nb uops | 34 |
| loop length | 140 |
| used w registers | 10 |
| used x registers | 13 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 4.25 cycles |
| front end | 4.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.25 | 0.25 | 0.25 | 0.25 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.25 | 0.25 | 0.25 | 0.25 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 4.25 |
| Dispatch | 4.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 5.00-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 27% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 25% |
| load | 27% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR W21, [X0, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 410120 <@plt_start@+0x100> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SDIV W1, W21, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W14, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W2, W1, W19, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 41a3f0 <advancePosition._omp_fn.0+0x1f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W1, W14, W2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W16, W1, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 41a3e0 <advancePosition._omp_fn.0+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| UBFM W15, W3, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W0, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FMOV D4, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| SBFM X14, X3, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR D0, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| SMULL X15, W15, W0 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| LDR X4, [X17, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X18, [X4, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 41a238 <advancePosition._omp_fn.0+0x38> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | timestep.c:85-94 |
| Module | exec |
| nb instructions | 35 |
| nb uops | 34 |
| loop length | 140 |
| used w registers | 10 |
| used x registers | 13 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 4.25 cycles |
| front end | 4.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.25 | 0.25 | 0.25 | 0.25 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.25 | 0.25 | 0.25 | 0.25 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 4.25 |
| Dispatch | 4.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 5.00-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 27% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 25% |
| load | 27% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR W21, [X0, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 410120 <@plt_start@+0x100> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SDIV W1, W21, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W14, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W2, W1, W19, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 41a3f0 <advancePosition._omp_fn.0+0x1f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W1, W14, W2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W16, W1, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 41a3e0 <advancePosition._omp_fn.0+0x1e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| UBFM W15, W3, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W0, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FMOV D4, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| SBFM X14, X3, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR D0, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| SMULL X15, W15, W0 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| LDR X4, [X17, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X18, [X4, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 41a238 <advancePosition._omp_fn.0+0x38> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advancePosition._omp_fn.0– | 1.24 | 0.18 |
| ▼Loop 101 - timestep.c:88-94 - exec– | 0.10 | 0.01 |
| ○Loop 102 - timestep.c:88-94 - exec | 1.13 | 0.13 |
