| Function: updateLinkCells | Module: exec | Source: linkCells.c:211-385 [...] | Coverage (incl. loops): 0.46% | (excl. loops): 0.00% |
|---|
| Function: updateLinkCells | Module: exec | Source: linkCells.c:211-385 [...] | Coverage (incl. loops): 0.46% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/build/CoMD/CoMD/src-openmp/linkCells.c: 211 - 385 |
-------------------------------------------------------------------------------- |
211: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + 2*gridSize[2]*(gridSize[0]+2) + |
212: (gridSize[0]+2)*(gridSize[1]+2) + (gridSize[0]+2)*(iy+1) + (ix+1); |
213: } |
214: // Halo in Z- |
215: else if (iz == -1) |
216: { |
217: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + 2*gridSize[2]*(gridSize[0]+2) + |
218: (gridSize[0]+2)*(iy+1) + (ix+1); |
219: } |
220: // Halo in Y+ |
221: else if (iy == gridSize[1]) |
222: { |
223: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + gridSize[2]*(gridSize[0]+2) + |
224: (gridSize[0]+2)*iz + (ix+1); |
225: } |
226: // Halo in Y- |
227: else if (iy == -1) |
228: { |
229: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + iz*(gridSize[0]+2) + (ix+1); |
230: } |
231: // Halo in X+ |
232: else if (ix == gridSize[0]) |
233: { |
234: iBox = boxes->nLocalBoxes + gridSize[1]*gridSize[2] + iz*gridSize[1] + iy; |
235: } |
236: // Halo in X- |
237: else if (ix == -1) |
238: { |
239: iBox = boxes->nLocalBoxes + iz*gridSize[1] + iy; |
240: } |
241: // local link celll. |
242: else |
243: { |
244: iBox = ix + gridSize[0]*iy + gridSize[0]*gridSize[1]*iz; |
245: } |
246: assert(iBox >= 0); |
247: assert(iBox < boxes->nTotalBoxes); |
[...] |
288: { |
289: emptyHaloCells(boxes); |
290: |
291: for (int iBox=0; iBox<boxes->nLocalBoxes; ++iBox) |
292: { |
293: int iOff = iBox*MAXATOMS; |
294: int ii=0; |
295: while (ii < boxes->nAtoms[iBox]) |
296: { |
297: int jBox = getBoxFromCoord(boxes, atoms->r[iOff+ii]); |
298: if (jBox != iBox) |
299: moveAtom(boxes, atoms, ii, iBox, jBox); |
300: else |
301: ++ii; |
302: } |
303: } |
304: } |
[...] |
352: int ix = (int)(floor((rr[0] - localMin[0])*boxes->invBoxSize[0])); |
353: int iy = (int)(floor((rr[1] - localMin[1])*boxes->invBoxSize[1])); |
354: int iz = (int)(floor((rr[2] - localMin[2])*boxes->invBoxSize[2])); |
355: |
356: |
357: // For each axis, if we are inside the local domain, make sure we get |
358: // a local link cell. Otherwise, make sure we get a halo link cell. |
359: if (rr[0] < localMax[0]) |
360: { |
361: if (ix == gridSize[0]) ix = gridSize[0] - 1; |
362: } |
363: else |
364: ix = gridSize[0]; // assign to halo cell |
365: if (rr[1] < localMax[1]) |
366: { |
367: if (iy == gridSize[1]) iy = gridSize[1] - 1; |
368: } |
369: else |
370: iy = gridSize[1]; |
371: if (rr[2] < localMax[2]) |
372: { |
373: if (iz == gridSize[2]) iz = gridSize[2] - 1; |
[...] |
384: for (int ii=boxes->nLocalBoxes; ii<boxes->nTotalBoxes; ++ii) |
385: boxes->nAtoms[ii] = 0; |
0x4175c0 STP X29, X30, [SP, #928]! |
0x4175c4 ADD X29, SP, #0 |
0x4175c8 LDR W2, [X0, #12] |
0x4175cc STP X25, X26, [SP, #64] |
0x4175d0 ORR X25, XZR, X1 |
0x4175d4 LDR W1, [X0, #20] |
0x4175d8 STP X19, X20, [SP, #16] |
0x4175dc ORR X20, XZR, X0 |
0x4175e0 CMP W2, W1 |
0x4175e4 B.GE 417608 |
0x4175e8 SBFM X0, X2, #0, #31 |
0x4175ec LDR X2, [X20, #120] |
(87) 0x4175f0 STR WZR, [X2, X0,LSL #2] |
(87) 0x4175f4 ADD X0, X0, #1 |
(87) 0x4175f8 LDR W3, [X20, #20] |
(87) 0x4175fc CMP W3, W0 |
(87) 0x417600 B.GT 4175f0 |
0x417604 LDR W2, [X20, #12] |
0x417608 CMP W2, #0 |
0x41760c B.LE 417834 |
0x417610 MOVZ W26, #24 |
0x417614 STP X21, X22, [SP, #32] |
0x417618 MOVZ X22, #0 |
0x41761c STP X23, X24, [SP, #48] |
0x417620 LDR X5, [X20, #120] |
0x417624 STR X27, [SP, #80] |
(85) 0x417628 LDR W4, [X5, X22,LSL #2] |
(85) 0x41762c ORR W27, WZR, W22 |
(85) 0x417630 UBFM W19, W22, #26, #25 |
(85) 0x417634 UBFM X24, X22, #62, #61 |
(85) 0x417638 CMP W4, #0 |
(85) 0x41763c B.LE 41781c |
(85) 0x417640 LDP D19, D18, [X20, #24] |
(85) 0x417644 MOVZ W21, #0 |
(85) 0x417648 ORR W23, WZR, W19 |
(85) 0x41764c LDP D17, D5, [X20, #40] |
(85) 0x417650 LDP D4, D3, [X20, #56] |
(85) 0x417654 LDP D16, D7, [X20, #96] |
(85) 0x417658 LDR D6, [X20, #112] |
(85) 0x41765c LDP W2, W0, [X20] |
(85) 0x417660 LDR W1, [X20, #8] |
(85) 0x417664 LDR X3, [X25, #24] |
(86) 0x417668 SMULL X6, W23, W26 |
(86) 0x41766c LDR D2, [X3, X6] |
(86) 0x417670 ADD X7, X3, X6 |
(86) 0x417674 LDP D1, D0, [X7, #8] |
(86) 0x417678 FCMPE D2, D5 |
(86) 0x41767c B.MI 4177ac |
(86) 0x417680 ORR W9, WZR, W2 |
(86) 0x417684 FCMPE D1, D4 |
(86) 0x417688 B.MI 417790 |
(86) 0x41768c ORR W12, WZR, W0 |
(86) 0x417690 FCMPE D0, D3 |
(86) 0x417694 B.MI 417740 |
(86) 0x417698 LDR W4, [X20, #12] |
(86) 0x41769c ADD W15, W2, #2 |
(86) 0x4176a0 MUL W14, W1, W0 |
(86) 0x4176a4 ADD W16, W0, #2 |
(86) 0x4176a8 ADD W17, W9, #1 |
(86) 0x4176ac MUL W18, W15, W1 |
(86) 0x4176b0 MADD W30, W12, W15, W15 |
(86) 0x4176b4 ADD W6, W4, W14,LSL #1 |
(86) 0x4176b8 ADD W7, W6, W18,LSL #1 |
(86) 0x4176bc MADD W9, W16, W15, W7 |
(86) 0x4176c0 ADD W8, W9, W30 |
(86) 0x4176c4 ADD W4, W8, W17 |
(86) 0x4176c8 TBNZ W4, #31, 417888 |
(86) 0x4176cc LDR W12, [X20, #20] |
(86) 0x4176d0 CMP W4, W12 |
(86) 0x4176d4 B.GE 4178a8 |
(86) 0x4176d8 CMP W4, W22 |
(86) 0x4176dc B.EQ 417728 |
(86) 0x4176e0 ORR X1, XZR, X25 |
(86) 0x4176e4 ORR W3, WZR, W27 |
(86) 0x4176e8 ORR W2, WZR, W21 |
(86) 0x4176ec ORR X0, XZR, X20 |
(86) 0x4176f0 BL 417428 |
(86) 0x4176f4 LDR X5, [X20, #120] |
(86) 0x4176f8 LDR W1, [X5, X24] |
(86) 0x4176fc CMP W1, W21 |
(86) 0x417700 B.LE 417818 |
(86) 0x417704 LDP W2, W0, [X20] |
(86) 0x417708 LDR W1, [X20, #8] |
(86) 0x41770c LDP D19, D18, [X20, #24] |
(86) 0x417710 LDR X3, [X25, #24] |
(86) 0x417714 LDP D17, D5, [X20, #40] |
(86) 0x417718 LDP D4, D3, [X20, #56] |
(86) 0x41771c LDP D16, D7, [X20, #96] |
(86) 0x417720 LDR D6, [X20, #112] |
(86) 0x417724 B 417668 |
(86) 0x417728 LDR W14, [X5, X24] |
(86) 0x41772c ADD W21, W21, #1 |
(86) 0x417730 CMP W14, W21 |
(86) 0x417734 B.LE 417818 |
(86) 0x417738 ADD W23, W19, W21 |
(86) 0x41773c B 417668 |
(86) 0x417740 FSUB D24, D0, S17 |
(86) 0x417744 FMUL D25, D24, D6 |
(86) 0x417748 FCVTMS W10, D25 |
(86) 0x41774c CMP W10, W1 |
(86) 0x417750 CSINC W11, WZR, WZR, #1 |
(86) 0x417754 SUB W13, W10, W11 |
(86) 0x417758 CMN W13, #1 |
(86) 0x41775c B.EQ 4177c8 |
(86) 0x417760 CMP W12, W0 |
(86) 0x417764 B.EQ 4177f4 |
(86) 0x417768 CMN W12, #1 |
(86) 0x41776c B.EQ 417844 |
(86) 0x417770 CMP W9, W2 |
(86) 0x417774 B.EQ 417864 |
(86) 0x417778 CMN W9, #1 |
(86) 0x41777c B.EQ 417878 |
(86) 0x417780 MADD W12, W12, W2, W9 |
(86) 0x417784 MUL W14, W0, W2 |
(86) 0x417788 MADD W4, W14, W13, W12 |
(86) 0x41778c B 4176c8 |
(86) 0x417790 FSUB D22, D1, S18 |
(86) 0x417794 FMUL D23, D22, D7 |
(86) 0x417798 FCVTMS W11, D23 |
(86) 0x41779c CMP W11, W0 |
(86) 0x4177a0 CSINC W13, WZR, WZR, #1 |
(86) 0x4177a4 SUB W12, W11, W13 |
(86) 0x4177a8 B 417690 |
(86) 0x4177ac FSUB D20, D2, S19 |
(86) 0x4177b0 FMUL D21, D20, D16 |
(86) 0x4177b4 FCVTMS W8, D21 |
(86) 0x4177b8 CMP W8, W2 |
(86) 0x4177bc CSINC W10, WZR, WZR, #1 |
(86) 0x4177c0 SUB W9, W8, W10 |
(86) 0x4177c4 B 417684 |
(86) 0x4177c8 ADD W6, W9, #1 |
(86) 0x4177cc LDR W9, [X20, #12] |
(86) 0x4177d0 ADD W30, W2, #2 |
(86) 0x4177d4 MUL W18, W1, W0 |
(86) 0x4177d8 MUL W4, W30, W1 |
(86) 0x4177dc MADD W7, W12, W30, W30 |
(86) 0x4177e0 ADD W8, W9, W18,LSL #1 |
(86) 0x4177e4 ADD W10, W8, W4,LSL #1 |
(86) 0x4177e8 ADD W11, W10, W7 |
(86) 0x4177ec ADD W4, W11, W6 |
(86) 0x4177f0 B 4176c8 |
(86) 0x4177f4 LDR W15, [X20, #12] |
(86) 0x4177f8 MUL W11, W1, W0 |
(86) 0x4177fc ADD W12, W2, #2 |
(86) 0x417800 ADD W14, W9, #1 |
(86) 0x417804 ADD W16, W15, W11,LSL #1 |
(86) 0x417808 MADD W17, W12, W1, W16 |
(86) 0x41780c MADD W13, W13, W12, W17 |
(86) 0x417810 ADD W4, W13, W14 |
(86) 0x417814 B 4176c8 |
(85) 0x417818 LDR W2, [X20, #12] |
(85) 0x41781c ADD X22, X22, #1 |
(85) 0x417820 CMP W2, W22 |
(85) 0x417824 B.GT 417628 |
0x417828 LDP X21, X22, [SP, #32] |
0x41782c LDP X23, X24, [SP, #48] |
0x417830 LDR X27, [SP, #80] |
0x417834 LDP X19, X20, [SP, #16] |
0x417838 LDP X25, X26, [SP, #64] |
0x41783c LDP X29, X30, [SP], #96 |
0x417840 RET |
(86) 0x417844 ADD W6, W9, #1 |
(86) 0x417848 LDR W9, [X20, #12] |
(86) 0x41784c MUL W7, W1, W0 |
(86) 0x417850 ADD W4, W2, #2 |
(86) 0x417854 ADD W8, W9, W7,LSL #1 |
(86) 0x417858 MADD W10, W4, W13, W8 |
(86) 0x41785c ADD W4, W10, W6 |
(86) 0x417860 B 4176c8 |
(86) 0x417864 LDR W17, [X20, #12] |
(86) 0x417868 MADD W18, W1, W0, W17 |
(86) 0x41786c MADD W30, W13, W0, W18 |
(86) 0x417870 ADD W4, W30, W12 |
(86) 0x417874 B 4176c8 |
(86) 0x417878 LDR W15, [X20, #12] |
(86) 0x41787c MADD W16, W13, W0, W15 |
(86) 0x417880 ADD W4, W16, W12 |
(86) 0x417884 B 4176c8 |
0x417888 ADRP X5, |
0x41788c ADRP X21, |
0x417890 ADRP X22, |
0x417894 ADD X3, X5, #3240 |
0x417898 ADD X1, X21, #2960 |
0x41789c ADD X0, X22, #3056 |
0x4178a0 MOVZ W2, #246 |
0x4178a4 BL 4101e0 |
0x4178a8 ADRP X23, |
0x4178ac ADRP X20, |
0x4178b0 ADRP X25, |
0x4178b4 ADD X3, X23, #3240 |
0x4178b8 ADD X1, X20, #2960 |
0x4178bc ADD X0, X25, #3072 |
0x4178c0 MOVZ W2, #247 |
0x4178c4 BL 4101e0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.92+ | timestep | timestep.c:148 | exec |
| ○ | main | CoMD.c:125 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | CoMD.c:266 | exec |
| ►1.08+ | redistributeAtoms | timestep.c:148 | exec |
| ○ | main | CoMD.c:207 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | CoMD.c:266 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | linkCells.c:211-385 |
| Module | exec |
| nb instructions | 44 |
| nb uops | 44 |
| loop length | 176 |
| used w registers | 3 |
| used x registers | 17 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 12 |
| micro-operation queue | 5.50 cycles |
| front end | 5.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.50 | 2.50 | 6.25 | 6.25 | 6.25 | 6.25 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 3.00 | 3.00 |
| cycles | 2.50 | 2.50 | 6.25 | 6.25 | 6.25 | 6.25 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 5.50 |
| Dispatch | 6.25 |
| Overall L1 | 6.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 28% |
| load | 29% |
| store | 45% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 14% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #928]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR W2, [X0, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X25, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR W1, [X0, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W2, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 417608 <updateLinkCells+0x48> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SBFM X0, X2, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X2, [X20, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W2, [X20, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| CMP W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 417834 <updateLinkCells+0x274> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MOVZ W26, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MOVZ X22, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X5, [X20, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X27, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR X27, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X5, <421888> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADRP X21, <42088c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADRP X22, <420890> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X3, X5, #3240 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X1, X21, #2960 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X22, #3056 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #246 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4101e0 <@plt_start@+0x1c0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X23, <4218a8> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADRP X20, <4208ac> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADRP X25, <4208b0> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X3, X23, #3240 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X1, X20, #2960 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X25, #3072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #247 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4101e0 <@plt_start@+0x1c0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | linkCells.c:211-385 |
| Module | exec |
| nb instructions | 44 |
| nb uops | 44 |
| loop length | 176 |
| used w registers | 3 |
| used x registers | 17 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 12 |
| micro-operation queue | 5.50 cycles |
| front end | 5.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.50 | 2.50 | 6.25 | 6.25 | 6.25 | 6.25 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 3.00 | 3.00 |
| cycles | 2.50 | 2.50 | 6.25 | 6.25 | 6.25 | 6.25 | 0.00 | 0.00 | 0.00 | 0.00 | 5.67 | 5.67 | 5.67 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 5.50 |
| Dispatch | 6.25 |
| Overall L1 | 6.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 28% |
| load | 29% |
| store | 45% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 14% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #928]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR W2, [X0, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X25, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR W1, [X0, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W2, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 417608 <updateLinkCells+0x48> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SBFM X0, X2, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X2, [X20, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W2, [X20, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| CMP W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 417834 <updateLinkCells+0x274> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MOVZ W26, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MOVZ X22, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X5, [X20, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X27, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR X27, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X5, <421888> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADRP X21, <42088c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADRP X22, <420890> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X3, X5, #3240 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X1, X21, #2960 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X22, #3056 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #246 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4101e0 <@plt_start@+0x1c0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X23, <4218a8> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADRP X20, <4208ac> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADRP X25, <4208b0> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X3, X23, #3240 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X1, X20, #2960 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X25, #3072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #247 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4101e0 <@plt_start@+0x1c0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼updateLinkCells– | 0.46 | 0.07 |
| ▼Loop 85 - linkCells.c:211-373 - exec– | 0.00 | 0.02 |
| ○Loop 86 - linkCells.c:211-373 - exec | 0.46 | 3.20 |
| ○Loop 87 - linkCells.c:384-385 - exec | 0.00 | 0.00 |
