| Function: loadAtomsBuffer | Module: exec | Source: haloExchange.c:373-393 | Coverage (incl. loops): 0.05% | (excl. loops): 0.00% |
|---|
| Function: loadAtomsBuffer | Module: exec | Source: haloExchange.c:373-393 | Coverage (incl. loops): 0.05% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/build/CoMD/CoMD/src-openmp/haloExchange.c: 373 - 393 |
-------------------------------------------------------------------------------- |
373: int nCells = parms->nCells[face]; |
374: int* cellList = parms->cellList[face]; |
375: int nBuf = 0; |
376: for (int iCell=0; iCell<nCells; ++iCell) |
377: { |
378: int iBox = cellList[iCell]; |
379: int iOff = iBox*MAXATOMS; |
380: for (int ii=iOff; ii<iOff+s->boxes->nAtoms[iBox]; ++ii) |
381: { |
382: buf[nBuf].gid = s->atoms->gid[ii]; |
383: buf[nBuf].type = s->atoms->iSpecies[ii]; |
384: buf[nBuf].rx = s->atoms->r[ii][0] + shift[0]; |
385: buf[nBuf].ry = s->atoms->r[ii][1] + shift[1]; |
386: buf[nBuf].rz = s->atoms->r[ii][2] + shift[2]; |
387: buf[nBuf].px = s->atoms->p[ii][0]; |
388: buf[nBuf].py = s->atoms->p[ii][1]; |
389: buf[nBuf].pz = s->atoms->p[ii][2]; |
390: ++nBuf; |
391: } |
392: } |
393: return nBuf*sizeof(AtomMsg); |
0x4133e0 LDR W8, [X0, W2,SXTW #2] |
0x4133e4 CMP W8, #1 |
0x4133e8 B.LT 413518 |
0x4133ec ADD X11, X0, W2,SXTW #3 |
0x4133f0 LDP X13, X14, [X1, #16] |
0x4133f4 ORR X10, XZR, XZR |
0x4133f8 ORR W9, WZR, WZR |
0x4133fc LDR X12, [X11, #72] |
0x413400 LDR X11, [X11, #24] |
0x413404 LDP D2, D3, [X13, #72] |
0x413408 LDP D0, D1, [X12] |
0x41340c FMUL D1, D3, D1 |
0x413410 LDR D3, [X13, #88] |
0x413414 ADD X13, X3, #24 |
0x413418 FMUL D0, D2, D0 |
0x41341c LDR D2, [X12, #16] |
0x413420 LDR X12, [X14, #120] |
0x413424 MOVZ W14, #56 |
0x413428 FMUL D2, D3, D2 |
0x41342c B 413450 |
0x413430 HINT #0 |
0x413434 HINT #0 |
0x413438 HINT #0 |
0x41343c HINT #0 |
(46) 0x413440 ADD W9, W9, W16 |
(44) 0x413444 ADD X10, X10, #1 |
(44) 0x413448 CMP X10, X8 |
(44) 0x41344c B.EQ 41350c |
(44) 0x413450 LDRSW X15, [X11, X10,LSL #2] |
(44) 0x413454 LDR W16, [X12, X15,LSL #2] |
(44) 0x413458 CMP W16, #1 |
(44) 0x41345c B.LT 413444 |
(46) 0x413460 LDR X0, [X1, #32] |
(46) 0x413464 UBFM W17, W15, #26, #25 |
(46) 0x413468 ORR X16, XZR, XZR |
(46) 0x41346c SBFM X18, X17, #0, #31 |
(46) 0x413470 ADD X2, X18, W17,SXTW #1 |
(46) 0x413474 LDP X4, X3, [X0, #8] |
(46) 0x413478 LDP X5, X0, [X0, #24] |
(46) 0x41347c ADD X0, X0, X2,LSL #3 |
(46) 0x413480 ADD X2, X5, X2,LSL #3 |
(46) 0x413484 SBFM X5, X17, #62, #31 |
(46) 0x413488 ADD X3, X3, X5 |
(46) 0x41348c ADD X4, X4, X5 |
(46) 0x413490 SMADDL X5, W9, W14, X13 |
(46) 0x413494 ADD X0, X0, #16 |
(46) 0x413498 ADD X2, X2, #16 |
(46) 0x41349c HINT #0 |
(45) 0x4134a0 LDUR D3, [X2, #496] |
(45) 0x4134a4 LDR W6, [X4, X16,LSL #2] |
(45) 0x4134a8 STUR W6, [X5, #488] |
(45) 0x4134ac LDR W6, [X3, X16,LSL #2] |
(45) 0x4134b0 ADD X16, X16, #1 |
(45) 0x4134b4 FADD D3, D3, D0 |
(45) 0x4134b8 ADD X7, X18, X16 |
(45) 0x4134bc STUR D3, [X5, #496] |
(45) 0x4134c0 LDUR D3, [X2, #504] |
(45) 0x4134c4 STUR W6, [X5, #492] |
(45) 0x4134c8 LDRSW X6, [X12, X15,LSL #2] |
(45) 0x4134cc ADD X6, X6, W17,SXTW |
(45) 0x4134d0 FADD D3, D3, D1 |
(45) 0x4134d4 CMP X7, X6 |
(45) 0x4134d8 STUR D3, [X5, #504] |
(45) 0x4134dc LDR D3, [X2], #24 |
(45) 0x4134e0 FADD D3, D3, D2 |
(45) 0x4134e4 STR D3, [X5] |
(45) 0x4134e8 LDUR D3, [X0, #496] |
(45) 0x4134ec STR D3, [X5, #8] |
(45) 0x4134f0 LDUR D3, [X0, #504] |
(45) 0x4134f4 STR D3, [X5, #16] |
(45) 0x4134f8 LDR D3, [X0], #24 |
(45) 0x4134fc STR D3, [X5, #24] |
(45) 0x413500 ADD X5, X5, #56 |
(45) 0x413504 B.LT 4134a0 |
(46) 0x413508 B 413440 |
0x41350c UBFM W8, W9, #26, #25 |
0x413510 SUB W0, W8, W9,LSL #3 |
0x413514 RET |
0x413518 ORR W0, WZR, WZR |
0x41351c RET |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►99.15+ | timestep | timestep.c:150 | exec |
| ○ | main | CoMD.c:125 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | eam.c:831 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run armclang_9
| Source file and lines | haloExchange.c:373-393 |
| Module | exec |
| nb instructions | 29 |
| nb uops | 25 |
| loop length | 116 |
| used w registers | 6 |
| used x registers | 9 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 0 |
| used d registers | 4 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 3.13 cycles |
| front end | 3.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 2.25 | 2.25 | 2.25 | 2.25 | 0.75 | 0.75 | 0.75 | 0.75 | 3.00 | 3.00 | 3.00 | 0.00 | 0.00 |
| cycles | 2.00 | 2.00 | 2.25 | 2.25 | 2.25 | 2.25 | 0.75 | 0.75 | 0.75 | 0.75 | 3.00 | 3.00 | 3.00 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 3.13 |
| Dispatch | 3.00 |
| Overall L1 | 3.13 |
| all | 33% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 22% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 31% |
| load | 37% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 18% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 29% |
| load | 37% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 18% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR W8, [X0, W2,SXTW #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 413518 <loadAtomsBuffer+0x138> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X11, X0, W2,SXTW #3 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| LDP X13, X14, [X1, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ORR X10, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W9, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X12, [X11, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X11, [X11, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP D2, D3, [X13, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP D0, D1, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| FMUL D1, D3, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR D3, [X13, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| ADD X13, X3, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FMUL D0, D2, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR D2, [X12, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X12, [X14, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W14, #56 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FMUL D2, D3, D2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| B 413450 <loadAtomsBuffer+0x70> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| UBFM W8, W9, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB W0, W8, W9,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W0, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run armclang_9
| Source file and lines | haloExchange.c:373-393 |
| Module | exec |
| nb instructions | 29 |
| nb uops | 25 |
| loop length | 116 |
| used w registers | 6 |
| used x registers | 9 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 0 |
| used d registers | 4 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 3.13 cycles |
| front end | 3.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 2.25 | 2.25 | 2.25 | 2.25 | 0.75 | 0.75 | 0.75 | 0.75 | 3.00 | 3.00 | 3.00 | 0.00 | 0.00 |
| cycles | 2.00 | 2.00 | 2.25 | 2.25 | 2.25 | 2.25 | 0.75 | 0.75 | 0.75 | 0.75 | 3.00 | 3.00 | 3.00 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 3.13 |
| Dispatch | 3.00 |
| Overall L1 | 3.13 |
| all | 33% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 22% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 31% |
| load | 37% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 18% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 29% |
| load | 37% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 18% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR W8, [X0, W2,SXTW #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 413518 <loadAtomsBuffer+0x138> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X11, X0, W2,SXTW #3 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| LDP X13, X14, [X1, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ORR X10, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W9, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X12, [X11, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X11, [X11, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP D2, D3, [X13, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP D0, D1, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| FMUL D1, D3, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR D3, [X13, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| ADD X13, X3, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FMUL D0, D2, D0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR D2, [X12, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X12, [X14, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| MOVZ W14, #56 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| FMUL D2, D3, D2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| B 413450 <loadAtomsBuffer+0x70> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| UBFM W8, W9, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB W0, W8, W9,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W0, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼loadAtomsBuffer– | 0.05 | 0.01 |
| ▼Loop 46 - haloExchange.c:376-389 - exec– | 0.00 | 0.00 |
| ○Loop 45 - haloExchange.c:380-389 - exec | 0.05 | 0.57 |
| ○Loop 44 - haloExchange.c:376-380 - exec | 0.00 | 0.01 |
