Loop Id: 94 | Module: exec | Source: linkCells.c:179-378 [...] | Coverage: 0.03% |
---|
Loop Id: 94 | Module: exec | Source: linkCells.c:179-378 [...] | Coverage: 0.03% |
---|
0x409434 ORR X0, XZR, X19 |
0x409438 BL 409240 |
0x40943c LDR X9, [X19, #120] [10] |
0x409440 SBFM X7, X0, #0, #31 |
0x409444 LDR W8, [X19, #12] [10] |
0x409448 LDR W12, [X9, X7,LSL #2] [9] |
0x40944c ADD W10, W12, W0,LSL #6 |
0x409450 CMP W8, W0 |
0x409454 B.LE 409468 |
0x409458 LDR W1, [X20] [4] |
0x40945c ADD W11, W1, #1 |
0x409460 STR W11, [X20] [4] |
0x409464 LDR W12, [X9, X7,LSL #2] [9] |
0x409468 MOVZ W13, #24 |
0x40946c LDP X18, X30, [X20, #16] [4] |
0x409470 ADD W14, W12, #1 |
0x409474 SBFM X15, X10, #0, #31 |
0x409478 SMADDL X16, W10, W13, XZR |
0x40947c LDR X17, [X20, #8] [4] |
0x409480 STR W14, [X9, X7,LSL #2] [9] |
0x409484 ADD X0, X30, X16 |
0x409488 LDR X2, [X20, #32] [4] |
0x40948c LDP X19, X20, [SP, #16] [3] |
0x409490 STR W22, [X17, X15,LSL #2] [7] |
0x409494 STR W21, [X18, X15,LSL #2] [1] |
0x409498 ADD X3, X2, X16 |
0x40949c STR D10, [X30, X16] [2] |
0x4094a0 LDP X21, X22, [SP, #32] [3] |
0x4094a4 STP D9, D8, [X0, #8] [8] |
0x4094a8 STR D13, [X2, X16] [6] |
0x4094ac LDP D8, D9, [SP, #48] [3] |
0x4094b0 STP D12, D11, [X3, #8] [5] |
0x4094b4 LDP D10, D11, [SP, #64] [3] |
0x4094b8 LDP D12, D13, [SP, #80] [3] |
0x4094bc LDP X29, X30, [SP], #96 [3] |
0x4094c0 RET |
0x4094c4 FSUB D21, D8, S6 |
0x4094c8 SUB W5, W3, #1 |
0x4094cc FMUL D22, D21, D3 |
0x4094d0 FCVTMS W6, D22 |
0x4094d4 CMP W3, W6 |
0x4094d8 CSEL W3, W5, W6, #0 |
0x4094dc B 409434 |
/home/hbollore/qaas-runs/170-265-5545/intel/CoMD/build/CoMD/CoMD/src-openmp/linkCells.c: 179 - 378 |
-------------------------------------------------------------------------------- |
179: iOff += boxes->nAtoms[iBox]; |
180: |
181: // assign values to array elements |
182: if (iBox < boxes->nLocalBoxes) |
183: atoms->nLocal++; |
184: boxes->nAtoms[iBox]++; |
185: atoms->gid[iOff] = gid; |
186: atoms->iSpecies[iOff] = iType; |
187: |
188: atoms->r[iOff][0] = x; |
189: atoms->r[iOff][1] = y; |
190: atoms->r[iOff][2] = z; |
191: |
192: atoms->p[iOff][0] = px; |
193: atoms->p[iOff][1] = py; |
194: atoms->p[iOff][2] = pz; |
195: } |
[...] |
354: int iz = (int)(floor((rr[2] - localMin[2])*boxes->invBoxSize[2])); |
[...] |
373: if (iz == gridSize[2]) iz = gridSize[2] - 1; |
374: } |
375: else |
376: iz = gridSize[2]; |
377: |
378: return getBoxFromTuple(boxes, ix, iy, iz); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►24.59+ | unloadAtomsBuffer | haloExchange.c:418 | exec |
○ | haloExchange | memUtils.h:28 | exec |
○ | timestep | timestep.c:150 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►19.67+ | unloadAtomsBuffer | haloExchange.c:424 | exec |
○ | haloExchange | memUtils.h:28 | exec |
○ | timestep | timestep.c:150 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►19.67+ | unloadAtomsBuffer | haloExchange.c:418 | exec |
○ | haloExchange | haloExchange.c:302 | exec |
○ | timestep | timestep.c:150 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►16.39+ | unloadAtomsBuffer | haloExchange.c:424 | exec |
○ | haloExchange | haloExchange.c:302 | exec |
○ | timestep | timestep.c:150 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►6.56+ | createFccLattice | initAtoms.c:96 | exec |
○ | main | CoMD.c:199 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►4.92+ | createFccLattice | initAtoms.c:96 | exec |
○ | main | CoMD.c:199 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►4.92+ | createFccLattice | initAtoms.c:100 | exec |
○ | main | CoMD.c:199 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►1.64+ | unloadAtomsBuffer | haloExchange.c:424 | exec |
○ | haloExchange | haloExchange.c:302 | exec |
○ | redistributeAtoms | timestep.c:150 | exec |
○ | main | CoMD.c:207 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
►1.64+ | haloExchange | memUtils.h:28 | exec |
○ | timestep | timestep.c:150 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:266 | exec |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.93 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.51 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | P10, P11, P12, |
Function | putAtomInBox |
Source | linkCells.c:179-195,linkCells.c:354-354,linkCells.c:373-378 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 7.33 |
CQA cycles if no scalar integer | 2.50 |
CQA cycles if FP arith vectorized | 7.33 |
CQA cycles if fully vectorized | 1.63 |
Front-end cycles | 5.38 |
DIV/SQRT cycles | 2.00 |
P0 cycles | 2.00 |
P1 cycles | 3.75 |
P2 cycles | 3.75 |
P3 cycles | 3.75 |
P4 cycles | 3.75 |
P5 cycles | 2.50 |
P6 cycles | 2.50 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 7.33 |
P10 cycles | 7.33 |
P11 cycles | 7.33 |
P12 cycles | 2.00 |
P13 cycles | 2.00 |
P14 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 43.00 |
Nb uops | 43.00 |
Nb loads | NA |
Nb stores | 8.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.27 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 29.45 |
Bytes prefetched | 0.00 |
Bytes loaded | 152.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 38.46 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 50.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 28.85 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 37.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.93 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.51 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | P10, P11, P12, |
Function | putAtomInBox |
Source | linkCells.c:179-195,linkCells.c:354-354,linkCells.c:373-378 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 7.33 |
CQA cycles if no scalar integer | 2.50 |
CQA cycles if FP arith vectorized | 7.33 |
CQA cycles if fully vectorized | 1.63 |
Front-end cycles | 5.38 |
DIV/SQRT cycles | 2.00 |
P0 cycles | 2.00 |
P1 cycles | 3.75 |
P2 cycles | 3.75 |
P3 cycles | 3.75 |
P4 cycles | 3.75 |
P5 cycles | 2.50 |
P6 cycles | 2.50 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 7.33 |
P10 cycles | 7.33 |
P11 cycles | 7.33 |
P12 cycles | 2.00 |
P13 cycles | 2.00 |
P14 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 43.00 |
Nb uops | 43.00 |
Nb loads | NA |
Nb stores | 8.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.27 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 29.45 |
Bytes prefetched | 0.00 |
Bytes loaded | 152.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 38.46 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 50.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 28.85 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 37.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
Function | putAtomInBox |
Source file and lines | linkCells.c:179-378 |
Module | exec |
nb instructions | 43 |
loop length | 172 |
nb stack references | 0 |
front end | 5.38 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 2.50 | 2.50 | 1.00 | 1.00 | 7.33 | 7.33 | 7.33 | 2.00 | 2.00 |
cycles | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 2.50 | 2.50 | 1.00 | 1.00 | 7.33 | 7.33 | 7.33 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 5.38 |
Overall L1 | 7.33 |
all | 38% |
load | 100% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ORR X0, XZR, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 409240 <getBoxFromTuple> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X9, [X19, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SBFM X7, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X19, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W12, [X9, X7,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W10, W12, W0,LSL #6 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
CMP W8, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 409468 <putAtomInBox+0xb8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W1, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W11, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W11, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR W12, [X9, X7,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W13, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP X18, X30, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
ADD W14, W12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SBFM X15, X10, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SMADDL X16, W10, W13, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
LDR X17, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR W14, [X9, X7,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X0, X30, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X2, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STR W22, [X17, X15,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR W21, [X18, X15,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X3, X2, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR D10, [X30, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STP D9, D8, [X0, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR D13, [X2, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LDP D8, D9, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
STP D12, D11, [X3, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LDP D10, D11, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D12, D13, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP X29, X30, [SP], #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
FSUB D21, D8, S6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
SUB W5, W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
FMUL D22, D21, D3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FCVTMS W6, D22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP W3, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL W3, W5, W6, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 409434 <putAtomInBox+0x84> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Function | putAtomInBox |
Source file and lines | linkCells.c:179-378 |
Module | exec |
nb instructions | 43 |
loop length | 172 |
nb stack references | 0 |
front end | 5.38 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 2.50 | 2.50 | 1.00 | 1.00 | 7.33 | 7.33 | 7.33 | 2.00 | 2.00 |
cycles | 2.00 | 2.00 | 3.75 | 3.75 | 3.75 | 3.75 | 2.50 | 2.50 | 1.00 | 1.00 | 7.33 | 7.33 | 7.33 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 5.38 |
Overall L1 | 7.33 |
all | 38% |
load | 100% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ORR X0, XZR, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 409240 <getBoxFromTuple> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X9, [X19, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SBFM X7, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X19, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W12, [X9, X7,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W10, W12, W0,LSL #6 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
CMP W8, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 409468 <putAtomInBox+0xb8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W1, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W11, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR W11, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR W12, [X9, X7,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W13, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP X18, X30, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
ADD W14, W12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SBFM X15, X10, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SMADDL X16, W10, W13, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
LDR X17, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR W14, [X9, X7,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X0, X30, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X2, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STR W22, [X17, X15,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR W21, [X18, X15,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X3, X2, X16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR D10, [X30, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STP D9, D8, [X0, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR D13, [X2, X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LDP D8, D9, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
STP D12, D11, [X3, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
LDP D10, D11, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D12, D13, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP X29, X30, [SP], #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
FSUB D21, D8, S6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
SUB W5, W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
FMUL D22, D21, D3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FCVTMS W6, D22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP W3, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL W3, W5, W6, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 409434 <putAtomInBox+0x84> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |