Loop Id: 121 | Module: exec | Source: timestep.c:110-116 | Coverage: 0.01% |
---|
Loop Id: 121 | Module: exec | Source: timestep.c:110-116 | Coverage: 0.01% |
---|
0x40cc2c LDR W5, [X13, X8,LSL #2] |
0x40cc30 CMP W5, #0 |
0x40cc34 B.LE 40cdd8 |
0x40cc38 LDP X15, X7, [X19, #32] |
0x40cc3c SUB W6, W5, #1 |
0x40cc40 UBFM X9, X8, #56, #55 |
0x40cc44 ADD X14, X6, X8,LSL #6 |
0x40cc48 LDR X16, [X15, #16] |
0x40cc4c LDR X17, [X15, #32] |
0x40cc50 ADD X18, X16, #4 |
0x40cc54 ADD X2, X16, X9 |
0x40cc58 ADD X30, X18, X14,LSL #2 |
0x40cc5c ADD X1, X17, X11 |
0x40cc60 SUB X21, X30, X2 |
0x40cc64 SUB X0, X21, #4 |
0x40cc68 UBFM X12, X0, #2, #63 |
0x40cc6c ADD X3, X12, #1 |
0x40cc70 ANDS X4, X3, #4160 |
0x40cc74 B.EQ 40cd18 |
0x40cc78 CMP X4, #1 |
0x40cc7c B.EQ 40cce4 |
0x40cc80 CMP X4, #2 |
0x40cc84 B.EQ 40ccb8 |
0x40cc88 LDRSW X5, [X16, X9] |
0x40cc8c ADD X2, X2, #4 |
0x40cc90 LDP D2, D3, [X1] |
0x40cc94 ADD X1, X1, #24 |
0x40cc98 LDUR D0, [X1, #504] |
0x40cc9c ADD X6, X7, X5,LSL #4 |
0x40cca0 LDR D4, [X6, #8] |
0x40cca4 FMUL D5, D3, D3 |
0x40cca8 FMADD D6, D2, D2, D5 |
0x40ccac FDIV D7, D17, D4 |
0x40ccb0 FMADD D16, D0, D0, D6 |
0x40ccb4 FMADD D1, D7, D16, D1 |
0x40ccb8 LDRSW X9, [X2], #4 |
0x40ccbc LDP D19, D20, [X1] |
0x40ccc0 ADD X1, X1, #24 |
0x40ccc4 LDUR D21, [X1, #504] |
0x40ccc8 ADD X14, X7, X9,LSL #4 |
0x40cccc LDR D18, [X14, #8] |
0x40ccd0 FMUL D22, D20, D20 |
0x40ccd4 FMADD D23, D19, D19, D22 |
0x40ccd8 FDIV D24, D17, D18 |
0x40ccdc FMADD D25, D21, D21, D23 |
0x40cce0 FMADD D1, D24, D25, D1 |
0x40cce4 LDRSW X15, [X2], #4 |
0x40cce8 LDP D27, D28, [X1] |
0x40ccec ADD X1, X1, #24 |
0x40ccf0 LDUR D29, [X1, #504] |
0x40ccf4 ADD X16, X7, X15,LSL #4 |
0x40ccf8 LDR D26, [X16, #8] |
0x40ccfc FMUL D30, D28, D28 |
0x40cd00 FMADD D31, D27, D27, D30 |
0x40cd04 FDIV D4, D17, D26 |
0x40cd08 FMADD D2, D29, D29, D31 |
0x40cd0c FMADD D1, D4, D2, D1 |
0x40cd10 CMP X30, X2 |
0x40cd14 B.EQ 40cdd8 |
(122) 0x40cd18 LDP D0, D3, [X1] |
(122) 0x40cd1c ORR X17, XZR, X2 |
(122) 0x40cd20 ADD X18, X1, #72 |
(122) 0x40cd24 ADD X2, X2, #16 |
(122) 0x40cd28 LDRSW X21, [X17], #4 |
(122) 0x40cd2c LDR D5, [X1, #16] |
(122) 0x40cd30 FMUL D6, D3, D3 |
(122) 0x40cd34 LDURSW X12, [X2, #500] |
(122) 0x40cd38 ADD X0, X7, X21,LSL #4 |
(122) 0x40cd3c LDRSW X4, [X17, #4] |
(122) 0x40cd40 LDP D18, D16, [X1, #24] |
(122) 0x40cd44 ADD X1, X1, #96 |
(122) 0x40cd48 FMADD D7, D0, D0, D6 |
(122) 0x40cd4c ADD X3, X7, X12,LSL #4 |
(122) 0x40cd50 LDURSW X6, [X2, #508] |
(122) 0x40cd54 ADD X5, X7, X4,LSL #4 |
(122) 0x40cd58 LDR D31, [X3, #8] |
(122) 0x40cd5c FMADD D21, D5, D5, D7 |
(122) 0x40cd60 LDR D5, [X0, #8] |
(122) 0x40cd64 FMUL D22, D16, D16 |
(122) 0x40cd68 ADD X9, X7, X6,LSL #4 |
(122) 0x40cd6c LDR D0, [X5, #8] |
(122) 0x40cd70 FDIV D4, D17, D31 |
(122) 0x40cd74 LDUR D20, [X1, #472] |
(122) 0x40cd78 FMADD D24, D18, D18, D22 |
(122) 0x40cd7c LDUR D19, [X1, #456] |
(122) 0x40cd80 LDR D2, [X9, #8] |
(122) 0x40cd84 FDIV D7, D17, D5 |
(122) 0x40cd88 LDUR D26, [X1, #464] |
(122) 0x40cd8c FMUL D23, D20, D20 |
(122) 0x40cd90 LDUR D28, [X1, #496] |
(122) 0x40cd94 FMADD D25, D19, D19, D24 |
(122) 0x40cd98 LDUR D27, [X1, #480] |
(122) 0x40cd9c FDIV D3, D17, D0 |
(122) 0x40cda0 LDUR D19, [X1, #488] |
(122) 0x40cda4 FMADD D29, D26, D26, D23 |
(122) 0x40cda8 LDR D22, [X18, #16] |
(122) 0x40cdac FMADD D30, D27, D27, D29 |
(122) 0x40cdb0 FDIV D6, D17, D2 |
(122) 0x40cdb4 FMADD D1, D7, D21, D1 |
(122) 0x40cdb8 FMUL D21, D28, D28 |
(122) 0x40cdbc FMADD D18, D4, D25, D1 |
(122) 0x40cdc0 FMADD D20, D19, D19, D21 |
(122) 0x40cdc4 FMADD D16, D3, D30, D18 |
(122) 0x40cdc8 FMADD D23, D22, D22, D20 |
(122) 0x40cdcc FMADD D1, D6, D23, D16 |
(122) 0x40cdd0 CMP X30, X2 |
(122) 0x40cdd4 B.NE 40cd18 |
0x40cdd8 ADD X8, X8, #1 |
0x40cddc ADD X11, X11, #1536 |
0x40cde0 CMP W10, W8 |
0x40cde4 B.GT 40cc2c |
/home/hbollore/qaas-runs/170-265-5545/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 110 - 116 |
-------------------------------------------------------------------------------- |
110: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
111: { |
112: int iSpecies = s->atoms->iSpecies[iOff]; |
113: real_t invMass = 0.5/s->species[iSpecies].mass; |
114: kenergy += ( s->atoms->p[iOff][0] * s->atoms->p[iOff][0] + |
115: s->atoms->p[iOff][1] * s->atoms->p[iOff][1] + |
116: s->atoms->p[iOff][2] * s->atoms->p[iOff][2] )*invMass; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.63 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 5.25 - 5.73 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.13 |
Bottlenecks | micro-operation queue, |
Function | kineticEnergy._omp_fn.0 |
Source | timestep.c:110-116 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 7.88 |
CQA cycles if no scalar integer | 3.00 |
CQA cycles if FP arith vectorized | 7.88 |
CQA cycles if fully vectorized | 1.50 - 1.38 |
Front-end cycles | 7.88 |
DIV/SQRT cycles | 3.00 |
P0 cycles | 3.00 |
P1 cycles | 7.00 |
P2 cycles | 7.00 |
P3 cycles | 7.00 |
P4 cycles | 7.00 |
P5 cycles | 3.75 |
P6 cycles | 3.75 |
P7 cycles | 3.75 |
P8 cycles | 3.75 |
P9 cycles | 5.33 |
P10 cycles | 5.33 |
P11 cycles | 5.33 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 3.00 - 1.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 63.00 |
Nb uops | 63.00 |
Nb loads | NA |
Nb stores | 0.00 |
Nb stack references | 0.00 |
FLOP/cycle | 3.05 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 3.00 |
Nb FLOP fma | 9.00 |
Nb FLOP div | 3.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.29 |
Bytes prefetched | 0.00 |
Bytes loaded | 144.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 15.00 |
Vectorization ratio load | 33.33 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 28.13 |
Vector-efficiency ratio load | 33.33 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 22.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.63 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 5.25 - 5.73 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.13 |
Bottlenecks | micro-operation queue, |
Function | kineticEnergy._omp_fn.0 |
Source | timestep.c:110-116 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 7.88 |
CQA cycles if no scalar integer | 3.00 |
CQA cycles if FP arith vectorized | 7.88 |
CQA cycles if fully vectorized | 1.50 - 1.38 |
Front-end cycles | 7.88 |
DIV/SQRT cycles | 3.00 |
P0 cycles | 3.00 |
P1 cycles | 7.00 |
P2 cycles | 7.00 |
P3 cycles | 7.00 |
P4 cycles | 7.00 |
P5 cycles | 3.75 |
P6 cycles | 3.75 |
P7 cycles | 3.75 |
P8 cycles | 3.75 |
P9 cycles | 5.33 |
P10 cycles | 5.33 |
P11 cycles | 5.33 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 3.00 - 1.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 63.00 |
Nb uops | 63.00 |
Nb loads | NA |
Nb stores | 0.00 |
Nb stack references | 0.00 |
FLOP/cycle | 3.05 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 3.00 |
Nb FLOP fma | 9.00 |
Nb FLOP div | 3.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.29 |
Bytes prefetched | 0.00 |
Bytes loaded | 144.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 15.00 |
Vectorization ratio load | 33.33 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 28.13 |
Vector-efficiency ratio load | 33.33 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 22.50 |
Path / |
Function | kineticEnergy._omp_fn.0 |
Source file and lines | timestep.c:110-116 |
Module | exec |
nb instructions | 63 |
loop length | 252 |
nb stack references | 0 |
front end | 7.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 3.75 | 3.75 | 3.75 | 3.75 | 5.33 | 5.33 | 5.33 | 0.00 | 0.00 |
cycles | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 3.75 | 3.75 | 3.75 | 3.75 | 5.33 | 5.33 | 5.33 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 3.00-1.50 |
Front-end | 7.88 |
Overall L1 | 7.88 |
all | 15% |
load | 33% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LDR W5, [X13, X8,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W5, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 40cdd8 <kineticEnergy._omp_fn.0+0x218> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X15, X7, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
SUB W6, W5, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UBFM X9, X8, #56, #55 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X14, X6, X8,LSL #6 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LDR X16, [X15, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X17, [X15, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X18, X16, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X16, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X30, X18, X14,LSL #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X1, X17, X11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X21, X30, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X0, X21, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UBFM X12, X0, #2, #63 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, X12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ANDS X4, X3, #4160 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40cd18 <kineticEnergy._omp_fn.0+0x158> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP X4, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40cce4 <kineticEnergy._omp_fn.0+0x124> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP X4, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40ccb8 <kineticEnergy._omp_fn.0+0xf8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDRSW X5, [X16, X9] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X2, X2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP D2, D3, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X1, X1, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR D0, [X1, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X6, X7, X5,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR D4, [X6, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D5, D3, D3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FMADD D6, D2, D2, D5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FDIV D7, D17, D4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMADD D16, D0, D0, D6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FMADD D1, D7, D16, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
LDRSW X9, [X2], #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP D19, D20, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X1, X1, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR D21, [X1, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X14, X7, X9,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR D18, [X14, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D22, D20, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FMADD D23, D19, D19, D22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FDIV D24, D17, D18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMADD D25, D21, D21, D23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FMADD D1, D24, D25, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
LDRSW X15, [X2], #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP D27, D28, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X1, X1, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR D29, [X1, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X16, X7, X15,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR D26, [X16, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D30, D28, D28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FMADD D31, D27, D27, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FDIV D4, D17, D26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMADD D2, D29, D29, D31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FMADD D1, D4, D2, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
CMP X30, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40cdd8 <kineticEnergy._omp_fn.0+0x218> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X8, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X11, X11, #1536 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W10, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GT 40cc2c <kineticEnergy._omp_fn.0+0x6c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Function | kineticEnergy._omp_fn.0 |
Source file and lines | timestep.c:110-116 |
Module | exec |
nb instructions | 63 |
loop length | 252 |
nb stack references | 0 |
front end | 7.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 3.75 | 3.75 | 3.75 | 3.75 | 5.33 | 5.33 | 5.33 | 0.00 | 0.00 |
cycles | 3.00 | 3.00 | 7.00 | 7.00 | 7.00 | 7.00 | 3.75 | 3.75 | 3.75 | 3.75 | 5.33 | 5.33 | 5.33 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 3.00-1.50 |
Front-end | 7.88 |
Overall L1 | 7.88 |
all | 15% |
load | 33% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LDR W5, [X13, X8,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CMP W5, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 40cdd8 <kineticEnergy._omp_fn.0+0x218> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X15, X7, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
SUB W6, W5, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UBFM X9, X8, #56, #55 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X14, X6, X8,LSL #6 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LDR X16, [X15, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X17, [X15, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X18, X16, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X16, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X30, X18, X14,LSL #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X1, X17, X11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X21, X30, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X0, X21, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UBFM X12, X0, #2, #63 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, X12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ANDS X4, X3, #4160 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40cd18 <kineticEnergy._omp_fn.0+0x158> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP X4, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40cce4 <kineticEnergy._omp_fn.0+0x124> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP X4, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40ccb8 <kineticEnergy._omp_fn.0+0xf8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDRSW X5, [X16, X9] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X2, X2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP D2, D3, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X1, X1, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR D0, [X1, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X6, X7, X5,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR D4, [X6, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D5, D3, D3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FMADD D6, D2, D2, D5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FDIV D7, D17, D4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMADD D16, D0, D0, D6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FMADD D1, D7, D16, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
LDRSW X9, [X2], #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP D19, D20, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X1, X1, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR D21, [X1, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X14, X7, X9,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR D18, [X14, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D22, D20, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FMADD D23, D19, D19, D22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FDIV D24, D17, D18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMADD D25, D21, D21, D23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FMADD D1, D24, D25, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
LDRSW X15, [X2], #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP D27, D28, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X1, X1, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR D29, [X1, #504] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
ADD X16, X7, X15,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR D26, [X16, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D30, D28, D28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
FMADD D31, D27, D27, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FDIV D4, D17, D26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-15 | 1-0.50 |
FMADD D2, D29, D29, D31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
FMADD D1, D4, D2, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 4 | 0.25 |
CMP X30, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 40cdd8 <kineticEnergy._omp_fn.0+0x218> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X8, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X11, X11, #1536 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W10, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GT 40cc2c <kineticEnergy._omp_fn.0+0x6c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |