Function: kineticEnergy._omp_fn.0 | Module: exec | Source: timestep.c:107-116 | Coverage: 0.04% |
---|
Function: kineticEnergy._omp_fn.0 | Module: exec | Source: timestep.c:107-116 | Coverage: 0.04% |
---|
/home/hbollore/qaas/qaas-runs/169-814-5713/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 107 - 116 |
-------------------------------------------------------------------------------- |
107: #pragma omp parallel for reduction(+:kenergy) |
108: for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) |
109: { |
110: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
111: { |
112: int iSpecies = s->atoms->iSpecies[iOff]; |
113: real_t invMass = 0.5/s->species[iSpecies].mass; |
114: kenergy += ( s->atoms->p[iOff][0] * s->atoms->p[iOff][0] + |
115: s->atoms->p[iOff][1] * s->atoms->p[iOff][1] + |
116: s->atoms->p[iOff][2] * s->atoms->p[iOff][2] )*invMass; |
0x40c940 STP X29, X30, [SP, #976]! |
0x40c944 ADD X29, SP, #0 |
0x40c948 STP X19, X20, [SP, #16] |
0x40c94c ORR X20, XZR, X0 |
0x40c950 LDR X19, [X0] |
0x40c954 STR X21, [SP, #32] |
0x40c958 BL 401f30 |
0x40c95c ORR W21, WZR, W0 |
0x40c960 BL 401e10 |
0x40c964 LDR X1, [X19, #24] |
0x40c968 ORR W8, WZR, W0 |
0x40c96c LDR W0, [X1, #12] |
0x40c970 SDIV W12, W0, W21 |
0x40c974 MSUB W2, W12, W21, W0 |
0x40c978 CMP W8, W2 |
0x40c97c B.LT 40cb9c |
(128) 0x40c980 MADD W3, W12, W8, W2 |
(128) 0x40c984 MOVI D1, #0 |
(128) 0x40c988 ADD W10, W12, W3 |
(128) 0x40c98c CMP W3, W10 |
(128) 0x40c990 B.GE 40cb68 |
(128) 0x40c994 UBFM W11, W3, #26, #25 |
(128) 0x40c998 MOVZ W4, #24 |
(128) 0x40c99c LDR X13, [X1, #120] |
(128) 0x40c9a0 FMOV D17, #0.5000000 |
(128) 0x40c9a4 SBFM X8, X3, #0, #31 |
(128) 0x40c9a8 SMADDL X11, W11, W4, XZR |
(129) 0x40c9ac LDR W5, [X13, X8,LSL #2] |
(129) 0x40c9b0 CMP W5, #0 |
(129) 0x40c9b4 B.LE 40cb58 |
(129) 0x40c9b8 LDP X15, X7, [X19, #32] |
(129) 0x40c9bc SUB W6, W5, #1 |
(129) 0x40c9c0 UBFM X9, X8, #56, #55 |
(129) 0x40c9c4 ADD X14, X6, X8,LSL #6 |
(129) 0x40c9c8 LDR X16, [X15, #16] |
(129) 0x40c9cc LDR X17, [X15, #32] |
(129) 0x40c9d0 ADD X18, X16, #4 |
(129) 0x40c9d4 ADD X2, X16, X9 |
(129) 0x40c9d8 ADD X30, X18, X14,LSL #2 |
(129) 0x40c9dc ADD X1, X17, X11 |
(129) 0x40c9e0 SUB X21, X30, X2 |
(129) 0x40c9e4 SUB X0, X21, #4 |
(129) 0x40c9e8 UBFM X12, X0, #2, #63 |
(129) 0x40c9ec ADD X3, X12, #1 |
(129) 0x40c9f0 ANDS X4, X3, #4160 |
(129) 0x40c9f4 B.EQ 40ca98 |
(129) 0x40c9f8 CMP X4, #1 |
(129) 0x40c9fc B.EQ 40ca64 |
(129) 0x40ca00 CMP X4, #2 |
(129) 0x40ca04 B.EQ 40ca38 |
(129) 0x40ca08 LDRSW X5, [X16, X9] |
(129) 0x40ca0c ADD X2, X2, #4 |
(129) 0x40ca10 LDP D2, D3, [X1] |
(129) 0x40ca14 ADD X1, X1, #24 |
(129) 0x40ca18 LDUR D0, [X1, #504] |
(129) 0x40ca1c ADD X6, X7, X5,LSL #4 |
(129) 0x40ca20 LDR D4, [X6, #8] |
(129) 0x40ca24 FMUL D5, D3, D3 |
(129) 0x40ca28 FMADD D6, D2, D2, D5 |
(129) 0x40ca2c FDIV D7, D17, D4 |
(129) 0x40ca30 FMADD D16, D0, D0, D6 |
(129) 0x40ca34 FMADD D1, D7, D16, D1 |
(129) 0x40ca38 LDRSW X9, [X2], #4 |
(129) 0x40ca3c LDP D19, D20, [X1] |
(129) 0x40ca40 ADD X1, X1, #24 |
(129) 0x40ca44 LDUR D21, [X1, #504] |
(129) 0x40ca48 ADD X14, X7, X9,LSL #4 |
(129) 0x40ca4c LDR D18, [X14, #8] |
(129) 0x40ca50 FMUL D22, D20, D20 |
(129) 0x40ca54 FMADD D23, D19, D19, D22 |
(129) 0x40ca58 FDIV D24, D17, D18 |
(129) 0x40ca5c FMADD D25, D21, D21, D23 |
(129) 0x40ca60 FMADD D1, D24, D25, D1 |
(129) 0x40ca64 LDRSW X15, [X2], #4 |
(129) 0x40ca68 LDP D27, D28, [X1] |
(129) 0x40ca6c ADD X1, X1, #24 |
(129) 0x40ca70 LDUR D29, [X1, #504] |
(129) 0x40ca74 ADD X16, X7, X15,LSL #4 |
(129) 0x40ca78 LDR D26, [X16, #8] |
(129) 0x40ca7c FMUL D30, D28, D28 |
(129) 0x40ca80 FMADD D31, D27, D27, D30 |
(129) 0x40ca84 FDIV D4, D17, D26 |
(129) 0x40ca88 FMADD D2, D29, D29, D31 |
(129) 0x40ca8c FMADD D1, D4, D2, D1 |
(129) 0x40ca90 CMP X30, X2 |
(129) 0x40ca94 B.EQ 40cb58 |
(130) 0x40ca98 LDP D0, D3, [X1] |
(130) 0x40ca9c ORR X17, XZR, X2 |
(130) 0x40caa0 ADD X18, X1, #72 |
(130) 0x40caa4 ADD X2, X2, #16 |
(130) 0x40caa8 LDRSW X21, [X17], #4 |
(130) 0x40caac LDR D5, [X1, #16] |
(130) 0x40cab0 FMUL D6, D3, D3 |
(130) 0x40cab4 LDURSW X12, [X2, #500] |
(130) 0x40cab8 ADD X0, X7, X21,LSL #4 |
(130) 0x40cabc LDRSW X4, [X17, #4] |
(130) 0x40cac0 LDP D18, D16, [X1, #24] |
(130) 0x40cac4 ADD X1, X1, #96 |
(130) 0x40cac8 FMADD D7, D0, D0, D6 |
(130) 0x40cacc ADD X3, X7, X12,LSL #4 |
(130) 0x40cad0 LDURSW X6, [X2, #508] |
(130) 0x40cad4 ADD X5, X7, X4,LSL #4 |
(130) 0x40cad8 LDR D31, [X3, #8] |
(130) 0x40cadc FMADD D21, D5, D5, D7 |
(130) 0x40cae0 LDR D5, [X0, #8] |
(130) 0x40cae4 FMUL D22, D16, D16 |
(130) 0x40cae8 ADD X9, X7, X6,LSL #4 |
(130) 0x40caec LDR D0, [X5, #8] |
(130) 0x40caf0 FDIV D4, D17, D31 |
(130) 0x40caf4 LDUR D20, [X1, #472] |
(130) 0x40caf8 FMADD D24, D18, D18, D22 |
(130) 0x40cafc LDUR D19, [X1, #456] |
(130) 0x40cb00 LDR D2, [X9, #8] |
(130) 0x40cb04 FDIV D7, D17, D5 |
(130) 0x40cb08 LDUR D26, [X1, #464] |
(130) 0x40cb0c FMUL D23, D20, D20 |
(130) 0x40cb10 LDUR D28, [X1, #496] |
(130) 0x40cb14 FMADD D25, D19, D19, D24 |
(130) 0x40cb18 LDUR D27, [X1, #480] |
(130) 0x40cb1c FDIV D3, D17, D0 |
(130) 0x40cb20 LDUR D19, [X1, #488] |
(130) 0x40cb24 FMADD D29, D26, D26, D23 |
(130) 0x40cb28 LDR D22, [X18, #16] |
(130) 0x40cb2c FMADD D30, D27, D27, D29 |
(130) 0x40cb30 FDIV D6, D17, D2 |
(130) 0x40cb34 FMADD D1, D7, D21, D1 |
(130) 0x40cb38 FMUL D21, D28, D28 |
(130) 0x40cb3c FMADD D18, D4, D25, D1 |
(130) 0x40cb40 FMADD D20, D19, D19, D21 |
(130) 0x40cb44 FMADD D16, D3, D30, D18 |
(130) 0x40cb48 FMADD D23, D22, D22, D20 |
(130) 0x40cb4c FMADD D1, D6, D23, D16 |
(130) 0x40cb50 CMP X30, X2 |
(130) 0x40cb54 B.NE 40ca98 |
(129) 0x40cb58 ADD X8, X8, #1 |
(129) 0x40cb5c ADD X11, X11, #1536 |
(129) 0x40cb60 CMP W10, W8 |
(129) 0x40cb64 B.GT 40c9ac |
(128) 0x40cb68 ADD X20, X20, #8 |
(128) 0x40cb6c LDR X1, [X20] |
(127) 0x40cb70 FMOV D17, X1 |
(127) 0x40cb74 ORR X10, XZR, X1 |
(127) 0x40cb78 FADD D24, D1, D17 |
(127) 0x40cb7c FMOV X19, D24 |
(127) 0x40cb80 CASAL X10, X19, [X20] |
(127) 0x40cb84 CMP X1, X10 |
(127) 0x40cb88 B.NE 40cba8 |
(128) 0x40cb8c LDP X19, X20, [SP, #16] |
(128) 0x40cb90 LDR X21, [SP, #32] |
(128) 0x40cb94 LDP X29, X30, [SP], #48 |
(128) 0x40cb98 RET |
(128) 0x40cb9c ADD W12, W12, #1 |
(128) 0x40cba0 MOVZ W2, #0 |
(128) 0x40cba4 B 40c980 |
(127) 0x40cba8 ORR X1, XZR, X10 |
(127) 0x40cbac B 40cb70 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►66.67+ | GOMP_parallel | libomp.so | |
○ | kineticEnergy | timestep.c:107 | exec |
○ | timestep | timestep.c:60 | exec |
○ | main | CoMD.c:125 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:150 | exec |
►33.33+ | GOMP_parallel | libomp.so | |
○ | kineticEnergy | timestep.c:107 | exec |
○ | main | CoMD.c:200 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | CoMD.c:150 | exec |
Path / |
Source file and lines | timestep.c:107-116 |
Module | exec |
nb instructions | 16 |
loop length | 64 |
nb stack references | 0 |
front end | 2.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 2.50 | 2.50 | 2.50 | 2.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
cycles | 1.50 | 1.50 | 2.50 | 2.50 | 2.50 | 2.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.00 |
Overall L1 | 2.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X19, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 401f30 <@plt_start@+0x1d0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 401e10 <@plt_start@+0xb0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X1, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR W8, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W0, [X1, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SDIV W12, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
MSUB W2, W12, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W8, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 40cb9c <kineticEnergy._omp_fn.0+0x25c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | timestep.c:107-116 |
Module | exec |
nb instructions | 16 |
loop length | 64 |
nb stack references | 0 |
front end | 2.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 2.50 | 2.50 | 2.50 | 2.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
cycles | 1.50 | 1.50 | 2.50 | 2.50 | 2.50 | 2.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.00 |
Overall L1 | 2.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X19, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 401f30 <@plt_start@+0x1d0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 401e10 <@plt_start@+0xb0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X1, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR W8, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W0, [X1, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SDIV W12, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
MSUB W2, W12, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W8, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 40cb9c <kineticEnergy._omp_fn.0+0x25c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼kineticEnergy._omp_fn.0– | 0.04 | 0.02 |
▼Loop 128 - timestep.c:107-116 - exec– | 0 | 0 |
▼Loop 129 - timestep.c:110-116 - exec– | 0 | 0 |
○Loop 130 - timestep.c:110-116 - exec | 0.04 | 0.01 |
○Loop 127 - timestep.c:107-107 - exec | 0 | 0 |