| Function: kineticEnergy._omp_fn.0 | Module: exec | Source: timestep.c:107-116 | Coverage (incl. loops): 0.06% | (excl. loops): 0.00% |
|---|
| Function: kineticEnergy._omp_fn.0 | Module: exec | Source: timestep.c:107-116 | Coverage (incl. loops): 0.06% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 107 - 116 |
-------------------------------------------------------------------------------- |
107: #pragma omp parallel for reduction(+:kenergy) |
108: for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) |
109: { |
110: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
111: { |
112: int iSpecies = s->atoms->iSpecies[iOff]; |
113: real_t invMass = 0.5/s->species[iSpecies].mass; |
114: kenergy += ( s->atoms->p[iOff][0] * s->atoms->p[iOff][0] + |
115: s->atoms->p[iOff][1] * s->atoms->p[iOff][1] + |
116: s->atoms->p[iOff][2] * s->atoms->p[iOff][2] )*invMass; |
0x41a880 STP X29, X30, [SP, #976]! |
0x41a884 ADD X29, SP, #0 |
0x41a888 STP X19, X20, [SP, #16] |
0x41a88c ORR X20, XZR, X0 |
0x41a890 LDR X19, [X0] |
0x41a894 STR X21, [SP, #32] |
0x41a898 BL 410120 |
0x41a89c ORR W21, WZR, W0 |
0x41a8a0 BL 410140 |
0x41a8a4 LDR X1, [X19, #24] |
0x41a8a8 ORR W8, WZR, W0 |
0x41a8ac LDR W0, [X1, #12] |
0x41a8b0 SDIV W12, W0, W21 |
0x41a8b4 MSUB W2, W12, W21, W0 |
0x41a8b8 CMP W8, W2 |
0x41a8bc B.LT 41aadc |
0x41a8c0 MADD W3, W12, W8, W2 |
0x41a8c4 MOVI D1, #0 |
0x41a8c8 ADD W10, W12, W3 |
0x41a8cc CMP W3, W10 |
0x41a8d0 B.GE 41aaa8 |
0x41a8d4 UBFM W11, W3, #26, #25 |
0x41a8d8 MOVZ W4, #24 |
0x41a8dc LDR X13, [X1, #120] |
0x41a8e0 FMOV D17, #0.5000000 |
0x41a8e4 SBFM X8, X3, #0, #31 |
0x41a8e8 SMULL X11, W11, W4 |
(107) 0x41a8ec LDR W5, [X13, X8,LSL #2] |
(107) 0x41a8f0 CMP W5, #0 |
(107) 0x41a8f4 B.LE 41aa98 |
(107) 0x41a8f8 LDP X15, X7, [X19, #32] |
(107) 0x41a8fc SUB W6, W5, #1 |
(107) 0x41a900 UBFM X9, X8, #56, #55 |
(107) 0x41a904 ADD X14, X6, X8,LSL #6 |
(107) 0x41a908 LDR X16, [X15, #16] |
(107) 0x41a90c LDR X17, [X15, #32] |
(107) 0x41a910 ADD X18, X16, #4 |
(107) 0x41a914 ADD X2, X16, X9 |
(107) 0x41a918 ADD X30, X18, X14,LSL #2 |
(107) 0x41a91c ADD X1, X17, X11 |
(107) 0x41a920 SUB X21, X30, X2 |
(107) 0x41a924 SUB X0, X21, #4 |
(107) 0x41a928 UBFM X12, X0, #2, #63 |
(107) 0x41a92c ADD X3, X12, #1 |
(107) 0x41a930 ANDS X4, X3, #0x3 |
(107) 0x41a934 B.EQ 41a9d8 |
(107) 0x41a938 CMP X4, #1 |
(107) 0x41a93c B.EQ 41a9a4 |
(107) 0x41a940 CMP X4, #2 |
(107) 0x41a944 B.EQ 41a978 |
(107) 0x41a948 LDRSW X5, [X16, X9] |
(107) 0x41a94c ADD X2, X2, #4 |
(107) 0x41a950 LDP D2, D3, [X1] |
(107) 0x41a954 ADD X1, X1, #24 |
(107) 0x41a958 LDUR D0, [X1, #504] |
(107) 0x41a95c ADD X6, X7, X5,LSL #4 |
(107) 0x41a960 LDR D4, [X6, #8] |
(107) 0x41a964 FMUL D5, D3, D3 |
(107) 0x41a968 FMADD D6, D2, D2, D5 |
(107) 0x41a96c FDIV D7, D17, D4 |
(107) 0x41a970 FMADD D16, D0, D0, D6 |
(107) 0x41a974 FMADD D1, D7, D16, D1 |
(107) 0x41a978 LDRSW X9, [X2], #4 |
(107) 0x41a97c LDP D19, D20, [X1] |
(107) 0x41a980 ADD X1, X1, #24 |
(107) 0x41a984 LDUR D21, [X1, #504] |
(107) 0x41a988 ADD X14, X7, X9,LSL #4 |
(107) 0x41a98c LDR D18, [X14, #8] |
(107) 0x41a990 FMUL D22, D20, D20 |
(107) 0x41a994 FMADD D23, D19, D19, D22 |
(107) 0x41a998 FDIV D24, D17, D18 |
(107) 0x41a99c FMADD D25, D21, D21, D23 |
(107) 0x41a9a0 FMADD D1, D24, D25, D1 |
(107) 0x41a9a4 LDRSW X15, [X2], #4 |
(107) 0x41a9a8 LDP D27, D28, [X1] |
(107) 0x41a9ac ADD X1, X1, #24 |
(107) 0x41a9b0 LDUR D29, [X1, #504] |
(107) 0x41a9b4 ADD X16, X7, X15,LSL #4 |
(107) 0x41a9b8 LDR D26, [X16, #8] |
(107) 0x41a9bc FMUL D30, D28, D28 |
(107) 0x41a9c0 FMADD D31, D27, D27, D30 |
(107) 0x41a9c4 FDIV D4, D17, D26 |
(107) 0x41a9c8 FMADD D2, D29, D29, D31 |
(107) 0x41a9cc FMADD D1, D4, D2, D1 |
(107) 0x41a9d0 CMP X30, X2 |
(107) 0x41a9d4 B.EQ 41aa98 |
(108) 0x41a9d8 LDP D0, D3, [X1] |
(108) 0x41a9dc ORR X17, XZR, X2 |
(108) 0x41a9e0 ADD X18, X1, #72 |
(108) 0x41a9e4 ADD X2, X2, #16 |
(108) 0x41a9e8 LDRSW X21, [X17], #4 |
(108) 0x41a9ec LDR D5, [X1, #16] |
(108) 0x41a9f0 FMUL D6, D3, D3 |
(108) 0x41a9f4 LDURSW X12, [X2, #500] |
(108) 0x41a9f8 ADD X0, X7, X21,LSL #4 |
(108) 0x41a9fc LDRSW X4, [X17, #4] |
(108) 0x41aa00 LDP D18, D16, [X1, #24] |
(108) 0x41aa04 ADD X1, X1, #96 |
(108) 0x41aa08 FMADD D7, D0, D0, D6 |
(108) 0x41aa0c ADD X3, X7, X12,LSL #4 |
(108) 0x41aa10 LDURSW X6, [X2, #508] |
(108) 0x41aa14 ADD X5, X7, X4,LSL #4 |
(108) 0x41aa18 LDR D31, [X3, #8] |
(108) 0x41aa1c FMADD D21, D5, D5, D7 |
(108) 0x41aa20 LDR D5, [X0, #8] |
(108) 0x41aa24 FMUL D22, D16, D16 |
(108) 0x41aa28 ADD X9, X7, X6,LSL #4 |
(108) 0x41aa2c LDR D0, [X5, #8] |
(108) 0x41aa30 FDIV D4, D17, D31 |
(108) 0x41aa34 LDUR D20, [X1, #472] |
(108) 0x41aa38 FMADD D24, D18, D18, D22 |
(108) 0x41aa3c LDUR D19, [X1, #456] |
(108) 0x41aa40 LDR D2, [X9, #8] |
(108) 0x41aa44 FDIV D7, D17, D5 |
(108) 0x41aa48 LDUR D26, [X1, #464] |
(108) 0x41aa4c FMUL D23, D20, D20 |
(108) 0x41aa50 LDUR D28, [X1, #496] |
(108) 0x41aa54 FMADD D25, D19, D19, D24 |
(108) 0x41aa58 LDUR D27, [X1, #480] |
(108) 0x41aa5c FDIV D3, D17, D0 |
(108) 0x41aa60 LDUR D19, [X1, #488] |
(108) 0x41aa64 FMADD D29, D26, D26, D23 |
(108) 0x41aa68 LDR D22, [X18, #16] |
(108) 0x41aa6c FMADD D30, D27, D27, D29 |
(108) 0x41aa70 FDIV D6, D17, D2 |
(108) 0x41aa74 FMADD D1, D7, D21, D1 |
(108) 0x41aa78 FMUL D21, D28, D28 |
(108) 0x41aa7c FMADD D18, D4, D25, D1 |
(108) 0x41aa80 FMADD D20, D19, D19, D21 |
(108) 0x41aa84 FMADD D16, D3, D30, D18 |
(108) 0x41aa88 FMADD D23, D22, D22, D20 |
(108) 0x41aa8c FMADD D1, D6, D23, D16 |
(108) 0x41aa90 CMP X30, X2 |
(108) 0x41aa94 B.NE 41a9d8 |
(107) 0x41aa98 ADD X8, X8, #1 |
(107) 0x41aa9c ADD X11, X11, #1536 |
(107) 0x41aaa0 CMP W10, W8 |
(107) 0x41aaa4 B.GT 41a8ec |
0x41aaa8 ADD X20, X20, #8 |
0x41aaac LDR X1, [X20] |
(106) 0x41aab0 FMOV D17, X1 |
(106) 0x41aab4 ORR X10, XZR, X1 |
(106) 0x41aab8 FADD D24, D1, D17 |
(106) 0x41aabc FMOV X19, D24 |
(106) 0x41aac0 CASAL X10, X19, [X20] |
(106) 0x41aac4 CMP X1, X10 |
(106) 0x41aac8 B.NE 41aae8 |
0x41aacc LDP X19, X20, [SP, #16] |
0x41aad0 LDR X21, [SP, #32] |
0x41aad4 LDP X29, X30, [SP], #48 |
0x41aad8 RET |
0x41aadc ADD W12, W12, #1 |
0x41aae0 MOVZ W2, #0 |
0x41aae4 B 41a8c0 |
(106) 0x41aae8 ORR X1, XZR, X10 |
(106) 0x41aaec B 41aab0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | timestep.c:107-116 |
| Module | exec |
| nb instructions | 36 |
| nb uops | 36 |
| loop length | 144 |
| used w registers | 10 |
| used x registers | 12 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 4.50 cycles |
| front end | 4.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.50 | 0.50 | 0.50 | 0.50 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.50 | 0.50 | 0.50 | 0.50 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 4.50 |
| Dispatch | 5.00 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 5.00-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 33% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| other | 23% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 25% |
| load | 33% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X19, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| BL 410120 <@plt_start@+0x100> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W8, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W0, [X1, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SDIV W12, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W2, W12, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W8, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 41aadc <kineticEnergy._omp_fn.0+0x25c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W12, W8, W2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W10, W12, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 41aaa8 <kineticEnergy._omp_fn.0+0x228> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UBFM W11, W3, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W4, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X13, [X1, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| FMOV D17, #0.5000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| SBFM X8, X3, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| SMULL X11, W11, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X20, X20, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X1, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W12, W12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 41a8c0 <kineticEnergy._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | timestep.c:107-116 |
| Module | exec |
| nb instructions | 36 |
| nb uops | 36 |
| loop length | 144 |
| used w registers | 10 |
| used x registers | 12 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 4.50 cycles |
| front end | 4.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.50 | 0.50 | 0.50 | 0.50 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 5.00 | 5.00 | 5.00 | 5.00 | 0.50 | 0.50 | 0.50 | 0.50 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 4.50 |
| Dispatch | 5.00 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 5.00-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 33% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| other | 23% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 25% |
| load | 33% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X19, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| BL 410120 <@plt_start@+0x100> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W8, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W0, [X1, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SDIV W12, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W2, W12, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W8, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 41aadc <kineticEnergy._omp_fn.0+0x25c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W12, W8, W2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W10, W12, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 41aaa8 <kineticEnergy._omp_fn.0+0x228> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UBFM W11, W3, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W4, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X13, [X1, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| FMOV D17, #0.5000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| SBFM X8, X3, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| SMULL X11, W11, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X20, X20, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X1, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W12, W12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 41a8c0 <kineticEnergy._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼kineticEnergy._omp_fn.0– | 0.06 | 0.01 |
| ▼Loop 107 - timestep.c:110-116 - exec– | 0.01 | 0.01 |
| ○Loop 108 - timestep.c:110-116 - exec | 0.05 | 0.01 |
| ○Loop 106 - timestep.c:107-107 - exec | 0.00 | 0.00 |
