| Function: setVcm._omp_fn.0 | Module: exec | Source: initAtoms.c:123-133 | Coverage (incl. loops): NA% | (excl. loops): NA% |
|---|
| Function: setVcm._omp_fn.0 | Module: exec | Source: initAtoms.c:123-133 | Coverage (incl. loops): NA% | (excl. loops): NA% |
|---|
/home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 123 - 133 |
-------------------------------------------------------------------------------- |
123: #pragma omp parallel for |
124: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
125: { |
126: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
127: { |
128: int iSpecies = s->atoms->iSpecies[iOff]; |
129: real_t mass = s->species[iSpecies].mass; |
130: |
131: s->atoms->p[iOff][0] += mass * vShift[0]; |
132: s->atoms->p[iOff][1] += mass * vShift[1]; |
133: s->atoms->p[iOff][2] += mass * vShift[2]; |
0x415640 STP X29, X30, [SP, #976]! |
0x415644 ADD X29, SP, #0 |
0x415648 STP X19, X20, [SP, #16] |
0x41564c LDR X19, [X0] |
0x415650 STR X21, [SP, #32] |
0x415654 ORR X21, XZR, X0 |
0x415658 BL 410050 |
0x41565c ORR W20, WZR, W0 |
0x415660 BL 410150 |
0x415664 LDR X2, [X19, #24] |
0x415668 ORR W10, WZR, W0 |
0x41566c LDR W0, [X2, #12] |
0x415670 SDIV W12, W0, W20 |
0x415674 MSUB W1, W12, W20, W0 |
0x415678 CMP W10, W1 |
0x41567c B.LT 4158e0 |
0x415680 MADD W3, W12, W10, W1 |
0x415684 ADD W14, W12, W3 |
0x415688 CMP W3, W14 |
0x41568c B.GE 4158d0 |
0x415690 UBFM W11, W3, #26, #25 |
0x415694 MOVZ W4, #24 |
0x415698 LDR X15, [X21, #8] |
0x41569c SBFM X10, X3, #0, #31 |
0x4156a0 LDR X13, [X2, #120] |
0x4156a4 SMULL X11, W11, W4 |
(64) 0x4156a8 LDR W5, [X13, X10,LSL #2] |
(64) 0x4156ac CMP W5, #0 |
(64) 0x4156b0 B.LE 4158c0 |
(64) 0x4156b4 LDP X16, X9, [X19, #32] |
(64) 0x4156b8 SUB W7, W5, #1 |
(64) 0x4156bc UBFM X6, X10, #56, #55 |
(64) 0x4156c0 ADD X8, X7, X10,LSL #6 |
(64) 0x4156c4 LDR X17, [X16, #16] |
(64) 0x4156c8 LDR X18, [X16, #32] |
(64) 0x4156cc ADD X30, X17, #4 |
(64) 0x4156d0 ADD X2, X17, X6 |
(64) 0x4156d4 ADD X21, X30, X8,LSL #2 |
(64) 0x4156d8 ADD X0, X18, X11 |
(64) 0x4156dc SUB X20, X21, X2 |
(64) 0x4156e0 SUB X12, X20, #4 |
(64) 0x4156e4 UBFM X1, X12, #2, #63 |
(64) 0x4156e8 ADD X3, X1, #1 |
(64) 0x4156ec ANDS X4, X3, #0x3 |
(64) 0x4156f0 B.EQ 4157c4 |
(64) 0x4156f4 CMP X4, #1 |
(64) 0x4156f8 B.EQ 415780 |
(64) 0x4156fc CMP X4, #2 |
(64) 0x415700 B.EQ 415744 |
(64) 0x415704 LDRSW X5, [X17, X6] |
(64) 0x415708 ADD X2, X2, #4 |
(64) 0x41570c LDP D3, D2, [X0] |
(64) 0x415710 ADD X0, X0, #24 |
(64) 0x415714 LDR D4, [X15] |
(64) 0x415718 ADD X7, X9, X5,LSL #4 |
(64) 0x41571c LDUR D0, [X0, #504] |
(64) 0x415720 LDR D1, [X7, #8] |
(64) 0x415724 FMADD D5, D1, D4, D3 |
(64) 0x415728 STUR D5, [X0, #488] |
(64) 0x41572c LDR D6, [X15, #8] |
(64) 0x415730 FMADD D7, D1, D6, D2 |
(64) 0x415734 STUR D7, [X0, #496] |
(64) 0x415738 LDR D16, [X15, #16] |
(64) 0x41573c FMADD D17, D1, D16, D0 |
(64) 0x415740 STUR D17, [X0, #504] |
(64) 0x415744 LDRSW X6, [X2], #4 |
(64) 0x415748 LDP D18, D19, [X0] |
(64) 0x41574c ADD X0, X0, #24 |
(64) 0x415750 LDR D21, [X15] |
(64) 0x415754 ADD X8, X9, X6,LSL #4 |
(64) 0x415758 LDUR D20, [X0, #504] |
(64) 0x41575c LDR D22, [X8, #8] |
(64) 0x415760 FMADD D23, D22, D21, D18 |
(64) 0x415764 STUR D23, [X0, #488] |
(64) 0x415768 LDR D24, [X15, #8] |
(64) 0x41576c FMADD D25, D22, D24, D19 |
(64) 0x415770 STUR D25, [X0, #496] |
(64) 0x415774 LDR D26, [X15, #16] |
(64) 0x415778 FMADD D27, D22, D26, D20 |
(64) 0x41577c STUR D27, [X0, #504] |
(64) 0x415780 LDRSW X16, [X2], #4 |
(64) 0x415784 LDP D28, D29, [X0] |
(64) 0x415788 ADD X0, X0, #24 |
(64) 0x41578c LDR D31, [X15] |
(64) 0x415790 ADD X17, X9, X16,LSL #4 |
(64) 0x415794 LDUR D30, [X0, #504] |
(64) 0x415798 LDR D0, [X17, #8] |
(64) 0x41579c FMADD D3, D0, D31, D28 |
(64) 0x4157a0 STUR D3, [X0, #488] |
(64) 0x4157a4 LDR D2, [X15, #8] |
(64) 0x4157a8 FMADD D4, D0, D2, D29 |
(64) 0x4157ac STUR D4, [X0, #496] |
(64) 0x4157b0 LDR D1, [X15, #16] |
(64) 0x4157b4 FMADD D5, D0, D1, D30 |
(64) 0x4157b8 STUR D5, [X0, #504] |
(64) 0x4157bc CMP X21, X2 |
(64) 0x4157c0 B.EQ 4158c0 |
(65) 0x4157c4 ORR X18, XZR, X2 |
(65) 0x4157c8 LDR D18, [X15] |
(65) 0x4157cc ADD X30, X0, #72 |
(65) 0x4157d0 ADD X2, X2, #16 |
(65) 0x4157d4 LDRSW X20, [X18], #4 |
(65) 0x4157d8 LDP D6, D7, [X0] |
(65) 0x4157dc LDURSW X4, [X2, #500] |
(65) 0x4157e0 ADD X12, X9, X20,LSL #4 |
(65) 0x4157e4 LDRSW X1, [X18, #4] |
(65) 0x4157e8 LDR D19, [X12, #8] |
(65) 0x4157ec ADD X5, X9, X4,LSL #4 |
(65) 0x4157f0 LDP D16, D17, [X0, #16] |
(65) 0x4157f4 ADD X0, X0, #96 |
(65) 0x4157f8 ADD X3, X9, X1,LSL #4 |
(65) 0x4157fc LDUR D28, [X0, #448] |
(65) 0x415800 FMADD D20, D19, D18, D6 |
(65) 0x415804 LDUR D29, [X0, #456] |
(65) 0x415808 LDUR D4, [X0, #464] |
(65) 0x41580c LDUR D6, [X0, #472] |
(65) 0x415810 STUR D20, [X0, #416] |
(65) 0x415814 LDR D21, [X15, #8] |
(65) 0x415818 FMADD D22, D19, D21, D7 |
(65) 0x41581c LDUR D7, [X0, #480] |
(65) 0x415820 STUR D22, [X0, #424] |
(65) 0x415824 LDR D23, [X15, #16] |
(65) 0x415828 FMADD D24, D19, D23, D16 |
(65) 0x41582c STUR D24, [X0, #432] |
(65) 0x415830 LDR D25, [X15] |
(65) 0x415834 LDR D26, [X5, #8] |
(65) 0x415838 FMADD D27, D26, D25, D17 |
(65) 0x41583c STUR D27, [X0, #440] |
(65) 0x415840 LDR D30, [X15, #8] |
(65) 0x415844 FMADD D31, D26, D30, D28 |
(65) 0x415848 STUR D31, [X0, #448] |
(65) 0x41584c LDR D0, [X15, #16] |
(65) 0x415850 FMADD D3, D26, D0, D29 |
(65) 0x415854 STUR D3, [X0, #456] |
(65) 0x415858 LDR D2, [X15] |
(65) 0x41585c LDR D1, [X3, #8] |
(65) 0x415860 FMADD D5, D1, D2, D4 |
(65) 0x415864 STUR D5, [X0, #464] |
(65) 0x415868 LDR D16, [X15, #8] |
(65) 0x41586c FMADD D17, D1, D16, D6 |
(65) 0x415870 STUR D17, [X0, #472] |
(65) 0x415874 LDR D18, [X15, #16] |
(65) 0x415878 FMADD D19, D1, D18, D7 |
(65) 0x41587c STUR D19, [X0, #480] |
(65) 0x415880 LDURSW X7, [X2, #508] |
(65) 0x415884 LDUR D20, [X0, #488] |
(65) 0x415888 LDR D21, [X15] |
(65) 0x41588c ADD X6, X9, X7,LSL #4 |
(65) 0x415890 LDR D22, [X6, #8] |
(65) 0x415894 FMADD D23, D22, D21, D20 |
(65) 0x415898 STUR D23, [X0, #488] |
(65) 0x41589c LDP D25, D26, [X30, #8] |
(65) 0x4158a0 LDR D24, [X15, #8] |
(65) 0x4158a4 FMADD D27, D22, D24, D25 |
(65) 0x4158a8 STR D27, [X30, #8] |
(65) 0x4158ac LDR D28, [X15, #16] |
(65) 0x4158b0 FMADD D29, D22, D28, D26 |
(65) 0x4158b4 STR D29, [X30, #16] |
(65) 0x4158b8 CMP X21, X2 |
(65) 0x4158bc B.NE 4157c4 |
(64) 0x4158c0 ADD X10, X10, #1 |
(64) 0x4158c4 ADD X11, X11, #1536 |
(64) 0x4158c8 CMP W14, W10 |
(64) 0x4158cc B.GT 4156a8 |
0x4158d0 LDP X19, X20, [SP, #16] |
0x4158d4 LDR X21, [SP, #32] |
0x4158d8 LDP X29, X30, [SP], #48 |
0x4158dc RET |
0x4158e0 ADD W12, W12, #1 |
0x4158e4 MOVZ W1, #0 |
0x4158e8 B 415680 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | initAtoms.c:123-133 |
| Module | exec |
| nb instructions | 33 |
| nb uops | 33 |
| loop length | 132 |
| used w registers | 10 |
| used x registers | 13 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 4.13 cycles |
| front end | 4.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 4.13 |
| Dispatch | 4.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 5.00-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 33% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X19, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 410050 <@plt_start@+0x30> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410150 <@plt_start@+0x130> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X2, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W10, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W0, [X2, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SDIV W12, W0, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W1, W12, W20, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W10, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 4158e0 <setVcm._omp_fn.0+0x2a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W12, W10, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4158d0 <setVcm._omp_fn.0+0x290> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UBFM W11, W3, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W4, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X15, [X21, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SBFM X10, X3, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X13, [X2, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SMULL X11, W11, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W12, W12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W1, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 415680 <setVcm._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | initAtoms.c:123-133 |
| Module | exec |
| nb instructions | 33 |
| nb uops | 33 |
| loop length | 132 |
| used w registers | 10 |
| used x registers | 13 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 4.13 cycles |
| front end | 4.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 4.13 |
| Dispatch | 4.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 5.00-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 25% |
| load | 33% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X19, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 410050 <@plt_start@+0x30> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410150 <@plt_start@+0x130> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X2, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W10, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W0, [X2, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SDIV W12, W0, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W1, W12, W20, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W10, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 4158e0 <setVcm._omp_fn.0+0x2a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W12, W10, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4158d0 <setVcm._omp_fn.0+0x290> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UBFM W11, W3, #26, #25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOVZ W4, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X15, [X21, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SBFM X10, X3, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X13, [X2, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SMULL X11, W11, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W12, W12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W1, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 415680 <setVcm._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable> -x 100 -y 100 -z 100MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-176-0594/intel/CoMD/run/oneview_runs/multicore/gcc/oneview_run_1781767694OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | ||||||
| 1x2 | ||||||
| 1x4 | ||||||
| 1x8 | ||||||
| 1x16 | 15 | 1 | 1 | 16 | 0.0050000003539026 | 0.011283439584076 |
| 1x24 | 12 | 1 | 1 | 24 | 0.0050000003539026 | 0.0089460927993059 |
| 1x32 | 16 | 1 | 1 | 32 | 0.0050000003539026 | 0.011858812533319 |
| 1x40 | ||||||
| 1x48 | 19 | 1 | 1 | 48 | 0.0050000003539026 | 0.013762221671641 |
| 1x56 | 12 | 1 | 1 | 56 | 0.0050000003539026 | 0.0086597502231598 |
| 1x64 | 8 | 1 | 1 | 64 | 0.0050000003539026 | 0.0057314746081829 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼setVcm._omp_fn.0– | 0.00 | 0.00 |
| ▼Loop 64 - initAtoms.c:126-133 - exec– | 0.00 | 0.00 |
| ○Loop 65 - initAtoms.c:126-133 - exec | 0.00 | 0.00 |
