| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage (incl. loops): 2.87% | (excl. loops): 0.00% |
|---|
| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage (incl. loops): 2.87% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_mom.cpp: 95 - 100 |
-------------------------------------------------------------------------------- |
95: #pragma omp parallel for simd collapse(2) |
96: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
97: for (int i = (x_min - 1 + 1); i < (x_max + 2 + 2); i++) { |
98: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
99: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
100: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i - 1, j + 0) + node_flux(i, j); |
0x42f7c0 STP X29, X30, [SP, #768]! |
0x42f7c4 ADD X29, SP, #0 |
0x42f7c8 STP X19, X20, [SP, #16] |
0x42f7cc ORR X20, XZR, X0 |
0x42f7d0 STP X21, X22, [SP, #32] |
0x42f7d4 LDP W22, W1, [X0, #48] |
0x42f7d8 LDR W2, [X20, #40] |
0x42f7dc LDR W0, [X0, #44] |
0x42f7e0 ADD W3, W1, #3 |
0x42f7e4 ADD W22, W22, #1 |
0x42f7e8 STP W3, W2, [SP, #176] |
0x42f7ec CMP W22, W3 |
0x42f7f0 B.GE 42fbd4 |
0x42f7f4 ADD W19, W0, #4 |
0x42f7f8 STP X23, X24, [SP, #48] |
0x42f7fc SUB W23, W3, W22 |
0x42f800 CMP W2, W19 |
0x42f804 B.GE 42fbd0 |
0x42f808 SUB W4, W19, W2 |
0x42f80c MUL W24, W23, W4 |
0x42f810 STR W4, [SP, #248] |
0x42f814 BL 410210 |
0x42f818 ORR W21, WZR, W0 |
0x42f81c BL 410240 |
0x42f820 UDIV W6, W24, W21 |
0x42f824 ORR W5, WZR, W0 |
0x42f828 MSUB W7, W6, W21, W24 |
0x42f82c CMP W0, W7 |
0x42f830 B.CC 42fc00 |
0x42f834 MADD W16, W6, W5, W7 |
0x42f838 ADD W8, W6, W16 |
0x42f83c STR W8, [SP, #252] |
0x42f840 CMP W16, W8 |
0x42f844 B.CS 42fbd0 |
0x42f848 LDR W9, [SP, #248] |
0x42f84c FMOV D31, #0.2500000 |
0x42f850 FMOV V30.2D, #0.2500000 |
0x42f854 STP X25, X26, [SP, #64] |
0x42f858 LDR W13, [SP, #180] |
0x42f85c UDIV W10, W16, W9 |
0x42f860 STP X27, X28, [SP, #80] |
0x42f864 LDR X23, [X20, #32] |
0x42f868 LDP X24, X27, [X20, #16] |
0x42f86c ADD W12, W10, W22 |
0x42f870 LDP X22, X14, [X20] |
0x42f874 MSUB W11, W10, W9, W16 |
0x42f878 SBFM X25, X12, #0, #31 |
0x42f87c ADD W28, W11, W13 |
0x42f880 SUB W1, W19, W28 |
0x42f884 ORR X26, XZR, X14 |
(189) 0x42f888 CMP W6, W1 |
(189) 0x42f88c CSEL W2, W6, W1, #9 |
(189) 0x42f890 ADD W15, W16, W2 |
(189) 0x42f894 STR W15, [SP, #116] |
(189) 0x42f898 CMP W16, W15 |
(189) 0x42f89c B.CS 42fbac |
(189) 0x42f8a0 LDR X30, [X23] |
(189) 0x42f8a4 SUB W16, W25, #1 |
(189) 0x42f8a8 SBFM X17, X16, #0, #31 |
(189) 0x42f8ac LDR X18, [X22] |
(189) 0x42f8b0 LDR X20, [X24] |
(189) 0x42f8b4 MUL X21, X17, X30 |
(189) 0x42f8b8 LDR X6, [X27] |
(189) 0x42f8bc MUL X1, X30, X25 |
(189) 0x42f8c0 MUL X19, X17, X18 |
(189) 0x42f8c4 LDR X7, [X27, #16] |
(189) 0x42f8c8 MUL X3, X18, X25 |
(189) 0x42f8cc MUL X8, X25, X20 |
(189) 0x42f8d0 LDR X10, [X24, #16] |
(189) 0x42f8d4 MUL X9, X25, X6 |
(189) 0x42f8d8 STR X19, [SP, #168] |
(189) 0x42f8dc LDR X0, [X26] |
(189) 0x42f8e0 STR X7, [SP, #96] |
(189) 0x42f8e4 LDR X5, [X22, #16] |
(189) 0x42f8e8 LDR X16, [X26, #16] |
(189) 0x42f8ec MUL X30, X25, X0 |
(189) 0x42f8f0 LDR X4, [X23, #16] |
(189) 0x42f8f4 STP X10, X9, [SP, #120] |
(189) 0x42f8f8 STP X8, X1, [SP, #136] |
(189) 0x42f8fc STP X3, X21, [SP, #152] |
(189) 0x42f900 CMP W2, #1 |
(189) 0x42f904 B.EQ 42fafc |
(189) 0x42f908 SBFM X20, X28, #0, #31 |
(189) 0x42f90c MOVZ X0, #0 |
(189) 0x42f910 ADD X11, X19, X20 |
(189) 0x42f914 ADD X12, X21, X20 |
(189) 0x42f918 ADD X13, X3, X20 |
(189) 0x42f91c ADD X21, X1, X20 |
(189) 0x42f920 ADD X15, X5, X11,LSL #3 |
(189) 0x42f924 UBFM X7, X13, #61, #60 |
(189) 0x42f928 ADD X17, X30, X20 |
(189) 0x42f92c ADD X14, X4, X12,LSL #3 |
(189) 0x42f930 UBFM X18, X11, #61, #60 |
(189) 0x42f934 UBFM X11, X21, #61, #60 |
(189) 0x42f938 ADD X3, X5, X13,LSL #3 |
(189) 0x42f93c ADD X8, X8, X20 |
(189) 0x42f940 UBFM X17, X17, #61, #60 |
(189) 0x42f944 ADD X1, X4, X21,LSL #3 |
(189) 0x42f948 UBFM X19, X12, #61, #60 |
(189) 0x42f94c ADD X20, X9, X20 |
(189) 0x42f950 ADD X9, X10, X8,LSL #3 |
(189) 0x42f954 SUB X12, X18, #8 |
(189) 0x42f958 SUB X6, X19, #8 |
(189) 0x42f95c STP X11, X7, [SP, #184] |
(189) 0x42f960 UBFM X10, X8, #61, #60 |
(189) 0x42f964 SUB X13, X17, #8 |
(189) 0x42f968 SUB X11, X11, #8 |
(189) 0x42f96c SUB X7, X7, #8 |
(189) 0x42f970 STP X12, X6, [SP, #200] |
(189) 0x42f974 UBFM X8, X20, #61, #60 |
(189) 0x42f978 UBFM W21, W2, #1, #31 |
(189) 0x42f97c STR X11, [SP, #104] |
(189) 0x42f980 ADD X11, X5, X7 |
(189) 0x42f984 STP X10, X13, [SP, #224] |
(189) 0x42f988 ADD X13, X5, X12 |
(189) 0x42f98c ADD X12, X4, X6 |
(189) 0x42f990 LDR X6, [SP, #96] |
(189) 0x42f994 STR X7, [SP, #216] |
(189) 0x42f998 SUB X7, X17, #8 |
(189) 0x42f99c LDR X10, [SP, #104] |
(189) 0x42f9a0 STR X8, [SP, #240] |
(189) 0x42f9a4 ADD X8, X16, X7 |
(189) 0x42f9a8 ADD X7, X16, X17 |
(189) 0x42f9ac ADD X6, X6, X20,LSL #3 |
(189) 0x42f9b0 UBFM X20, X21, #60, #59 |
(189) 0x42f9b4 ADD X10, X4, X10 |
(189) 0x42f9b8 TBZ W21, #0, 42fa40 |
(189) 0x42f9bc LDP X0, X21, [SP, #184] |
(189) 0x42f9c0 LDR Q20, [X5, X18] |
(189) 0x42f9c4 LDR Q18, [X5, X21] |
(189) 0x42f9c8 LDR X21, [SP, #104] |
(189) 0x42f9cc LDR X18, [SP, #208] |
(189) 0x42f9d0 LDR Q19, [X4, X0] |
(189) 0x42f9d4 MOVZ X0, #16 |
(189) 0x42f9d8 LDR Q7, [X4, X21] |
(189) 0x42f9dc LDR X21, [SP, #216] |
(189) 0x42f9e0 LDR Q17, [X4, X18] |
(189) 0x42f9e4 FMUL V0.2D, V19.2D, V18.2D |
(189) 0x42f9e8 LDR X18, [SP, #200] |
(189) 0x42f9ec LDR Q6, [X5, X21] |
(189) 0x42f9f0 LDR Q21, [X4, X19] |
(189) 0x42f9f4 LDR Q16, [X5, X18] |
(189) 0x42f9f8 FMUL V1.2D, V7.2D, V6.2D |
(189) 0x42f9fc LDR X18, [SP, #120] |
(189) 0x42fa00 FMLA V0.2D, V21.2D, V20.2D |
(189) 0x42fa04 LDR X19, [SP, #224] |
(189) 0x42fa08 FMLA V1.2D, V17.2D, V16.2D |
(189) 0x42fa0c FADD V2.2D, V0.2D, V1.2D |
(189) 0x42fa10 FMUL V3.2D, V2.2D, V30.2D |
(189) 0x42fa14 STR Q3, [X18, X19] |
(189) 0x42fa18 LDR Q5, [X16, X17] |
(189) 0x42fa1c LDP X17, X19, [SP, #232] |
(189) 0x42fa20 LDR X18, [SP, #96] |
(189) 0x42fa24 LDR Q4, [X16, X17] |
(189) 0x42fa28 FSUB V22.2D, V5.2D, V4.2D |
(189) 0x42fa2c FADD V23.2D, V22.2D, V3.2D |
(189) 0x42fa30 STR Q23, [X18, X19] |
(189) 0x42fa34 CMP X0, X20 |
(189) 0x42fa38 B.EQ 42faf0 |
(189) 0x42fa3c HINT #0 |
(190) 0x42fa40 LDR Q24, [X1, X0] |
(190) 0x42fa44 ADD X21, X0, #16 |
(190) 0x42fa48 LDR Q25, [X3, X0] |
(190) 0x42fa4c LDR Q28, [X10, X0] |
(190) 0x42fa50 LDR Q27, [X11, X0] |
(190) 0x42fa54 LDR Q29, [X14, X0] |
(190) 0x42fa58 FMUL V26.2D, V24.2D, V25.2D |
(190) 0x42fa5c LDR Q21, [X15, X0] |
(190) 0x42fa60 LDR Q20, [X12, X0] |
(190) 0x42fa64 FMUL V19.2D, V28.2D, V27.2D |
(190) 0x42fa68 LDR Q18, [X13, X0] |
(190) 0x42fa6c FMLA V26.2D, V29.2D, V21.2D |
(190) 0x42fa70 FMLA V19.2D, V20.2D, V18.2D |
(190) 0x42fa74 FADD V0.2D, V26.2D, V19.2D |
(190) 0x42fa78 FMUL V17.2D, V0.2D, V30.2D |
(190) 0x42fa7c STR Q17, [X9, X0] |
(190) 0x42fa80 LDR Q7, [X7, X0] |
(190) 0x42fa84 LDR Q16, [X8, X0] |
(190) 0x42fa88 FSUB V6.2D, V7.2D, V16.2D |
(190) 0x42fa8c FADD V1.2D, V6.2D, V17.2D |
(190) 0x42fa90 STR Q1, [X6, X0] |
(190) 0x42fa94 ADD X0, X0, #32 |
(190) 0x42fa98 LDR Q2, [X1, X21] |
(190) 0x42fa9c LDR Q3, [X3, X21] |
(190) 0x42faa0 LDR Q5, [X10, X21] |
(190) 0x42faa4 LDR Q4, [X11, X21] |
(190) 0x42faa8 LDR Q22, [X14, X21] |
(190) 0x42faac FMUL V23.2D, V2.2D, V3.2D |
(190) 0x42fab0 LDR Q24, [X15, X21] |
(190) 0x42fab4 LDR Q25, [X12, X21] |
(190) 0x42fab8 FMUL V28.2D, V5.2D, V4.2D |
(190) 0x42fabc LDR Q27, [X13, X21] |
(190) 0x42fac0 FMLA V23.2D, V22.2D, V24.2D |
(190) 0x42fac4 FMLA V28.2D, V25.2D, V27.2D |
(190) 0x42fac8 FADD V29.2D, V23.2D, V28.2D |
(190) 0x42facc FMUL V26.2D, V29.2D, V30.2D |
(190) 0x42fad0 STR Q26, [X9, X21] |
(190) 0x42fad4 LDR Q21, [X7, X21] |
(190) 0x42fad8 LDR Q20, [X8, X21] |
(190) 0x42fadc FSUB V19.2D, V21.2D, V20.2D |
(190) 0x42fae0 FADD V18.2D, V19.2D, V26.2D |
(190) 0x42fae4 STR Q18, [X6, X21] |
(190) 0x42fae8 CMP X0, X20 |
(190) 0x42faec B.NE 42fa40 |
(189) 0x42faf0 TBZ W2, #0, 42fba8 |
(189) 0x42faf4 AND W2, W2, #0xfffffffe |
(189) 0x42faf8 ADD W28, W28, W2 |
(189) 0x42fafc LDR X3, [SP, #144] |
(189) 0x42fb00 SUB W15, W28, #1 |
(189) 0x42fb04 SBFM X14, X28, #0, #31 |
(189) 0x42fb08 SBFM X1, X15, #0, #31 |
(189) 0x42fb0c LDR X10, [SP, #152] |
(189) 0x42fb10 ADD X21, X30, X1 |
(189) 0x42fb14 ADD X30, X30, X14 |
(189) 0x42fb18 LDR X9, [SP, #160] |
(189) 0x42fb1c ADD X13, X3, X14 |
(189) 0x42fb20 ADD X12, X3, X1 |
(189) 0x42fb24 LDR X0, [SP, #168] |
(189) 0x42fb28 ADD X8, X10, X1 |
(189) 0x42fb2c ADD X7, X10, X14 |
(189) 0x42fb30 LDR D17, [X4, X13,LSL #3] |
(189) 0x42fb34 ADD X6, X9, X1 |
(189) 0x42fb38 ADD X11, X9, X14 |
(189) 0x42fb3c LDR D7, [X4, X12,LSL #3] |
(189) 0x42fb40 ADD X19, X0, X1 |
(189) 0x42fb44 ADD X18, X0, X14 |
(189) 0x42fb48 LDR D6, [X5, X8,LSL #3] |
(189) 0x42fb4c LDR D2, [X5, X7,LSL #3] |
(189) 0x42fb50 LDR D0, [X4, X6,LSL #3] |
(189) 0x42fb54 LDR D16, [X4, X11,LSL #3] |
(189) 0x42fb58 FMUL D1, D7, D6 |
(189) 0x42fb5c LDR D3, [X5, X19,LSL #3] |
(189) 0x42fb60 FMUL D5, D17, D2 |
(189) 0x42fb64 LDR D4, [X5, X18,LSL #3] |
(189) 0x42fb68 LDR X20, [SP, #136] |
(189) 0x42fb6c FMADD D22, D0, D3, D1 |
(189) 0x42fb70 LDR X4, [SP, #120] |
(189) 0x42fb74 FMADD D23, D16, D4, D5 |
(189) 0x42fb78 LDR X2, [SP, #128] |
(189) 0x42fb7c ADD X17, X20, X14 |
(189) 0x42fb80 LDR X5, [SP, #96] |
(189) 0x42fb84 ADD X28, X2, X14 |
(189) 0x42fb88 FADD D24, D23, D22 |
(189) 0x42fb8c FMUL D28, D24, D31 |
(189) 0x42fb90 STR D28, [X4, X17,LSL #3] |
(189) 0x42fb94 LDR D25, [X16, X21,LSL #3] |
(189) 0x42fb98 LDR D27, [X16, X30,LSL #3] |
(189) 0x42fb9c FSUB D29, D27, S25 |
(189) 0x42fba0 FADD D26, D29, D28 |
(189) 0x42fba4 STR D26, [X5, X28,LSL #3] |
(189) 0x42fba8 LDR W16, [SP, #116] |
(189) 0x42fbac ADD X25, X25, #1 |
(189) 0x42fbb0 LDR W15, [SP, #176] |
(189) 0x42fbb4 CMP W15, W25 |
(189) 0x42fbb8 B.LE 42fbe4 |
(189) 0x42fbbc LDR W14, [SP, #252] |
(189) 0x42fbc0 LDR W28, [SP, #180] |
(189) 0x42fbc4 LDR W1, [SP, #248] |
(189) 0x42fbc8 SUB W6, W14, W16 |
(189) 0x42fbcc B 42f888 |
0x42fbd0 LDP X23, X24, [SP, #48] |
0x42fbd4 LDP X19, X20, [SP, #16] |
0x42fbd8 LDP X21, X22, [SP, #32] |
0x42fbdc LDP X29, X30, [SP], #256 |
0x42fbe0 RET |
0x42fbe4 LDP X23, X24, [SP, #48] |
0x42fbe8 LDP X25, X26, [SP, #64] |
0x42fbec LDP X27, X28, [SP, #80] |
0x42fbf0 LDP X19, X20, [SP, #16] |
0x42fbf4 LDP X21, X22, [SP, #32] |
0x42fbf8 LDP X29, X30, [SP], #256 |
0x42fbfc RET |
0x42fc00 ADD W6, W6, #1 |
0x42fc04 MOVZ W7, #0 |
0x42fc08 B 42f834 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.43+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_mom.cpp:95-100 |
| Module | exec |
| nb instructions | 65 |
| nb uops | 65 |
| loop length | 260 |
| used w registers | 22 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 1 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 8.13 cycles |
| front end | 8.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 9.17 | 8.83 | 9.00 | 4.50 | 4.50 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 9.17 | 8.83 | 9.00 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.13 |
| Dispatch | 9.17 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 2% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 9% |
| all | 28% |
| load | 39% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 22% |
| all | 37% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 37% |
| all | 28% |
| load | 39% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #768]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W2, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W3, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W2, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42fbd4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x414> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W0, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W2, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42fbd0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x410> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W19, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W4, [SP, #248] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W7, W6, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42fc00 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x440> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W6, W5, W7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W6, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #252] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W16, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42fbd0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x410> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W9, [SP, #248] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D31, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV V30.2D, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W13, [SP, #180] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W10, W16, W9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X23, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X24, X27, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD W12, W10, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X22, X14, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W11, W10, W9, W16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| SBFM X25, X12, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W28, W11, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W1, W19, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X26, XZR, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #256 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #256 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42f834 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x74> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_mom.cpp:95-100 |
| Module | exec |
| nb instructions | 65 |
| nb uops | 65 |
| loop length | 260 |
| used w registers | 22 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 1 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 8.13 cycles |
| front end | 8.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 9.17 | 8.83 | 9.00 | 4.50 | 4.50 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 9.17 | 8.83 | 9.00 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.13 |
| Dispatch | 9.17 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 2% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 9% |
| all | 28% |
| load | 39% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 22% |
| all | 37% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 37% |
| all | 28% |
| load | 39% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #768]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W2, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W3, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W2, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42fbd4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x414> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W0, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W2, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42fbd0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x410> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W19, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W4, [SP, #248] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W7, W6, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42fc00 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x440> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W6, W5, W7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W6, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #252] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W16, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42fbd0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x410> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W9, [SP, #248] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D31, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV V30.2D, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W13, [SP, #180] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W10, W16, W9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X23, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X24, X27, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD W12, W10, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X22, X14, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W11, W10, W9, W16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| SBFM X25, X12, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W28, W11, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W1, W19, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X26, XZR, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #256 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #256 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42f834 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x74> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.87 | 3.86 |
| ▼Loop 189 - advec_mom.cpp:97-100 - exec– | 0.01 | 0.02 |
| ○Loop 190 - advec_mom.cpp:98-100 - exec | 2.86 | 3.74 |
