| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:157-160 [...] | Coverage (incl. loops): 1.10% | (excl. loops): 0.00% |
|---|
| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:157-160 [...] | Coverage (incl. loops): 1.10% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_mom.cpp: 157 - 160 |
-------------------------------------------------------------------------------- |
157: #pragma omp parallel for simd collapse(2) |
158: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
159: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
160: node_flux(i, j) = 0.25 * (mass_flux_y(i - 1, j + 0) + mass_flux_y(i, j) + mass_flux_y(i - 1, j + 1) + mass_flux_y(i + 0, j + 1)); |
0x4306e0 STP X29, X30, [SP, #944]! |
0x4306e4 ADD X29, SP, #0 |
0x4306e8 STP X19, X20, [SP, #16] |
0x4306ec STP X23, X24, [SP, #48] |
0x4306f0 STP X25, X26, [SP, #64] |
0x4306f4 LDP W25, W19, [X0, #24] |
0x4306f8 LDP W20, W23, [X0, #16] |
0x4306fc SUB W25, W25, #1 |
0x430700 ADD W19, W19, #4 |
0x430704 CMP W25, W19 |
0x430708 B.GE 4309c0 |
0x43070c ADD W20, W20, #1 |
0x430710 ADD W23, W23, #3 |
0x430714 SUB W26, W19, W25 |
0x430718 CMP W20, W23 |
0x43071c B.GE 4309c0 |
0x430720 STP X21, X22, [SP, #32] |
0x430724 SUB W21, W23, W20 |
0x430728 ORR X24, XZR, X0 |
0x43072c MUL W26, W26, W21 |
0x430730 BL 410210 |
0x430734 ORR W22, WZR, W0 |
0x430738 BL 410240 |
0x43073c UDIV W1, W26, W22 |
0x430740 ORR W2, WZR, W0 |
0x430744 MSUB W3, W1, W22, W26 |
0x430748 CMP W0, W3 |
0x43074c B.CC 4309d4 |
0x430750 MADD W11, W1, W2, W3 |
0x430754 ADD W22, W1, W11 |
0x430758 CMP W11, W22 |
0x43075c B.CS 4309bc |
0x430760 UDIV W0, W11, W21 |
0x430764 LDP X30, X18, [X24] |
0x430768 FMOV D31, #0.2500000 |
0x43076c FMOV V30.2D, #0.2500000 |
0x430770 MSUB W5, W0, W21, W11 |
0x430774 ADD W4, W0, W25 |
0x430778 SBFM X14, X4, #0, #31 |
0x43077c ADD W5, W5, W20 |
0x430780 SUB W17, W23, W5 |
(197) 0x430784 CMP W1, W17 |
(197) 0x430788 CSEL W1, W1, W17, #9 |
(197) 0x43078c ADD W17, W11, W1 |
(197) 0x430790 CMP W11, W17 |
(197) 0x430794 B.CS 4309a0 |
(197) 0x430798 LDR X12, [X30] |
(197) 0x43079c LDR X16, [X18] |
(197) 0x4307a0 LDR X23, [X18, #16] |
(197) 0x4307a4 MUL X15, X12, X14 |
(197) 0x4307a8 LDR X11, [X30, #16] |
(197) 0x4307ac MUL X16, X14, X16 |
(197) 0x4307b0 ADD X12, X12, X15 |
(197) 0x4307b4 CMP W1, #1 |
(197) 0x4307b8 B.EQ 430958 |
(197) 0x4307bc UBFM W13, W1, #1, #31 |
(197) 0x4307c0 SBFM X6, X5, #0, #31 |
(197) 0x4307c4 UBFM X13, X13, #60, #59 |
(197) 0x4307c8 ADD X7, X15, X6 |
(197) 0x4307cc SUB X8, X13, #16 |
(197) 0x4307d0 ADD X9, X12, X6 |
(197) 0x4307d4 UBFM X10, X8, #4, #63 |
(197) 0x4307d8 UBFM X24, X7, #61, #60 |
(197) 0x4307dc ADD X25, X10, #1 |
(197) 0x4307e0 UBFM X4, X9, #61, #60 |
(197) 0x4307e4 ADD X2, X16, X6 |
(197) 0x4307e8 ANDS X3, X25, #0x3 |
(197) 0x4307ec SUB X26, X24, #8 |
(197) 0x4307f0 SUB X25, X4, #8 |
(197) 0x4307f4 ADD X6, X23, X2,LSL #3 |
(197) 0x4307f8 MOVZ X0, #0 |
(197) 0x4307fc ADD X9, X11, X26 |
(197) 0x430800 ADD X7, X11, X25 |
(197) 0x430804 ADD X10, X11, X24 |
(197) 0x430808 ADD X8, X11, X4 |
(197) 0x43080c UBFM X2, X2, #61, #60 |
(197) 0x430810 B.EQ 4308a4 |
(197) 0x430814 CMP X3, #1 |
(197) 0x430818 B.EQ 430874 |
(197) 0x43081c CMP X3, #2 |
(197) 0x430820 B.EQ 43084c |
(197) 0x430824 LDR Q6, [X11, X24] |
(197) 0x430828 MOVZ X0, #16 |
(197) 0x43082c LDR Q5, [X11, X26] |
(197) 0x430830 LDR Q4, [X11, X4] |
(197) 0x430834 LDR Q3, [X11, X25] |
(197) 0x430838 FADD V0.2D, V6.2D, V5.2D |
(197) 0x43083c FADD V1.2D, V4.2D, V3.2D |
(197) 0x430840 FADD V2.2D, V0.2D, V1.2D |
(197) 0x430844 FMUL V7.2D, V2.2D, V30.2D |
(197) 0x430848 STR Q7, [X23, X2] |
(197) 0x43084c LDR Q16, [X10, X0] |
(197) 0x430850 LDR Q17, [X9, X0] |
(197) 0x430854 LDR Q18, [X8, X0] |
(197) 0x430858 LDR Q28, [X7, X0] |
(197) 0x43085c FADD V19.2D, V16.2D, V17.2D |
(197) 0x430860 FADD V20.2D, V18.2D, V28.2D |
(197) 0x430864 FADD V21.2D, V19.2D, V20.2D |
(197) 0x430868 FMUL V22.2D, V21.2D, V30.2D |
(197) 0x43086c STR Q22, [X6, X0] |
(197) 0x430870 ADD X0, X0, #16 |
(197) 0x430874 LDR Q27, [X10, X0] |
(197) 0x430878 LDR Q29, [X9, X0] |
(197) 0x43087c LDR Q26, [X8, X0] |
(197) 0x430880 LDR Q25, [X7, X0] |
(197) 0x430884 FADD V23.2D, V27.2D, V29.2D |
(197) 0x430888 FADD V24.2D, V26.2D, V25.2D |
(197) 0x43088c FADD V6.2D, V23.2D, V24.2D |
(197) 0x430890 FMUL V5.2D, V6.2D, V30.2D |
(197) 0x430894 STR Q5, [X6, X0] |
(197) 0x430898 ADD X0, X0, #16 |
(197) 0x43089c CMP X0, X13 |
(197) 0x4308a0 B.EQ 43094c |
(198) 0x4308a4 LDR Q4, [X10, X0] |
(198) 0x4308a8 ADD X24, X0, #16 |
(198) 0x4308ac ADD X4, X0, #32 |
(198) 0x4308b0 ADD X3, X0, #48 |
(198) 0x4308b4 LDR Q3, [X9, X0] |
(198) 0x4308b8 LDR Q0, [X8, X0] |
(198) 0x4308bc LDR Q1, [X7, X0] |
(198) 0x4308c0 FADD V2.2D, V4.2D, V3.2D |
(198) 0x4308c4 FADD V7.2D, V0.2D, V1.2D |
(198) 0x4308c8 FADD V16.2D, V2.2D, V7.2D |
(198) 0x4308cc FMUL V17.2D, V16.2D, V30.2D |
(198) 0x4308d0 STR Q17, [X6, X0] |
(198) 0x4308d4 ADD X0, X0, #64 |
(198) 0x4308d8 LDR Q28, [X10, X24] |
(198) 0x4308dc LDR Q19, [X9, X24] |
(198) 0x4308e0 LDR Q18, [X8, X24] |
(198) 0x4308e4 LDR Q20, [X7, X24] |
(198) 0x4308e8 FADD V21.2D, V28.2D, V19.2D |
(198) 0x4308ec FADD V22.2D, V18.2D, V20.2D |
(198) 0x4308f0 FADD V27.2D, V21.2D, V22.2D |
(198) 0x4308f4 FMUL V29.2D, V27.2D, V30.2D |
(198) 0x4308f8 STR Q29, [X6, X24] |
(198) 0x4308fc LDR Q26, [X10, X4] |
(198) 0x430900 LDR Q25, [X9, X4] |
(198) 0x430904 LDR Q23, [X8, X4] |
(198) 0x430908 LDR Q24, [X7, X4] |
(198) 0x43090c FADD V6.2D, V26.2D, V25.2D |
(198) 0x430910 FADD V5.2D, V23.2D, V24.2D |
(198) 0x430914 FADD V4.2D, V6.2D, V5.2D |
(198) 0x430918 FMUL V3.2D, V4.2D, V30.2D |
(198) 0x43091c STR Q3, [X6, X4] |
(198) 0x430920 LDR Q0, [X10, X3] |
(198) 0x430924 LDR Q7, [X9, X3] |
(198) 0x430928 LDR Q2, [X8, X3] |
(198) 0x43092c LDR Q1, [X7, X3] |
(198) 0x430930 FADD V16.2D, V0.2D, V7.2D |
(198) 0x430934 FADD V17.2D, V2.2D, V1.2D |
(198) 0x430938 FADD V28.2D, V16.2D, V17.2D |
(198) 0x43093c FMUL V19.2D, V28.2D, V30.2D |
(198) 0x430940 STR Q19, [X6, X3] |
(198) 0x430944 CMP X0, X13 |
(198) 0x430948 B.NE 4308a4 |
(197) 0x43094c TBZ W1, #0, 43099c |
(197) 0x430950 AND W1, W1, #0xfffffffe |
(197) 0x430954 ADD W5, W5, W1 |
(197) 0x430958 SUB W13, W5, #1 |
(197) 0x43095c SBFM X26, X5, #0, #31 |
(197) 0x430960 SBFM X25, X13, #0, #31 |
(197) 0x430964 ADD X9, X12, X26 |
(197) 0x430968 ADD X7, X15, X25 |
(197) 0x43096c ADD X12, X12, X25 |
(197) 0x430970 ADD X15, X15, X26 |
(197) 0x430974 LDR D18, [X11, X9,LSL #3] |
(197) 0x430978 ADD X16, X16, X26 |
(197) 0x43097c LDR D20, [X11, X12,LSL #3] |
(197) 0x430980 LDR D21, [X11, X7,LSL #3] |
(197) 0x430984 LDR D22, [X11, X15,LSL #3] |
(197) 0x430988 FADD D29, D20, D21 |
(197) 0x43098c FADD D27, D18, D22 |
(197) 0x430990 FADD D26, D27, D29 |
(197) 0x430994 FMUL D25, D26, D31 |
(197) 0x430998 STR D25, [X23, X16,LSL #3] |
(197) 0x43099c ORR W11, WZR, W17 |
(197) 0x4309a0 ADD X14, X14, #1 |
(197) 0x4309a4 CMP W19, W14 |
(197) 0x4309a8 B.LE 4309bc |
(197) 0x4309ac SUB W1, W22, W11 |
(197) 0x4309b0 ORR W17, WZR, W21 |
(197) 0x4309b4 ORR W5, WZR, W20 |
(197) 0x4309b8 B 430784 |
0x4309bc LDP X21, X22, [SP, #32] |
0x4309c0 LDP X19, X20, [SP, #16] |
0x4309c4 LDP X23, X24, [SP, #48] |
0x4309c8 LDP X25, X26, [SP, #64] |
0x4309cc LDP X29, X30, [SP], #80 |
0x4309d0 RET |
0x4309d4 ADD W1, W1, #1 |
0x4309d8 MOVZ W3, #0 |
0x4309dc B 430750 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.44+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_mom.cpp:157-160 |
| Module | exec |
| nb instructions | 50 |
| nb uops | 50 |
| loop length | 200 |
| used w registers | 16 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 1 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 0 |
| nb stack references | 10 |
| micro-operation queue | 6.25 cycles |
| front end | 6.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 2.50 | 2.50 |
| cycles | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.25 |
| Dispatch | 7.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 2% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 10% |
| all | 26% |
| load | 42% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 23% |
| all | 37% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 37% |
| all | 27% |
| load | 42% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W25, W19, [X0, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP W20, W23, [X0, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUB W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4309c0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W23, W23, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W20, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4309c0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W21, W23, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X24, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MUL W26, W26, W21 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W22, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W1, W26, W22 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W3, W1, W22, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 4309d4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W11, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W22, W1, W11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 4309bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W0, W11, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| LDP X30, X18, [X24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FMOV D31, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV V30.2D, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| MSUB W5, W0, W21, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W4, W0, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X14, X4, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W5, W5, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W23, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 430750 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x70> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_mom.cpp:157-160 |
| Module | exec |
| nb instructions | 50 |
| nb uops | 50 |
| loop length | 200 |
| used w registers | 16 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 1 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 0 |
| nb stack references | 10 |
| micro-operation queue | 6.25 cycles |
| front end | 6.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 2.50 | 2.50 |
| cycles | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.25 |
| Dispatch | 7.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 2% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 10% |
| all | 26% |
| load | 42% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 23% |
| all | 37% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 37% |
| all | 27% |
| load | 42% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W25, W19, [X0, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP W20, W23, [X0, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUB W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4309c0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W23, W23, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W20, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4309c0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W21, W23, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X24, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MUL W26, W26, W21 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W22, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W1, W26, W22 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W3, W1, W22, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 4309d4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W11, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W22, W1, W11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 4309bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x2dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W0, W11, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| LDP X30, X18, [X24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FMOV D31, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV V30.2D, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| MSUB W5, W0, W21, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W4, W0, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X14, X4, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W5, W5, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W23, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 430750 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.8+0x70> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 1.10 | 1.47 |
| ▼Loop 197 - advec_mom.cpp:159-160 - exec– | 0.00 | 0.01 |
| ○Loop 198 - advec_mom.cpp:160-160 - exec | 1.09 | 1.43 |
