| Function: cg_calc_ur(int, int, int, double, double*, double*, double const*, double*, double const*) ... | Module: exec | Source: cg.cpp:105-113 | Coverage (incl. loops): 43.33% | (excl. loops): 0.03% |
|---|
| Function: cg_calc_ur(int, int, int, double, double*, double*, double const*, double*, double const*) ... | Module: exec | Source: cg.cpp:105-113 | Coverage (incl. loops): 43.33% | (excl. loops): 0.03% |
|---|
/home/eoseret/qaas/qaas_runs/178-237-4322/intel/TeaLeaf/build/TeaLeaf/src/omp/cg.cpp: 105 - 113 |
-------------------------------------------------------------------------------- |
105: #pragma omp parallel for reduction(+ : rrn_temp) |
106: #endif |
107: for (int jj = halo_depth; jj < y - halo_depth; ++jj) { |
108: for (int kk = halo_depth; kk < x - halo_depth; ++kk) { |
109: const int index = kk + jj * x; |
110: |
111: u[index] += alpha * p[index]; |
112: r[index] -= alpha * w[index]; |
113: rrn_temp += r[index] * r[index]; |
0x41162c STP X29, X30, [SP, #944]! |
0x411630 ADD X29, SP, #0 |
0x411634 STP X19, X20, [SP, #16] |
0x411638 ORR X19, XZR, X0 |
0x41163c STP X21, X22, [SP, #32] |
0x411640 LDR W20, [X0, #56] |
0x411644 BL 410320 |
0x411648 ORR W21, WZR, W0 |
0x41164c BL 4102b0 |
0x411650 LDR W1, [X19, #52] |
0x411654 ORR W10, WZR, W0 |
0x411658 SUB W0, W1, W20,LSL #1 |
0x41165c SDIV W1, W0, W21 |
0x411660 MSUB W0, W1, W21, W0 |
0x411664 CMP W10, W0 |
0x411668 B.LT 411870 |
0x41166c MADD W10, W1, W10, W0 |
0x411670 MOVI D30, #0 |
0x411674 ADD W1, W1, W10 |
0x411678 CMP W10, W1 |
0x41167c B.GE 4117e4 |
0x411680 STP X23, X24, [SP, #48] |
0x411684 ADD W10, W20, W10 |
0x411688 ADD W0, W20, W1 |
0x41168c SBFM X17, X20, #0, #31 |
0x411690 STP X25, X26, [SP, #64] |
0x411694 LDR D27, [X19] |
0x411698 LDP X9, X13, [X19, #8] |
0x41169c LDP X8, X12, [X19, #24] |
0x4116a0 DUP V28.2D, V27.D[0] |
0x4116a4 LDR W16, [X19, #48] |
0x4116a8 SUB W22, W16, W20,LSL #1 |
0x4116ac SUB W30, W16, W20 |
0x4116b0 MUL W14, W10, W16 |
0x4116b4 UBFM W7, W22, #1, #31 |
0x4116b8 AND W18, W22, #0xfffffffe |
0x4116bc UBFM X7, X7, #60, #59 |
0x4116c0 ADD W18, W18, W20 |
0x4116c4 SUB W21, W22, #1 |
0x4116c8 AND W15, W22, #0x1 |
(24) 0x4116cc CMP W20, W30 |
(24) 0x4116d0 B.GE 41185c |
(24) 0x4116d4 SBFM X24, X14, #0, #31 |
(24) 0x4116d8 SBFM X4, X20, #0, #31 |
(24) 0x4116dc CMP W21, #3 |
(24) 0x4116e0 B.LS 411818 |
(24) 0x4116e4 ORR X4, XZR, X17 |
(27) 0x4116e8 SBFM X24, X14, #0, #31 |
(27) 0x4116ec ADD X2, X24, X17 |
(27) 0x4116f0 UBFM X5, X2, #61, #60 |
(27) 0x4116f4 ADD X2, X8, X2,LSL #3 |
(27) 0x4116f8 ADD X1, X5, #16 |
(27) 0x4116fc ADD X3, X9, X5 |
(27) 0x411700 ADD X6, X12, X1 |
(27) 0x411704 ADD X25, X9, X1 |
(27) 0x411708 CMP X6, X3 |
(27) 0x41170c ADD X6, X12, X5 |
(27) 0x411710 ADD X11, X8, X1 |
(27) 0x411714 CCMP X25, X6, #0, #8 |
(27) 0x411718 CSINC W23, WZR, WZR, #8 |
(27) 0x41171c ADD X1, X5, #8 |
(27) 0x411720 CMP X3, X11 |
(27) 0x411724 ADD X26, X12, X1 |
(27) 0x411728 CCMP X2, X25, #2, #3 |
(27) 0x41172c CSINC W11, WZR, WZR, #3 |
(27) 0x411730 ADD X1, X13, X1 |
(27) 0x411734 CMP X26, X2 |
(27) 0x411738 CCMP X2, X1, #4, #1 |
(27) 0x41173c AND W11, W23, W11 |
(27) 0x411740 CCMP X3, X1, #4, #1 |
(27) 0x411744 CSINC W1, WZR, WZR, #0 |
(27) 0x411748 ANDS WZR, W1, W11 |
(27) 0x41174c B.EQ 411818 |
(27) 0x411750 MOVZ X1, #0 |
(27) 0x411754 ADD X5, X13, X5 |
(27) 0x411758 HINT #0 |
(27) 0x41175c HINT #0 |
(26) 0x411760 LDR Q29, [X5, X1] |
(26) 0x411764 LDR Q31, [X3, X1] |
(26) 0x411768 FMLA V31.2D, V29.2D, V28.2D |
(26) 0x41176c STR Q31, [X3, X1] |
(26) 0x411770 LDR Q29, [X6, X1] |
(26) 0x411774 LDR Q31, [X2, X1] |
(26) 0x411778 FMLS V31.2D, V28.2D, V29.2D |
(26) 0x41177c FMUL V29.2D, V31.2D, V31.2D |
(26) 0x411780 STR Q31, [X2, X1] |
(26) 0x411784 ADD X1, X1, #16 |
(26) 0x411788 FADD D30, D30, D29 |
(26) 0x41178c MOV D31, V29.D[1] |
(26) 0x411790 FADD D30, D31, D30 |
(26) 0x411794 CMP X7, X1 |
(26) 0x411798 B.NE 411760 |
(27) 0x41179c CBZ W15, 4117cc |
(27) 0x4117a0 ADD W1, W18, W14 |
(27) 0x4117a4 SBFM X1, X1, #61, #31 |
(27) 0x4117a8 LDR D29, [X13, X1] |
(27) 0x4117ac LDR D31, [X9, X1] |
(27) 0x4117b0 FMADD D31, D27, D29, D31 |
(27) 0x4117b4 STR D31, [X9, X1] |
(27) 0x4117b8 LDR D29, [X12, X1] |
(27) 0x4117bc LDR D31, [X8, X1] |
(27) 0x4117c0 FMSUB D31, D27, D29, D31 |
(27) 0x4117c4 FMADD D30, D31, D31, D30 |
(27) 0x4117c8 STR D31, [X8, X1] |
(27) 0x4117cc ADD W10, W10, #1 |
(27) 0x4117d0 ADD W14, W14, W16 |
(27) 0x4117d4 CMP W0, W10 |
(27) 0x4117d8 B.GT 4116e8 |
0x4117dc LDP X23, X24, [SP, #48] |
0x4117e0 LDP X25, X26, [SP, #64] |
0x4117e4 ADD X19, X19, #40 |
0x4117e8 LDR X0, [X19] |
(23) 0x4117ec ORR X1, XZR, X0 |
(23) 0x4117f0 FMOV D31, X0 |
(23) 0x4117f4 FADD D31, D30, D31 |
(23) 0x4117f8 FMOV X2, D31 |
(23) 0x4117fc CAS X1, X2, [X19] |
(23) 0x411800 CMP X0, X1 |
(23) 0x411804 B.NE 41187c |
0x411808 LDP X19, X20, [SP, #16] |
0x41180c LDP X21, X22, [SP, #32] |
0x411810 LDP X29, X30, [SP], #80 |
0x411814 RET |
(24) 0x411818 ADD X2, X22, X4 |
(24) 0x41181c ADD X1, X24, X4 |
(24) 0x411820 ADD X2, X2, X24 |
(24) 0x411824 UBFM X1, X1, #61, #60 |
(24) 0x411828 UBFM X2, X2, #61, #60 |
(25) 0x41182c LDR D29, [X13, X1] |
(25) 0x411830 LDR D31, [X9, X1] |
(25) 0x411834 FMADD D31, D27, D29, D31 |
(25) 0x411838 STR D31, [X9, X1] |
(25) 0x41183c LDR D29, [X12, X1] |
(25) 0x411840 LDR D31, [X8, X1] |
(25) 0x411844 FMSUB D31, D27, D29, D31 |
(25) 0x411848 FMADD D30, D31, D31, D30 |
(25) 0x41184c STR D31, [X8, X1] |
(25) 0x411850 ADD X1, X1, #8 |
(25) 0x411854 CMP X2, X1 |
(25) 0x411858 B.NE 41182c |
(24) 0x41185c ADD W10, W10, #1 |
(24) 0x411860 ADD W14, W14, W16 |
(24) 0x411864 CMP W0, W10 |
(24) 0x411868 B.GT 4116cc |
0x41186c B 4117dc |
0x411870 ADD W1, W1, #1 |
0x411874 MOVZ W0, #0 |
0x411878 B 41166c |
(23) 0x41187c ORR X0, XZR, X1 |
(23) 0x411880 B 4117ec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.58+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | run_cg_calc_ur(Chunk*, Setting[...] | cg.cpp:117 | exec |
| ○ | cg_main_step_driver(Chunk*, Se[...] | cg_driver.cpp:69 | exec |
| ○ | cg_driver(Chunk*, Settings&, d[...] | cg_driver.cpp:18 | exec |
| ○ | solve(Chunk*, Settings&, int, [...] | diffuse.cpp:51 | exec |
| ○ | diffuse(Chunk*, Settings&) | diffuse.cpp:12 | exec |
| ○ | main | main.cpp:179 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:104 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.03% of application time for run gcc_0
| Source file and lines | cg.cpp:105-113 |
| Module | exec |
| nb instructions | 52 |
| nb uops | 52 |
| loop length | 208 |
| used w registers | 13 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 10 |
| micro-operation queue | 6.50 cycles |
| front end | 6.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.50 | 3.50 | 7.25 | 7.25 | 7.25 | 7.25 | 0.50 | 0.50 | 0.50 | 0.50 | 5.83 | 5.50 | 5.67 | 2.50 | 2.50 |
| cycles | 3.50 | 3.50 | 7.25 | 7.25 | 7.25 | 7.25 | 0.50 | 0.50 | 0.50 | 0.50 | 5.83 | 5.50 | 5.67 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 6.50 |
| Dispatch | 7.25 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 7.25-12.50 |
| all | 3% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 9% |
| all | 30% |
| load | 38% |
| store | 50% |
| mul | 12% |
| add-sub | 14% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W20, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410320 <@plt_start@+0x300> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4102b0 <@plt_start@+0x290> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X19, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W10, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W1, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W1, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W10, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411870 <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x244> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W10, W1, W10, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D30, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W1, W1, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W10, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4117e4 <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x1b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W10, W20, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W0, W20, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SBFM X17, X20, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR D27, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDP X9, X13, [X19, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X8, X12, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| DUP V28.2D, V27.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| LDR W16, [X19, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SUB W22, W16, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W30, W16, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W14, W10, W16 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| UBFM W7, W22, #1, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| AND W18, W22, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UBFM X7, X7, #60, #59 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD W18, W18, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| AND W15, W22, #0x1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD X19, X19, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 4117dc <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x1b0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 41166c <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.03% of application time for run gcc_0
| Source file and lines | cg.cpp:105-113 |
| Module | exec |
| nb instructions | 52 |
| nb uops | 52 |
| loop length | 208 |
| used w registers | 13 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 10 |
| micro-operation queue | 6.50 cycles |
| front end | 6.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.50 | 3.50 | 7.25 | 7.25 | 7.25 | 7.25 | 0.50 | 0.50 | 0.50 | 0.50 | 5.83 | 5.50 | 5.67 | 2.50 | 2.50 |
| cycles | 3.50 | 3.50 | 7.25 | 7.25 | 7.25 | 7.25 | 0.50 | 0.50 | 0.50 | 0.50 | 5.83 | 5.50 | 5.67 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 6.50 |
| Dispatch | 7.25 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 7.25-12.50 |
| all | 3% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 9% |
| all | 30% |
| load | 38% |
| store | 50% |
| mul | 12% |
| add-sub | 14% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #944]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W20, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410320 <@plt_start@+0x300> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4102b0 <@plt_start@+0x290> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X19, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W10, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W1, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W1, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W10, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411870 <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x244> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W10, W1, W10, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D30, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W1, W1, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W10, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4117e4 <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x1b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W10, W20, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W0, W20, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SBFM X17, X20, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR D27, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDP X9, X13, [X19, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X8, X12, [X19, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| DUP V28.2D, V27.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| LDR W16, [X19, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SUB W22, W16, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W30, W16, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W14, W10, W16 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| UBFM W7, W22, #1, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| AND W18, W22, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UBFM X7, X7, #60, #59 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD W18, W18, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| AND W15, W22, #0x1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD X19, X19, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 4117dc <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x1b0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 41166c <_Z10cg_calc_uriiidPdS_PKdS_S1_._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼cg_calc_ur(int, int, int, double, double*, double*, double const*, double*, double const*) [clone ._omp_fn.0]– | 43.33 | 115.54 |
| ○Loop 23 - cg.cpp:105-105 - exec | 0.00 | 0.03 |
| ▼Loop 24 - cg.cpp:105-113 - exec– | 0.00 | 0.00 |
| ▼Loop 27 - cg.cpp:105-113 - exec– | 0.08 | 0.22 |
| ○Loop 26 - cg.cpp:108-113 - exec | 43.21 | 115.17 |
| ○Loop 25 - cg.cpp:108-113 - exec | 0.00 | 0.00 |
