| Function: cg_calc_p(int, int, int, double, double*, double const*) [clone ._omp_fn.0] | Module: exec | Source: cg.cpp:125-131 | Coverage (incl. loops): 20.79% | (excl. loops): 0.01% |
|---|
| Function: cg_calc_p(int, int, int, double, double*, double const*) [clone ._omp_fn.0] | Module: exec | Source: cg.cpp:125-131 | Coverage (incl. loops): 20.79% | (excl. loops): 0.01% |
|---|
/home/eoseret/qaas/qaas_runs/178-237-4322/intel/TeaLeaf/build/TeaLeaf/src/omp/cg.cpp: 125 - 131 |
-------------------------------------------------------------------------------- |
125: #pragma omp parallel for |
126: #endif |
127: for (int jj = halo_depth; jj < y - halo_depth; ++jj) { |
128: for (int kk = halo_depth; kk < x - halo_depth; ++kk) { |
129: const int index = kk + jj * x; |
130: |
131: p[index] = beta * p[index] + r[index]; |
0x411ac4 STP X29, X30, [SP, #976]! |
0x411ac8 ADD X29, SP, #0 |
0x411acc STP X19, X20, [SP, #16] |
0x411ad0 ORR X20, XZR, X0 |
0x411ad4 STR X21, [SP, #32] |
0x411ad8 LDR W21, [X0, #32] |
0x411adc BL 410100 |
0x411ae0 ORR W19, WZR, W0 |
0x411ae4 BL 410110 |
0x411ae8 LDR W1, [X20, #28] |
0x411aec ORR W5, WZR, W0 |
0x411af0 SUB W0, W1, W21,LSL #1 |
0x411af4 SDIV W3, W0, W19 |
0x411af8 MSUB W2, W3, W19, W0 |
0x411afc CMP W5, W2 |
0x411b00 B.LT 411cf8 |
0x411b04 MADD W4, W3, W5, W2 |
0x411b08 ADD W6, W3, W4 |
0x411b0c CMP W4, W6 |
0x411b10 B.GE 411ce8 |
0x411b14 LDR W14, [X20, #24] |
0x411b18 ADD W5, W21, W4 |
0x411b1c ADD W12, W21, W6 |
0x411b20 LDP X15, X16, [X20, #8] |
0x411b24 SUB W7, W14, W21 |
0x411b28 LDR D31, [X20] |
0x411b2c CMP W21, W7 |
0x411b30 B.GE 411ce8 |
0x411b34 SUB W8, W7, W21 |
0x411b38 MUL W9, W14, W5 |
0x411b3c ADD X10, X8, W21,SXTW |
0x411b40 SUB X13, XZR, X8,LSL #3 |
0x411b44 ADD X11, X10, W9,SXTW |
0x411b48 SBFM X17, X14, #61, #31 |
0x411b4c UBFM X4, X11, #61, #60 |
(21) 0x411b50 SUB X18, XZR, X13 |
(21) 0x411b54 ADD X1, X4, X13 |
(21) 0x411b58 SUB X30, X18, #8 |
(21) 0x411b5c UBFM X20, X30, #3, #63 |
(21) 0x411b60 ADD X21, X20, #1 |
(21) 0x411b64 ANDS X19, X21, #0x7 |
(21) 0x411b68 B.EQ 411c30 |
(21) 0x411b6c CMP X19, #1 |
(21) 0x411b70 B.EQ 411c14 |
(21) 0x411b74 CMP X19, #2 |
(21) 0x411b78 B.EQ 411c00 |
(21) 0x411b7c CMP X19, #3 |
(21) 0x411b80 B.EQ 411bec |
(21) 0x411b84 CMP X19, #4 |
(21) 0x411b88 B.EQ 411bd8 |
(21) 0x411b8c CMP X19, #5 |
(21) 0x411b90 B.EQ 411bc4 |
(21) 0x411b94 CMP X19, #6 |
(21) 0x411b98 B.EQ 411bb0 |
(21) 0x411b9c LDR D4, [X15, X1] |
(21) 0x411ba0 LDR D3, [X16, X1] |
(21) 0x411ba4 FMADD D0, D31, D4, D3 |
(21) 0x411ba8 STR D0, [X15, X1] |
(21) 0x411bac ADD X1, X1, #8 |
(21) 0x411bb0 LDR D2, [X15, X1] |
(21) 0x411bb4 LDR D1, [X16, X1] |
(21) 0x411bb8 FMADD D5, D31, D2, D1 |
(21) 0x411bbc STR D5, [X15, X1] |
(21) 0x411bc0 ADD X1, X1, #8 |
(21) 0x411bc4 LDR D6, [X15, X1] |
(21) 0x411bc8 LDR D29, [X16, X1] |
(21) 0x411bcc FMADD D7, D31, D6, D29 |
(21) 0x411bd0 STR D7, [X15, X1] |
(21) 0x411bd4 ADD X1, X1, #8 |
(21) 0x411bd8 LDR D30, [X15, X1] |
(21) 0x411bdc LDR D28, [X16, X1] |
(21) 0x411be0 FMADD D16, D31, D30, D28 |
(21) 0x411be4 STR D16, [X15, X1] |
(21) 0x411be8 ADD X1, X1, #8 |
(21) 0x411bec LDR D27, [X15, X1] |
(21) 0x411bf0 LDR D26, [X16, X1] |
(21) 0x411bf4 FMADD D17, D31, D27, D26 |
(21) 0x411bf8 STR D17, [X15, X1] |
(21) 0x411bfc ADD X1, X1, #8 |
(21) 0x411c00 LDR D25, [X15, X1] |
(21) 0x411c04 LDR D24, [X16, X1] |
(21) 0x411c08 FMADD D18, D31, D25, D24 |
(21) 0x411c0c STR D18, [X15, X1] |
(21) 0x411c10 ADD X1, X1, #8 |
(21) 0x411c14 LDR D23, [X15, X1] |
(21) 0x411c18 LDR D22, [X16, X1] |
(21) 0x411c1c FMADD D19, D31, D23, D22 |
(21) 0x411c20 STR D19, [X15, X1] |
(21) 0x411c24 ADD X1, X1, #8 |
(21) 0x411c28 CMP X4, X1 |
(21) 0x411c2c B.EQ 411cd8 |
(20) 0x411c30 LDR D21, [X15, X1] |
(20) 0x411c34 ADD X0, X1, #8 |
(20) 0x411c38 ADD X2, X1, #16 |
(20) 0x411c3c ADD X3, X1, #24 |
(20) 0x411c40 ADD X14, X1, #32 |
(20) 0x411c44 LDR D20, [X16, X1] |
(20) 0x411c48 ADD X8, X1, #40 |
(20) 0x411c4c ADD X7, X1, #48 |
(20) 0x411c50 ADD X6, X1, #56 |
(20) 0x411c54 FMADD D4, D31, D21, D20 |
(20) 0x411c58 STR D4, [X15, X1] |
(20) 0x411c5c ADD X1, X1, #64 |
(20) 0x411c60 LDR D3, [X15, X0] |
(20) 0x411c64 LDR D0, [X16, X0] |
(20) 0x411c68 FMADD D2, D31, D3, D0 |
(20) 0x411c6c STR D2, [X15, X0] |
(20) 0x411c70 LDR D1, [X15, X2] |
(20) 0x411c74 LDR D5, [X16, X2] |
(20) 0x411c78 FMADD D6, D31, D1, D5 |
(20) 0x411c7c STR D6, [X15, X2] |
(20) 0x411c80 LDR D29, [X15, X3] |
(20) 0x411c84 LDR D7, [X16, X3] |
(20) 0x411c88 FMADD D30, D31, D29, D7 |
(20) 0x411c8c STR D30, [X15, X3] |
(20) 0x411c90 LDR D28, [X15, X14] |
(20) 0x411c94 LDR D16, [X16, X14] |
(20) 0x411c98 FMADD D27, D31, D28, D16 |
(20) 0x411c9c STR D27, [X15, X14] |
(20) 0x411ca0 LDR D26, [X15, X8] |
(20) 0x411ca4 LDR D17, [X16, X8] |
(20) 0x411ca8 FMADD D25, D31, D26, D17 |
(20) 0x411cac STR D25, [X15, X8] |
(20) 0x411cb0 LDR D24, [X15, X7] |
(20) 0x411cb4 LDR D18, [X16, X7] |
(20) 0x411cb8 FMADD D23, D31, D24, D18 |
(20) 0x411cbc STR D23, [X15, X7] |
(20) 0x411cc0 LDR D22, [X15, X6] |
(20) 0x411cc4 LDR D19, [X16, X6] |
(20) 0x411cc8 FMADD D21, D31, D22, D19 |
(20) 0x411ccc STR D21, [X15, X6] |
(20) 0x411cd0 CMP X4, X1 |
(20) 0x411cd4 B.NE 411c30 |
(21) 0x411cd8 ADD W5, W5, #1 |
(21) 0x411cdc ADD X4, X4, X17 |
(21) 0x411ce0 CMP W12, W5 |
(21) 0x411ce4 B.GT 411b50 |
0x411ce8 LDR X21, [SP, #32] |
0x411cec LDP X19, X20, [SP, #16] |
0x411cf0 LDP X29, X30, [SP], #48 |
0x411cf4 RET |
0x411cf8 ADD W3, W3, #1 |
0x411cfc MOVZ W2, #0 |
0x411d00 B 411b04 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.57+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | run_cg_calc_p(Chunk*, Settings[...] | cg.cpp:159 | exec |
| ○ | cg_main_step_driver(Chunk*, Se[...] | cg_driver.cpp:83 | exec |
| ○ | cg_driver(Chunk*, Settings&, d[...] | cg_driver.cpp:18 | exec |
| ○ | solve(Chunk*, Settings&, int, [...] | diffuse.cpp:51 | exec |
| ○ | diffuse(Chunk*, Settings&) | diffuse.cpp:12 | exec |
| ○ | main | main.cpp:179 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | basic_string.h:809 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run gcc_5
| Source file and lines | cg.cpp:125-131 |
| Module | exec |
| nb instructions | 42 |
| nb uops | 42 |
| loop length | 168 |
| used w registers | 15 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 1 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 5.25 cycles |
| front end | 5.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 5.25 |
| Dispatch | 6.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 6.75-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 23% |
| load | 26% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR W21, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410100 <@plt_start@+0xe0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410110 <@plt_start@+0xf0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X20, #28] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W3, W0, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W2, W3, W19, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W5, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411cf8 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x234> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W3, W5, W2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W6, W3, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 411ce8 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x224> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W14, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W5, W21, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W12, W21, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X15, X16, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SUB W7, W14, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR D31, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| CMP W21, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 411ce8 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x224> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W8, W7, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W9, W14, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X10, X8, W21,SXTW | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| SUB X13, XZR, X8,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X11, X10, W9,SXTW | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| SBFM X17, X14, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| UBFM X4, X11, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W3, W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 411b04 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run gcc_5
| Source file and lines | cg.cpp:125-131 |
| Module | exec |
| nb instructions | 42 |
| nb uops | 42 |
| loop length | 168 |
| used w registers | 15 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 1 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 5.25 cycles |
| front end | 5.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| cycles | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.83 | 3.50 | 3.67 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 5.25 |
| Dispatch | 6.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 6.75-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 23% |
| load | 26% |
| store | 41% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR W21, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410100 <@plt_start@+0xe0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410110 <@plt_start@+0xf0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X20, #28] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W3, W0, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W2, W3, W19, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W5, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411cf8 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x234> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W3, W5, W2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W6, W3, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 411ce8 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x224> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W14, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W5, W21, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W12, W21, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X15, X16, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SUB W7, W14, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR D31, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| CMP W21, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 411ce8 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x224> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W8, W7, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W9, W14, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X10, X8, W21,SXTW | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| SUB X13, XZR, X8,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X11, X10, W9,SXTW | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| SBFM X17, X14, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| UBFM X4, X11, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W3, W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 411b04 <_Z9cg_calc_piiidPdPKd._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼cg_calc_p(int, int, int, double, double*, double const*) [clone ._omp_fn.0]– | 20.79 | 55.40 |
| ▼Loop 21 - cg.cpp:128-131 - exec– | 0.00 | 0.00 |
| ○Loop 20 - cg.cpp:128-131 - exec | 20.78 | 55.35 |
