| Function: cg_init(int, int, int, int, double, double, double*, double const*, double const*, double* ... | Module: exec | Source: cg.cpp:59-68 | Coverage (incl. loops): 0.01% | (excl. loops): 0.00% |
|---|
| Function: cg_init(int, int, int, int, double, double, double*, double const*, double const*, double* ... | Module: exec | Source: cg.cpp:59-68 | Coverage (incl. loops): 0.01% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-237-4322/intel/TeaLeaf/build/TeaLeaf/src/omp/cg.cpp: 59 - 68 |
-------------------------------------------------------------------------------- |
59: #pragma omp parallel for reduction(+ : rro_temp) |
60: #endif |
61: for (int jj = halo_depth; jj < y - halo_depth; ++jj) { |
62: for (int kk = halo_depth; kk < x - halo_depth; ++kk) { |
63: const int index = kk + jj * x; |
64: const double smvp = tealeaf_SMVP(u); |
65: w[index] = smvp; |
66: r[index] = u[index] - w[index]; |
67: p[index] = r[index]; |
68: rro_temp += r[index] * p[index]; |
0x4113c8 STP X29, X30, [SP, #976]! |
0x4113cc ADD X29, SP, #0 |
0x4113d0 STP X19, X20, [SP, #16] |
0x4113d4 ORR X19, XZR, X0 |
0x4113d8 STP X21, X22, [SP, #32] |
0x4113dc LDR W20, [X0, #64] |
0x4113e0 BL 410100 |
0x4113e4 ORR W21, WZR, W0 |
0x4113e8 BL 410110 |
0x4113ec LDR W1, [X19, #60] |
0x4113f0 ORR W16, WZR, W0 |
0x4113f4 SUB W0, W1, W20,LSL #1 |
0x4113f8 SDIV W2, W0, W21 |
0x4113fc MSUB W3, W2, W21, W0 |
0x411400 CMP W16, W3 |
0x411404 B.LT 411624 |
0x411408 MADD W4, W2, W16, W3 |
0x41140c MOVI D27, #0 |
0x411410 ADD W5, W2, W4 |
0x411414 CMP W4, W5 |
0x411418 B.GE 4115f0 |
0x41141c LDR W18, [X19, #56] |
0x411420 SBFM X30, X20, #0, #31 |
0x411424 ADD W0, W20, W5 |
0x411428 LDP X3, X13, [X19] |
0x41142c ADD W16, W20, W4 |
0x411430 FMOV D26, #1.0000000 |
0x411434 LDP X8, X7, [X19, #16] |
0x411438 SUB W6, W18, W20,LSL #1 |
0x41143c SBFM X15, X18, #61, #31 |
0x411440 SUB W22, W18, W20 |
0x411444 MUL W17, W16, W18 |
0x411448 ADD X14, X3, X15 |
0x41144c SUB X12, X3, X15 |
0x411450 ADD X21, X6, X30 |
0x411454 LDP X6, X5, [X19, #32] |
0x411458 ADD X11, X3, #8 |
0x41145c SUB X10, X3, #8 |
0x411460 ADD X9, X6, #8 |
0x411464 ADD X4, X5, X15 |
(12) 0x411468 CMP W20, W22 |
(12) 0x41146c B.GE 4115e0 |
(12) 0x411470 SBFM X1, X17, #0, #31 |
(12) 0x411474 ADD X15, X21, X1 |
(12) 0x411478 ADD X2, X1, X30 |
(12) 0x41147c UBFM X15, X15, #61, #60 |
(12) 0x411480 UBFM X1, X2, #61, #60 |
(12) 0x411484 SUB X2, X15, X2,LSL #3 |
(12) 0x411488 TBZ W2, #3, 411500 |
(12) 0x41148c LDR D30, [X6, X1] |
(12) 0x411490 LDR D28, [X5, X1] |
(12) 0x411494 LDR D31, [X10, X1] |
(12) 0x411498 LDR D29, [X12, X1] |
(12) 0x41149c LDR D25, [X9, X1] |
(12) 0x4114a0 LDR D24, [X4, X1] |
(12) 0x4114a4 FMUL D0, D30, D31 |
(12) 0x4114a8 LDR D22, [X11, X1] |
(12) 0x4114ac FMUL D1, D28, D29 |
(12) 0x4114b0 LDR D23, [X14, X1] |
(12) 0x4114b4 FADD D2, D25, D30 |
(12) 0x4114b8 FADD D5, D24, D28 |
(12) 0x4114bc LDR D4, [X3, X1] |
(12) 0x4114c0 FMADD D3, D25, D22, D0 |
(12) 0x4114c4 FMADD D6, D24, D23, D1 |
(12) 0x4114c8 FADD D7, D2, D26 |
(12) 0x4114cc FADD D16, D7, D5 |
(12) 0x4114d0 FADD D17, D3, D6 |
(12) 0x4114d4 FNMSUB D18, D16, D4, D17 |
(12) 0x4114d8 STR D18, [X7, X1] |
(12) 0x4114dc LDR D19, [X3, X1] |
(12) 0x4114e0 FADD D20, D17, D19 |
(12) 0x4114e4 FMSUB D21, D16, D4, D20 |
(12) 0x4114e8 FMADD D27, D21, D21, D27 |
(12) 0x4114ec STR D21, [X8, X1] |
(12) 0x4114f0 STR D21, [X13, X1] |
(12) 0x4114f4 ADD X1, X1, #8 |
(12) 0x4114f8 CMP X15, X1 |
(12) 0x4114fc B.EQ 4115e0 |
(13) 0x411500 LDR D30, [X6, X1] |
(13) 0x411504 ADD X2, X1, #8 |
(13) 0x411508 LDR D28, [X5, X1] |
(13) 0x41150c LDR D31, [X10, X1] |
(13) 0x411510 LDR D29, [X12, X1] |
(13) 0x411514 LDR D25, [X9, X1] |
(13) 0x411518 LDR D24, [X4, X1] |
(13) 0x41151c FMUL D0, D30, D31 |
(13) 0x411520 LDR D22, [X11, X1] |
(13) 0x411524 FMUL D1, D28, D29 |
(13) 0x411528 LDR D23, [X14, X1] |
(13) 0x41152c FADD D2, D25, D30 |
(13) 0x411530 FADD D5, D24, D28 |
(13) 0x411534 LDR D4, [X3, X1] |
(13) 0x411538 FMADD D3, D25, D22, D0 |
(13) 0x41153c FMADD D6, D24, D23, D1 |
(13) 0x411540 FADD D7, D2, D26 |
(13) 0x411544 FADD D16, D7, D5 |
(13) 0x411548 FADD D17, D3, D6 |
(13) 0x41154c FNMSUB D18, D16, D4, D17 |
(13) 0x411550 STR D18, [X7, X1] |
(13) 0x411554 LDR D19, [X3, X1] |
(13) 0x411558 FADD D20, D17, D19 |
(13) 0x41155c FMSUB D21, D16, D4, D20 |
(13) 0x411560 FMADD D27, D21, D21, D27 |
(13) 0x411564 STR D21, [X8, X1] |
(13) 0x411568 STR D21, [X13, X1] |
(13) 0x41156c ADD X1, X1, #16 |
(13) 0x411570 LDR D30, [X6, X2] |
(13) 0x411574 LDR D28, [X5, X2] |
(13) 0x411578 LDR D31, [X10, X2] |
(13) 0x41157c LDR D29, [X12, X2] |
(13) 0x411580 LDR D25, [X9, X2] |
(13) 0x411584 LDR D24, [X4, X2] |
(13) 0x411588 FMUL D0, D30, D31 |
(13) 0x41158c LDR D22, [X11, X2] |
(13) 0x411590 FMUL D1, D28, D29 |
(13) 0x411594 LDR D23, [X14, X2] |
(13) 0x411598 FADD D2, D25, D30 |
(13) 0x41159c FADD D5, D24, D28 |
(13) 0x4115a0 LDR D4, [X3, X2] |
(13) 0x4115a4 FMADD D3, D25, D22, D0 |
(13) 0x4115a8 FMADD D6, D24, D23, D1 |
(13) 0x4115ac FADD D7, D2, D26 |
(13) 0x4115b0 FADD D16, D7, D5 |
(13) 0x4115b4 FADD D17, D3, D6 |
(13) 0x4115b8 FNMSUB D18, D16, D4, D17 |
(13) 0x4115bc STR D18, [X7, X2] |
(13) 0x4115c0 LDR D19, [X3, X2] |
(13) 0x4115c4 FADD D20, D17, D19 |
(13) 0x4115c8 FMSUB D21, D16, D4, D20 |
(13) 0x4115cc FMADD D27, D21, D21, D27 |
(13) 0x4115d0 STR D21, [X8, X2] |
(13) 0x4115d4 STR D21, [X13, X2] |
(13) 0x4115d8 CMP X15, X1 |
(13) 0x4115dc B.NE 411500 |
(12) 0x4115e0 ADD W16, W16, #1 |
(12) 0x4115e4 ADD W17, W17, W18 |
(12) 0x4115e8 CMP W0, W16 |
(12) 0x4115ec B.GT 411468 |
0x4115f0 ADD X19, X19, #48 |
0x4115f4 LDR X1, [X19] |
(11) 0x4115f8 FMOV D26, X1 |
(11) 0x4115fc ORR X20, XZR, X1 |
(11) 0x411600 FADD D30, D27, D26 |
(11) 0x411604 FMOV X30, D30 |
(11) 0x411608 CAS X20, X30, [X19] |
(11) 0x41160c CMP X1, X20 |
(11) 0x411610 B.NE 411630 |
0x411614 LDP X19, X20, [SP, #16] |
0x411618 LDP X21, X22, [SP, #32] |
0x41161c LDP X29, X30, [SP], #48 |
0x411620 RET |
0x411624 ADD W2, W2, #1 |
0x411628 MOVZ W3, #0 |
0x41162c B 411408 |
(11) 0x411630 ORR X1, XZR, X20 |
(11) 0x411634 B 4115f8 |
0x411638 HINT #0 |
0x41163c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.24+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.76+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | run_cg_init(Chunk*, Settings&,[...] | cg.cpp:73 | exec |
| ○ | cg_init_driver(Chunk*, Setting[...] | cg_driver.cpp:30 | exec |
| ○ | cg_driver(Chunk*, Settings&, d[...] | cg_driver.cpp:15 | exec |
| ○ | solve(Chunk*, Settings&, int, [...] | diffuse.cpp:51 | exec |
| ○ | diffuse(Chunk*, Settings&) | diffuse.cpp:12 | exec |
| ○ | main | main.cpp:179 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | basic_string.h:809 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_5
| Source file and lines | cg.cpp:59-68 |
| Module | exec |
| nb instructions | 51 |
| nb uops | 49 |
| loop length | 204 |
| used w registers | 14 |
| used x registers | 23 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 6.13 cycles |
| front end | 6.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 6.13 |
| Dispatch | 7.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 7.75-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 29% |
| load | 34% |
| store | 50% |
| mul | 12% |
| add-sub | 20% |
| fma | 12% |
| other | 35% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 29% |
| load | 34% |
| store | 50% |
| mul | 12% |
| add-sub | 20% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 34% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W20, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410100 <@plt_start@+0xe0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410110 <@plt_start@+0xf0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X19, #60] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR W16, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W2, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W3, W2, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W16, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411624 <_Z7cg_initiiiiddPdPKdS1_S_S_S_S_S_S_._omp_fn.3+0x25c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W2, W16, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D27, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W5, W2, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4115f0 <_Z7cg_initiiiiddPdPKdS1_S_S_S_S_S_S_._omp_fn.3+0x228> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W18, [X19, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SBFM X30, X20, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W0, W20, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X3, X13, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD W16, W20, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| FMOV D26, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDP X8, X7, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SUB W6, W18, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X15, X18, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| SUB W22, W18, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W17, W16, W18 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X14, X3, X15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X12, X3, X15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X21, X6, X30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X6, X5, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD X11, X3, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X10, X3, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X9, X6, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X5, X15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X19, X19, #48 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X1, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W2, W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 411408 <_Z7cg_initiiiiddPdPKdS1_S_S_S_S_S_S_._omp_fn.3+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_5
| Source file and lines | cg.cpp:59-68 |
| Module | exec |
| nb instructions | 51 |
| nb uops | 49 |
| loop length | 204 |
| used w registers | 14 |
| used x registers | 23 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 6.13 cycles |
| front end | 6.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 3.00 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 1.50 | 1.50 |
| cycles | 3.00 | 3.00 | 7.75 | 7.75 | 7.75 | 7.75 | 0.50 | 0.50 | 0.50 | 0.50 | 4.50 | 4.17 | 4.33 | 1.50 | 1.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 6.13 |
| Dispatch | 7.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 7.75-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 29% |
| load | 34% |
| store | 50% |
| mul | 12% |
| add-sub | 20% |
| fma | 12% |
| other | 35% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 29% |
| load | 34% |
| store | 50% |
| mul | 12% |
| add-sub | 20% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 34% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W20, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410100 <@plt_start@+0xe0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410110 <@plt_start@+0xf0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X19, #60] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR W16, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W2, W0, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W3, W2, W21, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W16, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411624 <_Z7cg_initiiiiddPdPKdS1_S_S_S_S_S_S_._omp_fn.3+0x25c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W2, W16, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D27, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W5, W2, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4115f0 <_Z7cg_initiiiiddPdPKdS1_S_S_S_S_S_S_._omp_fn.3+0x228> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W18, [X19, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SBFM X30, X20, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W0, W20, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X3, X13, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD W16, W20, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| FMOV D26, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDP X8, X7, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SUB W6, W18, W20,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X15, X18, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| SUB W22, W18, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W17, W16, W18 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X14, X3, X15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X12, X3, X15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X21, X6, X30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X6, X5, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD X11, X3, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X10, X3, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X9, X6, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X4, X5, X15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X19, X19, #48 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X1, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #48 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W2, W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 411408 <_Z7cg_initiiiiddPdPKdS1_S_S_S_S_S_S_._omp_fn.3+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼cg_init(int, int, int, int, double, double, double*, double const*, double const*, double*, double*, double*, double*, double*, double*) [clone ._omp_fn.3]– | 0.01 | 0.03 |
| ○Loop 11 - cg.cpp:59-59 - exec | 0.00 | 0.00 |
| ▼Loop 12 - cg.cpp:62-68 - exec– | 0.00 | 0.00 |
| ○Loop 13 - cg.cpp:62-68 - exec | 0.01 | 0.04 |
