| Function: cg_calc_w(int, int, int, double*, double const*, double*, double const*, double const*) [c ... | Module: exec | Source: cg.cpp:83-90 | Coverage (incl. loops): 33.08% | (excl. loops): 0.02% |
|---|
| Function: cg_calc_w(int, int, int, double*, double const*, double*, double const*, double const*) [c ... | Module: exec | Source: cg.cpp:83-90 | Coverage (incl. loops): 33.08% | (excl. loops): 0.02% |
|---|
/home/eoseret/qaas/qaas_runs/178-237-4322/intel/TeaLeaf/build/TeaLeaf/src/omp/cg.cpp: 83 - 90 |
-------------------------------------------------------------------------------- |
83: #pragma omp parallel for reduction(+ : pw_temp) |
84: #endif |
85: for (int jj = halo_depth; jj < y - halo_depth; ++jj) { |
86: for (int kk = halo_depth; kk < x - halo_depth; ++kk) { |
87: const int index = kk + jj * x; |
88: const double smvp = tealeaf_SMVP(p); |
89: w[index] = smvp; |
90: pw_temp += w[index] * p[index]; |
0x411288 STP X29, X30, [SP, #896]! |
0x41128c ADD X29, SP, #0 |
0x411290 STP X19, X20, [SP, #16] |
0x411294 ORR X19, XZR, X0 |
0x411298 STP X21, X22, [SP, #32] |
0x41129c LDR W21, [X0, #48] |
0x4112a0 BL 410320 |
0x4112a4 ORR W20, WZR, W0 |
0x4112a8 BL 4102b0 |
0x4112ac LDR W1, [X19, #44] |
0x4112b0 ORR W16, WZR, W0 |
0x4112b4 SUB W0, W1, W21,LSL #1 |
0x4112b8 SDIV W1, W0, W20 |
0x4112bc MSUB W0, W1, W20, W0 |
0x4112c0 CMP W16, W0 |
0x4112c4 B.LT 411618 |
0x4112c8 MADD W16, W1, W16, W0 |
0x4112cc MOVI D30, #0 |
0x4112d0 ADD W1, W1, W16 |
0x4112d4 CMP W16, W1 |
0x4112d8 B.GE 411544 |
0x4112dc STP X23, X24, [SP, #48] |
0x4112e0 ADD W16, W21, W16 |
0x4112e4 FMOV D21, #1.0000000 |
0x4112e8 FMOV V22.2D, #1.0000000 |
0x4112ec ADD W20, W21, W1 |
0x4112f0 STP X25, X26, [SP, #64] |
0x4112f4 STP X27, X28, [SP, #80] |
0x4112f8 SBFM X27, X21, #0, #31 |
0x4112fc LDR W0, [X19, #40] |
0x411300 LDP X6, X18, [X19] |
0x411304 LDP X17, X15, [X19, #16] |
0x411308 SUB W8, W0, W21,LSL #1 |
0x41130c SBFM X1, X0, #61, #31 |
0x411310 SUB W4, W0, W21 |
0x411314 MUL W30, W16, W0 |
0x411318 ADD X13, X6, X1 |
0x41131c SUB X12, X6, X1 |
0x411320 UBFM W14, W8, #1, #31 |
0x411324 AND W2, W8, #0xfffffffe |
0x411328 SUB W5, W0, W21,LSL #1 |
0x41132c ADD W2, W2, W21 |
0x411330 SBFM X22, X0, #0, #31 |
0x411334 UBFM X14, X14, #60, #59 |
0x411338 AND W28, W8, #0x1 |
0x41133c ORR W9, WZR, W4 |
0x411340 ORR W24, WZR, W8 |
0x411344 STR W2, [SP, #96] |
0x411348 ADD X11, X6, #8 |
0x41134c ADD X1, X15, X1 |
0x411350 SUB X10, X6, #8 |
0x411354 ADD X7, X17, #8 |
(19) 0x411358 CMP W21, W9 |
(19) 0x41135c B.GE 411604 |
(19) 0x411360 SBFM X2, X30, #0, #31 |
(19) 0x411364 SBFM X8, X21, #0, #31 |
(19) 0x411368 CMP W24, #1 |
(19) 0x41136c B.EQ 411588 |
(19) 0x411370 ORR X8, XZR, X27 |
(19) 0x411374 STP W21, W9, [SP, #100] |
(19) 0x411378 STR W24, [SP, #108] |
(19) 0x41137c STP X1, X12, [SP, #112] |
(22) 0x411380 SBFM X1, X30, #0, #31 |
(22) 0x411384 ADD X3, X1, X27 |
(22) 0x411388 UBFM X2, X3, #61, #60 |
(22) 0x41138c ADD X9, X18, X3,LSL #3 |
(22) 0x411390 SUB X4, X3, X22 |
(22) 0x411394 ADD X12, X2, #24 |
(22) 0x411398 ADD X4, X4, #1 |
(22) 0x41139c ADD X12, X6, X12 |
(22) 0x4113a0 SUB X21, X2, #8 |
(22) 0x4113a4 ADD X24, X6, X4,LSL #3 |
(22) 0x4113a8 ADD X23, X2, #16 |
(22) 0x4113ac CMP X12, X9 |
(22) 0x4113b0 ADD X23, X18, X23 |
(22) 0x4113b4 ADD X12, X2, #8 |
(22) 0x4113b8 ADD X21, X6, X21 |
(22) 0x4113bc ADD X3, X22, X3 |
(22) 0x4113c0 ADD X3, X3, #1 |
(22) 0x4113c4 CCMP X21, X23, #2, #8 |
(22) 0x4113c8 ADD X23, X17, X12 |
(22) 0x4113cc ADD X26, X6, X3,LSL #3 |
(22) 0x4113d0 CCMP X24, X9, #4, #2 |
(22) 0x4113d4 SUB X24, X9, X23 |
(22) 0x4113d8 ADD X25, X15, X3,LSL #3 |
(22) 0x4113dc UBFM X4, X4, #61, #60 |
(22) 0x4113e0 CCMP X24, #8, #0, #1 |
(22) 0x4113e4 UBFM X3, X3, #61, #60 |
(22) 0x4113e8 CSINC W24, WZR, WZR, #9 |
(22) 0x4113ec CMP X26, X9 |
(22) 0x4113f0 ADD X26, X15, X12 |
(22) 0x4113f4 CCMP X25, X9, #4, #1 |
(22) 0x4113f8 CCMP X26, X9, #4, #1 |
(22) 0x4113fc CSINC W25, WZR, WZR, #0 |
(22) 0x411400 ANDS WZR, W24, W25 |
(22) 0x411404 B.EQ 411578 |
(22) 0x411408 SUB X3, X3, #8 |
(22) 0x41140c SUB X4, X4, #8 |
(22) 0x411410 ADD X26, X15, X3 |
(22) 0x411414 ADD X25, X17, X2 |
(22) 0x411418 ADD X24, X15, X2 |
(22) 0x41141c MOVZ X1, #0 |
(22) 0x411420 ADD X4, X6, X4 |
(22) 0x411424 ADD X12, X6, X12 |
(22) 0x411428 ADD X3, X6, X3 |
(22) 0x41142c ADD X2, X6, X2 |
(21) 0x411430 LDR Q25, [X23, X1] |
(21) 0x411434 LDR Q29, [X25, X1] |
(21) 0x411438 LDR Q31, [X21, X1] |
(21) 0x41143c LDR Q23, [X12, X1] |
(21) 0x411440 LDR Q27, [X26, X1] |
(21) 0x411444 FADD V28.2D, V25.2D, V29.2D |
(21) 0x411448 LDR Q26, [X24, X1] |
(21) 0x41144c FMUL V31.2D, V31.2D, V29.2D |
(21) 0x411450 LDR Q29, [X4, X1] |
(21) 0x411454 FADD V28.2D, V28.2D, V22.2D |
(21) 0x411458 LDR Q24, [X2, X1] |
(21) 0x41145c FMLA V31.2D, V25.2D, V23.2D |
(21) 0x411460 LDR Q25, [X3, X1] |
(21) 0x411464 FADD V23.2D, V27.2D, V26.2D |
(21) 0x411468 FMUL V29.2D, V29.2D, V26.2D |
(21) 0x41146c FADD V28.2D, V28.2D, V23.2D |
(21) 0x411470 FNEG V31.2D, V31.2D |
(21) 0x411474 FMLA V29.2D, V27.2D, V25.2D |
(21) 0x411478 FMLA V31.2D, V28.2D, V24.2D |
(21) 0x41147c FSUB V31.2D, V31.2D, V29.2D |
(21) 0x411480 STR Q31, [X9, X1] |
(21) 0x411484 LDR Q29, [X2, X1] |
(21) 0x411488 ADD X1, X1, #16 |
(21) 0x41148c FMUL V31.2D, V29.2D, V31.2D |
(21) 0x411490 FADD D30, D30, D31 |
(21) 0x411494 MOV D29, V31.D[1] |
(21) 0x411498 FADD D30, D29, D30 |
(21) 0x41149c CMP X1, X14 |
(21) 0x4114a0 B.NE 411430 |
(22) 0x4114a4 CBZ W28, 411528 |
(22) 0x4114a8 LDR W1, [SP, #96] |
(22) 0x4114ac ADD W2, W1, W30 |
(22) 0x4114b0 SBFM X1, X2, #0, #31 |
(22) 0x4114b4 ADD W4, W0, W2 |
(22) 0x4114b8 ADD X1, X1, #1 |
(22) 0x4114bc SUB W3, W2, W0 |
(22) 0x4114c0 UBFM X1, X1, #61, #60 |
(22) 0x4114c4 SBFM X4, X4, #61, #31 |
(22) 0x4114c8 SUB X2, X1, #8 |
(22) 0x4114cc ADD X9, X6, X1 |
(22) 0x4114d0 LDR D19, [X6, W3,SXTW #3] |
(22) 0x4114d4 LDR D31, [X17, X2] |
(22) 0x4114d8 LDR D26, [X17, X1] |
(22) 0x4114dc LDR D27, [X15, X4] |
(22) 0x4114e0 LDR D29, [X15, X2] |
(22) 0x4114e4 FADD D28, D26, D31 |
(22) 0x4114e8 LDUR D25, [X9, #496] |
(22) 0x4114ec LDR D20, [X6, X1] |
(22) 0x4114f0 FADD D23, D27, D29 |
(22) 0x4114f4 LDR D24, [X6, X4] |
(22) 0x4114f8 FMUL D29, D29, D19 |
(22) 0x4114fc FADD D28, D28, D22 |
(22) 0x411500 FMUL D31, D31, D25 |
(22) 0x411504 LDR D25, [X6, X2] |
(22) 0x411508 FMADD D29, D27, D24, D29 |
(22) 0x41150c FMADD D31, D26, D20, D31 |
(22) 0x411510 FADD D28, D28, D23 |
(22) 0x411514 FNMSUB D31, D28, D25, D31 |
(22) 0x411518 FSUB D31, D31, S29 |
(22) 0x41151c STR D31, [X18, X2] |
(22) 0x411520 LDR D29, [X6, X2] |
(22) 0x411524 FMADD D30, D31, D29, D30 |
(22) 0x411528 ADD W16, W16, #1 |
(22) 0x41152c ADD W30, W30, W0 |
(22) 0x411530 CMP W20, W16 |
(22) 0x411534 B.GT 411380 |
0x411538 LDP X23, X24, [SP, #48] |
0x41153c LDP X25, X26, [SP, #64] |
0x411540 LDP X27, X28, [SP, #80] |
0x411544 ADD X19, X19, #32 |
0x411548 LDR X0, [X19] |
(18) 0x41154c ORR X1, XZR, X0 |
(18) 0x411550 FMOV D31, X0 |
(18) 0x411554 FADD D31, D30, D31 |
(18) 0x411558 FMOV X2, D31 |
(18) 0x41155c CAS X1, X2, [X19] |
(18) 0x411560 CMP X0, X1 |
(18) 0x411564 B.NE 411624 |
0x411568 LDP X19, X20, [SP, #16] |
0x41156c LDP X21, X22, [SP, #32] |
0x411570 LDP X29, X30, [SP], #128 |
0x411574 RET |
(19) 0x411578 ORR X2, XZR, X1 |
(19) 0x41157c LDP W21, W9, [SP, #100] |
(19) 0x411580 LDP X1, X12, [SP, #112] |
(19) 0x411584 LDR W24, [SP, #108] |
(19) 0x411588 ADD X3, X5, X8 |
(19) 0x41158c ADD X8, X2, X8 |
(19) 0x411590 ADD X3, X3, X2 |
(19) 0x411594 UBFM X2, X8, #61, #60 |
(19) 0x411598 UBFM X3, X3, #61, #60 |
(19) 0x41159c HINT #0 |
(20) 0x4115a0 LDR D26, [X7, X2] |
(20) 0x4115a4 LDR D31, [X17, X2] |
(20) 0x4115a8 LDR D27, [X1, X2] |
(20) 0x4115ac LDR D29, [X15, X2] |
(20) 0x4115b0 FADD D28, D26, D31 |
(20) 0x4115b4 LDR D25, [X10, X2] |
(20) 0x4115b8 LDR D20, [X11, X2] |
(20) 0x4115bc FADD D23, D27, D29 |
(20) 0x4115c0 LDR D19, [X12, X2] |
(20) 0x4115c4 FADD D28, D28, D21 |
(20) 0x4115c8 FMUL D31, D31, D25 |
(20) 0x4115cc LDR D24, [X13, X2] |
(20) 0x4115d0 LDR D25, [X6, X2] |
(20) 0x4115d4 FMUL D29, D29, D19 |
(20) 0x4115d8 FMADD D31, D26, D20, D31 |
(20) 0x4115dc FADD D28, D28, D23 |
(20) 0x4115e0 FMADD D29, D27, D24, D29 |
(20) 0x4115e4 FNMSUB D31, D28, D25, D31 |
(20) 0x4115e8 FSUB D31, D31, S29 |
(20) 0x4115ec STR D31, [X18, X2] |
(20) 0x4115f0 LDR D29, [X6, X2] |
(20) 0x4115f4 ADD X2, X2, #8 |
(20) 0x4115f8 FMADD D30, D31, D29, D30 |
(20) 0x4115fc CMP X2, X3 |
(20) 0x411600 B.NE 4115a0 |
(19) 0x411604 ADD W16, W16, #1 |
(19) 0x411608 ADD W30, W30, W0 |
(19) 0x41160c CMP W20, W16 |
(19) 0x411610 B.GT 411358 |
0x411614 B 411538 |
0x411618 ADD W1, W1, #1 |
0x41161c MOVZ W0, #0 |
0x411620 B 4112c8 |
(18) 0x411624 ORR X0, XZR, X1 |
(18) 0x411628 B 41154c |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.43+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.57+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | run_cg_calc_w(Chunk*, Settings[...] | cg.cpp:83 | exec |
| ○ | cg_main_step_driver(Chunk*, Se[...] | cg_driver.cpp:57 | exec |
| ○ | cg_driver(Chunk*, Settings&, d[...] | cg_driver.cpp:18 | exec |
| ○ | solve(Chunk*, Settings&, int, [...] | diffuse.cpp:51 | exec |
| ○ | diffuse(Chunk*, Settings&) | diffuse.cpp:12 | exec |
| ○ | main | main.cpp:179 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:104 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.02% of application time for run gcc_0
| Source file and lines | cg.cpp:83-90 |
| Module | exec |
| nb instructions | 65 |
| nb uops | 65 |
| loop length | 260 |
| used w registers | 15 |
| used x registers | 25 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 8.13 cycles |
| front end | 8.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.50 | 3.50 | 9.75 | 9.75 | 9.75 | 9.75 | 0.75 | 0.75 | 0.75 | 0.75 | 6.50 | 6.17 | 6.33 | 3.50 | 3.50 |
| cycles | 3.50 | 3.50 | 9.75 | 9.75 | 9.75 | 9.75 | 0.75 | 0.75 | 0.75 | 0.75 | 6.50 | 6.17 | 6.33 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 8.13 |
| Dispatch | 9.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 9.75-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 2% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 6% |
| all | 30% |
| load | 45% |
| store | 44% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| other | 27% |
| all | 37% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 37% |
| all | 30% |
| load | 45% |
| store | 44% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 29% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #896]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W21, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410320 <@plt_start@+0x300> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4102b0 <@plt_start@+0x290> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X19, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W16, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W1, W0, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W1, W20, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W16, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411618 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x390> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W1, W16, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D30, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W1, W1, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W16, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 411544 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x2bc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W16, W21, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| FMOV D21, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV V22.2D, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| ADD W20, W21, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SBFM X27, X21, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR W0, [X19, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X6, X18, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X17, X15, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SUB W8, W0, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X1, X0, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB W4, W0, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W30, W16, W0 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X13, X6, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X12, X6, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| UBFM W14, W8, #1, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| AND W2, W8, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W5, W0, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W2, W2, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X22, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| UBFM X14, X14, #60, #59 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| AND W28, W8, #0x1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR W9, WZR, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR W24, WZR, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W2, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| ADD X11, X6, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X1, X15, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB X10, X6, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X7, X17, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD X19, X19, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #128 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 411538 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x2b0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 4112c8 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.02% of application time for run gcc_0
| Source file and lines | cg.cpp:83-90 |
| Module | exec |
| nb instructions | 65 |
| nb uops | 65 |
| loop length | 260 |
| used w registers | 15 |
| used x registers | 25 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 1 |
| used z registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 8.13 cycles |
| front end | 8.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 3.50 | 3.50 | 9.75 | 9.75 | 9.75 | 9.75 | 0.75 | 0.75 | 0.75 | 0.75 | 6.50 | 6.17 | 6.33 | 3.50 | 3.50 |
| cycles | 3.50 | 3.50 | 9.75 | 9.75 | 9.75 | 9.75 | 0.75 | 0.75 | 0.75 | 0.75 | 6.50 | 6.17 | 6.33 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | 5.00-12.50 |
| Front-end | 8.13 |
| Dispatch | 9.75 |
| DIV/SQRT | 5.00-12.50 |
| Overall L1 | 9.75-12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 2% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 6% |
| all | 30% |
| load | 45% |
| store | 44% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| other | 27% |
| all | 37% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 37% |
| all | 30% |
| load | 45% |
| store | 44% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 29% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #896]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W21, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| BL 410320 <@plt_start@+0x300> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 4102b0 <@plt_start@+0x290> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [X19, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W16, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W1, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SDIV W1, W0, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W1, W20, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W16, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 411618 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x390> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W1, W16, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| MOVI D30, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD W1, W1, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W16, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 411544 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x2bc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W16, W21, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| FMOV D21, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV V22.2D, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| ADD W20, W21, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SBFM X27, X21, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR W0, [X19, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X6, X18, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X17, X15, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SUB W8, W0, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X1, X0, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB W4, W0, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W30, W16, W0 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ADD X13, X6, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X12, X6, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| UBFM W14, W8, #1, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| AND W2, W8, #0xfffffffe | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W5, W0, W21,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W2, W2, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X22, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| UBFM X14, X14, #60, #59 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| AND W28, W8, #0x1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR W9, WZR, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR W24, WZR, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W2, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| ADD X11, X6, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X1, X15, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SUB X10, X6, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X7, X17, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD X19, X19, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDR X0, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #128 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 411538 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x2b0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 4112c8 <_Z9cg_calc_wiiiPdPKdS_S1_S1_._omp_fn.0+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼cg_calc_w(int, int, int, double*, double const*, double*, double const*, double const*) [clone ._omp_fn.0]– | 33.08 | 88.21 |
| ○Loop 18 - cg.cpp:83-83 - exec | 0.00 | 0.00 |
| ▼Loop 19 - cg.cpp:83-90 - exec– | 0.00 | 0.00 |
| ▼Loop 22 - cg.cpp:83-90 - exec– | 0.11 | 0.31 |
| ○Loop 21 - cg.cpp:86-90 - exec | 32.94 | 87.80 |
| ○Loop 20 - cg.cpp:86-90 - exec | 0.00 | 0.00 |
