| Function: _QMaccelerate_kernel_modulePaccelerate_kernel..omp_par | Module: exec | Source: :0-0 [...] | Coverage (incl. loops): 7.04% | (excl. loops): 0.00% |
|---|
| Function: _QMaccelerate_kernel_modulePaccelerate_kernel..omp_par | Module: exec | Source: :0-0 [...] | Coverage (incl. loops): 7.04% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-231-1255/intel/CloverLeaf1.3-FC/build/CloverLeaf1.3-FC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 55 - 75 |
-------------------------------------------------------------------------------- |
55: halfdt=0.5_8*dt |
56: |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
[...] |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
0x418160 SUB SP, SP, #192 |
0x418164 STP X29, X30, [SP, #96] |
0x418168 STP X28, X27, [SP, #112] |
0x41816c STP X26, X25, [SP, #128] |
0x418170 STP X24, X23, [SP, #144] |
0x418174 STP X22, X21, [SP, #160] |
0x418178 STP X20, X19, [SP, #176] |
0x41817c ADD X29, SP, #96 |
0x418180 LDP X15, X14, [X2, #64] |
0x418184 LDP X8, X9, [X2] |
0x418188 ADRP X0, |
0x41818c ADD X0, X0, #3632 |
0x418190 LDR X10, [X2, #16] |
0x418194 LDR X11, [X2, #32] |
0x418198 LDR X20, [X2, #160] |
0x41819c STP X14, X15, [X29, #992] |
0x4181a0 LDP X14, X21, [X2, #80] |
0x4181a4 LDR X23, [X8] |
0x4181a8 LDR X8, [X9] |
0x4181ac LDR X9, [X10] |
0x4181b0 LDP X12, X13, [X2, #48] |
0x4181b4 STUR X14, [X29, #472] |
0x4181b8 LDP X22, X14, [X2, #96] |
0x4181bc LDP X24, X25, [X2, #112] |
0x4181c0 LDP X26, X27, [X2, #128] |
0x4181c4 LDP X28, X19, [X2, #144] |
0x4181c8 STP X8, X14, [SP, #40] |
0x4181cc LDR X8, [X11] |
0x4181d0 STP X8, X9, [SP, #24] |
0x4181d4 LDR W8, [X13] |
0x4181d8 LDR W9, [X12] |
0x4181dc ADD W8, W8, #1 |
0x4181e0 STR W9, [SP, #16] |
0x4181e4 SUBS W8, W8, W9 |
0x4181e8 CSINV W8, W8, WZR, #10 |
0x4181ec STP W8, WZR, [X29, #500] |
0x4181f0 MOVZ W8, #1 |
0x4181f4 STUR W8, [X29, #496] |
0x4181f8 BL 410140 |
0x4181fc ORR W1, WZR, W0 |
0x418200 SUB X3, X29, #4 |
0x418204 SUB X4, X29, #8 |
0x418208 SUB X5, X29, #12 |
0x41820c SUB X6, X29, #16 |
0x418210 STR WZR, [SP] |
0x418214 ADRP X0, |
0x418218 ADD X0, X0, #3632 |
0x41821c MOVZ W2, #34 |
0x418220 MOVZ W7, #1 |
0x418224 STR W1, [SP, #8] |
0x418228 BL 4101b0 |
0x41822c LDP W8, W12, [X29, #500] |
0x418230 SUB W8, W8, W12 |
0x418234 CMN W8, #1 |
0x418238 STR W8, [SP, #20] |
0x41823c B.EQ 418588 |
0x418240 LDP X9, X8, [X29, #992] |
0x418244 LDR W13, [X9] |
0x418248 LDR W8, [X8] |
0x41824c ADD W10, W13, #1 |
0x418250 STUR W8, [X29, #488] |
0x418254 CMP W10, W8 |
0x418258 B.LT 418588 |
0x41825c LDUR W8, [X29, #488] |
0x418260 SUB W10, W13, W8 |
0x418264 ADD W8, W10, #2 |
0x418268 STUR W8, [X29, #480] |
0x41826c CBZ W8, 418588 |
0x418270 LDR W8, [SP, #16] |
0x418274 ADD W14, W13, #1 |
0x418278 FMOV D2, #0.2500000 |
0x41827c PTRUE P1.D, ALL |
0x418280 FDUP Z3.D, #80 |
0x418284 ORR W9, WZR, WZR |
0x418288 ADD W8, W12, W8 |
0x41828c STR W8, [SP, #16] |
0x418290 LDUR X8, [X29, #472] |
0x418294 LDR D0, [X8] |
0x418298 LDUR W8, [X29, #488] |
0x41829c SUB W15, W8, #1 |
0x4182a0 CMP W13, W15 |
0x4182a4 CCMP W14, W8, #8, #10 |
0x4182a8 CNTD X14, ALL |
0x4182ac DUP Z1.D, Z0.D[0] |
0x4182b0 CSINC W8, WZR, WZR, #10 |
0x4182b4 STR W8, [SP, #12] |
0x4182b8 LDUR W8, [X29, #480] |
0x4182bc SUBS W15, W8, W14 |
0x4182c0 CSEL W15, WZR, W15, #3 |
0x4182c4 WHILELO P0.D, WZR, W8 |
0x4182c8 B 4182e0 |
(63) 0x4182cc LDR W8, [SP, #20] |
(63) 0x4182d0 LDUR W9, [X29, #472] |
(63) 0x4182d4 CMP W9, W8 |
(63) 0x4182d8 ADD W9, W9, #1 |
(63) 0x4182dc B.EQ 418588 |
(63) 0x4182e0 LDR W8, [SP, #16] |
(63) 0x4182e4 STUR W9, [X29, #472] |
(63) 0x4182e8 LDUR W5, [X29, #488] |
(63) 0x4182ec SUB X6, XZR, X14 |
(63) 0x4182f0 ORR P2.B, P0/Z, P0.B, P0.B |
(63) 0x4182f4 ADD W17, W8, W9 |
(63) 0x4182f8 SUB W16, W17, #1 |
(63) 0x4182fc LDP X9, X8, [SP, #32] |
(63) 0x418300 SBFM X17, X17, #0, #31 |
(63) 0x418304 SBFM X16, X16, #0, #31 |
(63) 0x418308 SUB X0, X16, X9 |
(63) 0x41830c SUB X18, X17, X9 |
(63) 0x418310 LDR X9, [SP, #48] |
(63) 0x418314 MUL X16, X0, X8 |
(63) 0x418318 MUL X17, X18, X8 |
(63) 0x41831c LDR X8, [SP, #24] |
(63) 0x418320 ADD X1, X25, X16,LSL #3 |
(63) 0x418324 ADD X2, X20, X17,LSL #3 |
(63) 0x418328 ADD X3, X20, X16,LSL #3 |
(63) 0x41832c MUL X18, X18, X8 |
(63) 0x418330 MUL X0, X0, X8 |
(63) 0x418334 LDR W8, [SP, #12] |
(63) 0x418338 ADD X0, X24, X0,LSL #3 |
(63) 0x41833c ADD X4, X19, X18,LSL #3 |
(63) 0x418340 TBZ W8, #0, 41846c |
(63) 0x418344 ORR W5, WZR, WZR |
(64) 0x418348 LDUR W8, [X29, #488] |
(64) 0x41834c ADD W8, W8, W5 |
(64) 0x418350 ADD W5, W5, #1 |
(64) 0x418354 SUB W10, W8, #1 |
(64) 0x418358 SBFM X8, X8, #0, #31 |
(64) 0x41835c SBFM X10, X10, #0, #31 |
(64) 0x418360 SUB X6, X8, X23 |
(64) 0x418364 SUB X10, X10, X23 |
(64) 0x418368 ADD X8, X6, X16 |
(64) 0x41836c ADD X12, X6, X17 |
(64) 0x418370 ADD X7, X6, X18 |
(64) 0x418374 LDR D17, [X0, X6,LSL #3] |
(64) 0x418378 LDR D21, [X1, X6,LSL #3] |
(64) 0x41837c ADD X11, X10, X16 |
(64) 0x418380 LDR D6, [X21, X8,LSL #3] |
(64) 0x418384 LDR D7, [X22, X8,LSL #3] |
(64) 0x418388 ADD X13, X10, X17 |
(64) 0x41838c LDR D18, [X25, X8,LSL #3] |
(64) 0x418390 LDUR W8, [X29, #480] |
(64) 0x418394 LDR D4, [X21, X11,LSL #3] |
(64) 0x418398 LDR D5, [X22, X11,LSL #3] |
(64) 0x41839c LDR D20, [X25, X11,LSL #3] |
(64) 0x4183a0 LDR D16, [X25, X13,LSL #3] |
(64) 0x4183a4 CMP W8, W5 |
(64) 0x4183a8 FMUL D6, D6, D7 |
(64) 0x4183ac LDR D7, [X21, X12,LSL #3] |
(64) 0x4183b0 FMADD D4, D4, D5, D6 |
(64) 0x4183b4 LDR D5, [X22, X12,LSL #3] |
(64) 0x4183b8 LDR D6, [X22, X13,LSL #3] |
(64) 0x4183bc FSUB D18, D18, S20 |
(64) 0x4183c0 LDR D20, [X28, X12,LSL #3] |
(64) 0x4183c4 FMUL D18, D17, D18 |
(64) 0x4183c8 FMADD D4, D7, D5, D4 |
(64) 0x4183cc LDR D5, [X21, X13,LSL #3] |
(64) 0x4183d0 LDR D7, [X25, X12,LSL #3] |
(64) 0x4183d4 FMADD D4, D5, D6, D4 |
(64) 0x4183d8 LDR D6, [X24, X7,LSL #3] |
(64) 0x4183dc LDR D5, [X9, X7,LSL #3] |
(64) 0x4183e0 FSUB D19, D7, S16 |
(64) 0x4183e4 FSUB D7, D7, S21 |
(64) 0x4183e8 LDR D21, [X1, X10,LSL #3] |
(64) 0x4183ec FMUL D4, D4, D2 |
(64) 0x4183f0 FMADD D18, D6, D19, D18 |
(64) 0x4183f4 FSUB D16, D16, S21 |
(64) 0x4183f8 LDR D19, [X27, X7,LSL #3] |
(64) 0x4183fc LDR D21, [X3, X10,LSL #3] |
(64) 0x418400 FDIV D4, D0, D4 |
(64) 0x418404 FMSUB D5, D4, D18, D5 |
(64) 0x418408 LDR D18, [X28, X13,LSL #3] |
(64) 0x41840c FMUL D16, D18, D16 |
(64) 0x418410 FMADD D7, D20, D7, D16 |
(64) 0x418414 LDR D16, [X2, X6,LSL #3] |
(64) 0x418418 FMSUB D7, D4, D7, D19 |
(64) 0x41841c LDR D19, [X2, X10,LSL #3] |
(64) 0x418420 STR D7, [X19, X7,LSL #3] |
(64) 0x418424 LDR D7, [X20, X12,LSL #3] |
(64) 0x418428 FSUB D16, D16, S19 |
(64) 0x41842c LDR D19, [X3, X6,LSL #3] |
(64) 0x418430 FSUB D22, D19, S21 |
(64) 0x418434 FSUB D7, D7, S19 |
(64) 0x418438 FMUL D17, D17, D22 |
(64) 0x41843c FMADD D6, D6, D16, D17 |
(64) 0x418440 LDR D16, [X20, X13,LSL #3] |
(64) 0x418444 FMSUB D5, D4, D6, D5 |
(64) 0x418448 LDR D6, [X4, X6,LSL #3] |
(64) 0x41844c STR D5, [X26, X7,LSL #3] |
(64) 0x418450 FSUB D5, D16, S21 |
(64) 0x418454 FMUL D5, D18, D5 |
(64) 0x418458 FMADD D5, D20, D7, D5 |
(64) 0x41845c FMSUB D4, D4, D5, D6 |
(64) 0x418460 STR D4, [X4, X6,LSL #3] |
(64) 0x418464 B.HI 418348 |
(63) 0x418468 B 4182cc |
(62) 0x41846c SBFM X12, X5, #0, #31 |
(62) 0x418470 SUB W7, W5, #1 |
(62) 0x418474 ADD W6, W6, W14 |
(62) 0x418478 ADD W5, W5, W14 |
(62) 0x41847c SUB X12, X12, X23 |
(62) 0x418480 SBFM X7, X7, #0, #31 |
(62) 0x418484 ADD X13, X12, X16 |
(62) 0x418488 SUB X7, X7, X23 |
(62) 0x41848c ADD X8, X12, X17 |
(62) 0x418490 LD1D {Z18.D}, P2/Z, [X0, X12,LSL #3] |
(62) 0x418494 ADD X10, X12, X18 |
(62) 0x418498 LD1D {Z6.D}, P2/Z, [X21, X13,LSL #3] |
(62) 0x41849c LD1D {Z7.D}, P2/Z, [X22, X13,LSL #3] |
(62) 0x4184a0 ADD X30, X7, X16 |
(62) 0x4184a4 ADD X11, X7, X17 |
(62) 0x4184a8 LD1D {Z19.D}, P2/Z, [X25, X13,LSL #3] |
(62) 0x4184ac LD1D {Z21.D}, P2/Z, [X1, X7,LSL #3] |
(62) 0x4184b0 LD1D {Z4.D}, P2/Z, [X21, X30,LSL #3] |
(62) 0x4184b4 LD1D {Z5.D}, P2/Z, [X22, X30,LSL #3] |
(62) 0x4184b8 LD1D {Z20.D}, P2/Z, [X25, X30,LSL #3] |
(62) 0x4184bc LD1D {Z16.D}, P2/Z, [X25, X11,LSL #3] |
(62) 0x4184c0 FMUL Z6.D, Z6.D, Z7.D |
(62) 0x4184c4 LD1D {Z7.D}, P2/Z, [X25, X8,LSL #3] |
(62) 0x4184c8 FSUB Z19.D, Z19.D, Z20.D |
(62) 0x4184cc LD1D {Z20.D}, P2/Z, [X1, X12,LSL #3] |
(62) 0x4184d0 FMAD Z4.D, P1/M, Z5.D, Z6.D |
(62) 0x4184d4 LD1D {Z5.D}, P2/Z, [X21, X8,LSL #3] |
(62) 0x4184d8 LD1D {Z6.D}, P2/Z, [X22, X8,LSL #3] |
(62) 0x4184dc FMUL Z19.D, Z18.D, Z19.D |
(62) 0x4184e0 FSUB Z17.D, Z7.D, Z16.D |
(62) 0x4184e4 FSUB Z16.D, Z16.D, Z21.D |
(62) 0x4184e8 FSUB Z7.D, Z7.D, Z20.D |
(62) 0x4184ec LD1D {Z20.D}, P2/Z, [X28, X11,LSL #3] |
(62) 0x4184f0 FMLA Z4.D, P1/M, Z5.D, Z6.D |
(62) 0x4184f4 LD1D {Z5.D}, P2/Z, [X21, X11,LSL #3] |
(62) 0x4184f8 LD1D {Z6.D}, P2/Z, [X22, X11,LSL #3] |
(62) 0x4184fc FMUL Z16.D, Z20.D, Z16.D |
(62) 0x418500 FMLA Z4.D, P1/M, Z5.D, Z6.D |
(62) 0x418504 LD1D {Z6.D}, P2/Z, [X24, X10,LSL #3] |
(62) 0x418508 LD1D {Z5.D}, P2/Z, [X9, X10,LSL #3] |
(62) 0x41850c FMUL Z4.D, Z4.D, Z3.D |
(62) 0x418510 FMAD Z17.D, P1/M, Z6.D, Z19.D |
(62) 0x418514 LD1D {Z19.D}, P2/Z, [X28, X8,LSL #3] |
(62) 0x418518 FDIVR Z4.D, P1/M, Z4.D, Z1.D |
(62) 0x41851c FMAD Z7.D, P1/M, Z19.D, Z16.D |
(62) 0x418520 LD1D {Z16.D}, P2/Z, [X2, X7,LSL #3] |
(62) 0x418524 FMLS Z5.D, P1/M, Z4.D, Z17.D |
(62) 0x418528 LD1D {Z17.D}, P2/Z, [X27, X10,LSL #3] |
(62) 0x41852c FMSB Z7.D, P1/M, Z4.D, Z17.D |
(62) 0x418530 LD1D {Z17.D}, P2/Z, [X3, X7,LSL #3] |
(62) 0x418534 ST1D {Z7.D}, P2, [X19, X10,LSL #3] |
(62) 0x418538 LD1D {Z7.D}, P2/Z, [X2, X12,LSL #3] |
(62) 0x41853c FSUB Z7.D, Z7.D, Z16.D |
(62) 0x418540 LD1D {Z16.D}, P2/Z, [X3, X12,LSL #3] |
(62) 0x418544 FSUB Z21.D, Z16.D, Z17.D |
(62) 0x418548 FMUL Z18.D, Z18.D, Z21.D |
(62) 0x41854c FMAD Z6.D, P1/M, Z7.D, Z18.D |
(62) 0x418550 LD1D {Z7.D}, P2/Z, [X20, X11,LSL #3] |
(62) 0x418554 FMLS Z5.D, P1/M, Z4.D, Z6.D |
(62) 0x418558 LD1D {Z6.D}, P2/Z, [X20, X8,LSL #3] |
(62) 0x41855c FSUB Z7.D, Z7.D, Z17.D |
(62) 0x418560 FMUL Z7.D, Z20.D, Z7.D |
(62) 0x418564 FSUB Z6.D, Z6.D, Z16.D |
(62) 0x418568 ST1D {Z5.D}, P2, [X26, X10,LSL #3] |
(62) 0x41856c LD1D {Z5.D}, P2/Z, [X4, X12,LSL #3] |
(62) 0x418570 FMAD Z6.D, P1/M, Z19.D, Z7.D |
(62) 0x418574 FMSB Z4.D, P1/M, Z6.D, Z5.D |
(62) 0x418578 ST1D {Z4.D}, P2, [X4, X12,LSL #3] |
(62) 0x41857c WHILELO P2.D, W6, W15 |
(62) 0x418580 B.MI 41846c |
(63) 0x418584 B 4182cc |
0x418588 LDR W19, [SP, #8] |
0x41858c ADRP X0, |
0x418590 ADD X0, X0, #3632 |
0x418594 ORR W1, WZR, W19 |
0x418598 BL 410040 |
0x41859c ADRP X0, |
0x4185a0 ADD X0, X0, #3656 |
0x4185a4 ORR W1, WZR, W19 |
0x4185a8 BL 410330 |
0x4185ac LDP X20, X19, [SP, #176] |
0x4185b0 LDP X22, X21, [SP, #160] |
0x4185b4 LDP X24, X23, [SP, #144] |
0x4185b8 LDP X26, X25, [SP, #128] |
0x4185bc LDP X28, X27, [SP, #112] |
0x4185c0 LDP X29, X30, [SP, #96] |
0x4185c4 ADD SP, SP, #192 |
0x4185c8 RET |
0x4185cc HINT #0 |
0x4185d0 HINT #0 |
0x4185d4 HINT #0 |
0x4185d8 HINT #0 |
0x4185dc HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.58+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | accelerate_kernel | accelerate_kernel.f90:83 | exec |
| ○ | accelerate | accelerate.f90:41 | exec |
| ○ | hydro | hydro.f90:58 | exec |
| ○ | main | clover_leaf.f90:76 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run orig_0
| Source file and lines | accelerate_kernel.f90:55-63,accelerate_kernel.f90:69-75 |
| Module | exec |
| nb instructions | 113 |
| nb uops | 108 |
| loop length | 452 |
| used w registers | 13 |
| used x registers | 26 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 3 |
| nb stack references | 22 |
| micro-operation queue | 13.50 cycles |
| front end | 13.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 11.50 | 11.50 | 11.50 | 11.50 | 1.00 | 1.00 | 0.50 | 0.50 | 18.00 | 18.00 | 18.00 | 10.00 | 10.00 |
| cycles | 4.50 | 4.50 | 11.50 | 11.50 | 11.50 | 11.50 | 1.00 | 1.00 | 0.50 | 0.50 | 18.00 | 18.00 | 18.00 | 10.00 | 10.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 13.50 |
| Dispatch | 18.00 |
| Overall L1 | 18.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 8% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 7% |
| all | 31% |
| load | 42% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 22% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 28% |
| all | 14% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 14% |
| all | 30% |
| load | 42% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 22% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SUB SP, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X29, X30, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X28, X27, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X26, X25, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X24, X23, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X22, X21, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X20, X19, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X15, X14, [X2, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X8, X9, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADRP X0, <48a188> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3632 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X10, [X2, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X11, [X2, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X20, [X2, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X14, X15, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP X14, X21, [X2, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR X23, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X8, [X9] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X9, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X12, X13, [X2, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| STUR X14, [X29, #472] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X22, X14, [X2, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X24, X25, [X2, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X27, [X2, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X28, X19, [X2, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STP X8, X14, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X8, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STP X8, X9, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W8, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W9, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W8, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W9, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| SUBS W8, W8, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| CSINV W8, W8, WZR, #10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP W8, WZR, [X29, #500] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STUR W8, [X29, #496] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB X3, X29, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X4, X29, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X5, X29, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X6, X29, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STR WZR, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| ADRP X0, <48a214> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3632 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W1, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 4101b0 <@plt_start@+0x190> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W8, W12, [X29, #500] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUB W8, W8, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMN W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| STR W8, [SP, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| B.EQ 418588 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x428> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X9, X8, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR W13, [X9] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W10, W13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STUR W8, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W10, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 418588 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x428> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDUR W8, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUB W10, W13, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W8, W10, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STUR W8, [X29, #480] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CBZ W8, 418588 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x428> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W14, W13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| FMOV D2, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| PTRUE P1.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| FDUP Z3.D, #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| ORR W9, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W8, W12, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDUR X8, [X29, #472] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D0, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDUR W8, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUB W15, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W13, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CCMP W14, W8, #8, #10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CNTD X14, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| DUP Z1.D, Z0.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| CSINC W8, WZR, WZR, #10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W8, [SP, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDUR W8, [X29, #480] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUBS W15, W8, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W15, WZR, W15, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| WHILELO P0.D, WZR, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | N/A |
| B 4182e0 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x180> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W19, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADRP X0, <48a58c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3632 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W1, WZR, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410040 <@plt_start@+0x20> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <48a59c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3656 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W1, WZR, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410330 <@plt_start@+0x310> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X20, X19, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X22, X21, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X24, X23, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X25, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X28, X27, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADD SP, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run orig_0
| Source file and lines | accelerate_kernel.f90:55-63,accelerate_kernel.f90:69-75 |
| Module | exec |
| nb instructions | 113 |
| nb uops | 108 |
| loop length | 452 |
| used w registers | 13 |
| used x registers | 26 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 3 |
| nb stack references | 22 |
| micro-operation queue | 13.50 cycles |
| front end | 13.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 11.50 | 11.50 | 11.50 | 11.50 | 1.00 | 1.00 | 0.50 | 0.50 | 18.00 | 18.00 | 18.00 | 10.00 | 10.00 |
| cycles | 4.50 | 4.50 | 11.50 | 11.50 | 11.50 | 11.50 | 1.00 | 1.00 | 0.50 | 0.50 | 18.00 | 18.00 | 18.00 | 10.00 | 10.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 13.50 |
| Dispatch | 18.00 |
| Overall L1 | 18.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 8% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 7% |
| all | 31% |
| load | 42% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 22% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 28% |
| all | 14% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 14% |
| all | 30% |
| load | 42% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 22% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SUB SP, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X29, X30, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X28, X27, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X26, X25, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X24, X23, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X22, X21, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X20, X19, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X15, X14, [X2, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X8, X9, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADRP X0, <48a188> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3632 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| LDR X10, [X2, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X11, [X2, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X20, [X2, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X14, X15, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP X14, X21, [X2, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR X23, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X8, [X9] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X9, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDP X12, X13, [X2, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| STUR X14, [X29, #472] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X22, X14, [X2, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X24, X25, [X2, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X27, [X2, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X28, X19, [X2, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STP X8, X14, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X8, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STP X8, X9, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W8, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W9, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W8, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W9, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| SUBS W8, W8, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| CSINV W8, W8, WZR, #10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP W8, WZR, [X29, #500] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STUR W8, [X29, #496] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB X3, X29, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X4, X29, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X5, X29, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| SUB X6, X29, #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STR WZR, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| ADRP X0, <48a214> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3632 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W1, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 4101b0 <@plt_start@+0x190> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W8, W12, [X29, #500] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUB W8, W8, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMN W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| STR W8, [SP, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| B.EQ 418588 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x428> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X9, X8, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDR W13, [X9] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W10, W13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STUR W8, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W10, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LT 418588 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x428> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDUR W8, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUB W10, W13, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W8, W10, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STUR W8, [X29, #480] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CBZ W8, 418588 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x428> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W14, W13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| FMOV D2, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| PTRUE P1.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| FDUP Z3.D, #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| ORR W9, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W8, W12, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W8, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDUR X8, [X29, #472] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D0, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDUR W8, [X29, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUB W15, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W13, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CCMP W14, W8, #8, #10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CNTD X14, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| DUP Z1.D, Z0.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | vect (100.0%) |
| CSINC W8, WZR, WZR, #10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W8, [SP, #12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDUR W8, [X29, #480] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SUBS W15, W8, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W15, WZR, W15, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| WHILELO P0.D, WZR, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | N/A |
| B 4182e0 <_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par+0x180> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W19, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADRP X0, <48a58c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3632 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W1, WZR, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410040 <@plt_start@+0x20> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADRP X0, <48a59c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X0, X0, #3656 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W1, WZR, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410330 <@plt_start@+0x310> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X20, X19, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X22, X21, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X24, X23, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X25, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X28, X27, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADD SP, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼_QMaccelerate_kernel_modulePaccelerate_kernel..omp_par– | 7.04 | 9.62 |
| ▼Loop 63 - - exec– | 0.00 | 0.02 |
| ○Loop 62 - - exec | 7.03 | 9.61 |
| ○Loop 64 - - exec | 0.00 | 0.00 |
