Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 97.38% |
---|
Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 97.38% |
---|
/home/hbollore/qaas-runs/170-256-3563/intel/HACCmk/build/HACCmk/src/Step10_orig.c: 10 - 41 |
-------------------------------------------------------------------------------- |
10: { |
[...] |
19: for ( j = 0; j < count1; j++ ) |
20: { |
21: dxc = xx1[j] - xxi; |
22: dyc = yy1[j] - yyi; |
23: dzc = zz1[j] - zzi; |
24: |
25: r2 = dxc * dxc + dyc * dyc + dzc * dzc; |
26: |
27: m = ( r2 < fsrrmax2 ) ? mass1[j] : 0.0f; |
28: |
29: f = pow( r2 + mp_rsm2, -1.5 ) - ( ma0 + r2*(ma1 + r2*(ma2 + r2*(ma3 + r2*(ma4 + r2*ma5))))); |
30: |
31: f = ( r2 > 0.0f ) ? m * f : 0.0f; |
32: |
33: xi = xi + f * dxc; |
34: yi = yi + f * dyc; |
35: zi = zi + f * dzc; |
36: } |
37: |
38: *dxi = xi; |
39: *dyi = yi; |
40: *dzi = zi; |
41: } |
0x401460 SUB SP, SP, #96 |
0x401464 STP D15, D14, [SP, #16] |
0x401468 STP D13, D12, [SP, #32] |
0x40146c STP D11, D10, [SP, #48] |
0x401470 STP D9, D8, [SP, #64] |
0x401474 STP X20, X19, [SP, #80] |
0x401478 CMP W0, #1 |
0x40147c B.LT 4014a8 |
0x401480 ORR W8, WZR, W0 |
0x401484 CNTW X10, ALL |
0x401488 FMOV S24, S1 |
0x40148c CMP X10, X8 |
0x401490 B.LS 4014b8 |
0x401494 ORR X9, XZR, XZR |
0x401498 MOVI D6, #0 |
0x40149c MOVI D7, #0 |
0x4014a0 MOVI D5, #0 |
0x4014a4 B 40170c |
0x4014a8 MOVI D5, #0 |
0x4014ac MOVI D7, #0 |
0x4014b0 MOVI D6, #0 |
0x4014b4 B 401818 |
0x4014b8 UDIV X9, X8, X10 |
0x4014bc PTRUE P0.D, ALL |
0x4014c0 PFALSE P1.B |
0x4014c4 DUP Z20.S, #0 |
0x4014c8 ORR X11, XZR, XZR |
0x4014cc CNTD X13, ALL |
0x4014d0 DUP Z5.S, Z0.S[0] |
0x4014d4 STP S24, S2, [SP] |
0x4014d8 DUP Z6.S, Z24.S[0] |
0x4014dc DUP Z7.S, Z2.S[0] |
0x4014e0 DUP Z17.S, Z3.S[0] |
0x4014e4 DUP Z18.S, Z4.S[0] |
0x4014e8 MOVZ W14, #35234 |
0x4014ec MOVZ W15, #63002 |
0x4014f0 MOVZ W16, #18267 |
0x4014f4 MOVZ W17, #6647 |
0x4014f8 DUP Z1.D, #0 |
0x4014fc MOVZ W18, #52448 |
0x401500 MOVZ W0, #58683 |
0x401504 PNEXT P1.D, P0, P1.D |
0x401508 ORR Z16.D, Z20.D, Z20.D |
0x40150c ORR Z19.D, Z20.D, Z20.D |
0x401510 FMOV D24, #1.0000000 |
0x401514 ORR Z22.D, Z20.D, Z20.D |
0x401518 ORR Z23.D, Z20.D, Z20.D |
0x40151c ORR Z25.D, Z20.D, Z20.D |
0x401520 ORR Z26.D, Z20.D, Z20.D |
0x401524 MOVK W14, #13765 |
0x401528 MOVK W15, #14461 |
0x40152c MOVK W16, #47759 |
0x401530 MOVK W17, #15420 |
0x401534 MOVK W18, #48537 |
0x401538 MOVK W0, #16009 |
0x40153c STP S3, S4, [SP, #8] |
0x401540 UMSUBL X12, W9, W10, X8 |
0x401544 UMADDL X9, W9, W10, XZR |
(12) 0x401548 UBFM X19, X11, #62, #61 |
(12) 0x40154c ORR Z15.D, Z1.D, Z1.D |
(12) 0x401550 ADD X20, X1, X19 |
(12) 0x401554 LD1W {Z4.D}, P0/Z, [X20, MUL VL] |
(12) 0x401558 LD1W {Z21.D}, P0/Z, [X20, X13,LSL #2] |
(12) 0x40155c ADD X20, X2, X19 |
(12) 0x401560 MOVPRFX Z27, Z4 |
(12) 0x401564 FSUB Z27.S, P0/M, Z27.S, Z5.S |
(12) 0x401568 MOVPRFX Z28, Z21 |
(12) 0x40156c FSUB Z28.S, P0/M, Z28.S, Z5.S |
(12) 0x401570 LD1W {Z4.D}, P0/Z, [X20, MUL VL] |
(12) 0x401574 LD1W {Z21.D}, P0/Z, [X20, X13,LSL #2] |
(12) 0x401578 ADD X20, X3, X19 |
(12) 0x40157c MOVPRFX Z29, Z4 |
(12) 0x401580 FSUB Z29.S, P0/M, Z29.S, Z6.S |
(12) 0x401584 MOVPRFX Z30, Z21 |
(12) 0x401588 FSUB Z30.S, P0/M, Z30.S, Z6.S |
(12) 0x40158c ADD X19, X4, X19 |
(12) 0x401590 LD1W {Z4.D}, P0/Z, [X20, MUL VL] |
(12) 0x401594 LD1W {Z21.D}, P0/Z, [X20, X13,LSL #2] |
(12) 0x401598 MOVPRFX Z31, Z4 |
(12) 0x40159c FSUB Z31.S, P0/M, Z31.S, Z7.S |
(12) 0x4015a0 MOVPRFX Z8, Z21 |
(12) 0x4015a4 FSUB Z8.S, P0/M, Z8.S, Z7.S |
(12) 0x4015a8 MOVPRFX Z4, Z27 |
(12) 0x4015ac FMUL Z4.S, P0/M, Z4.S, Z27.S |
(12) 0x4015b0 MOVPRFX Z21, Z28 |
(12) 0x4015b4 FMUL Z21.S, P0/M, Z21.S, Z28.S |
(12) 0x4015b8 FMLA Z4.S, P0/M, Z29.S, Z29.S |
(12) 0x4015bc FMLA Z21.S, P0/M, Z30.S, Z30.S |
(12) 0x4015c0 MOVPRFX Z9, Z4 |
(12) 0x4015c4 FMLA Z9.S, P0/M, Z31.S, Z31.S |
(12) 0x4015c8 MOVPRFX Z10, Z21 |
(12) 0x4015cc FMLA Z10.S, P0/M, Z8.S, Z8.S |
(12) 0x4015d0 FCMGT P2.S, P0/Z, Z17.S, Z9.S |
(12) 0x4015d4 FCMGT P3.S, P0/Z, Z17.S, Z10.S |
(12) 0x4015d8 MOVPRFX Z4, Z9 |
(12) 0x4015dc FADD Z4.S, P0/M, Z4.S, Z18.S |
(12) 0x4015e0 MOVPRFX Z21, Z10 |
(12) 0x4015e4 FADD Z21.S, P0/M, Z21.S, Z18.S |
(12) 0x4015e8 FCVT Z4.D, P0/M, Z4.S |
(12) 0x4015ec FCVT Z21.D, P0/M, Z21.S |
(12) 0x4015f0 MOVPRFX Z13, Z4 |
(12) 0x4015f4 FSQRT Z13.D, P0/M, Z4.D |
(12) 0x4015f8 MOVPRFX Z14, Z21 |
(12) 0x4015fc FSQRT Z14.D, P0/M, Z21.D |
(12) 0x401600 LD1W {Z11.D}, P2/Z, [X19, MUL VL] |
(12) 0x401604 LD1W {Z12.D}, P3/Z, [X19, X13,LSL #2] |
(12) 0x401608 ORR P2.B, P1/Z, P1.B, P1.B |
(10) 0x40160c LASTB D3, P2, Z4.D |
(10) 0x401610 FMUL D3, D3, D3 |
(10) 0x401614 FDIV D3, D24, D3 |
(10) 0x401618 CPY Z15.D, P2/M, D3 |
(10) 0x40161c PNEXT P2.D, P0, P2.D |
(10) 0x401620 B.NE 40160c |
(12) 0x401624 ORR Z4.D, Z1.D, Z1.D |
(12) 0x401628 ORR P2.B, P1/Z, P1.B, P1.B |
(11) 0x40162c LASTB D3, P2, Z21.D |
(11) 0x401630 FMUL D3, D3, D3 |
(11) 0x401634 FDIV D3, D24, D3 |
(11) 0x401638 CPY Z4.D, P2/M, D3 |
(11) 0x40163c PNEXT P2.D, P0, P2.D |
(11) 0x401640 B.NE 40162c |
(12) 0x401644 DUP Z3.S, W14 |
(12) 0x401648 FCMGT P2.S, P0/Z, Z9.S, #0 |
(12) 0x40164c FCMGT P3.S, P0/Z, Z10.S, #0 |
(12) 0x401650 ADD X11, X11, X10 |
(12) 0x401654 CMP X11, X9 |
(12) 0x401658 DUP Z21.S, W15 |
(12) 0x40165c MOVPRFX Z2, Z21 |
(12) 0x401660 FMLS Z2.S, P0/M, Z9.S, Z3.S |
(12) 0x401664 FMSB Z3.S, P0/M, Z10.S, Z21.S |
(12) 0x401668 DUP Z21.S, W16 |
(12) 0x40166c FMAD Z2.S, P0/M, Z9.S, Z21.S |
(12) 0x401670 FMAD Z3.S, P0/M, Z10.S, Z21.S |
(12) 0x401674 DUP Z21.S, W17 |
(12) 0x401678 FMAD Z2.S, P0/M, Z9.S, Z21.S |
(12) 0x40167c FMAD Z3.S, P0/M, Z10.S, Z21.S |
(12) 0x401680 DUP Z21.S, W18 |
(12) 0x401684 FMAD Z2.S, P0/M, Z9.S, Z21.S |
(12) 0x401688 FMAD Z3.S, P0/M, Z10.S, Z21.S |
(12) 0x40168c DUP Z21.S, W0 |
(12) 0x401690 FMAD Z2.S, P0/M, Z9.S, Z21.S |
(12) 0x401694 FMAD Z3.S, P0/M, Z10.S, Z21.S |
(12) 0x401698 FCVT Z2.D, P0/M, Z2.S |
(12) 0x40169c FCVT Z3.D, P0/M, Z3.S |
(12) 0x4016a0 FNMLS Z2.D, P0/M, Z15.D, Z13.D |
(12) 0x4016a4 FNMLS Z3.D, P0/M, Z4.D, Z14.D |
(12) 0x4016a8 FCVT Z2.S, P0/M, Z2.D |
(12) 0x4016ac FCVT Z3.S, P0/M, Z3.D |
(12) 0x4016b0 FMUL Z2.S, P0/M, Z2.S, Z11.S |
(12) 0x4016b4 FMUL Z3.S, P0/M, Z3.S, Z12.S |
(12) 0x4016b8 SEL Z2.D, P2, Z2.D, Z20.D |
(12) 0x4016bc SEL Z3.D, P3, Z3.D, Z20.D |
(12) 0x4016c0 FMLA Z25.S, P0/M, Z2.S, Z27.S |
(12) 0x4016c4 FMLA Z22.S, P0/M, Z2.S, Z29.S |
(12) 0x4016c8 FMLA Z16.S, P0/M, Z2.S, Z31.S |
(12) 0x4016cc FMLA Z26.S, P0/M, Z3.S, Z28.S |
(12) 0x4016d0 FMLA Z23.S, P0/M, Z3.S, Z30.S |
(12) 0x4016d4 FMLA Z19.S, P0/M, Z3.S, Z8.S |
(12) 0x4016d8 B.NE 401548 |
0x4016dc LDP S3, S4, [SP, #8] |
0x4016e0 LDP S24, S2, [SP] |
0x4016e4 MOVPRFX Z1, Z26 |
0x4016e8 FADD Z1.S, P0/M, Z1.S, Z25.S |
0x4016ec FADDV S5, P0, Z1.S |
0x4016f0 MOVPRFX Z1, Z23 |
0x4016f4 FADD Z1.S, P0/M, Z1.S, Z22.S |
0x4016f8 FADDV S7, P0, Z1.S |
0x4016fc MOVPRFX Z1, Z19 |
0x401700 FADD Z1.S, P0/M, Z1.S, Z16.S |
0x401704 FADDV S6, P0, Z1.S |
0x401708 CBZ X12, 401818 |
0x40170c UBFM X13, X9, #62, #61 |
0x401710 SUB X8, X8, X9 |
0x401714 MOVZ W9, #35234 |
0x401718 MOVZ W14, #63002 |
0x40171c FMOV D16, #1.0000000 |
0x401720 MOVZ W15, #18267 |
0x401724 MOVZ W16, #6647 |
0x401728 MOVZ W17, #52448 |
0x40172c MOVZ W18, #58683 |
0x401730 ADD X10, X4, X13 |
0x401734 ADD X11, X3, X13 |
0x401738 ADD X12, X2, X13 |
0x40173c ADD X13, X1, X13 |
0x401740 MOVK W9, #13765 |
0x401744 MOVK W14, #47229 |
0x401748 MOVK W15, #14991 |
0x40174c MOVK W16, #48188 |
0x401750 MOVK W17, #15769 |
0x401754 MOVK W18, #48777 |
0x401758 B 401784 |
0x40175c HINT #0 |
(9) 0x401760 FMADD S5, S1, S18, S5 |
(9) 0x401764 FMADD S7, S1, S19, S7 |
(9) 0x401768 ADD X10, X10, #4 |
(9) 0x40176c ADD X11, X11, #4 |
(9) 0x401770 ADD X12, X12, #4 |
(9) 0x401774 ADD X13, X13, #4 |
(9) 0x401778 SUBS X8, X8, #1 |
(9) 0x40177c FMADD S6, S1, S17, S6 |
(9) 0x401780 B.EQ 401818 |
(9) 0x401784 LDR S1, [X13] |
(9) 0x401788 MOVI D22, #0 |
(9) 0x40178c FSUB S18, S1, S0 |
(9) 0x401790 LDR S1, [X12] |
(9) 0x401794 FSUB S19, S1, S24 |
(9) 0x401798 LDR S1, [X11] |
(9) 0x40179c FSUB S17, S1, S2 |
(9) 0x4017a0 FMUL S1, S18, S18 |
(9) 0x4017a4 FMADD S1, S19, S19, S1 |
(9) 0x4017a8 FMADD S20, S17, S17, S1 |
(9) 0x4017ac MOVI D1, #0 |
(9) 0x4017b0 FCMP S20, S3 |
(9) 0x4017b4 B.GE 4017bc |
(9) 0x4017b8 LDR S22, [X10] |
(9) 0x4017bc FCMP S20, #0 |
(9) 0x4017c0 B.LE 401760 |
(9) 0x4017c4 FMOV S23, W9 |
(9) 0x4017c8 FADD S1, S20, S4 |
(9) 0x4017cc FCVT D1, S1 |
(9) 0x4017d0 FMOV S21, W14 |
(9) 0x4017d4 FMADD S23, S20, S23, S21 |
(9) 0x4017d8 FMOV S21, W15 |
(9) 0x4017dc FMADD S23, S23, S20, S21 |
(9) 0x4017e0 FMOV S21, W16 |
(9) 0x4017e4 FMADD S23, S23, S20, S21 |
(9) 0x4017e8 FMOV S21, W17 |
(9) 0x4017ec FMADD S23, S23, S20, S21 |
(9) 0x4017f0 FMOV S21, W18 |
(9) 0x4017f4 FMADD S21, S23, S20, S21 |
(9) 0x4017f8 FSQRT D20, D1 |
(9) 0x4017fc FMUL D1, D1, D1 |
(9) 0x401800 FDIV D1, D16, D1 |
(9) 0x401804 FCVT D21, S21 |
(9) 0x401808 FMADD D1, D1, D20, D21 |
(9) 0x40180c FCVT S1, D1 |
(9) 0x401810 FMUL S1, S22, S1 |
(9) 0x401814 B 401760 |
0x401818 LDP D9, D8, [SP, #64] |
0x40181c LDP D11, D10, [SP, #48] |
0x401820 LDP D13, D12, [SP, #32] |
0x401824 LDP D15, D14, [SP, #16] |
0x401828 LDP X20, X19, [SP, #80] |
0x40182c STR S5, [X5] |
0x401830 STR S7, [X6] |
0x401834 STR S6, [X7] |
0x401838 ADD SP, SP, #96 |
0x40183c RET |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 101 |
loop length | 404 |
nb stack references | 0 |
front end | 12.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 3.50 | 11.25 | 11.25 | 11.25 | 11.25 | 15.50 | 15.50 | 6.00 | 6.00 | 5.67 | 5.67 | 5.67 | 0.50 | 0.50 |
cycles | 3.50 | 3.50 | 11.25 | 11.25 | 11.25 | 11.25 | 15.50 | 15.50 | 6.00 | 6.00 | 5.67 | 5.67 | 5.67 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 12.50 |
Overall L1 | 15.50 |
all | 29% |
load | 100% |
store | 66% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
other | 19% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 34% |
load | 100% |
store | 66% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 54% |
fma | 0% |
div/sqrt | 0% |
other | 19% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D15, D14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D13, D12, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D11, D10, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 4014a8 <Step10_orig+0x48> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W8, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CNTW X10, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
FMOV S24, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
CMP X10, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 4014b8 <Step10_orig+0x58> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X9, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVI D6, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D7, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D5, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
B 40170c <Step10_orig+0x2ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVI D5, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D7, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D6, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
B 401818 <Step10_orig+0x3b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X9, X8, X10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 1-0.50 |
PTRUE P0.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
PFALSE P1.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z20.S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR X11, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CNTD X13, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z5.S, Z0.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
STP S24, S2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z6.S, Z24.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z7.S, Z2.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z17.S, Z3.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z18.S, Z4.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVZ W14, #35234 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W15, #63002 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W16, #18267 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W17, #6647 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z1.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVZ W18, #52448 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W0, #58683 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PNEXT P1.D, P0, P1.D | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR Z16.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z19.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMOV D24, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
ORR Z22.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z23.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z25.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z26.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVK W14, #13765 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W15, #14461 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W16, #47759 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W17, #15420 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W18, #48537 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W0, #16009 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP S3, S4, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
UMSUBL X12, W9, W10, X8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
UMADDL X9, W9, W10, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
LDP S3, S4, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP S24, S2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
MOVPRFX Z1, Z26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z1.S, P0/M, Z1.S, Z25.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S5, P0, Z1.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
MOVPRFX Z1, Z23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z1.S, P0/M, Z1.S, Z22.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S7, P0, Z1.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
MOVPRFX Z1, Z19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z1.S, P0/M, Z1.S, Z16.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S6, P0, Z1.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
CBZ X12, 401818 <Step10_orig+0x3b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X13, X9, #62, #61 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X8, X8, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W9, #35234 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W14, #63002 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
FMOV D16, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVZ W15, #18267 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W16, #6647 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W17, #52448 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W18, #58683 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X10, X4, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X11, X3, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X2, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X1, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W9, #13765 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W14, #47229 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W15, #14991 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W16, #48188 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W17, #15769 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W18, #48777 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 401784 <Step10_orig+0x324> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
LDP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D11, D10, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D13, D12, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D15, D14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STR S5, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S7, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S6, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
ADD SP, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 101 |
loop length | 404 |
nb stack references | 0 |
front end | 12.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 3.50 | 11.25 | 11.25 | 11.25 | 11.25 | 15.50 | 15.50 | 6.00 | 6.00 | 5.67 | 5.67 | 5.67 | 0.50 | 0.50 |
cycles | 3.50 | 3.50 | 11.25 | 11.25 | 11.25 | 11.25 | 15.50 | 15.50 | 6.00 | 6.00 | 5.67 | 5.67 | 5.67 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 12.50 |
Overall L1 | 15.50 |
all | 29% |
load | 100% |
store | 66% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
other | 19% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 34% |
load | 100% |
store | 66% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 54% |
fma | 0% |
div/sqrt | 0% |
other | 19% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D15, D14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D13, D12, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D11, D10, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 4014a8 <Step10_orig+0x48> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W8, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CNTW X10, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
FMOV S24, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
CMP X10, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 4014b8 <Step10_orig+0x58> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X9, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVI D6, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D7, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D5, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
B 40170c <Step10_orig+0x2ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVI D5, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D7, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVI D6, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
B 401818 <Step10_orig+0x3b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X9, X8, X10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 1-0.50 |
PTRUE P0.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
PFALSE P1.B | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z20.S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR X11, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CNTD X13, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z5.S, Z0.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
STP S24, S2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z6.S, Z24.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z7.S, Z2.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z17.S, Z3.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z18.S, Z4.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVZ W14, #35234 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W15, #63002 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W16, #18267 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W17, #6647 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z1.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVZ W18, #52448 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W0, #58683 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PNEXT P1.D, P0, P1.D | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR Z16.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z19.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FMOV D24, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
ORR Z22.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z23.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z25.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z26.D, Z20.D, Z20.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
MOVK W14, #13765 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W15, #14461 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W16, #47759 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W17, #15420 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W18, #48537 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W0, #16009 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP S3, S4, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
UMSUBL X12, W9, W10, X8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
UMADDL X9, W9, W10, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
LDP S3, S4, [SP, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP S24, S2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
MOVPRFX Z1, Z26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z1.S, P0/M, Z1.S, Z25.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S5, P0, Z1.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
MOVPRFX Z1, Z23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z1.S, P0/M, Z1.S, Z22.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S7, P0, Z1.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
MOVPRFX Z1, Z19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADD Z1.S, P0/M, Z1.S, Z16.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S6, P0, Z1.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
CBZ X12, 401818 <Step10_orig+0x3b8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X13, X9, #62, #61 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X8, X8, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W9, #35234 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W14, #63002 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
FMOV D16, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
MOVZ W15, #18267 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W16, #6647 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W17, #52448 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W18, #58683 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X10, X4, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X11, X3, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X2, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X1, X13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W9, #13765 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W14, #47229 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W15, #14991 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W16, #48188 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W17, #15769 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK W18, #48777 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 401784 <Step10_orig+0x324> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
LDP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D11, D10, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D13, D12, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP D15, D14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDP X20, X19, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STR S5, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S7, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S6, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
ADD SP, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼Step10_orig– | 97.38 | 40.4 |
▼Loop 12 - Step10_orig.c:19-35 - exec– | 86.85 | 35.88 |
○Loop 10 - Step10_orig.c:29-29 - exec | 6.24 | 2.58 |
○Loop 11 - Step10_orig.c:29-29 - exec | 4.25 | 1.76 |
○Loop 9 - Step10_orig.c:19-35 - exec | 0.01 | 0.01 |