Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 96.48% |
---|
Function: Step10_orig | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 96.48% |
---|
/home/hbollore/qaas-runs/170-256-3563/intel/HACCmk/build/HACCmk/src/Step10_orig.c: 10 - 41 |
-------------------------------------------------------------------------------- |
10: { |
[...] |
17: xi = 0.; yi = 0.; zi = 0.; |
18: |
19: for ( j = 0; j < count1; j++ ) |
20: { |
21: dxc = xx1[j] - xxi; |
22: dyc = yy1[j] - yyi; |
23: dzc = zz1[j] - zzi; |
24: |
25: r2 = dxc * dxc + dyc * dyc + dzc * dzc; |
26: |
27: m = ( r2 < fsrrmax2 ) ? mass1[j] : 0.0f; |
28: |
29: f = pow( r2 + mp_rsm2, -1.5 ) - ( ma0 + r2*(ma1 + r2*(ma2 + r2*(ma3 + r2*(ma4 + r2*ma5))))); |
30: |
31: f = ( r2 > 0.0f ) ? m * f : 0.0f; |
[...] |
38: *dxi = xi; |
39: *dyi = yi; |
40: *dzi = zi; |
41: } |
0x401280 CMP W0, #0 |
0x401284 B.LE 401404 |
0x401288 MOVZ X8, #0 |
0x40128c CNTW X9, ALL |
0x401290 PTRUE P0.B, ALL |
0x401294 ADRP X15, 401294 |
0x401298 ADRP X14, 401298 |
0x40129c DUP Z5.S, #0 |
0x4012a0 ADRP X13, 4012a0 |
0x4012a4 ADRP X12, 4012a4 |
0x4012a8 STP D8, D9, [SP, #1008]! |
0x4012ac ADRP X11, 4012ac |
0x4012b0 ADRP X10, 4012b0 |
0x4012b4 DUP Z21.S, Z0.S[0] |
0x4012b8 ADD X16, X15, #1376 |
0x4012bc ADD X17, X14, #1380 |
0x4012c0 DUP Z20.S, Z1.S[0] |
0x4012c4 ADD X18, X13, #1384 |
0x4012c8 ADD X15, X12, #1388 |
0x4012cc DUP Z19.S, Z2.S[0] |
0x4012d0 ADD X14, X11, #1392 |
0x4012d4 ADD X13, X10, #1396 |
0x4012d8 DUP Z18.S, Z3.S[0] |
0x4012dc DUP Z17.S, Z4.S[0] |
0x4012e0 LD1RW {Z28.S}, P0/Z, [X16] |
0x4012e4 LD1RW {Z27.S}, P0/Z, [X17] |
0x4012e8 LD1RW {Z26.S}, P0/Z, [X18] |
0x4012ec LD1RW {Z25.S}, P0/Z, [X15] |
0x4012f0 LD1RW {Z24.S}, P0/Z, [X14] |
0x4012f4 LD1RW {Z23.S}, P0/Z, [X13] |
0x4012f8 WHILELO P1.S, WZR, W0 |
0x4012fc ORR Z6.D, Z5.D, Z5.D |
0x401300 ORR Z7.D, Z5.D, Z5.D |
0x401304 ORR Z22.D, Z5.D, Z5.D |
0x401308 FDUP Z16.D, #112 |
(4) 0x40130c LD1W {Z3.S}, P1/Z, [X2, X8,LSL #2] |
(4) 0x401310 LD1W {Z4.S}, P1/Z, [X1, X8,LSL #2] |
(4) 0x401314 LD1W {Z2.S}, P1/Z, [X3, X8,LSL #2] |
(4) 0x401318 FSUB Z3.S, Z3.S, Z20.S |
(4) 0x40131c FSUB Z4.S, Z4.S, Z21.S |
(4) 0x401320 FMUL Z29.S, Z3.S, Z3.S |
(4) 0x401324 FSUB Z2.S, Z2.S, Z19.S |
(4) 0x401328 FMLA Z29.S, P0/M, Z4.S, Z4.S |
(4) 0x40132c FMLA Z29.S, P0/M, Z2.S, Z2.S |
(4) 0x401330 FADD Z0.S, Z29.S, Z17.S |
(4) 0x401334 ZIP2 Z1.S, Z0.S, Z0.S |
(4) 0x401338 ZIP1 Z0.S, Z0.S, Z0.S |
(4) 0x40133c FCVT Z1.D, P0/M, Z1.S |
(4) 0x401340 FCVT Z0.D, P0/M, Z0.S |
(4) 0x401344 MOVPRFX Z9, Z1 |
(4) 0x401348 FSQRT Z9.D, P0/M, Z1.D |
(4) 0x40134c MOVPRFX Z8, Z0 |
(4) 0x401350 FSQRT Z8.D, P0/M, Z0.D |
(4) 0x401354 FMUL Z1.D, Z1.D, Z9.D |
(4) 0x401358 FDIVR Z1.D, P0/M, Z1.D, Z16.D |
(4) 0x40135c FCMGT P2.S, P0/Z, Z29.S, #0 |
(4) 0x401360 FCMGT P3.S, P1/Z, Z18.S, Z29.S |
(4) 0x401364 MOVPRFX Z30, Z27 |
(4) 0x401368 FMLA Z30.S, P0/M, Z29.S, Z28.S |
(4) 0x40136c FMUL Z0.D, Z0.D, Z8.D |
(4) 0x401370 FMAD Z30.S, P0/M, Z29.S, Z26.S |
(4) 0x401374 LD1W {Z8.S}, P3/Z, [X4, X8,LSL #2] |
(4) 0x401378 FMAD Z30.S, P0/M, Z29.S, Z25.S |
(4) 0x40137c ADD X8, X8, X9 |
(4) 0x401380 FMAD Z30.S, P0/M, Z29.S, Z24.S |
(4) 0x401384 FDIVR Z0.D, P0/M, Z0.D, Z16.D |
(4) 0x401388 FMAD Z30.S, P0/M, Z29.S, Z23.S |
(4) 0x40138c SEL Z29.S, P3, Z8.S, Z22.S |
(4) 0x401390 ZIP2 Z31.S, Z30.S, Z30.S |
(4) 0x401394 ZIP1 Z30.S, Z30.S, Z30.S |
(4) 0x401398 FCVT Z31.D, P0/M, Z31.S |
(4) 0x40139c FCVT Z30.D, P0/M, Z30.S |
(4) 0x4013a0 FADD Z31.D, Z1.D, Z31.D |
(4) 0x4013a4 FADD Z30.D, Z0.D, Z30.D |
(4) 0x4013a8 FCVT Z31.S, P0/M, Z31.D |
(4) 0x4013ac FCVT Z30.S, P0/M, Z30.D |
(4) 0x4013b0 UZP1 Z9.S, Z31.S, Z30.S |
(4) 0x4013b4 FMUL Z1.S, Z9.S, Z29.S |
(4) 0x4013b8 MOVPRFX Z4.S, P2/Z, Z4.S |
(4) 0x4013bc FMUL Z4.S, P2/M, Z4.S, Z1.S |
(4) 0x4013c0 MOVPRFX Z3.S, P2/Z, Z3.S |
(4) 0x4013c4 FMUL Z3.S, P2/M, Z3.S, Z1.S |
(4) 0x4013c8 FADD Z7.S, P1/M, Z7.S, Z4.S |
(4) 0x4013cc FADD Z6.S, P1/M, Z6.S, Z3.S |
(4) 0x4013d0 MOVPRFX Z2.S, P2/Z, Z2.S |
(4) 0x4013d4 FMUL Z2.S, P2/M, Z2.S, Z1.S |
(4) 0x4013d8 FADD Z5.S, P1/M, Z5.S, Z2.S |
(4) 0x4013dc WHILELO P1.S, W8, W0 |
(4) 0x4013e0 B.NE 40130c |
0x4013e4 FADDV S19, P0, Z7.S |
0x4013e8 STR S19, [X5] |
0x4013ec FADDV S20, P0, Z6.S |
0x4013f0 STR S20, [X6] |
0x4013f4 FADDV S21, P0, Z5.S |
0x4013f8 LDP D8, D9, [SP], #16 |
0x4013fc STR S21, [X7] |
0x401400 RET |
0x401404 MOVI V5.2S, #0 |
0x401408 STR S5, [X5] |
0x40140c STR S5, [X6] |
0x401410 STR S5, [X7] |
0x401414 RET |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.43+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so | |
►1.57+ | GOMP_parallel | libomp.so | |
○ | main | main.c:152 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | main.c:192 | exec |
Path / |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 48 |
loop length | 192 |
nb stack references | 0 |
front end | 6.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 10.00 | 10.00 | 2.00 | 2.00 | 6.50 | 6.50 | 1.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 10.00 | 10.00 | 3.00 | 3.00 | 6.50 | 6.50 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.00 |
Overall L1 | 10.00 |
all | 32% |
load | 14% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 57% |
all | 75% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 37% |
load | 14% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 53% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 401404 <Step10_orig+0x184> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVZ X8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CNTW X9, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
PTRUE P0.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADRP X15, 401294 <Step10_orig+0x14> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADRP X14, 401298 <Step10_orig+0x18> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z5.S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADRP X13, 4012a0 <Step10_orig+0x20> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADRP X12, 4012a4 <Step10_orig+0x24> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D8, D9, [SP, #1008]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
ADRP X11, 4012ac <Step10_orig+0x2c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADRP X10, 4012b0 <Step10_orig+0x30> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z21.S, Z0.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD X16, X15, #1376 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X17, X14, #1380 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z20.S, Z1.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD X18, X13, #1384 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X15, X12, #1388 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z19.S, Z2.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD X14, X11, #1392 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X10, #1396 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z18.S, Z3.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z17.S, Z4.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1RW {Z28.S}, P0/Z, [X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z27.S}, P0/Z, [X17] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z26.S}, P0/Z, [X18] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z25.S}, P0/Z, [X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z24.S}, P0/Z, [X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z23.S}, P0/Z, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
WHILELO P1.S, WZR, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 |
ORR Z6.D, Z5.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z7.D, Z5.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z22.D, Z5.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDUP Z16.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S19, P0, Z7.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
STR S19, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S20, P0, Z6.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
STR S20, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S21, P0, Z5.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
LDP D8, D9, [SP], #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
STR S21, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVI V5.2S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
STR S5, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S5, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S5, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 48 |
loop length | 192 |
nb stack references | 0 |
front end | 6.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 10.00 | 10.00 | 2.00 | 2.00 | 6.50 | 6.50 | 1.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.50 | 4.75 | 4.75 | 4.75 | 4.75 | 10.00 | 10.00 | 3.00 | 3.00 | 6.50 | 6.50 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.00 |
Overall L1 | 10.00 |
all | 32% |
load | 14% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 57% |
all | 75% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 37% |
load | 14% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 53% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 401404 <Step10_orig+0x184> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVZ X8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CNTW X9, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
PTRUE P0.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADRP X15, 401294 <Step10_orig+0x14> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADRP X14, 401298 <Step10_orig+0x18> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z5.S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADRP X13, 4012a0 <Step10_orig+0x20> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADRP X12, 4012a4 <Step10_orig+0x24> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D8, D9, [SP, #1008]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
ADRP X11, 4012ac <Step10_orig+0x2c> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADRP X10, 4012b0 <Step10_orig+0x30> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z21.S, Z0.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD X16, X15, #1376 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X17, X14, #1380 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z20.S, Z1.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD X18, X13, #1384 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X15, X12, #1388 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z19.S, Z2.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADD X14, X11, #1392 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X10, #1396 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z18.S, Z3.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z17.S, Z4.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
LD1RW {Z28.S}, P0/Z, [X16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z27.S}, P0/Z, [X17] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z26.S}, P0/Z, [X18] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z25.S}, P0/Z, [X15] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z24.S}, P0/Z, [X14] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
LD1RW {Z23.S}, P0/Z, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 |
WHILELO P1.S, WZR, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 |
ORR Z6.D, Z5.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z7.D, Z5.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ORR Z22.D, Z5.D, Z5.D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FDUP Z16.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S19, P0, Z7.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
STR S19, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S20, P0, Z6.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
STR S20, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
FADDV S21, P0, Z5.S | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 0.75 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 11 | 2.50 |
LDP D8, D9, [SP], #16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
STR S21, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVI V5.2S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 |
STR S5, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S5, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STR S5, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼Step10_orig– | 96.48 | 27.73 |
○Loop 4 - Step10_orig.c:19-31 - exec | 96.47 | 27.53 |