Loop Id: 45 | Module: exec | Source: LPlusTimes.cpp:57-57 [...] | Coverage: 0.04% |
---|
Loop Id: 45 | Module: exec | Source: LPlusTimes.cpp:57-57 [...] | Coverage: 0.04% |
---|
(43) 0x4383ac LDR W1, [SP, #20] |
(43) 0x4383b0 ADRP X0, |
(43) 0x4383b4 ADD X0, X0, #4000 |
(43) 0x4383b8 BL 402bb0 |
(43) 0x4383bc LDP X20, X19, [SP, #336] |
(43) 0x4383c0 LDP X22, X21, [SP, #320] |
(43) 0x4383c4 LDP X24, X23, [SP, #304] |
(43) 0x4383c8 LDP X26, X25, [SP, #288] |
(43) 0x4383cc LDP X28, X27, [SP, #272] |
(43) 0x4383d0 LDP X29, X30, [SP, #256] |
(43) 0x4383d4 ADD SP, SP, #352 |
(43) 0x4383d8 RET |
(43) 0x4383dc MADD X14, X19, X22, XZR |
(43) 0x4383e0 LDUR X3, [X29, #432] |
(43) 0x4383e4 LDUR X8, [X29, #464] |
(43) 0x4383e8 LDP X2, X0, [X29, #952] |
(43) 0x4383ec UBFM X12, X22, #61, #60 |
(43) 0x4383f0 SUB X11, X25, X24 |
(43) 0x4383f4 ADD X1, X26, X9 |
(43) 0x4383f8 STUR X27, [X29, #392] |
(43) 0x4383fc PTRUE P0.D, ALL |
(43) 0x438400 UBFM X16, X19, #61, #60 |
(43) 0x438404 STR X28, [SP, #128] |
(43) 0x438408 STP X26, X23, [X29, #912] |
(43) 0x43840c STR X20, [SP, #120] |
(43) 0x438410 ADD X15, X14, X24 |
(43) 0x438414 UBFM X15, X15, #61, #60 |
(43) 0x438418 SUB X10, X8, X22 |
(43) 0x43841c UBFM X8, X24, #61, #60 |
(43) 0x438420 UBFM X13, X2, #61, #60 |
(43) 0x438424 STR X15, [SP, #96] |
(43) 0x438428 UBFM X15, X3, #61, #60 |
(43) 0x43842c ADD X17, X0, X8 |
(43) 0x438430 ADD X8, X8, X14,LSL #3 |
(43) 0x438434 STR X13, [SP, #104] |
(43) 0x438438 ADD X13, X12, #8 |
(43) 0x43843c STR X15, [SP, #88] |
(43) 0x438440 ADD X15, X25, X14 |
(43) 0x438444 UBFM X14, X27, #61, #60 |
(43) 0x438448 CNTW X27, ALL |
(43) 0x43844c STR X14, [SP, #64] |
(43) 0x438450 UBFM X14, X28, #61, #60 |
(43) 0x438454 MOVZ W28, #8 |
(43) 0x438458 STR X17, [SP, #112] |
(43) 0x43845c STR X15, [SP, #80] |
(43) 0x438460 ORN X15, XZR, X24 |
(43) 0x438464 STR X14, [SP, #56] |
(43) 0x438468 SUB X14, X24, X25 |
(43) 0x43846c ADD X18, X25, X15 |
(43) 0x438470 LDUR X15, [X29, #424] |
(43) 0x438474 STUR X14, [X29, #464] |
(43) 0x438478 ORR X14, XZR, X2 |
(43) 0x43847c ORR X2, XZR, X3 |
(43) 0x438480 ADD X8, X15, X8 |
(43) 0x438484 ORR X3, XZR, X15 |
(43) 0x438488 STR X8, [SP, #72] |
(43) 0x43848c ADD X8, X8, #16 |
(43) 0x438490 STR X8, [SP, #48] |
(43) 0x438494 ADD X8, X17, #16 |
(43) 0x438498 STR X8, [SP, #40] |
(43) 0x43849c ADD X8, X20, X12 |
(43) 0x4384a0 STR X8, [SP, #32] |
(43) 0x4384a4 ADD X8, X20, X13 |
(43) 0x4384a8 STR X8, [SP, #24] |
(43) 0x4384ac B 4384e4 |
(44) 0x4384c0 LDP X26, X23, [X29, #912] |
(44) 0x4384c4 LDP X14, X0, [X29, #952] |
(44) 0x4384c8 LDR X20, [SP, #120] |
(44) 0x4384cc LDP X3, X2, [X29, #936] |
(43) 0x4384d0 LDUR X8, [X29, #456] |
(43) 0x4384d4 ADD X1, X1, #1 |
(43) 0x4384d8 CMP X9, X8 |
(43) 0x4384dc ADD X9, X9, #1 |
(43) 0x4384e0 B.EQ 4383ac |
(43) 0x4384e4 CMP X10, #1 |
(43) 0x4384e8 B.LT 4384d0 |
(44) 0x4384ec SDIV X8, X9, X23 |
(44) 0x4384f0 LDUR X13, [X29, #416] |
(44) 0x4384f4 LDR X4, [SP, #112] |
(44) 0x4384f8 ORR X19, XZR, XZR |
(44) 0x4384fc MADD X12, X8, X23, XZR |
(44) 0x438500 ADD X8, X8, X13 |
(44) 0x438504 MADD X13, X14, X8, XZR |
(44) 0x438508 LDUR X14, [X29, #392] |
(44) 0x43850c ADD X23, X20, X13,LSL #3 |
(44) 0x438510 SUB X13, X9, X12 |
(44) 0x438514 SUB X12, X1, X12 |
(44) 0x438518 ADD X15, X13, X26 |
(44) 0x43851c LDR X13, [SP, #128] |
(44) 0x438520 MADD X14, X14, X8, XZR |
(44) 0x438524 MADD X17, X13, X15, XZR |
(44) 0x438528 ADD X13, X4, X17,LSL #3 |
(44) 0x43852c ADD X13, X13, X14,LSL #3 |
(44) 0x438530 ADD X14, X25, X14 |
(44) 0x438534 ADD X14, X14, X17 |
(44) 0x438538 ADD X21, X0, X14,LSL #3 |
(44) 0x43853c LDR X14, [SP, #104] |
(44) 0x438540 MADD X17, X14, X8, XZR |
(44) 0x438544 LDP X20, X14, [SP, #88] |
(44) 0x438548 MADD X0, X20, X15, X14 |
(44) 0x43854c LDR X14, [SP, #80] |
(44) 0x438550 ADD X5, X3, X0 |
(44) 0x438554 MADD X15, X2, X15, X14 |
(44) 0x438558 LDP X2, X14, [SP, #24] |
(44) 0x43855c ADD X6, X3, X15,LSL #3 |
(44) 0x438560 LDP X0, X15, [SP, #64] |
(44) 0x438564 ADD X14, X14, X17 |
(44) 0x438568 ADD X17, X2, X17 |
(44) 0x43856c LDR X2, [SP, #56] |
(44) 0x438570 MADD X7, X20, X12, X15 |
(44) 0x438574 MADD X15, X2, X12, XZR |
(44) 0x438578 MADD X15, X0, X8, X15 |
(44) 0x43857c MADD X8, X0, X8, XZR |
(44) 0x438580 ADD X30, X4, X15 |
(44) 0x438584 MADD X15, X2, X12, X8 |
(44) 0x438588 LDR X8, [SP, #48] |
(44) 0x43858c ADD X0, X4, X15 |
(44) 0x438590 MADD X8, X20, X12, X8 |
(44) 0x438594 LDR X12, [SP, #40] |
(44) 0x438598 ADD X26, X12, X15 |
(44) 0x43859c B 4385b4 |
(44) 0x4385a0 ADD X19, X19, #1 |
(44) 0x4385a4 ADD X7, X7, X16 |
(44) 0x4385a8 ADD X8, X8, X16 |
(44) 0x4385ac CMP X19, X10 |
(44) 0x4385b0 B.EQ 4384c0 |
(44) 0x4385b4 CMP X11, #1 |
(44) 0x4385b8 B.LT 4385a0 |
0x4385bc CMP X27, #8 |
0x4385c0 ADD X20, X19, X22 |
0x4385c4 CSEL X12, X27, X28, #8 |
0x4385c8 CMP X11, X12 |
0x4385cc B.CS 4385e0 |
0x4385d0 ORR X2, XZR, XZR |
0x4385d4 B 438680 |
0x4385e0 UBFM X12, X19, #61, #60 |
0x4385e4 ORR X2, XZR, XZR |
0x4385e8 ADD X15, X14, X12 |
0x4385ec ADD X3, X17, X12 |
0x4385f0 MADD X12, X16, X19, XZR |
0x4385f4 ADD X4, X5, X12 |
0x4385f8 ADD X12, X6, X12 |
0x4385fc CMP X13, X12 |
0x438600 CCMP X4, X21, #2, #3 |
0x438604 CSINC W12, WZR, WZR, #2 |
0x438608 CMP X15, X21 |
0x43860c CCMP X13, X3, #2, #3 |
0x438610 B.CC 438680 |
0x438614 TBNZ W12, #0, 438680 |
0x438680 ADD W12, W24, W2 |
0x438684 SUB W15, W25, W12 |
0x438688 SUB X12, X18, X2 |
0x43868c ANDS X3, X15, #4160 |
0x438690 B.EQ 4386c0 |
0x438694 ADD X15, X0, X2,LSL #3 |
0x438698 HINT #0 |
0x43869c HINT #0 |
(47) 0x4386a0 LDR D0, [X23, X20,LSL #3] |
(47) 0x4386a4 LDR D1, [X7, X2,LSL #3] |
(47) 0x4386a8 LDR D2, [X15] |
(47) 0x4386ac ADD X2, X2, #1 |
(47) 0x4386b0 SUBS X3, X3, #1 |
(47) 0x4386b4 FMADD D0, D0, D1, D2 |
(47) 0x4386b8 STR D0, [X15], #8 |
(47) 0x4386bc B.NE 4386a0 |
0x4386c0 CMP X12, #3 |
0x4386c4 B.CC 4385a0 |
0x4386c8 LDUR X12, [X29, #464] |
0x4386cc ADD X3, X12, X2 |
0x4386d0 UBFM X12, X2, #61, #60 |
0x4386d4 ADD X2, X8, X12 |
0x4386d8 ADD X12, X26, X12 |
0x4386dc HINT #0 |
(46) 0x4386e0 LDR D0, [X23, X20,LSL #3] |
(46) 0x4386e4 LDUR D1, [X2, #496] |
(46) 0x4386e8 LDP D2, D3, [X12, #1008] |
(46) 0x4386ec ADDS X3, X3, #4 |
(46) 0x4386f0 FMADD D0, D0, D1, D2 |
(46) 0x4386f4 STUR D0, [X12, #496] |
(46) 0x4386f8 LDR D0, [X23, X20,LSL #3] |
(46) 0x4386fc LDUR D1, [X2, #504] |
(46) 0x438700 FMADD D0, D0, D1, D3 |
(46) 0x438704 LDP D2, D3, [X12] |
(46) 0x438708 STUR D0, [X12, #504] |
(46) 0x43870c LDR D0, [X23, X20,LSL #3] |
(46) 0x438710 LDR D1, [X2] |
(46) 0x438714 FMADD D0, D0, D1, D2 |
(46) 0x438718 STR D0, [X12] |
(46) 0x43871c LDR D0, [X23, X20,LSL #3] |
(46) 0x438720 LDR D1, [X2, #8] |
(46) 0x438724 ADD X2, X2, #32 |
(46) 0x438728 FMADD D0, D0, D1, D3 |
(46) 0x43872c STR D0, [X12, #8] |
(46) 0x438730 ADD X12, X12, #32 |
(46) 0x438734 B.NE 4386e0 |
0x438738 B 4385a0 |
/home/hbollore/qaas/qaas-runs/169-817-5851/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/kernel/Collapse.hpp: 81 - 83 |
-------------------------------------------------------------------------------- |
81: #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \ |
82: RAJA_COLLAPSE(2) |
83: for (i0 = 0; i0 < l0; ++i0) { |
/home/hbollore/qaas/qaas-runs/169-817-5851/intel/Kripke/build/Kripke/src/Kripke/Kernel/LPlusTimes.cpp: 57 - 57 |
-------------------------------------------------------------------------------- |
57: rhs(d,g,z) += ell_plus(d, nm) * phi_out(nm, g, z); |
/home/hbollore/qaas/qaas-runs/169-817-5851/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/loop/forall.hpp: 59 - 59 |
-------------------------------------------------------------------------------- |
59: for (decltype(distance_it) i = 0; i < distance_it; ++i) { |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.91 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.54 |
Bottlenecks | P2, P3, P4, P5, |
Function | .omp_outlined.#0x4382e0 |
Source | forall.hpp:59-59 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 6.75 |
CQA cycles if no scalar integer | 6.75 |
CQA cycles if FP arith vectorized | 6.75 |
CQA cycles if fully vectorized | 1.38 |
Front-end cycles | 4.38 |
DIV/SQRT cycles | 3.50 |
P0 cycles | 3.50 |
P1 cycles | 6.75 |
P2 cycles | 6.75 |
P3 cycles | 6.75 |
P4 cycles | 6.75 |
P5 cycles | 0.00 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.33 |
P10 cycles | 0.33 |
P11 cycles | 0.33 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 38.00 |
Nb uops | 35.00 |
Nb loads | NA |
Nb stores | 0.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 1.19 |
Bytes prefetched | 0.00 |
Bytes loaded | 8.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | NA |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 24.31 |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 23.44 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 25.00 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.91 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.54 |
Bottlenecks | P2, P3, P4, P5, |
Function | .omp_outlined.#0x4382e0 |
Source | forall.hpp:59-59 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 6.75 |
CQA cycles if no scalar integer | 6.75 |
CQA cycles if FP arith vectorized | 6.75 |
CQA cycles if fully vectorized | 1.38 |
Front-end cycles | 4.38 |
DIV/SQRT cycles | 3.50 |
P0 cycles | 3.50 |
P1 cycles | 6.75 |
P2 cycles | 6.75 |
P3 cycles | 6.75 |
P4 cycles | 6.75 |
P5 cycles | 0.00 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.33 |
P10 cycles | 0.33 |
P11 cycles | 0.33 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 38.00 |
Nb uops | 35.00 |
Nb loads | NA |
Nb stores | 0.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 1.19 |
Bytes prefetched | 0.00 |
Bytes loaded | 8.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | NA |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 24.31 |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 23.44 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 25.00 |
Path / |
Function | .omp_outlined.#0x4382e0 |
Source file and lines | LPlusTimes.cpp:57-57 |
Module | exec |
nb instructions | 38 |
loop length | 152 |
nb stack references | 0 |
front end | 4.38 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
cycles | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.38 |
Overall L1 | 6.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP X27, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD X20, X19, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CSEL X12, X27, X28, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X11, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CS 4385e0 <.omp_outlined.+0x300> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X2, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 438680 <.omp_outlined.+0x3a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X12, X19, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X2, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X15, X14, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, X17, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD X12, X16, X19, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD X4, X5, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X6, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X13, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP X4, X21, #2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CSINC W12, WZR, WZR, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X15, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP X13, X3, #2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.CC 438680 <.omp_outlined.+0x3a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TBNZ W12, #0, 438680 <.omp_outlined.+0x3a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W12, W24, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W15, W25, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X12, X18, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ANDS X3, X15, #4160 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 4386c0 <.omp_outlined.+0x3e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X15, X0, X2,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
CMP X12, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 4385a0 <.omp_outlined.+0x2c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDUR X12, [X29, #464] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X3, X12, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UBFM X12, X2, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X8, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X26, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
B 4385a0 <.omp_outlined.+0x2c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Function | .omp_outlined.#0x4382e0 |
Source file and lines | LPlusTimes.cpp:57-57 |
Module | exec |
nb instructions | 38 |
loop length | 152 |
nb stack references | 0 |
front end | 4.38 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
cycles | 3.50 | 3.50 | 6.75 | 6.75 | 6.75 | 6.75 | 0.00 | 0.00 | 0.00 | 0.00 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.38 |
Overall L1 | 6.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP X27, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD X20, X19, X22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CSEL X12, X27, X28, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X11, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CS 4385e0 <.omp_outlined.+0x300> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X2, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 438680 <.omp_outlined.+0x3a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X12, X19, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X2, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X15, X14, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, X17, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD X12, X16, X19, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD X4, X5, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X6, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X13, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP X4, X21, #2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CSINC W12, WZR, WZR, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X15, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP X13, X3, #2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.CC 438680 <.omp_outlined.+0x3a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TBNZ W12, #0, 438680 <.omp_outlined.+0x3a0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W12, W24, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W15, W25, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X12, X18, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ANDS X3, X15, #4160 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 4386c0 <.omp_outlined.+0x3e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X15, X0, X2,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
CMP X12, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 4385a0 <.omp_outlined.+0x2c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDUR X12, [X29, #464] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X3, X12, X2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UBFM X12, X2, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X8, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X26, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
B 4385a0 <.omp_outlined.+0x2c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |