Loop Id: 25 | Module: exec | Source: forall.hpp:59-59 [...] | Coverage: 0.01% |
---|
Loop Id: 25 | Module: exec | Source: forall.hpp:59-59 [...] | Coverage: 0.01% |
---|
(23) 0x453ddc LDR W1, [SP, #36] |
(23) 0x453de0 ADRP X0, |
(23) 0x453de4 ADD X0, X0, #1816 |
(23) 0x453de8 BL 402bc0 |
(23) 0x453dec LDP X20, X19, [SP, #448] |
(23) 0x453df0 LDP X22, X21, [SP, #432] |
(23) 0x453df4 LDP X24, X23, [SP, #416] |
(23) 0x453df8 LDP X26, X25, [SP, #400] |
(23) 0x453dfc LDP X28, X27, [SP, #384] |
(23) 0x453e00 LDP X29, X30, [SP, #368] |
(23) 0x453e04 ADD SP, SP, #464 |
(23) 0x453e08 RET |
(23) 0x453e0c LDR X1, [SP, #16] |
(23) 0x453e10 LDUR X15, [X29, #432] |
(23) 0x453e14 LDR X10, [SP, #152] |
(23) 0x453e18 MADD X11, X26, X23, XZR |
(23) 0x453e1c UBFM X9, X24, #61, #60 |
(23) 0x453e20 FMOV D0, #2.0000000 |
(23) 0x453e24 MADD X8, X28, X1, XZR |
(23) 0x453e28 LDUR X16, [X29, #368] |
(23) 0x453e2c MADD X12, X15, X1, XZR |
(23) 0x453e30 ADD X10, X10, X9 |
(23) 0x453e34 LDR X14, [SP, #88] |
(23) 0x453e38 LDR X18, [SP, #24] |
(23) 0x453e3c LDUR X17, [X29, #352] |
(23) 0x453e40 ADD X0, X17, X13 |
(23) 0x453e44 STUR X23, [X29, #392] |
(23) 0x453e48 ADD X8, X9, X8,LSL #3 |
(23) 0x453e4c ADD X9, X9, X11,LSL #3 |
(23) 0x453e50 MADD X11, X16, X23, XZR |
(23) 0x453e54 UBFM X12, X12, #61, #60 |
(23) 0x453e58 ADD X11, X12, X11,LSL #3 |
(23) 0x453e5c LDR X12, [SP, #136] |
(23) 0x453e60 ADD X8, X12, X8 |
(23) 0x453e64 LDR X12, [SP, #144] |
(23) 0x453e68 STR X8, [SP, #152] |
(23) 0x453e6c LDUR X8, [X29, #440] |
(23) 0x453e70 ADD X9, X12, X9 |
(23) 0x453e74 LDUR X12, [X29, #448] |
(23) 0x453e78 SUB X8, X8, X23 |
(23) 0x453e7c STR X9, [SP, #144] |
(23) 0x453e80 SDIV X9, X8, X12 |
(23) 0x453e84 MSUB X8, X9, X12, X8 |
(23) 0x453e88 LDUR X12, [X29, #384] |
(23) 0x453e8c CMP X8, #0 |
(23) 0x453e90 MADD X8, X28, X18, XZR |
(23) 0x453e94 SUB X12, X12, X24 |
(23) 0x453e98 UBFM X6, X8, #61, #60 |
(23) 0x453e9c STUR X12, [X29, #384] |
(23) 0x453ea0 MADD X12, X14, X24, XZR |
(23) 0x453ea4 LDUR X24, [X29, #416] |
(23) 0x453ea8 ADD X11, X11, X12,LSL #3 |
(23) 0x453eac LDR X12, [SP, #112] |
(23) 0x453eb0 ADD X12, X12, X11 |
(23) 0x453eb4 STR X12, [SP, #136] |
(23) 0x453eb8 LDR X12, [SP, #128] |
(23) 0x453ebc ADD X12, X12, X11 |
(23) 0x453ec0 STR X12, [SP, #128] |
(23) 0x453ec4 LDR X12, [SP, #120] |
(23) 0x453ec8 ADD X11, X12, X11 |
(23) 0x453ecc LDUR X12, [X29, #464] |
(23) 0x453ed0 STR X11, [SP, #120] |
(23) 0x453ed4 LDUR X11, [X29, #456] |
(23) 0x453ed8 UBFM X2, X12, #61, #60 |
(23) 0x453edc SUB X11, X11, X1 |
(23) 0x453ee0 STR X11, [SP, #112] |
(23) 0x453ee4 LDR X11, [SP, #104] |
(23) 0x453ee8 UBFM X11, X11, #61, #60 |
(23) 0x453eec STR X11, [SP, #104] |
(23) 0x453ef0 LDR X11, [SP, #96] |
(23) 0x453ef4 UBFM X11, X11, #61, #60 |
(23) 0x453ef8 STR X11, [SP, #96] |
(23) 0x453efc MADD X11, X14, X12, XZR |
(23) 0x453f00 LDR X12, [SP, #80] |
(23) 0x453f04 UBFM X14, X12, #61, #60 |
(23) 0x453f08 LDR X12, [SP, #72] |
(23) 0x453f0c UBFM X12, X12, #61, #60 |
(23) 0x453f10 STP X12, X14, [SP, #80] |
(23) 0x453f14 MADD X12, X26, X24, XZR |
(23) 0x453f18 LDR X14, [SP, #48] |
(23) 0x453f1c UBFM X8, X12, #61, #60 |
(23) 0x453f20 STUR X8, [X29, #448] |
(23) 0x453f24 CSINC X8, X9, X9, #0 |
(23) 0x453f28 STUR X8, [X29, #456] |
(23) 0x453f2c LDR X8, [SP, #64] |
(23) 0x453f30 UBFM X14, X14, #61, #60 |
(23) 0x453f34 UBFM X8, X8, #61, #60 |
(23) 0x453f38 STR X14, [SP, #72] |
(23) 0x453f3c STR X8, [SP, #64] |
(23) 0x453f40 MADD X8, X16, X24, XZR |
(23) 0x453f44 ORR X16, XZR, X18 |
(23) 0x453f48 UBFM X8, X8, #61, #60 |
(23) 0x453f4c STUR X8, [X29, #440] |
(23) 0x453f50 MADD X8, X15, X18, XZR |
(23) 0x453f54 UBFM X15, X11, #61, #60 |
(23) 0x453f58 UBFM X14, X8, #61, #60 |
(23) 0x453f5c LDR X8, [SP, #56] |
(23) 0x453f60 UBFM X8, X8, #61, #60 |
(23) 0x453f64 STR X8, [SP, #56] |
(23) 0x453f68 LDUR X8, [X29, #424] |
(23) 0x453f6c UBFM X8, X8, #61, #60 |
(23) 0x453f70 STR X8, [SP, #48] |
(23) 0x453f74 LDUR X8, [X29, #376] |
(23) 0x453f78 UBFM X8, X8, #61, #60 |
(23) 0x453f7c STR X8, [SP, #40] |
(23) 0x453f80 B 453fa0 |
(24) 0x453f84 LDUR X17, [X29, #352] |
(24) 0x453f88 LDP X0, X13, [X29, #880] |
(23) 0x453f8c LDUR X8, [X29, #360] |
(23) 0x453f90 ADD X0, X0, #1 |
(23) 0x453f94 CMP X13, X8 |
(23) 0x453f98 ADD X13, X13, #1 |
(23) 0x453f9c B.EQ 453ddc |
(23) 0x453fa0 LDUR X8, [X29, #456] |
(23) 0x453fa4 CMP X8, #1 |
(23) 0x453fa8 B.LT 453f8c |
(24) 0x453fac LDUR X9, [X29, #344] |
(24) 0x453fb0 STUR X13, [X29, #376] |
(24) 0x453fb4 LDR X18, [SP, #112] |
(24) 0x453fb8 ORR X3, XZR, XZR |
(24) 0x453fbc SDIV X8, X13, X9 |
(24) 0x453fc0 STUR X0, [X29, #368] |
(24) 0x453fc4 MADD X9, X8, X9, XZR |
(24) 0x453fc8 SUB X11, X13, X9 |
(24) 0x453fcc LDR X13, [SP, #184] |
(24) 0x453fd0 SDIV X12, X18, X13 |
(24) 0x453fd4 MSUB X13, X12, X13, X18 |
(24) 0x453fd8 CMP X13, #0 |
(24) 0x453fdc CSINC X4, X12, X12, #0 |
(24) 0x453fe0 LDUR X12, [X29, #336] |
(24) 0x453fe4 ADD X5, X8, X12 |
(24) 0x453fe8 ADD X8, X11, X17 |
(24) 0x453fec LDP X12, X11, [SP, #168] |
(24) 0x453ff0 MADD X11, X5, X11, XZR |
(24) 0x453ff4 MADD X8, X8, X12, X11 |
(24) 0x453ff8 LDR X11, [SP, #160] |
(24) 0x453ffc ADD X8, X11, X8,LSL #3 |
(24) 0x454000 STUR X8, [X29, #432] |
(24) 0x454004 SUB X8, X0, X9 |
(24) 0x454008 LDP X9, X11, [SP, #96] |
(24) 0x45400c MADD X9, X9, X8, XZR |
(24) 0x454010 MADD X9, X11, X5, X9 |
(24) 0x454014 LDR X11, [SP, #152] |
(24) 0x454018 ADD X9, X11, X9 |
(24) 0x45401c STUR X9, [X29, #424] |
(24) 0x454020 LDP X9, X11, [SP, #80] |
(24) 0x454024 MADD X9, X9, X8, XZR |
(24) 0x454028 MADD X9, X11, X5, X9 |
(24) 0x45402c LDR X11, [SP, #144] |
(24) 0x454030 ADD X18, X11, X9 |
(24) 0x454034 LDP X9, X11, [SP, #64] |
(24) 0x454038 MADD X9, X9, X8, XZR |
(24) 0x45403c MADD X9, X11, X5, X9 |
(24) 0x454040 LDR X11, [SP, #136] |
(24) 0x454044 ADD X0, X11, X9 |
(24) 0x454048 LDR X9, [SP, #128] |
(24) 0x45404c LDR X11, [SP, #56] |
(24) 0x454050 MADD X28, X11, X8, X9 |
(24) 0x454054 LDR X9, [SP, #40] |
(24) 0x454058 MADD X8, X9, X8, XZR |
(24) 0x45405c LDR X9, [SP, #48] |
(24) 0x454060 MADD X8, X9, X5, X8 |
(24) 0x454064 LDR X9, [SP, #120] |
(24) 0x454068 ADD X11, X9, X8 |
(24) 0x45406c B 4540b4 |
0x454080 LDUR X23, [X29, #392] |
0x454084 LDUR X24, [X29, #416] |
0x454088 LDUR X0, [X29, #464] |
(24) 0x45408c LDUR X8, [X29, #448] |
(24) 0x454090 ADD X3, X3, #1 |
(24) 0x454094 ADD X18, X18, X8 |
(24) 0x454098 LDUR X8, [X29, #440] |
(24) 0x45409c ADD X0, X0, X8 |
(24) 0x4540a0 ADD X28, X28, X8 |
(24) 0x4540a4 ADD X11, X11, X8 |
(24) 0x4540a8 LDUR X8, [X29, #456] |
(24) 0x4540ac CMP X3, X8 |
(24) 0x4540b0 B.EQ 453f84 |
(24) 0x4540b4 CMP X4, #1 |
(24) 0x4540b8 B.LT 45408c |
0x4540bc LDUR X12, [X29, #408] |
0x4540c0 LDUR X13, [X29, #384] |
0x4540c4 MADD X24, X3, X24, X23 |
0x4540c8 ORR X9, XZR, XZR |
0x4540cc STUR X0, [X29, #464] |
0x4540d0 ORR X17, XZR, X11 |
0x4540d4 SDIV X8, X13, X12 |
0x4540d8 MSUB X12, X8, X12, X13 |
0x4540dc CMP X12, #0 |
0x4540e0 LDP X7, X12, [X29, #936] |
0x4540e4 CSINC X25, X8, X8, #0 |
0x4540e8 LDUR X8, [X29, #400] |
0x4540ec MADD X8, X8, X24, XZR |
0x4540f0 ADD X23, X12, X8,LSL #3 |
0x4540f4 ORR X8, XZR, X28 |
0x4540f8 B 45411c |
0x454100 ADD X9, X9, #1 |
0x454104 ADD X7, X7, X6 |
0x454108 ADD X0, X0, X14 |
0x45410c ADD X8, X8, X14 |
0x454110 ADD X17, X17, X14 |
0x454114 CMP X9, X4 |
0x454118 B.EQ 454080 |
0x45411c CMP X25, #1 |
0x454120 B.LT 454100 |
0x454124 ORR X30, XZR, XZR |
0x454128 ORR X13, XZR, XZR |
0x45412c ORR X26, XZR, X1 |
0x454130 MADD X12, X9, X16, X1 |
0x454134 ORR X1, XZR, X25 |
0x454138 HINT #0 |
0x45413c HINT #0 |
(26) 0x454140 LDR D1, [X19, X5,LSL #3] |
(26) 0x454144 LDR D2, [X10, X13] |
(26) 0x454148 LDR D3, [X22, X12,LSL #3] |
(26) 0x45414c SUBS X1, X1, #1 |
(26) 0x454150 FADD D1, D1, D1 |
(26) 0x454154 LDR D4, [X20, X24,LSL #3] |
(26) 0x454158 LDR D5, [X23, X12,LSL #3] |
(26) 0x45415c FDIV D1, D1, D2 |
(26) 0x454160 LDR D2, [X21, X5,LSL #3] |
(26) 0x454164 FADD D2, D2, D2 |
(26) 0x454168 FDIV D2, D2, D3 |
(26) 0x45416c LDR D3, [X27, X5,LSL #3] |
(26) 0x454170 FADD D3, D3, D3 |
(26) 0x454174 FDIV D3, D3, D4 |
(26) 0x454178 LDR D4, [X17, X30] |
(26) 0x45417c FMADD D4, D5, D1, D4 |
(26) 0x454180 LDR D5, [X18, X13] |
(26) 0x454184 FADD D1, D1, D2 |
(26) 0x454188 FMADD D4, D5, D2, D4 |
(26) 0x45418c LDR D5, [X7, X13] |
(26) 0x454190 LDR D2, [X8, X30] |
(26) 0x454194 FADD D1, D1, D3 |
(26) 0x454198 FMADD D4, D5, D3, D4 |
(26) 0x45419c FADD D1, D1, D2 |
(26) 0x4541a0 FDIV D1, D4, D1 |
(26) 0x4541a4 STR D1, [X0, X30] |
(26) 0x4541a8 ADD X30, X30, X15 |
(26) 0x4541ac LDR D2, [X23, X12,LSL #3] |
(26) 0x4541b0 FNMSUB D2, D1, D0, D2 |
(26) 0x4541b4 STR D2, [X23, X12,LSL #3] |
(26) 0x4541b8 LDR D2, [X18, X13] |
(26) 0x4541bc FNMSUB D2, D1, D0, D2 |
(26) 0x4541c0 STR D2, [X18, X13] |
(26) 0x4541c4 LDR D2, [X7, X13] |
(26) 0x4541c8 FNMSUB D1, D1, D0, D2 |
(26) 0x4541cc STR D1, [X7, X13] |
(26) 0x4541d0 ADD X13, X13, X2 |
(26) 0x4541d4 B.NE 454140 |
0x4541d8 ORR X1, XZR, X26 |
0x4541dc B 454100 |
/home/hbollore/qaas-runs/170-289-7893/intel/Kripke/build/Kripke/src/Kripke/Kernel/SweepSubdomain.cpp: 87 - 105 |
-------------------------------------------------------------------------------- |
87: double xcos_dxi = 2.0 * xcos(d) / dx(i); |
88: double ycos_dyj = 2.0 * ycos(d) / dy(j); |
89: double zcos_dzk = 2.0 * zcos(d) / dz(k); |
90: |
91: Zone z(zone_layout(*i, *j, *k)); |
92: |
93: /* Calculate new zonal flux */ |
94: double psi_d_g_z = (rhs(d,g,z) |
95: + psi_lf(d, g, j, k) * xcos_dxi |
96: + psi_fr(d, g, i, k) * ycos_dyj |
97: + psi_bo(d, g, i, j) * zcos_dzk) |
98: / (xcos_dxi + ycos_dyj + zcos_dzk + sigt(g, z)); |
99: |
100: psi(d, g, z) = psi_d_g_z; |
101: |
102: /* Apply diamond-difference relationships */ |
103: psi_lf(d, g, j, k) = 2.0 * psi_d_g_z - psi_lf(d, g, j, k); |
104: psi_fr(d, g, i, k) = 2.0 * psi_d_g_z - psi_fr(d, g, i, k); |
105: psi_bo(d, g, i, j) = 2.0 * psi_d_g_z - psi_bo(d, g, i, j); |
/home/hbollore/qaas-runs/170-289-7893/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/loop/forall.hpp: 59 - 59 |
-------------------------------------------------------------------------------- |
59: for (decltype(distance_it) i = 0; i < distance_it; ++i) { |
/home/hbollore/qaas-runs/170-289-7893/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/kernel/Collapse.hpp: 81 - 83 |
-------------------------------------------------------------------------------- |
81: #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \ |
82: RAJA_COLLAPSE(2) |
83: for (i0 = 0; i0 < l0; ++i0) { |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.31 |
Bottlenecks | P2, P3, P4, P5, |
Function | .omp_outlined.#0x453c40 |
Source | forall.hpp:59-59 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 5.75 |
CQA cycles if no scalar integer | 5.75 |
CQA cycles if FP arith vectorized | 5.75 |
CQA cycles if fully vectorized | 1.44 |
Front-end cycles | 4.38 |
DIV/SQRT cycles | 2.00 |
P0 cycles | 2.00 |
P1 cycles | 5.75 |
P2 cycles | 5.75 |
P3 cycles | 5.75 |
P4 cycles | 5.75 |
P5 cycles | 0.00 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 2.83 |
P10 cycles | 2.50 |
P11 cycles | 2.67 |
P12 cycles | 0.50 |
P13 cycles | 0.50 |
P14 cycles | 1.00 - 0.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 37.00 |
Nb uops | 35.00 |
Nb loads | NA |
Nb stores | 1.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 12.52 |
Bytes prefetched | 0.00 |
Bytes loaded | 64.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | NA |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 25.00 |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | 25.00 |
Vector-efficiency ratio div_sqrt | 25.00 |
Vector-efficiency ratio other | 25.00 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.31 |
Bottlenecks | P2, P3, P4, P5, |
Function | .omp_outlined.#0x453c40 |
Source | forall.hpp:59-59 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 5.75 |
CQA cycles if no scalar integer | 5.75 |
CQA cycles if FP arith vectorized | 5.75 |
CQA cycles if fully vectorized | 1.44 |
Front-end cycles | 4.38 |
DIV/SQRT cycles | 2.00 |
P0 cycles | 2.00 |
P1 cycles | 5.75 |
P2 cycles | 5.75 |
P3 cycles | 5.75 |
P4 cycles | 5.75 |
P5 cycles | 0.00 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 2.83 |
P10 cycles | 2.50 |
P11 cycles | 2.67 |
P12 cycles | 0.50 |
P13 cycles | 0.50 |
P14 cycles | 1.00 - 0.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 37.00 |
Nb uops | 35.00 |
Nb loads | NA |
Nb stores | 1.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 12.52 |
Bytes prefetched | 0.00 |
Bytes loaded | 64.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | NA |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 25.00 |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | 25.00 |
Vector-efficiency ratio div_sqrt | 25.00 |
Vector-efficiency ratio other | 25.00 |
Path / |
Function | .omp_outlined.#0x453c40 |
Source file and lines | forall.hpp:59-59 |
Module | exec |
nb instructions | 37 |
loop length | 148 |
nb stack references | 0 |
front end | 4.38 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 5.75 | 5.75 | 5.75 | 5.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 0.50 | 0.50 |
cycles | 2.00 | 2.00 | 5.75 | 5.75 | 5.75 | 5.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 4.38 |
Overall L1 | 5.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LDUR X23, [X29, #392] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X24, [X29, #416] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X0, [X29, #464] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X12, [X29, #408] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X13, [X29, #384] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MADD X24, X3, X24, X23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR X9, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STUR X0, [X29, #464] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X17, XZR, X11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SDIV X8, X13, X12 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 1-0.50 |
MSUB X12, X8, X12, X13 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP X12, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDP X7, X12, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CSINC X25, X8, X8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR X8, [X29, #400] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MADD X8, X8, X24, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD X23, X12, X8,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X8, XZR, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 45411c <.omp_outlined.+0x4dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X9, X9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X7, X7, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X8, X8, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X17, X17, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X9, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 454080 <.omp_outlined.+0x440> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP X25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 454100 <.omp_outlined.+0x4c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X30, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X13, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X26, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD X12, X9, X16, X1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR X1, XZR, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
ORR X1, XZR, X26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 454100 <.omp_outlined.+0x4c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Function | .omp_outlined.#0x453c40 |
Source file and lines | forall.hpp:59-59 |
Module | exec |
nb instructions | 37 |
loop length | 148 |
nb stack references | 0 |
front end | 4.38 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 5.75 | 5.75 | 5.75 | 5.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 0.50 | 0.50 |
cycles | 2.00 | 2.00 | 5.75 | 5.75 | 5.75 | 5.75 | 0.00 | 0.00 | 0.00 | 0.00 | 2.83 | 2.50 | 2.67 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 4.38 |
Overall L1 | 5.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LDUR X23, [X29, #392] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X24, [X29, #416] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X0, [X29, #464] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X12, [X29, #408] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDUR X13, [X29, #384] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MADD X24, X3, X24, X23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR X9, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STUR X0, [X29, #464] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X17, XZR, X11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SDIV X8, X13, X12 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 1-0.50 |
MSUB X12, X8, X12, X13 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP X12, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDP X7, X12, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CSINC X25, X8, X8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDUR X8, [X29, #400] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MADD X8, X8, X24, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD X23, X12, X8,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X8, XZR, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 45411c <.omp_outlined.+0x4dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X9, X9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X7, X7, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X8, X8, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X17, X17, X14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X9, X4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.EQ 454080 <.omp_outlined.+0x440> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP X25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 454100 <.omp_outlined.+0x4c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X30, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X13, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X26, XZR, X1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD X12, X9, X16, X1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ORR X1, XZR, X25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
ORR X1, XZR, X26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 454100 <.omp_outlined.+0x4c0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |