Function: .omp_outlined..20 | Module: exec | Source: advec_mom.cpp:180-211 [...] | Coverage: 3.99% |
---|
Function: .omp_outlined..20 | Module: exec | Source: advec_mom.cpp:180-211 [...] | Coverage: 3.99% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 180 - 211 |
-------------------------------------------------------------------------------- |
180: #pragma omp parallel for simd collapse(2) |
181: for (int j = (y_min - 1 + 1); j < (y_max + 1 + 2); j++) { |
182: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) |
183: ({ |
184: int upwind, donor, downwind, dif; |
185: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
186: if (node_flux(i, j) < 0.0) { |
187: upwind = j + 2; |
188: donor = j + 1; |
189: downwind = j; |
190: dif = donor; |
191: } else { |
192: upwind = j - 1; |
193: donor = j; |
194: downwind = j + 1; |
195: dif = upwind; |
196: } |
197: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(i, donor)); |
198: width = celldy[j]; |
199: vdiffuw = vel1(i, donor) - vel1(i, upwind); |
200: vdiffdw = vel1(i, downwind) - vel1(i, donor); |
201: limiter = 0.0; |
202: if (vdiffuw * vdiffdw > 0.0) { |
203: auw = std::fabs(vdiffuw); |
204: adw = std::fabs(vdiffdw); |
205: wind = 1.0; |
206: if (vdiffdw <= 0.0) wind = -1.0; |
207: limiter = |
208: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldy[dif]) / 6.0, auw), adw); |
209: } |
210: advec_vel_s = vel1(i, donor) + (1.0 - sigma) * limiter; |
211: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41e0b0 SUB SP, SP, #176 |
0x41e0b4 STP D9, D8, [SP, #64] |
0x41e0b8 STP X29, X30, [SP, #80] |
0x41e0bc STP X28, X27, [SP, #96] |
0x41e0c0 STP X26, X25, [SP, #112] |
0x41e0c4 STP X24, X23, [SP, #128] |
0x41e0c8 STP X22, X21, [SP, #144] |
0x41e0cc STP X20, X19, [SP, #160] |
0x41e0d0 ADD X29, SP, #80 |
0x41e0d4 LDR W8, [X3] |
0x41e0d8 LDR W20, [X2] |
0x41e0dc LDR W9, [X4] |
0x41e0e0 ADD W8, W8, #3 |
0x41e0e4 LDR W10, [X5] |
0x41e0e8 ADD W21, W9, #1 |
0x41e0ec ADD W9, W10, #3 |
0x41e0f0 SUBS W8, W8, W20 |
0x41e0f4 CCMP W9, W21, #4, #12 |
0x41e0f8 B.LE 41e198 |
0x41e0fc LDP X11, X10, [X29, #104] |
0x41e100 LDR X12, [X29, #96] |
0x41e104 LDR X14, [X6] |
0x41e108 SUB W24, W9, W21 |
0x41e10c MOVN X9, #0 |
0x41e110 ADD X3, SP, #36 |
0x41e114 SUB X4, X29, #24 |
0x41e118 SUB X5, X29, #32 |
0x41e11c LDR X13, [X7] |
0x41e120 LDR W1, [X0] |
0x41e124 LDR X23, [X6, #16] |
0x41e128 ADRP X0, |
0x41e12c ADD X0, X0, #176 |
0x41e130 ADD X6, SP, #40 |
0x41e134 MOVZ W2, #34 |
0x41e138 LDR X25, [X7, #16] |
0x41e13c MOVZ W7, #1 |
0x41e140 LDR X27, [X11] |
0x41e144 LDR X28, [X11, #16] |
0x41e148 LDR X11, [X10] |
0x41e14c LDR X26, [X12, #8] |
0x41e150 LDR X22, [X10, #16] |
0x41e154 UMADDL X19, W24, W8, X9 |
0x41e158 MOVZ W8, #1 |
0x41e15c STP X13, X14, [SP, #16] |
0x41e160 STP X19, XZR, [X29, #992] |
0x41e164 STR X8, [SP, #40] |
0x41e168 STP W1, WZR, [SP, #32] |
0x41e16c STP X8, X11, [SP] |
0x41e170 BL 4033a0 |
0x41e174 LDP X8, X13, [X29, #992] |
0x41e178 CMP X8, X19 |
0x41e17c CSEL X8, X8, X19, #11 |
0x41e180 CMP X13, X8 |
0x41e184 B.LE 41e1bc |
(352) 0x41e188 LDR W1, [SP, #32] |
(352) 0x41e18c ADRP X0, |
(352) 0x41e190 ADD X0, X0, #200 |
(352) 0x41e194 BL 403260 |
(352) 0x41e198 LDP D9, D8, [SP, #64] |
(352) 0x41e19c LDP X20, X19, [SP, #160] |
(352) 0x41e1a0 LDP X22, X21, [SP, #144] |
(352) 0x41e1a4 LDP X24, X23, [SP, #128] |
(352) 0x41e1a8 LDP X26, X25, [SP, #112] |
(352) 0x41e1ac LDP X28, X27, [SP, #96] |
(352) 0x41e1b0 LDP X29, X30, [SP, #80] |
(352) 0x41e1b4 ADD SP, SP, #176 |
(352) 0x41e1b8 RET |
(352) 0x41e1bc ADD X8, X8, #1 |
(352) 0x41e1c0 CNTD X10, ALL |
(352) 0x41e1c4 ORR X9, XZR, X13 |
(352) 0x41e1c8 SUB X11, X8, X13 |
(352) 0x41e1cc CMP X11, X10 |
(352) 0x41e1d0 B.CC 41e360 |
(352) 0x41e1d4 UDIV X9, X11, X10 |
(352) 0x41e1d8 PTRUE P0.D, ALL |
(352) 0x41e1dc FDUP Z20.D, #0 |
(352) 0x41e1e0 DUP Z21.D, #0 |
(352) 0x41e1e4 MADD X12, X9, X10, XZR |
(352) 0x41e1e8 INDEX Z0.D, X13, #1 |
(352) 0x41e1ec ADD X9, X13, X12 |
(352) 0x41e1f0 LDR X13, [SP, #24] |
(352) 0x41e1f4 DUP Z1.D, X10 |
(352) 0x41e1f8 SUB X11, X11, X12 |
(352) 0x41e1fc DUP Z2.D, X24 |
(352) 0x41e200 DUP Z3.D, X20 |
(352) 0x41e204 DUP Z4.D, X21 |
(352) 0x41e208 DUP Z7.D, X27 |
(352) 0x41e20c DUP Z5.D, X13 |
(352) 0x41e210 LDR X13, [SP, #16] |
(352) 0x41e214 DUP Z6.D, X13 |
(352) 0x41e218 LDR X13, [SP, #8] |
(352) 0x41e21c DUP Z16.D, X13 |
(352) 0x41e220 ADD W13, W20, #1 |
(352) 0x41e224 DUP Z17.D, X13 |
(352) 0x41e228 SUB W13, W20, #1 |
(352) 0x41e22c DUP Z18.D, X13 |
(352) 0x41e230 ADD W13, W20, #2 |
(352) 0x41e234 DUP Z19.D, X13 |
(352) 0x41e238 ORR X13, XZR, #3840 |
(352) 0x41e23c MOVK X13, #16325 |
(352) 0x41e240 DUP Z22.D, X13 |
(351) 0x41e244 MOVPRFX Z23, Z0 |
(351) 0x41e248 SDIV Z23.D, P0/M, Z23.D, Z2.D |
(351) 0x41e24c MOVPRFX Z25, Z0 |
(351) 0x41e250 MLS Z25.D, P0/M, Z23.D, Z2.D |
(351) 0x41e254 ADD Z25.D, Z4.D, Z25.D |
(351) 0x41e258 ADD Z24.D, Z3.D, Z23.D |
(351) 0x41e25c MOVPRFX Z26, Z24 |
(351) 0x41e260 SXTW Z26.D, P0/M, Z24.D |
(351) 0x41e264 SUBS X12, X12, X10 |
(351) 0x41e268 ADD Z28.D, Z17.D, Z23.D |
(351) 0x41e26c MOVPRFX Z29, Z28 |
(351) 0x41e270 SXTW Z29.D, P0/M, Z28.D |
(351) 0x41e274 ADD Z30.D, Z18.D, Z23.D |
(351) 0x41e278 ADD Z23.D, Z19.D, Z23.D |
(351) 0x41e27c ADD Z0.D, Z0.D, Z1.D |
(351) 0x41e280 SXTW Z25.D, P0/M, Z25.D |
(351) 0x41e284 MOVPRFX Z27, Z25 |
(351) 0x41e288 MLA Z27.D, P0/M, Z5.D, Z26.D |
(351) 0x41e28c LD1D {Z27.D}, P0/Z, [X23, Z27.D,LSL #3] |
(351) 0x41e290 MOVPRFX Z8, Z27 |
(351) 0x41e294 FABS Z8.D, P0/M, Z27.D |
(351) 0x41e298 FCMLT P1.D, P0/Z, Z27.D, #0 |
(351) 0x41e29c SEL Z23.D, P1, Z23.D, Z30.D |
(351) 0x41e2a0 SEL Z31.D, P1, Z26.D, Z29.D |
(351) 0x41e2a4 SEL Z29.D, P1, Z29.D, Z26.D |
(351) 0x41e2a8 SEL Z28.D, P1, Z28.D, Z30.D |
(351) 0x41e2ac MOVPRFX Z30, Z25 |
(351) 0x41e2b0 MLA Z30.D, P0/M, Z29.D, Z6.D |
(351) 0x41e2b4 LD1D {Z30.D}, P0/Z, [X25, Z30.D,LSL #3] |
(351) 0x41e2b8 SXTW Z23.D, P0/M, Z23.D |
(351) 0x41e2bc MAD Z29.D, P0/M, Z7.D, Z25.D |
(351) 0x41e2c0 MAD Z31.D, P0/M, Z7.D, Z25.D |
(351) 0x41e2c4 MAD Z23.D, P0/M, Z7.D, Z25.D |
(351) 0x41e2c8 FDIVR Z30.D, P0/M, Z30.D, Z8.D |
(351) 0x41e2cc LD1D {Z29.D}, P0/Z, [X28, Z29.D,LSL #3] |
(351) 0x41e2d0 LD1D {Z23.D}, P0/Z, [X28, Z23.D,LSL #3] |
(351) 0x41e2d4 LD1D {Z31.D}, P0/Z, [X28, Z31.D,LSL #3] |
(351) 0x41e2d8 FSUB Z9.D, Z29.D, Z23.D |
(351) 0x41e2dc FSUB Z8.D, Z31.D, Z29.D |
(351) 0x41e2e0 FABD Z31.D, P0/M, Z31.D, Z29.D |
(351) 0x41e2e4 FABD Z23.D, P0/M, Z23.D, Z29.D |
(351) 0x41e2e8 FMUL Z9.D, Z8.D, Z9.D |
(351) 0x41e2ec FCMGT P2.D, P0/Z, Z8.D, #0 |
(351) 0x41e2f0 FCMGT P1.D, P0/Z, Z9.D, #0 |
(351) 0x41e2f4 FSUB Z9.D, Z20.D, Z30.D |
(351) 0x41e2f8 LD1D {Z24.D}, P1/Z, [X26, Z24.D,SXTW #3] |
(351) 0x41e2fc LD1D {Z28.D}, P1/Z, [X26, Z28.D,SXTW #3] |
(351) 0x41e300 FMUL Z9.D, Z31.D, Z9.D |
(351) 0x41e304 MOVPRFX Z8, Z9 |
(351) 0x41e308 FDIV Z8.D, P0/M, Z8.D, Z24.D |
(351) 0x41e30c MOVPRFX Z9, Z23 |
(351) 0x41e310 FMLA Z9.D, P0/M, Z30.D, Z23.D |
(351) 0x41e314 FDIVR Z28.D, P0/M, Z28.D, Z9.D |
(351) 0x41e318 FMUL Z24.D, Z24.D, Z22.D |
(351) 0x41e31c FADD Z28.D, Z28.D, Z8.D |
(351) 0x41e320 FMUL Z24.D, Z24.D, Z28.D |
(351) 0x41e324 FMINNM Z23.D, P0/M, Z23.D, Z24.D |
(351) 0x41e328 FMINNM Z23.D, P0/M, Z23.D, Z31.D |
(351) 0x41e32c MOVPRFX Z24, Z23 |
(351) 0x41e330 FNEG Z24.D, P0/M, Z23.D |
(351) 0x41e334 SEL Z23.D, P2, Z23.D, Z24.D |
(351) 0x41e338 MOVPRFX Z24, Z30 |
(351) 0x41e33c FSUBR Z24.D, P0/M, Z24.D, #1 |
(351) 0x41e340 SEL Z23.D, P1, Z23.D, Z21.D |
(351) 0x41e344 FMAD Z23.D, P0/M, Z24.D, Z29.D |
(351) 0x41e348 MOVPRFX Z24, Z25 |
(351) 0x41e34c MLA Z24.D, P0/M, Z16.D, Z26.D |
(351) 0x41e350 FMUL Z23.D, Z23.D, Z27.D |
(351) 0x41e354 ST1D {Z23.D}, P0, [X22, Z24.D,LSL #3] |
(351) 0x41e358 B.NE 41e244 |
(352) 0x41e35c CBZ X11, 41e188 |
(350) 0x41e360 LDP X1, X0, [SP, #16] |
(350) 0x41e364 LDR X2, [SP, #8] |
(350) 0x41e368 SUB W10, WZR, W24 |
(350) 0x41e36c FMOV D0, #2.0000000 |
(350) 0x41e370 ORR X11, XZR, #3840 |
(350) 0x41e374 MOVK X11, #16325 |
(350) 0x41e378 B 41e3a0 |
0x41e37c HINT #0 |
(350) 0x41e380 FMSUB D2, D2, D6, D6 |
(350) 0x41e384 MADD X12, X2, X13, X12 |
(350) 0x41e388 ADD X9, X9, #1 |
(350) 0x41e38c CMP X8, X9 |
(350) 0x41e390 FADD D2, D2, D3 |
(350) 0x41e394 FMUL D1, D2, D1 |
(350) 0x41e398 STR D1, [X22, X12,LSL #3] |
(350) 0x41e39c B.EQ 41e188 |
(350) 0x41e3a0 SDIV X12, X9, X24 |
(350) 0x41e3a4 ADD W13, W21, W9 |
(350) 0x41e3a8 ADD W14, W20, W12 |
(350) 0x41e3ac MADD W12, W10, W12, W13 |
(350) 0x41e3b0 SBFM X12, X12, #0, #31 |
(350) 0x41e3b4 SBFM X13, X14, #0, #31 |
(350) 0x41e3b8 ADD W14, W14, #1 |
(350) 0x41e3bc MADD X15, X0, X13, X12 |
(350) 0x41e3c0 LDR D1, [X23, X15,LSL #3] |
(350) 0x41e3c4 SBFM X15, X14, #0, #31 |
(350) 0x41e3c8 FCMP D1, #0 |
(350) 0x41e3cc B.GE 41e3e0 |
(350) 0x41e3d0 ADD W17, W13, #2 |
(350) 0x41e3d4 ORR X16, XZR, X13 |
(350) 0x41e3d8 B 41e3f0 |
0x41e3dc HINT #0 |
(349) 0x41e3e0 SUB W14, W13, #1 |
(349) 0x41e3e4 ORR X16, XZR, X15 |
(349) 0x41e3e8 ORR X15, XZR, X13 |
(349) 0x41e3ec ORR W17, WZR, W14 |
(350) 0x41e3f0 MADD X18, X15, X1, X12 |
(350) 0x41e3f4 FABS D2, D1 |
(350) 0x41e3f8 MADD X15, X15, X27, X12 |
(350) 0x41e3fc MOVI D6, #0 |
(350) 0x41e400 LDR D3, [X25, X18,LSL #3] |
(350) 0x41e404 FDIV D2, D2, D3 |
(350) 0x41e408 LDR D3, [X28, X15,LSL #3] |
(350) 0x41e40c SBFM X15, X17, #0, #31 |
(350) 0x41e410 MADD X15, X27, X15, X12 |
(350) 0x41e414 LDR D4, [X28, X15,LSL #3] |
(350) 0x41e418 MADD X15, X16, X27, X12 |
(350) 0x41e41c FSUB D5, D3, S4 |
(350) 0x41e420 LDR D4, [X28, X15,LSL #3] |
(350) 0x41e424 FSUB D4, D4, S3 |
(350) 0x41e428 FMUL D7, D4, D5 |
(350) 0x41e42c FCMP D7, #0 |
(350) 0x41e430 B.LE 41e380 |
(350) 0x41e434 LDR D6, [X26, X13,LSL #3] |
(350) 0x41e438 FABS D5, D5 |
(350) 0x41e43c LDR D18, [X26, X14,SXTW #3] |
(350) 0x41e440 FABS D7, D4 |
(350) 0x41e444 FSUB D16, D0, S2 |
(350) 0x41e448 FCMP D4, #0 |
(350) 0x41e44c FMADD D17, D2, D5, D5 |
(350) 0x41e450 FMUL D16, D7, D16 |
(350) 0x41e454 FDIV D16, D16, D6 |
(350) 0x41e458 FDIV D17, D17, D18 |
(350) 0x41e45c FADD D16, D17, D16 |
(350) 0x41e460 FMOV D17, X11 |
(350) 0x41e464 FMUL D6, D6, D17 |
(350) 0x41e468 FMUL D6, D6, D16 |
(350) 0x41e46c FMINNM D5, D6, D5 |
(350) 0x41e470 FMINNM D5, D5, D7 |
(350) 0x41e474 FNEG D6, D5 |
(350) 0x41e478 FCSEL D6, D5, D6, #12 |
(350) 0x41e47c B 41e380 |
0x42b274 HINT #0 |
0x42b278 HINT #0 |
0x42b27c HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_mom.cpp:180-211 |
Module | exec |
nb instructions | 59 |
loop length | 236 |
nb stack references | 0 |
front end | 6.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
cycles | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.75 |
Overall L1 | 9.67 |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #176 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W20, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W9, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W8, W8, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W21, W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP W9, W21, #4, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.LE 41e198 <.omp_outlined..20+0xe8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X11, X10, [X29, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X12, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X14, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W24, W9, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, SP, #36 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X13, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X23, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <45f128> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #176 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X6, SP, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X25, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X27, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X28, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X11, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X26, [X12, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
UMADDL X19, W24, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X13, X14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X19, XZR, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP W1, WZR, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X8, X11, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 4033a0 <@plt_start@+0x670> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X13, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X8, X8, X19, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X13, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41e1bc <.omp_outlined..20+0x10c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Source file and lines | advec_mom.cpp:180-211 |
Module | exec |
nb instructions | 59 |
loop length | 236 |
nb stack references | 0 |
front end | 6.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
cycles | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.75 |
Overall L1 | 9.67 |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #176 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W20, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W9, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W8, W8, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W21, W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP W9, W21, #4, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.LE 41e198 <.omp_outlined..20+0xe8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X11, X10, [X29, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X12, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X14, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W24, W9, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, SP, #36 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X13, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X23, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <45f128> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #176 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X6, SP, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X25, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X27, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X28, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X11, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X26, [X12, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
UMADDL X19, W24, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X13, X14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X19, XZR, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP W1, WZR, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X8, X11, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 4033a0 <@plt_start@+0x670> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X13, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X8, X8, X19, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X13, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41e1bc <.omp_outlined..20+0x10c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..20– | 3.99 | 5.32 |
▼Loop 349 - context.h:69-69 - exec– | 0 | 0 |
▼Loop 350 - advec_mom.cpp:180-211 - exec– | 0 | 0 |
▼Loop 352 - advec_mom.cpp:180-211 - exec– | 0 | 0 |
○Loop 351 - advec_mom.cpp:181-211 - exec | 3.99 | 5.3 |