Function: .omp_outlined..12 | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 3.76% |
---|
Function: .omp_outlined..12 | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 3.76% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 157 - 202 |
-------------------------------------------------------------------------------- |
157: #pragma omp parallel for simd collapse(2) |
158: for (int j = (y_min + 1); j < (y_max + 2 + 2); j++) { |
159: for (int i = (x_min + 1); i < (x_max + 2); i++) |
160: ({ |
161: int upwind, donor, downwind, dif; |
162: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
163: if (vol_flux_y(i, j) > 0.0) { |
164: upwind = j - 2; |
165: donor = j - 1; |
166: downwind = j; |
167: dif = donor; |
168: } else { |
169: upwind = std::min(j + 1, y_max + 2); |
170: donor = j; |
171: downwind = j - 1; |
172: dif = upwind; |
173: } |
174: sigmat = std::fabs(vol_flux_y(i, j)) / pre_vol(i, donor); |
175: sigma3 = (1.0 + sigmat) * (vertexdy[j] / vertexdy[dif]); |
176: sigma4 = 2.0 - sigmat; |
177: sigmav = sigmat; |
178: diffuw = density1(i, donor) - density1(i, upwind); |
179: diffdw = density1(i, downwind) - density1(i, donor); |
180: wind = 1.0; |
181: if (diffdw <= 0.0) wind = -1.0; |
182: if (diffuw * diffdw > 0.0) { |
183: limiter = (1.0 - sigmav) * wind * |
184: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
185: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
186: } else { |
187: limiter = 0.0; |
188: } |
189: mass_flux_y(i, j) = vol_flux_y(i, j) * (density1(i, donor) + limiter); |
190: sigmam = std::fabs(mass_flux_y(i, j)) / (density1(i, donor) * pre_vol(i, donor)); |
191: diffuw = energy1(i, donor) - energy1(i, upwind); |
192: diffdw = energy1(i, downwind) - energy1(i, donor); |
193: wind = 1.0; |
194: if (diffdw <= 0.0) wind = -1.0; |
195: if (diffuw * diffdw > 0.0) { |
196: limiter = (1.0 - sigmam) * wind * |
197: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
198: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
199: } else { |
200: limiter = 0.0; |
201: } |
202: ener_flux(i, j) = mass_flux_y(i, j) * (energy1(i, donor) + limiter); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41b8b0 SUB SP, SP, #288 |
0x41b8b4 STP D15, D14, [SP, #128] |
0x41b8b8 STP D13, D12, [SP, #144] |
0x41b8bc STP D11, D10, [SP, #160] |
0x41b8c0 STP D9, D8, [SP, #176] |
0x41b8c4 STP X29, X30, [SP, #192] |
0x41b8c8 STP X28, X27, [SP, #208] |
0x41b8cc STP X26, X25, [SP, #224] |
0x41b8d0 STP X24, X23, [SP, #240] |
0x41b8d4 STP X22, X21, [SP, #256] |
0x41b8d8 STP X20, X19, [SP, #272] |
0x41b8dc ADD X29, SP, #192 |
0x41b8e0 LDR W11, [X2] |
0x41b8e4 LDR W13, [X3] |
0x41b8e8 ADD W12, W11, #1 |
0x41b8ec ADD W8, W13, #4 |
0x41b8f0 SUBS W8, W8, W12 |
0x41b8f4 B.LE 41b9d4 |
0x41b8f8 LDR W9, [X4] |
0x41b8fc LDR W10, [X5] |
0x41b900 ADD W15, W9, #1 |
0x41b904 ADD W9, W10, #2 |
0x41b908 CMP W9, W15 |
0x41b90c B.LE 41b9d4 |
0x41b910 STP X12, X11, [SP, #72] |
0x41b914 LDP X11, X10, [X29, #128] |
0x41b918 STR W13, [SP, #20] |
0x41b91c LDR X12, [X29, #120] |
0x41b920 ADD X3, SP, #92 |
0x41b924 SUB X4, X29, #80 |
0x41b928 SUB X5, X29, #88 |
0x41b92c LDP X14, X13, [X29, #96] |
0x41b930 LDR X17, [X6] |
0x41b934 MOVZ W2, #34 |
0x41b938 LDR X16, [X7] |
0x41b93c LDR W1, [X0] |
0x41b940 ADRP X0, |
0x41b944 ADD X0, X0, #3456 |
0x41b948 LDR X24, [X6, #16] |
0x41b94c ADD X6, SP, #96 |
0x41b950 LDR X26, [X7, #16] |
0x41b954 MOVZ W7, #1 |
0x41b958 STR X15, [SP, #64] |
0x41b95c LDR X23, [X11] |
0x41b960 LDR X21, [X11, #16] |
0x41b964 SUB W11, W9, W15 |
0x41b968 MOVN X9, #0 |
0x41b96c LDR X28, [X13] |
0x41b970 LDR X19, [X13, #16] |
0x41b974 UMADDL X20, W11, W8, X9 |
0x41b978 LDR X13, [X12] |
0x41b97c LDR X9, [X10] |
0x41b980 LDR X27, [X14, #8] |
0x41b984 LDR X25, [X12, #16] |
0x41b988 LDR X22, [X10, #16] |
0x41b98c MOVZ W8, #1 |
0x41b990 STP X16, X17, [SP, #48] |
0x41b994 STP W1, WZR, [SP, #88] |
0x41b998 STR X8, [SP, #96] |
0x41b99c STR X8, [SP] |
0x41b9a0 STP X11, X13, [SP, #32] |
0x41b9a4 STP X20, XZR, [X29, #936] |
0x41b9a8 STR X9, [SP, #24] |
0x41b9ac BL 402ee0 |
0x41b9b0 LDP X8, X14, [X29, #936] |
0x41b9b4 CMP X8, X20 |
0x41b9b8 CSEL X9, X8, X20, #11 |
0x41b9bc CMP X14, X9 |
0x41b9c0 B.LE 41ba04 |
(311) 0x41b9c4 LDR W1, [SP, #88] |
(311) 0x41b9c8 ADRP X0, |
(311) 0x41b9cc ADD X0, X0, #3480 |
(311) 0x41b9d0 BL 402d80 |
(311) 0x41b9d4 LDP D9, D8, [SP, #176] |
(311) 0x41b9d8 LDP D11, D10, [SP, #160] |
(311) 0x41b9dc LDP D13, D12, [SP, #144] |
(311) 0x41b9e0 LDP D15, D14, [SP, #128] |
(311) 0x41b9e4 LDP X20, X19, [SP, #272] |
(311) 0x41b9e8 LDP X22, X21, [SP, #256] |
(311) 0x41b9ec LDP X24, X23, [SP, #240] |
(311) 0x41b9f0 LDP X26, X25, [SP, #224] |
(311) 0x41b9f4 LDP X28, X27, [SP, #208] |
(311) 0x41b9f8 LDP X29, X30, [SP, #192] |
(311) 0x41b9fc ADD SP, SP, #288 |
(311) 0x41ba00 RET |
(311) 0x41ba04 LDR W8, [SP, #20] |
(311) 0x41ba08 ADD X9, X9, #1 |
(311) 0x41ba0c CNTD X11, ALL |
(311) 0x41ba10 ORR X10, XZR, X14 |
(311) 0x41ba14 ADD W8, W8, #2 |
(311) 0x41ba18 SUB X12, X9, X14 |
(311) 0x41ba1c CMP X12, X11 |
(311) 0x41ba20 B.CC 41bc84 |
(311) 0x41ba24 UDIV X10, X12, X11 |
(311) 0x41ba28 PTRUE P0.D, ALL |
(311) 0x41ba2c MADD X13, X10, X11, XZR |
(311) 0x41ba30 INDEX Z0.D, X14, #1 |
(311) 0x41ba34 ADD X10, X14, X13 |
(311) 0x41ba38 LDR X14, [SP, #32] |
(311) 0x41ba3c DUP Z22.D, X8 |
(311) 0x41ba40 SUB X12, X12, X13 |
(311) 0x41ba44 SXTW Z22.D, P0/M, Z22.D |
(311) 0x41ba48 DUP Z1.D, X11 |
(311) 0x41ba4c DUP Z16.D, X28 |
(311) 0x41ba50 DUP Z18.D, X23 |
(311) 0x41ba54 DUP Z2.D, X14 |
(311) 0x41ba58 LDP X14, X15, [SP, #72] |
(311) 0x41ba5c DUP Z3.D, X14 |
(311) 0x41ba60 LDR X14, [SP, #64] |
(311) 0x41ba64 DUP Z6.D, X15 |
(311) 0x41ba68 DUP Z4.D, X14 |
(311) 0x41ba6c LDR X14, [SP, #56] |
(311) 0x41ba70 DUP Z5.D, X14 |
(311) 0x41ba74 LDR X14, [SP, #48] |
(311) 0x41ba78 DUP Z7.D, X14 |
(311) 0x41ba7c LDR X14, [SP, #40] |
(311) 0x41ba80 DUP Z17.D, X14 |
(311) 0x41ba84 LDR X14, [SP, #24] |
(311) 0x41ba88 DUP Z19.D, X14 |
(311) 0x41ba8c ADD W14, W15, #2 |
(311) 0x41ba90 DUP Z20.D, X14 |
(311) 0x41ba94 SUB W14, W15, #1 |
(311) 0x41ba98 DUP Z21.D, X14 |
(311) 0x41ba9c ORR X14, XZR, #3840 |
(311) 0x41baa0 MOVK X14, #16325 |
(311) 0x41baa4 DUP Z25.D, X14 |
(310) 0x41baa8 MOVPRFX Z28, Z0 |
(310) 0x41baac SDIV Z28.D, P0/M, Z28.D, Z2.D |
(310) 0x41bab0 MOVPRFX Z26, Z0 |
(310) 0x41bab4 MLS Z26.D, P0/M, Z28.D, Z2.D |
(310) 0x41bab8 ADD Z27.D, Z4.D, Z26.D |
(310) 0x41babc ADD Z29.D, Z3.D, Z28.D |
(310) 0x41bac0 MOVPRFX Z26, Z29 |
(310) 0x41bac4 SXTW Z26.D, P0/M, Z29.D |
(310) 0x41bac8 SUBS X13, X13, X11 |
(310) 0x41bacc ADD Z31.D, Z20.D, Z28.D |
(310) 0x41bad0 ADD Z8.D, Z6.D, Z28.D |
(310) 0x41bad4 ADD Z28.D, Z21.D, Z28.D |
(310) 0x41bad8 ADD Z0.D, Z0.D, Z1.D |
(310) 0x41badc SXTW Z27.D, P0/M, Z27.D |
(310) 0x41bae0 MOVPRFX Z30, Z27 |
(310) 0x41bae4 MLA Z30.D, P0/M, Z5.D, Z26.D |
(310) 0x41bae8 LD1D {Z30.D}, P0/Z, [X24, Z30.D,LSL #3] |
(310) 0x41baec MOVPRFX Z10, Z30 |
(310) 0x41baf0 FABS Z10.D, P0/M, Z30.D |
(310) 0x41baf4 SXTW Z31.D, P0/M, Z31.D |
(310) 0x41baf8 SXTW Z8.D, P0/M, Z8.D |
(310) 0x41bafc SXTW Z28.D, P0/M, Z28.D |
(310) 0x41bb00 SMIN Z31.D, P0/M, Z31.D, Z22.D |
(310) 0x41bb04 FCMGT P1.D, P0/Z, Z30.D, #0 |
(310) 0x41bb08 SEL Z28.D, P1, Z28.D, Z31.D |
(310) 0x41bb0c SEL Z31.D, P1, Z8.D, Z31.D |
(310) 0x41bb10 SEL Z9.D, P1, Z26.D, Z8.D |
(310) 0x41bb14 SEL Z8.D, P1, Z8.D, Z26.D |
(310) 0x41bb18 MOVPRFX Z11, Z27 |
(310) 0x41bb1c MLA Z11.D, P0/M, Z8.D, Z7.D |
(310) 0x41bb20 LD1D {Z12.D}, P0/Z, [X26, Z11.D,LSL #3] |
(310) 0x41bb24 LD1D {Z29.D}, P0/Z, [X27, Z29.D,SXTW #3] |
(310) 0x41bb28 MOVPRFX Z13, Z27 |
(310) 0x41bb2c MLA Z13.D, P0/M, Z16.D, Z28.D |
(310) 0x41bb30 MOVPRFX Z14, Z27 |
(310) 0x41bb34 MLA Z14.D, P0/M, Z9.D, Z16.D |
(310) 0x41bb38 LD1D {Z31.D}, P0/Z, [X27, Z31.D,LSL #3] |
(310) 0x41bb3c FDIV Z10.D, P0/M, Z10.D, Z12.D |
(310) 0x41bb40 MOVPRFX Z12, Z10 |
(310) 0x41bb44 FADD Z12.D, P0/M, Z12.D, #1 |
(310) 0x41bb48 MAD Z28.D, P0/M, Z18.D, Z27.D |
(310) 0x41bb4c FDIV Z29.D, P0/M, Z29.D, Z31.D |
(310) 0x41bb50 MOVPRFX Z31, Z27 |
(310) 0x41bb54 MLA Z31.D, P0/M, Z8.D, Z16.D |
(310) 0x41bb58 FMUL Z29.D, Z12.D, Z29.D |
(310) 0x41bb5c LD1D {Z12.D}, P0/Z, [X19, Z31.D,LSL #3] |
(310) 0x41bb60 LD1D {Z13.D}, P0/Z, [X19, Z13.D,LSL #3] |
(310) 0x41bb64 LD1D {Z14.D}, P0/Z, [X19, Z14.D,LSL #3] |
(310) 0x41bb68 FSUB Z23.D, Z14.D, Z12.D |
(310) 0x41bb6c FSUB Z15.D, Z12.D, Z13.D |
(310) 0x41bb70 FCMLE P1.D, P0/Z, Z23.D, #0 |
(310) 0x41bb74 FMUL Z15.D, Z15.D, Z23.D |
(310) 0x41bb78 MOVPRFX Z23, Z10 |
(310) 0x41bb7c FSUBR Z23.D, P0/M, Z23.D, #1 |
(310) 0x41bb80 MOVPRFX Z24, Z23 |
(310) 0x41bb84 FNEG Z24.D, P0/M, Z23.D |
(310) 0x41bb88 EOR P1.B, P0/Z, P1.B, P0.B |
(310) 0x41bb8c SEL Z23.D, P1, Z23.D, Z24.D |
(310) 0x41bb90 FDUP Z24.D, #0 |
(310) 0x41bb94 FCMGT P1.D, P0/Z, Z15.D, #0 |
(310) 0x41bb98 FSUB Z24.D, Z24.D, Z10.D |
(310) 0x41bb9c MOVPRFX Z10, Z12 |
(310) 0x41bba0 FABD Z10.D, P0/M, Z10.D, Z13.D |
(310) 0x41bba4 MOVPRFX Z13, Z14 |
(310) 0x41bba8 FABD Z13.D, P0/M, Z13.D, Z12.D |
(310) 0x41bbac MOVPRFX Z14, Z10 |
(310) 0x41bbb0 FMINNM Z14.D, P0/M, Z14.D, Z13.D |
(310) 0x41bbb4 FMUL Z13.D, Z24.D, Z13.D |
(310) 0x41bbb8 FMAD Z10.D, P0/M, Z29.D, Z13.D |
(310) 0x41bbbc FMUL Z10.D, Z10.D, Z25.D |
(310) 0x41bbc0 FMINNM Z10.D, P0/M, Z10.D, Z14.D |
(310) 0x41bbc4 FMUL Z23.D, Z23.D, Z10.D |
(310) 0x41bbc8 DUP Z10.D, #0 |
(310) 0x41bbcc SEL Z23.D, P1, Z23.D, Z10.D |
(310) 0x41bbd0 FADD Z23.D, Z12.D, Z23.D |
(310) 0x41bbd4 FMUL Z23.D, Z30.D, Z23.D |
(310) 0x41bbd8 MOVPRFX Z30, Z27 |
(310) 0x41bbdc MLA Z30.D, P0/M, Z17.D, Z26.D |
(310) 0x41bbe0 ST1D {Z23.D}, P0, [X25, Z30.D,LSL #3] |
(310) 0x41bbe4 MOVPRFX Z30, Z27 |
(310) 0x41bbe8 MLA Z30.D, P0/M, Z8.D, Z18.D |
(310) 0x41bbec MOVPRFX Z8, Z27 |
(310) 0x41bbf0 MLA Z8.D, P0/M, Z9.D, Z18.D |
(310) 0x41bbf4 LD1D {Z30.D}, P0/Z, [X21, Z30.D,LSL #3] |
(310) 0x41bbf8 LD1D {Z28.D}, P0/Z, [X21, Z28.D,LSL #3] |
(310) 0x41bbfc LD1D {Z8.D}, P0/Z, [X21, Z8.D,LSL #3] |
(310) 0x41bc00 LD1D {Z31.D}, P0/Z, [X19, Z31.D,LSL #3] |
(310) 0x41bc04 FSUB Z9.D, Z30.D, Z28.D |
(310) 0x41bc08 FSUB Z10.D, Z8.D, Z30.D |
(310) 0x41bc0c FABD Z8.D, P0/M, Z8.D, Z30.D |
(310) 0x41bc10 FABD Z28.D, P0/M, Z28.D, Z30.D |
(310) 0x41bc14 FMUL Z9.D, Z9.D, Z10.D |
(310) 0x41bc18 FCMLE P1.D, P0/Z, Z10.D, #0 |
(310) 0x41bc1c LD1D {Z10.D}, P0/Z, [X26, Z11.D,LSL #3] |
(310) 0x41bc20 FMUL Z24.D, Z24.D, Z8.D |
(310) 0x41bc24 EOR P1.B, P0/Z, P1.B, P0.B |
(310) 0x41bc28 FMLA Z24.D, P0/M, Z29.D, Z28.D |
(310) 0x41bc2c FMINNM Z28.D, P0/M, Z28.D, Z8.D |
(310) 0x41bc30 FMUL Z31.D, Z31.D, Z10.D |
(310) 0x41bc34 MOVPRFX Z10, Z23 |
(310) 0x41bc38 FABS Z10.D, P0/M, Z23.D |
(310) 0x41bc3c FDIVR Z31.D, P0/M, Z31.D, Z10.D |
(310) 0x41bc40 FMUL Z24.D, Z24.D, Z25.D |
(310) 0x41bc44 FMINNM Z24.D, P0/M, Z24.D, Z28.D |
(310) 0x41bc48 FSUBR Z31.D, P0/M, Z31.D, #1 |
(310) 0x41bc4c MOVPRFX Z10, Z31 |
(310) 0x41bc50 FNEG Z10.D, P0/M, Z31.D |
(310) 0x41bc54 SEL Z31.D, P1, Z31.D, Z10.D |
(310) 0x41bc58 FCMGT P1.D, P0/Z, Z9.D, #0 |
(310) 0x41bc5c FMUL Z24.D, Z24.D, Z31.D |
(310) 0x41bc60 EOR P1.B, P0/Z, P1.B, P0.B |
(310) 0x41bc64 CPY Z24.D, P1/M, #0 |
(310) 0x41bc68 FADD Z24.D, Z30.D, Z24.D |
(310) 0x41bc6c FMUL Z23.D, Z23.D, Z24.D |
(310) 0x41bc70 MOVPRFX Z24, Z27 |
(310) 0x41bc74 MLA Z24.D, P0/M, Z19.D, Z26.D |
(310) 0x41bc78 ST1D {Z23.D}, P0, [X22, Z24.D,LSL #3] |
(310) 0x41bc7c B.NE 41baa8 |
(311) 0x41bc80 CBZ X12, 41b9c4 |
(309) 0x41bc84 LDP X20, X7, [SP, #32] |
(309) 0x41bc88 LDP X3, X2, [SP, #72] |
(309) 0x41bc8c LDP X5, X4, [SP, #56] |
(309) 0x41bc90 FMOV D0, #1.0000000 |
(309) 0x41bc94 FMOV D1, #2.0000000 |
(309) 0x41bc98 ORR X12, XZR, #3840 |
(309) 0x41bc9c MOVK X12, #16325 |
(309) 0x41bca0 LDR X6, [SP, #48] |
(309) 0x41bca4 LDR X30, [SP, #24] |
(309) 0x41bca8 SUB W11, WZR, W20 |
(309) 0x41bcac B 41bcdc |
0x41bcb0 HINT #0 |
0x41bcb4 HINT #0 |
0x41bcb8 HINT #0 |
0x41bcbc HINT #0 |
(309) 0x41bcc0 FADD D3, D6, D5 |
(309) 0x41bcc4 MADD X13, X30, X14, X13 |
(309) 0x41bcc8 ADD X10, X10, #1 |
(309) 0x41bccc CMP X9, X10 |
(309) 0x41bcd0 FMUL D2, D2, D3 |
(309) 0x41bcd4 STR D2, [X22, X13,LSL #3] |
(309) 0x41bcd8 B.EQ 41b9c4 |
(309) 0x41bcdc SDIV X15, X10, X20 |
(309) 0x41bce0 ADD W13, W4, W10 |
(309) 0x41bce4 ADD W14, W3, W15 |
(309) 0x41bce8 MADD W13, W11, W15, W13 |
(309) 0x41bcec SBFM X13, X13, #0, #31 |
(309) 0x41bcf0 SBFM X14, X14, #0, #31 |
(309) 0x41bcf4 MADD X16, X5, X14, X13 |
(309) 0x41bcf8 LDR D2, [X24, X16,LSL #3] |
(309) 0x41bcfc FCMP D2, #0 |
(309) 0x41bd00 B.LE 41bd20 |
(309) 0x41bd04 ADD W15, W2, W15 |
(309) 0x41bd08 SUB W16, W14, #2 |
(309) 0x41bd0c SBFM X16, X16, #0, #31 |
(309) 0x41bd10 SBFM X0, X15, #0, #31 |
(309) 0x41bd14 ORR X15, XZR, X14 |
(309) 0x41bd18 ORR X18, XZR, X0 |
(309) 0x41bd1c B 41bd40 |
(309) 0x41bd20 ADD W16, W14, #1 |
(309) 0x41bd24 ADD W15, W2, W15 |
(309) 0x41bd28 ORR X18, XZR, X14 |
(309) 0x41bd2c SBFM X15, X15, #0, #31 |
(309) 0x41bd30 CMP W8, W16 |
(309) 0x41bd34 CSINC W16, W8, W14, #11 |
(309) 0x41bd38 SBFM X16, X16, #0, #31 |
(309) 0x41bd3c ORR X0, XZR, X16 |
(309) 0x41bd40 MADD X17, X18, X6, X13 |
(309) 0x41bd44 FABS D3, D2 |
(309) 0x41bd48 LDR D5, [X27, X0,LSL #3] |
(309) 0x41bd4c MADD X0, X18, X28, X13 |
(309) 0x41bd50 LDR D4, [X26, X17,LSL #3] |
(309) 0x41bd54 MADD X1, X28, X16, X13 |
(309) 0x41bd58 LDR D6, [X19, X0,LSL #3] |
(309) 0x41bd5c FDIV D7, D3, D4 |
(309) 0x41bd60 LDR D4, [X27, X14,LSL #3] |
(309) 0x41bd64 FDIV D4, D4, D5 |
(309) 0x41bd68 LDR D5, [X19, X1,LSL #3] |
(309) 0x41bd6c MADD X1, X15, X28, X13 |
(309) 0x41bd70 FSUB D16, D6, S5 |
(309) 0x41bd74 LDR D5, [X19, X1,LSL #3] |
(309) 0x41bd78 FADD D3, D7, D0 |
(309) 0x41bd7c FSUB D17, D5, S6 |
(309) 0x41bd80 MOVI D5, #0 |
(309) 0x41bd84 FMUL D18, D16, D17 |
(309) 0x41bd88 FMUL D3, D3, D4 |
(309) 0x41bd8c FSUB D4, D1, S7 |
(309) 0x41bd90 FCMP D18, #0 |
(309) 0x41bd94 MOVI D18, #0 |
(309) 0x41bd98 B.LE 41bdd0 |
(309) 0x41bd9c FSUB D7, D0, S7 |
(309) 0x41bda0 FCMP D17, #0 |
(309) 0x41bda4 FABS D16, D16 |
(309) 0x41bda8 FABS D17, D17 |
(309) 0x41bdac FNEG D18, D7 |
(309) 0x41bdb0 FCSEL D7, D7, D18, #8 |
(309) 0x41bdb4 FMINNM D18, D16, D17 |
(309) 0x41bdb8 FMUL D17, D4, D17 |
(309) 0x41bdbc FMADD D16, D3, D16, D17 |
(309) 0x41bdc0 FMOV D17, X12 |
(309) 0x41bdc4 FMUL D16, D16, D17 |
(309) 0x41bdc8 FMINNM D16, D18, D16 |
(309) 0x41bdcc FMUL D18, D7, D16 |
(309) 0x41bdd0 FADD D6, D6, D18 |
(309) 0x41bdd4 MADD X1, X7, X14, X13 |
(309) 0x41bdd8 MADD X18, X18, X23, X13 |
(309) 0x41bddc FMUL D2, D2, D6 |
(309) 0x41bde0 MADD X16, X23, X16, X13 |
(309) 0x41bde4 MADD X15, X15, X23, X13 |
(309) 0x41bde8 STR D2, [X25, X1,LSL #3] |
(309) 0x41bdec LDR D6, [X21, X18,LSL #3] |
(309) 0x41bdf0 LDR D7, [X21, X16,LSL #3] |
(309) 0x41bdf4 LDR D16, [X21, X15,LSL #3] |
(309) 0x41bdf8 FSUB D7, D6, S7 |
(309) 0x41bdfc FSUB D16, D16, S6 |
(309) 0x41be00 FMUL D17, D7, D16 |
(309) 0x41be04 FCMP D17, #0 |
(309) 0x41be08 B.LE 41bcc0 |
(309) 0x41be0c LDR D17, [X19, X0,LSL #3] |
(309) 0x41be10 LDR D18, [X26, X17,LSL #3] |
(309) 0x41be14 FABS D5, D2 |
(309) 0x41be18 FCMP D16, #0 |
(309) 0x41be1c FABS D16, D16 |
(309) 0x41be20 FABS D7, D7 |
(309) 0x41be24 FMUL D4, D4, D16 |
(309) 0x41be28 FMADD D3, D3, D7, D4 |
(309) 0x41be2c FMOV D4, X12 |
(309) 0x41be30 FMUL D17, D17, D18 |
(309) 0x41be34 FDIV D5, D5, D17 |
(309) 0x41be38 FMUL D3, D3, D4 |
(309) 0x41be3c FSUB D5, D0, S5 |
(309) 0x41be40 FNEG D17, D5 |
(309) 0x41be44 FCSEL D5, D5, D17, #8 |
(309) 0x41be48 FMINNM D17, D7, D16 |
(309) 0x41be4c FMINNM D3, D17, D3 |
(309) 0x41be50 FMUL D5, D3, D5 |
(309) 0x41be54 B 41bcc0 |
0x41be58 HINT #0 |
0x41be5c HINT #0 |
0x41d944 HINT #0 |
0x41d948 HINT #0 |
0x41d94c HINT #0 |
0x42b258 HINT #0 |
0x42b25c HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 80 |
loop length | 320 |
nb stack references | 0 |
front end | 8.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.00 | 14.00 | 14.00 | 8.00 | 8.00 |
cycles | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.00 | 14.00 | 14.00 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 8.63 |
Overall L1 | 14.00 |
all | 36% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #288 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D15, D14, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D13, D12, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D11, D10, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D9, D8, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #192] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #240] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #256] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W11, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W13, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W12, W11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W8, W13, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41b9d4 <.omp_outlined..12+0x124> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W9, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W15, W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W9, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41b9d4 <.omp_outlined..12+0x124> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X12, X11, [SP, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP X11, X10, [X29, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STR W13, [SP, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X12, [X29, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X3, SP, #92 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #88 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP X14, X13, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X17, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X16, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <462940> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #3456 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X24, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X6, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X26, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X15, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X23, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X21, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W11, W9, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X28, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X19, [X13, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
UMADDL X20, W11, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
LDR X13, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X9, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X27, [X14, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X25, [X12, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X16, X17, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP W1, WZR, [SP, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X11, X13, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, XZR, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X9, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 402ee0 <@plt_start@+0x190> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X14, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X9, X8, X20, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X14, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ba04 <.omp_outlined..12+0x154> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 80 |
loop length | 320 |
nb stack references | 0 |
front end | 8.63 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.00 | 14.00 | 14.00 | 8.00 | 8.00 |
cycles | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.00 | 14.00 | 14.00 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 8.63 |
Overall L1 | 14.00 |
all | 36% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #288 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D15, D14, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D13, D12, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D11, D10, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D9, D8, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #192] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #240] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #256] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W11, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W13, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W12, W11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W8, W13, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41b9d4 <.omp_outlined..12+0x124> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W9, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W15, W9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W9, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41b9d4 <.omp_outlined..12+0x124> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X12, X11, [SP, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP X11, X10, [X29, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
STR W13, [SP, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X12, [X29, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X3, SP, #92 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #88 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDP X14, X13, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X17, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X16, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <462940> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #3456 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X24, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X6, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X26, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X15, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X23, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X21, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W11, W9, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X28, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X19, [X13, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
UMADDL X20, W11, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
LDR X13, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X9, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X27, [X14, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X25, [X12, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X16, X17, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP W1, WZR, [SP, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X11, X13, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, XZR, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X9, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 402ee0 <@plt_start@+0x190> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X14, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X9, X8, X20, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X14, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ba04 <.omp_outlined..12+0x154> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..12– | 3.76 | 5.01 |
▼Loop 309 - advec_cell.cpp:157-202 - exec– | 0 | 0 |
▼Loop 311 - advec_cell.cpp:157-202 - exec– | 0 | 0 |
○Loop 310 - advec_cell.cpp:158-202 - exec | 3.76 | 4.99 |