Function: .omp_outlined..4 | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 3.01% |
---|
Function: .omp_outlined..4 | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 3.01% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 65 - 110 |
-------------------------------------------------------------------------------- |
65: #pragma omp parallel for simd collapse(2) |
66: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
67: for (int i = (x_min + 1); i < (x_max + 2 + 2); i++) |
68: ({ |
69: int upwind, donor, downwind, dif; |
70: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
71: if (vol_flux_x(i, j) > 0.0) { |
72: upwind = i - 2; |
73: donor = i - 1; |
74: downwind = i; |
75: dif = donor; |
76: } else { |
77: upwind = std::min(i + 1, x_max + 2); |
78: donor = i; |
79: downwind = i - 1; |
80: dif = upwind; |
81: } |
82: sigmat = std::fabs(vol_flux_x(i, j)) / pre_vol(donor, j); |
83: sigma3 = (1.0 + sigmat) * (vertexdx[i] / vertexdx[dif]); |
84: sigma4 = 2.0 - sigmat; |
85: sigmav = sigmat; |
86: diffuw = density1(donor, j) - density1(upwind, j); |
87: diffdw = density1(downwind, j) - density1(donor, j); |
88: wind = 1.0; |
89: if (diffdw <= 0.0) wind = -1.0; |
90: if (diffuw * diffdw > 0.0) { |
91: limiter = (1.0 - sigmav) * wind * |
92: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
93: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
94: } else { |
95: limiter = 0.0; |
96: } |
97: mass_flux_x(i, j) = vol_flux_x(i, j) * (density1(donor, j) + limiter); |
98: sigmam = std::fabs(mass_flux_x(i, j)) / (density1(donor, j) * pre_vol(donor, j)); |
99: diffuw = energy1(donor, j) - energy1(upwind, j); |
100: diffdw = energy1(downwind, j) - energy1(donor, j); |
101: wind = 1.0; |
102: if (diffdw <= 0.0) wind = -1.0; |
103: if (diffuw * diffdw > 0.0) { |
104: limiter = (1.0 - sigmam) * wind * |
105: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
106: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
107: } else { |
108: limiter = 0.0; |
109: } |
110: ener_flux(i, j) = mass_flux_x(i, j) * (energy1(donor, j) + limiter); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41aa00 SUB SP, SP, #288 |
0x41aa04 STP D15, D14, [SP, #128] |
0x41aa08 STP D13, D12, [SP, #144] |
0x41aa0c STP D11, D10, [SP, #160] |
0x41aa10 STP D9, D8, [SP, #176] |
0x41aa14 STP X29, X30, [SP, #192] |
0x41aa18 STP X28, X27, [SP, #208] |
0x41aa1c STP X26, X25, [SP, #224] |
0x41aa20 STP X24, X23, [SP, #240] |
0x41aa24 STP X22, X21, [SP, #256] |
0x41aa28 STP X20, X19, [SP, #272] |
0x41aa2c ADD X29, SP, #192 |
0x41aa30 LDR W8, [X2] |
0x41aa34 LDR W9, [X3] |
0x41aa38 ADD W20, W8, #1 |
0x41aa3c ADD W8, W9, #2 |
0x41aa40 SUBS W8, W8, W20 |
0x41aa44 B.LE 41ab28 |
0x41aa48 LDR W21, [X4] |
0x41aa4c LDR W10, [X5] |
0x41aa50 ADD W16, W21, #1 |
0x41aa54 ADD W9, W10, #4 |
0x41aa58 CMP W9, W16 |
0x41aa5c B.LE 41ab28 |
0x41aa60 STR W10, [SP, #20] |
0x41aa64 LDP X11, X10, [X29, #128] |
0x41aa68 LDR X12, [X29, #120] |
0x41aa6c ADD X3, SP, #92 |
0x41aa70 SUB X4, X29, #80 |
0x41aa74 SUB X5, X29, #88 |
0x41aa78 MOVZ W2, #34 |
0x41aa7c LDR X15, [X6] |
0x41aa80 LDP X14, X13, [X29, #96] |
0x41aa84 LDR W1, [X0] |
0x41aa88 ADRP X0, |
0x41aa8c ADD X0, X0, #3120 |
0x41aa90 LDR X23, [X6, #16] |
0x41aa94 ADD X6, SP, #96 |
0x41aa98 LDR X25, [X7, #16] |
0x41aa9c STR WZR, [SP, #92] |
0x41aaa0 LDR X26, [X14, #8] |
0x41aaa4 LDR X14, [X13] |
0x41aaa8 LDR X28, [X13, #16] |
0x41aaac LDR X13, [X12] |
0x41aab0 LDR X22, [X12, #16] |
0x41aab4 LDR X12, [X11] |
0x41aab8 LDR X27, [X11, #16] |
0x41aabc SUB W11, W9, W16 |
0x41aac0 MOVN X9, #0 |
0x41aac4 UMADDL X19, W11, W8, X9 |
0x41aac8 STR X15, [SP, #64] |
0x41aacc LDR X15, [X7] |
0x41aad0 LDR X9, [X10] |
0x41aad4 LDR X24, [X10, #16] |
0x41aad8 MOVZ W8, #1 |
0x41aadc MOVZ W7, #1 |
0x41aae0 STR X11, [SP, #24] |
0x41aae4 STR W1, [SP, #76] |
0x41aae8 STR X8, [SP, #96] |
0x41aaec STP X8, X16, [SP] |
0x41aaf0 STP X14, X15, [SP, #48] |
0x41aaf4 STP X12, X13, [SP, #32] |
0x41aaf8 STP X19, XZR, [X29, #936] |
0x41aafc STR X9, [SP, #80] |
0x41ab00 BL 4033a0 |
0x41ab04 LDP X8, X14, [X29, #936] |
0x41ab08 CMP X8, X19 |
0x41ab0c CSEL X9, X8, X19, #11 |
0x41ab10 CMP X14, X9 |
0x41ab14 B.LE 41ab58 |
(240) 0x41ab18 LDR W1, [SP, #76] |
(240) 0x41ab1c ADRP X0, |
(240) 0x41ab20 ADD X0, X0, #3144 |
(240) 0x41ab24 BL 403260 |
(240) 0x41ab28 LDP D9, D8, [SP, #176] |
(240) 0x41ab2c LDP D11, D10, [SP, #160] |
(240) 0x41ab30 LDP D13, D12, [SP, #144] |
(240) 0x41ab34 LDP D15, D14, [SP, #128] |
(240) 0x41ab38 LDP X20, X19, [SP, #272] |
(240) 0x41ab3c LDP X22, X21, [SP, #256] |
(240) 0x41ab40 LDP X24, X23, [SP, #240] |
(240) 0x41ab44 LDP X26, X25, [SP, #224] |
(240) 0x41ab48 LDP X28, X27, [SP, #208] |
(240) 0x41ab4c LDP X29, X30, [SP, #192] |
(240) 0x41ab50 ADD SP, SP, #288 |
(240) 0x41ab54 RET |
(240) 0x41ab58 LDR W8, [SP, #20] |
(240) 0x41ab5c ADD X9, X9, #1 |
(240) 0x41ab60 CNTD X11, ALL |
(240) 0x41ab64 ORR X10, XZR, X14 |
(240) 0x41ab68 ADD W8, W8, #2 |
(240) 0x41ab6c SUB X12, X9, X14 |
(240) 0x41ab70 CMP X12, X11 |
(240) 0x41ab74 B.CC 41adc0 |
(240) 0x41ab78 UDIV X10, X12, X11 |
(240) 0x41ab7c PTRUE P0.D, ALL |
(240) 0x41ab80 MADD X13, X10, X11, XZR |
(240) 0x41ab84 INDEX Z0.D, X14, #1 |
(240) 0x41ab88 ADD X10, X14, X13 |
(240) 0x41ab8c LDR X14, [SP, #24] |
(240) 0x41ab90 DUP Z22.D, X8 |
(240) 0x41ab94 SUB X12, X12, X13 |
(240) 0x41ab98 SXTW Z22.D, P0/M, Z22.D |
(240) 0x41ab9c DUP Z1.D, X11 |
(240) 0x41aba0 DUP Z3.D, X20 |
(240) 0x41aba4 DUP Z6.D, X21 |
(240) 0x41aba8 DUP Z2.D, X14 |
(240) 0x41abac LDR X14, [SP, #8] |
(240) 0x41abb0 DUP Z4.D, X14 |
(240) 0x41abb4 LDR X14, [SP, #64] |
(240) 0x41abb8 DUP Z5.D, X14 |
(240) 0x41abbc LDR X14, [SP, #56] |
(240) 0x41abc0 DUP Z7.D, X14 |
(240) 0x41abc4 LDR X14, [SP, #48] |
(240) 0x41abc8 DUP Z16.D, X14 |
(240) 0x41abcc LDR X14, [SP, #40] |
(240) 0x41abd0 DUP Z17.D, X14 |
(240) 0x41abd4 LDR X14, [SP, #32] |
(240) 0x41abd8 DUP Z18.D, X14 |
(240) 0x41abdc LDR X14, [SP, #80] |
(240) 0x41abe0 DUP Z19.D, X14 |
(240) 0x41abe4 ADD W14, W21, #2 |
(240) 0x41abe8 DUP Z20.D, X14 |
(240) 0x41abec SUB W14, W21, #1 |
(240) 0x41abf0 DUP Z21.D, X14 |
(240) 0x41abf4 ORR X14, XZR, #3840 |
(240) 0x41abf8 MOVK X14, #16325 |
(240) 0x41abfc DUP Z25.D, X14 |
(239) 0x41ac00 MOVPRFX Z26, Z0 |
(239) 0x41ac04 SDIV Z26.D, P0/M, Z26.D, Z2.D |
(239) 0x41ac08 ADD Z27.D, Z3.D, Z26.D |
(239) 0x41ac0c MOVPRFX Z28, Z0 |
(239) 0x41ac10 MLS Z28.D, P0/M, Z26.D, Z2.D |
(239) 0x41ac14 ADD Z29.D, Z4.D, Z28.D |
(239) 0x41ac18 MOVPRFX Z26, Z29 |
(239) 0x41ac1c SXTW Z26.D, P0/M, Z29.D |
(239) 0x41ac20 SUBS X13, X13, X11 |
(239) 0x41ac24 ADD Z31.D, Z20.D, Z28.D |
(239) 0x41ac28 ADD Z8.D, Z6.D, Z28.D |
(239) 0x41ac2c ADD Z28.D, Z21.D, Z28.D |
(239) 0x41ac30 ADD Z0.D, Z0.D, Z1.D |
(239) 0x41ac34 SXTW Z27.D, P0/M, Z27.D |
(239) 0x41ac38 MOVPRFX Z30, Z26 |
(239) 0x41ac3c MLA Z30.D, P0/M, Z5.D, Z27.D |
(239) 0x41ac40 LD1D {Z30.D}, P0/Z, [X23, Z30.D,LSL #3] |
(239) 0x41ac44 MOVPRFX Z10, Z30 |
(239) 0x41ac48 FABS Z10.D, P0/M, Z30.D |
(239) 0x41ac4c SXTW Z31.D, P0/M, Z31.D |
(239) 0x41ac50 SXTW Z8.D, P0/M, Z8.D |
(239) 0x41ac54 SXTW Z28.D, P0/M, Z28.D |
(239) 0x41ac58 SMIN Z31.D, P0/M, Z31.D, Z22.D |
(239) 0x41ac5c FCMGT P1.D, P0/Z, Z30.D, #0 |
(239) 0x41ac60 SEL Z28.D, P1, Z28.D, Z31.D |
(239) 0x41ac64 SEL Z31.D, P1, Z8.D, Z31.D |
(239) 0x41ac68 SEL Z9.D, P1, Z26.D, Z8.D |
(239) 0x41ac6c SEL Z8.D, P1, Z8.D, Z26.D |
(239) 0x41ac70 MOVPRFX Z11, Z8 |
(239) 0x41ac74 MLA Z11.D, P0/M, Z7.D, Z27.D |
(239) 0x41ac78 LD1D {Z12.D}, P0/Z, [X25, Z11.D,LSL #3] |
(239) 0x41ac7c LD1D {Z29.D}, P0/Z, [X26, Z29.D,SXTW #3] |
(239) 0x41ac80 LD1D {Z31.D}, P0/Z, [X26, Z31.D,LSL #3] |
(239) 0x41ac84 FDIV Z10.D, P0/M, Z10.D, Z12.D |
(239) 0x41ac88 FMAD Z29.D, P0/M, Z10.D, Z29.D |
(239) 0x41ac8c FDIV Z29.D, P0/M, Z29.D, Z31.D |
(239) 0x41ac90 MOVPRFX Z31, Z16 |
(239) 0x41ac94 MUL Z31.D, P0/M, Z31.D, Z27.D |
(239) 0x41ac98 ADD Z12.D, Z8.D, Z31.D |
(239) 0x41ac9c ADD Z14.D, Z31.D, Z28.D |
(239) 0x41aca0 ADD Z31.D, Z9.D, Z31.D |
(239) 0x41aca4 LD1D {Z13.D}, P0/Z, [X28, Z12.D,LSL #3] |
(239) 0x41aca8 LD1D {Z14.D}, P0/Z, [X28, Z14.D,LSL #3] |
(239) 0x41acac LD1D {Z31.D}, P0/Z, [X28, Z31.D,LSL #3] |
(239) 0x41acb0 FSUB Z23.D, Z31.D, Z13.D |
(239) 0x41acb4 FSUB Z15.D, Z13.D, Z14.D |
(239) 0x41acb8 FABD Z31.D, P0/M, Z31.D, Z13.D |
(239) 0x41acbc FCMGT P1.D, P0/Z, Z23.D, #0 |
(239) 0x41acc0 FMUL Z15.D, Z23.D, Z15.D |
(239) 0x41acc4 MOVPRFX Z23, Z10 |
(239) 0x41acc8 FSUBR Z23.D, P0/M, Z23.D, #1 |
(239) 0x41accc MOVPRFX Z24, Z23 |
(239) 0x41acd0 FNEG Z24.D, P0/M, Z23.D |
(239) 0x41acd4 SEL Z23.D, P1, Z23.D, Z24.D |
(239) 0x41acd8 FDUP Z24.D, #0 |
(239) 0x41acdc FCMGT P1.D, P0/Z, Z15.D, #0 |
(239) 0x41ace0 FSUB Z24.D, Z24.D, Z10.D |
(239) 0x41ace4 MOVPRFX Z10, Z13 |
(239) 0x41ace8 FABD Z10.D, P0/M, Z10.D, Z14.D |
(239) 0x41acec MOVPRFX Z14, Z10 |
(239) 0x41acf0 FMINNM Z14.D, P0/M, Z14.D, Z31.D |
(239) 0x41acf4 FMUL Z10.D, Z10.D, Z29.D |
(239) 0x41acf8 FMAD Z31.D, P0/M, Z24.D, Z10.D |
(239) 0x41acfc FMUL Z31.D, Z31.D, Z25.D |
(239) 0x41ad00 FMINNM Z31.D, P0/M, Z31.D, Z14.D |
(239) 0x41ad04 FMUL Z23.D, Z31.D, Z23.D |
(239) 0x41ad08 DUP Z31.D, #0 |
(239) 0x41ad0c SEL Z23.D, P1, Z23.D, Z31.D |
(239) 0x41ad10 FADD Z23.D, Z23.D, Z13.D |
(239) 0x41ad14 FMUL Z23.D, Z23.D, Z30.D |
(239) 0x41ad18 MOVPRFX Z30, Z26 |
(239) 0x41ad1c MLA Z30.D, P0/M, Z17.D, Z27.D |
(239) 0x41ad20 ST1D {Z23.D}, P0, [X22, Z30.D,LSL #3] |
(239) 0x41ad24 MOVPRFX Z30, Z18 |
(239) 0x41ad28 MUL Z30.D, P0/M, Z30.D, Z27.D |
(239) 0x41ad2c ADD Z31.D, Z8.D, Z30.D |
(239) 0x41ad30 ADD Z28.D, Z30.D, Z28.D |
(239) 0x41ad34 ADD Z30.D, Z9.D, Z30.D |
(239) 0x41ad38 LD1D {Z31.D}, P0/Z, [X27, Z31.D,LSL #3] |
(239) 0x41ad3c LD1D {Z28.D}, P0/Z, [X27, Z28.D,LSL #3] |
(239) 0x41ad40 LD1D {Z30.D}, P0/Z, [X27, Z30.D,LSL #3] |
(239) 0x41ad44 LD1D {Z9.D}, P0/Z, [X28, Z12.D,LSL #3] |
(239) 0x41ad48 LD1D {Z10.D}, P0/Z, [X25, Z11.D,LSL #3] |
(239) 0x41ad4c FMUL Z9.D, Z10.D, Z9.D |
(239) 0x41ad50 MOVPRFX Z10, Z23 |
(239) 0x41ad54 FABS Z10.D, P0/M, Z23.D |
(239) 0x41ad58 FSUB Z8.D, Z31.D, Z28.D |
(239) 0x41ad5c FABD Z28.D, P0/M, Z28.D, Z31.D |
(239) 0x41ad60 FMUL Z29.D, Z28.D, Z29.D |
(239) 0x41ad64 FDIVR Z9.D, P0/M, Z9.D, Z10.D |
(239) 0x41ad68 FSUB Z10.D, Z30.D, Z31.D |
(239) 0x41ad6c FABD Z30.D, P0/M, Z30.D, Z31.D |
(239) 0x41ad70 FMAD Z24.D, P0/M, Z30.D, Z29.D |
(239) 0x41ad74 FCMGT P1.D, P0/Z, Z10.D, #0 |
(239) 0x41ad78 FMINNM Z28.D, P0/M, Z28.D, Z30.D |
(239) 0x41ad7c FMUL Z8.D, Z10.D, Z8.D |
(239) 0x41ad80 FMUL Z24.D, Z24.D, Z25.D |
(239) 0x41ad84 FMINNM Z24.D, P0/M, Z24.D, Z28.D |
(239) 0x41ad88 FSUBR Z9.D, P0/M, Z9.D, #1 |
(239) 0x41ad8c MOVPRFX Z10, Z9 |
(239) 0x41ad90 FNEG Z10.D, P0/M, Z9.D |
(239) 0x41ad94 SEL Z9.D, P1, Z9.D, Z10.D |
(239) 0x41ad98 FCMLE P1.D, P0/Z, Z8.D, #0 |
(239) 0x41ad9c FMUL Z24.D, Z9.D, Z24.D |
(239) 0x41ada0 CPY Z24.D, P1/M, #0 |
(239) 0x41ada4 FADD Z24.D, Z24.D, Z31.D |
(239) 0x41ada8 FMUL Z23.D, Z24.D, Z23.D |
(239) 0x41adac MOVPRFX Z24, Z26 |
(239) 0x41adb0 MLA Z24.D, P0/M, Z19.D, Z27.D |
(239) 0x41adb4 ST1D {Z23.D}, P0, [X24, Z24.D,LSL #3] |
(239) 0x41adb8 B.NE 41ac00 |
(240) 0x41adbc CBZ X12, 41ab18 |
(238) 0x41adc0 LDP X30, X19, [SP, #24] |
(238) 0x41adc4 LDP X5, X4, [SP, #56] |
(238) 0x41adc8 LDP X7, X6, [SP, #40] |
(238) 0x41adcc FMOV D0, #2.0000000 |
(238) 0x41add0 FMOV D1, #1.0000000 |
(238) 0x41add4 ORR X13, XZR, #3840 |
(238) 0x41add8 SUB W12, W21, #1 |
(238) 0x41addc MOVK X13, #16325 |
(238) 0x41ade0 SUB W11, WZR, W30 |
(238) 0x41ade4 B 41ae08 |
(238) 0x41ade8 LDR X16, [SP, #80] |
(238) 0x41adec FADD D3, D5, D6 |
(238) 0x41adf0 ADD X10, X10, #1 |
(238) 0x41adf4 CMP X9, X10 |
(238) 0x41adf8 FMUL D2, D3, D2 |
(238) 0x41adfc MADD X14, X16, X15, X14 |
(238) 0x41ae00 STR D2, [X24, X14,LSL #3] |
(238) 0x41ae04 B.EQ 41ab18 |
(238) 0x41ae08 SDIV X16, X10, X30 |
(238) 0x41ae0c ADD W14, W21, W10 |
(238) 0x41ae10 MADD W14, W11, W16, W14 |
(238) 0x41ae14 ADD W15, W20, W16 |
(238) 0x41ae18 ADD W14, W14, #1 |
(238) 0x41ae1c SBFM X15, X15, #0, #31 |
(238) 0x41ae20 SBFM X14, X14, #0, #31 |
(238) 0x41ae24 MADD X17, X4, X15, X14 |
(238) 0x41ae28 LDR D2, [X23, X17,LSL #3] |
(238) 0x41ae2c FCMP D2, #0 |
(238) 0x41ae30 B.LE 41ae60 |
(238) 0x41ae34 ADD W17, W12, W10 |
(238) 0x41ae38 ADD W18, W21, W10 |
(238) 0x41ae3c MADD W17, W11, W16, W17 |
(238) 0x41ae40 MADD W16, W11, W16, W18 |
(238) 0x41ae44 SBFM X1, X16, #0, #31 |
(238) 0x41ae48 SBFM X17, X17, #0, #31 |
(238) 0x41ae4c ORR X16, XZR, X14 |
(238) 0x41ae50 ORR X0, XZR, X1 |
(238) 0x41ae54 B 41ae84 |
0x41ae58 HINT #0 |
0x41ae5c HINT #0 |
(237) 0x41ae60 ADD W17, W21, W10 |
(237) 0x41ae64 ORR X0, XZR, X14 |
(237) 0x41ae68 MADD W16, W11, W16, W17 |
(237) 0x41ae6c ADD W17, W16, #2 |
(237) 0x41ae70 SBFM X16, X16, #0, #31 |
(237) 0x41ae74 CMP W8, W17 |
(237) 0x41ae78 CSEL W17, W8, W17, #11 |
(237) 0x41ae7c SBFM X17, X17, #0, #31 |
(237) 0x41ae80 ORR X1, XZR, X17 |
(238) 0x41ae84 MADD X18, X5, X15, X0 |
(238) 0x41ae88 FABS D3, D2 |
(238) 0x41ae8c MADD X2, X6, X15, XZR |
(238) 0x41ae90 LDR D4, [X25, X18,LSL #3] |
(238) 0x41ae94 ADD X3, X2, X17 |
(238) 0x41ae98 LDR D5, [X28, X3,LSL #3] |
(238) 0x41ae9c FDIV D7, D3, D4 |
(238) 0x41aea0 LDR D3, [X26, X14,LSL #3] |
(238) 0x41aea4 LDR D4, [X26, X1,LSL #3] |
(238) 0x41aea8 ADD X1, X0, X2 |
(238) 0x41aeac ADD X2, X16, X2 |
(238) 0x41aeb0 LDR D6, [X28, X1,LSL #3] |
(238) 0x41aeb4 FSUB D16, D6, S5 |
(238) 0x41aeb8 LDR D5, [X28, X2,LSL #3] |
(238) 0x41aebc FSUB D17, D5, S6 |
(238) 0x41aec0 MOVI D5, #0 |
(238) 0x41aec4 FMADD D3, D7, D3, D3 |
(238) 0x41aec8 FMUL D18, D17, D16 |
(238) 0x41aecc FDIV D3, D3, D4 |
(238) 0x41aed0 FSUB D4, D0, S7 |
(238) 0x41aed4 FCMP D18, #0 |
(238) 0x41aed8 MOVI D18, #0 |
(238) 0x41aedc B.LE 41af14 |
(238) 0x41aee0 FSUB D7, D1, S7 |
(238) 0x41aee4 FCMP D17, #0 |
(238) 0x41aee8 FABS D16, D16 |
(238) 0x41aeec FABS D17, D17 |
(238) 0x41aef0 FNEG D18, D7 |
(238) 0x41aef4 FCSEL D7, D7, D18, #12 |
(238) 0x41aef8 FMINNM D18, D16, D17 |
(238) 0x41aefc FMUL D16, D16, D3 |
(238) 0x41af00 FMADD D16, D17, D4, D16 |
(238) 0x41af04 FMOV D17, X13 |
(238) 0x41af08 FMUL D16, D16, D17 |
(238) 0x41af0c FMINNM D16, D18, D16 |
(238) 0x41af10 FMUL D18, D16, D7 |
(238) 0x41af14 FADD D6, D18, D6 |
(238) 0x41af18 MADD X2, X7, X15, X14 |
(238) 0x41af1c FMUL D2, D6, D2 |
(238) 0x41af20 STR D2, [X22, X2,LSL #3] |
(238) 0x41af24 MADD X2, X19, X15, XZR |
(238) 0x41af28 ADD X0, X0, X2 |
(238) 0x41af2c ADD X17, X2, X17 |
(238) 0x41af30 ADD X16, X16, X2 |
(238) 0x41af34 LDR D6, [X27, X0,LSL #3] |
(238) 0x41af38 LDR D7, [X27, X17,LSL #3] |
(238) 0x41af3c LDR D16, [X27, X16,LSL #3] |
(238) 0x41af40 FSUB D7, D6, S7 |
(238) 0x41af44 FSUB D16, D16, S6 |
(238) 0x41af48 FMUL D17, D16, D7 |
(238) 0x41af4c FCMP D17, #0 |
(238) 0x41af50 B.LE 41ade8 |
(238) 0x41af54 LDR D17, [X28, X1,LSL #3] |
(238) 0x41af58 LDR D18, [X25, X18,LSL #3] |
(238) 0x41af5c FABS D5, D2 |
(238) 0x41af60 FABS D7, D7 |
(238) 0x41af64 FCMP D16, #0 |
(238) 0x41af68 FABS D16, D16 |
(238) 0x41af6c FMUL D3, D7, D3 |
(238) 0x41af70 FMADD D3, D16, D4, D3 |
(238) 0x41af74 FMOV D4, X13 |
(238) 0x41af78 FMUL D17, D18, D17 |
(238) 0x41af7c FDIV D5, D5, D17 |
(238) 0x41af80 FMUL D3, D3, D4 |
(238) 0x41af84 FSUB D5, D1, S5 |
(238) 0x41af88 FNEG D17, D5 |
(238) 0x41af8c FCSEL D5, D5, D17, #12 |
(238) 0x41af90 FMINNM D17, D7, D16 |
(238) 0x41af94 FMINNM D3, D17, D3 |
(238) 0x41af98 FMUL D5, D5, D3 |
(238) 0x41af9c B 41ade8 |
0x4263b8 HINT #0 |
0x4263bc HINT #0 |
0x42a434 HINT #0 |
0x42a438 HINT #0 |
0x42a43c HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 77 |
loop length | 308 |
nb stack references | 0 |
front end | 8.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.50 | 14.17 | 14.33 | 8.50 | 8.50 |
cycles | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.50 | 14.17 | 14.33 | 8.50 | 8.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 8.75 |
Overall L1 | 14.50 |
all | 33% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #288 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D15, D14, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D13, D12, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D11, D10, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D9, D8, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #192] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #240] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #256] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W9, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W20, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W8, W9, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ab28 <.omp_outlined..4+0x128> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W21, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W16, W21, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W9, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ab28 <.omp_outlined..4+0x128> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STR W10, [SP, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP X11, X10, [X29, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X12, [X29, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X3, SP, #92 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #88 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X15, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP X14, X13, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <45ea88> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #3120 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X23, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X6, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X25, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR WZR, [SP, #92] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X26, [X14, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X14, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X28, [X13, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X13, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X12, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X12, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X27, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W11, W9, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UMADDL X19, W11, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STR X15, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X15, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X9, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X24, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X11, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR W1, [SP, #76] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X8, X16, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X14, X15, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X12, X13, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X19, XZR, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X9, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 4033a0 <@plt_start@+0x670> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X14, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X9, X8, X19, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X14, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ab58 <.omp_outlined..4+0x158> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 77 |
loop length | 308 |
nb stack references | 0 |
front end | 8.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.50 | 14.17 | 14.33 | 8.50 | 8.50 |
cycles | 2.00 | 2.00 | 6.00 | 6.00 | 6.00 | 6.00 | 2.00 | 2.00 | 0.00 | 0.00 | 14.50 | 14.17 | 14.33 | 8.50 | 8.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 8.75 |
Overall L1 | 14.50 |
all | 33% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #288 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D15, D14, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D13, D12, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D11, D10, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP D9, D8, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #192] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #240] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #256] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #272] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #192 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W9, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W20, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W8, W9, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ab28 <.omp_outlined..4+0x128> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR W21, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W16, W21, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W9, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ab28 <.omp_outlined..4+0x128> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STR W10, [SP, #20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP X11, X10, [X29, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X12, [X29, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X3, SP, #92 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #88 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X15, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDP X14, X13, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <45ea88> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #3120 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X23, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD X6, SP, #96 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X25, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR WZR, [SP, #92] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X26, [X14, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X14, [X13] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X28, [X13, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X13, [X12] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X12, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X12, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X27, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W11, W9, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
UMADDL X19, W11, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STR X15, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X15, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X9, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X24, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR X11, [SP, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR W1, [SP, #76] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X8, X16, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X14, X15, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X12, X13, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X19, XZR, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X9, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 4033a0 <@plt_start@+0x670> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X14, [X29, #936] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X9, X8, X19, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X14, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41ab58 <.omp_outlined..4+0x158> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..4– | 3.01 | 4.02 |
▼Loop 237 - context.h:69-69 - exec– | 0 | 0 |
▼Loop 238 - advec_cell.cpp:65-110 - exec– | 0 | 0 |
▼Loop 240 - advec_cell.cpp:65-110 - exec– | 0 | 0 |
○Loop 239 - advec_cell.cpp:66-110 - exec | 3.01 | 4 |