Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 3.69% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 3.69% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 157 - 202 |
-------------------------------------------------------------------------------- |
157: #pragma omp parallel for simd collapse(2) |
158: for (int j = (y_min + 1); j < (y_max + 2 + 2); j++) { |
159: for (int i = (x_min + 1); i < (x_max + 2); i++) |
160: ({ |
161: int upwind, donor, downwind, dif; |
162: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
163: if (vol_flux_y(i, j) > 0.0) { |
[...] |
169: upwind = std::min(j + 1, y_max + 2); |
170: donor = j; |
171: downwind = j - 1; |
172: dif = upwind; |
173: } |
174: sigmat = std::fabs(vol_flux_y(i, j)) / pre_vol(i, donor); |
175: sigma3 = (1.0 + sigmat) * (vertexdy[j] / vertexdy[dif]); |
176: sigma4 = 2.0 - sigmat; |
177: sigmav = sigmat; |
178: diffuw = density1(i, donor) - density1(i, upwind); |
179: diffdw = density1(i, downwind) - density1(i, donor); |
180: wind = 1.0; |
181: if (diffdw <= 0.0) wind = -1.0; |
182: if (diffuw * diffdw > 0.0) { |
183: limiter = (1.0 - sigmav) * wind * |
184: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
185: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
186: } else { |
187: limiter = 0.0; |
188: } |
189: mass_flux_y(i, j) = vol_flux_y(i, j) * (density1(i, donor) + limiter); |
190: sigmam = std::fabs(mass_flux_y(i, j)) / (density1(i, donor) * pre_vol(i, donor)); |
191: diffuw = energy1(i, donor) - energy1(i, upwind); |
192: diffdw = energy1(i, downwind) - energy1(i, donor); |
193: wind = 1.0; |
194: if (diffdw <= 0.0) wind = -1.0; |
195: if (diffuw * diffdw > 0.0) { |
196: limiter = (1.0 - sigmam) * wind * |
197: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
198: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
199: } else { |
200: limiter = 0.0; |
201: } |
202: ener_flux(i, j) = mass_flux_y(i, j) * (energy1(i, donor) + limiter); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41ffb0 STP X29, X30, [SP, #864]! |
0x41ffb4 ADD X29, SP, #0 |
0x41ffb8 STP X21, X22, [SP, #32] |
0x41ffbc ORR X21, XZR, X0 |
0x41ffc0 STP X23, X24, [SP, #48] |
0x41ffc4 LDP W22, W24, [X0, #64] |
0x41ffc8 LDR W0, [X0, #56] |
0x41ffcc LDR W1, [X21, #60] |
0x41ffd0 ADD W2, W24, #4 |
0x41ffd4 ADD W22, W22, #1 |
0x41ffd8 ADD W3, W0, #1 |
0x41ffdc STP W2, W3, [SP, #112] |
0x41ffe0 CMP W22, W2 |
0x41ffe4 B.GE 420294 |
0x41ffe8 STP X19, X20, [SP, #16] |
0x41ffec ADD W20, W1, #2 |
0x41fff0 SUB W23, W2, W22 |
0x41fff4 CMP W3, W20 |
0x41fff8 B.GE 420290 |
0x41fffc SUB W4, W20, W3 |
0x420000 MADD W23, W23, W4, WZR |
0x420004 STR W4, [SP, #120] |
0x420008 BL 403530 |
0x42000c ORR W19, WZR, W0 |
0x420010 BL 4033c0 |
0x420014 UDIV W6, W23, W19 |
0x420018 ORR W5, WZR, W0 |
0x42001c MSUB W7, W6, W19, W23 |
0x420020 CMP W0, W7 |
0x420024 B.CC 4202fc |
(209) 0x420028 MADD W5, W6, W5, W7 |
(209) 0x42002c ADD W8, W6, W5 |
(209) 0x420030 STR W8, [SP, #124] |
(209) 0x420034 CMP W5, W8 |
(209) 0x420038 B.CS 420290 |
(209) 0x42003c LDR W10, [SP, #120] |
(209) 0x420040 ORR X9, XZR, #3840 |
(209) 0x420044 FMOV D7, #1.0000000 |
(209) 0x420048 MOVK X9, #16325 |
(209) 0x42004c LDP X14, X15, [X21] |
(209) 0x420050 FMOV D18, #2.0000000 |
(209) 0x420054 FMOV D16, #-1.0000000 |
(209) 0x420058 ADD W24, W24, #2 |
(209) 0x42005c FMOV D17, X9 |
(209) 0x420060 UDIV W11, W5, W10 |
(209) 0x420064 LDR W18, [SP, #116] |
(209) 0x420068 STP X25, X26, [SP, #64] |
(209) 0x42006c STP X27, X28, [SP, #80] |
(209) 0x420070 LDR X16, [X21, #16] |
(209) 0x420074 LDP X28, X27, [X21, #24] |
(209) 0x420078 MSUB W12, W11, W10, W5 |
(209) 0x42007c ADD W19, W11, W22 |
(209) 0x420080 SBFM X13, X19, #0, #31 |
(209) 0x420084 SUB W25, W19, #2 |
(209) 0x420088 STP X14, X15, [SP, #136] |
(209) 0x42008c SUB X11, X13, #1 |
(209) 0x420090 ADD W18, W12, W18 |
(209) 0x420094 LDR X17, [X21, #40] |
(209) 0x420098 SUB W14, W20, W18 |
(209) 0x42009c STR X16, [SP, #152] |
(209) 0x4200a0 LDR X26, [X21, #48] |
(209) 0x4200a4 STR X17, [SP, #128] |
(209) 0x4200a8 CMP W6, W14 |
(209) 0x4200ac CSEL W30, W6, W14, #9 |
(209) 0x4200b0 ADD W21, W5, W30 |
(209) 0x4200b4 ADD W19, W19, #1 |
(209) 0x4200b8 STR W21, [SP, #108] |
(209) 0x4200bc CMP W5, W21 |
(209) 0x4200c0 B.CS 4202b8 |
(210) 0x4200c4 LDR X23, [X28] |
(210) 0x4200c8 SBFM X0, X18, #0, #31 |
(210) 0x4200cc SUB W22, W30, #1 |
(210) 0x4200d0 LDP X9, X8, [SP, #128] |
(210) 0x4200d4 ADD X5, X0, #1 |
(210) 0x4200d8 SBFM X30, X25, #0, #31 |
(210) 0x4200dc ADD X12, X5, X22 |
(210) 0x4200e0 LDR X1, [X27] |
(210) 0x4200e4 MADD X7, X13, X23, XZR |
(210) 0x4200e8 LDR X2, [X26] |
(210) 0x4200ec LDR X6, [X28, #16] |
(210) 0x4200f0 MADD X4, X13, X1, XZR |
(210) 0x4200f4 LDR X3, [X27, #16] |
(210) 0x4200f8 MADD X20, X13, X2, XZR |
(210) 0x4200fc LDR X10, [SP, #144] |
(210) 0x420100 ADD X17, X6, X7,LSL #3 |
(210) 0x420104 LDR X16, [SP, #152] |
(210) 0x420108 ADD X18, X3, X4,LSL #3 |
(210) 0x42010c LDR X21, [X8, #8] |
(210) 0x420110 LDR X15, [X26, #16] |
(210) 0x420114 LDR X8, [X10] |
(210) 0x420118 ADD X23, X21, X13,LSL #3 |
(210) 0x42011c LDR X22, [X9] |
(210) 0x420120 ADD X20, X15, X20,LSL #3 |
(210) 0x420124 LDR X7, [X16] |
(210) 0x420128 LDR X4, [X10, #16] |
(210) 0x42012c LDR X14, [X9, #16] |
(210) 0x420130 LDR X6, [X16, #16] |
(210) 0x420134 HINT #0 |
(210) 0x420138 HINT #0 |
(210) 0x42013c HINT #0 |
(210) 0x420140 ORR X1, XZR, X11 |
(210) 0x420144 ORR X2, XZR, X30 |
(210) 0x420148 LDR D6, [X18, X0,LSL #3] |
(210) 0x42014c ORR X3, XZR, X13 |
(210) 0x420150 ORR X10, XZR, X11 |
(210) 0x420154 FCMPE D6, #0 |
(210) 0x420158 B.GT 420174 |
(211) 0x42015c CMP W24, W19 |
(211) 0x420160 CSEL W2, W24, W19, #13 |
(211) 0x420164 SBFM X10, X2, #0, #31 |
(211) 0x420168 ORR X3, XZR, X11 |
(211) 0x42016c ORR X1, XZR, X13 |
(211) 0x420170 ORR X2, XZR, X10 |
(211) 0x420174 MADD X9, X22, X1, X0 |
(211) 0x420178 FABS D2, D6 |
(211) 0x42017c LDR D4, [X21, X10,LSL #3] |
(211) 0x420180 MADD X16, X8, X3, X0 |
(211) 0x420184 MADD X10, X8, X1, X0 |
(211) 0x420188 LDR D5, [X23] |
(211) 0x42018c MADD X15, X8, X2, X0 |
(211) 0x420190 LDR D1, [X14, X9,LSL #3] |
(211) 0x420194 MADD X1, X7, X1, X0 |
(211) 0x420198 MADD X3, X7, X3, X0 |
(211) 0x42019c LDR D0, [X4, X10,LSL #3] |
(211) 0x4201a0 MADD X2, X7, X2, X0 |
(211) 0x4201a4 LDR D3, [X4, X16,LSL #3] |
(211) 0x4201a8 FDIV D26, D5, D4 |
(211) 0x4201ac LDR D19, [X4, X15,LSL #3] |
(211) 0x4201b0 FDIV D23, D2, D1 |
(211) 0x4201b4 FSUB D24, D3, S0 |
(211) 0x4201b8 FSUB D27, D0, S19 |
(211) 0x4201bc FCMPE D24, #0 |
(211) 0x4201c0 FABS D25, D24 |
(211) 0x4201c4 FMUL D29, D24, D27 |
(211) 0x4201c8 FABS D28, D27 |
(211) 0x4201cc FMINNM D22, D25, D28 |
(211) 0x4201d0 FCSEL D20, D16, D7, #9 |
(211) 0x4201d4 FCMPE D29, #0 |
(211) 0x4201d8 FADD D30, D23, D7 |
(211) 0x4201dc FSUB D21, D7, S23 |
(211) 0x4201e0 FSUB D31, D18, S23 |
(211) 0x4201e4 FMUL D24, D26, D30 |
(211) 0x4201e8 B.LS 420204 |
(211) 0x4201ec FMUL D2, D25, D31 |
(211) 0x4201f0 FMADD D4, D28, D24, D2 |
(211) 0x4201f4 FMUL D1, D4, D17 |
(211) 0x4201f8 FMINNM D3, D1, D22 |
(211) 0x4201fc FMUL D23, D3, D21 |
(211) 0x420200 FMADD D0, D23, D20, D0 |
(211) 0x420204 FMUL D25, D0, D6 |
(211) 0x420208 STR D25, [X17, X0,LSL #3] |
(211) 0x42020c FABS D6, D25 |
(211) 0x420210 LDR D5, [X6, X1,LSL #3] |
(211) 0x420214 LDR D21, [X6, X3,LSL #3] |
(211) 0x420218 LDR D26, [X6, X2,LSL #3] |
(211) 0x42021c LDR D20, [X4, X10,LSL #3] |
(211) 0x420220 FMUL D27, D5, D25 |
(211) 0x420224 FSUB D28, D21, S5 |
(211) 0x420228 LDR D22, [X14, X9,LSL #3] |
(211) 0x42022c FSUB D19, D5, S26 |
(211) 0x420230 FABS D30, D28 |
(211) 0x420234 FCMPE D28, #0 |
(211) 0x420238 FMUL D29, D22, D20 |
(211) 0x42023c FMUL D2, D28, D19 |
(211) 0x420240 FABS D4, D19 |
(211) 0x420244 FMINNM D23, D30, D4 |
(211) 0x420248 FMUL D31, D30, D31 |
(211) 0x42024c FCSEL D3, D16, D7, #9 |
(211) 0x420250 FCMPE D2, #0 |
(211) 0x420254 FMADD D24, D4, D24, D31 |
(211) 0x420258 B.LS 4202a4 |
(211) 0x42025c FDIV D0, D6, D29 |
(211) 0x420260 FMUL D1, D24, D17 |
(211) 0x420264 FMINNM D21, D1, D23 |
(211) 0x420268 FSUB D6, D7, S0 |
(211) 0x42026c FMUL D26, D6, D21 |
(211) 0x420270 FMADD D5, D26, D3, D5 |
(211) 0x420274 FMUL D25, D5, D25 |
(211) 0x420278 STR D25, [X20, X0,LSL #3] |
(211) 0x42027c ORR X0, XZR, X5 |
(211) 0x420280 CMP X12, X5 |
(211) 0x420284 B.EQ 4202b4 |
(211) 0x420288 ADD X5, X5, #1 |
(211) 0x42028c B 420140 |
(208) 0x420290 LDP X19, X20, [SP, #16] |
(208) 0x420294 LDP X21, X22, [SP, #32] |
(208) 0x420298 LDP X23, X24, [SP, #48] |
(208) 0x42029c LDP X29, X30, [SP], #160 |
(208) 0x4202a0 RET |
(207) 0x4202a4 STR D27, [X20, X0,LSL #3] |
(207) 0x4202a8 ORR X0, XZR, X5 |
(207) 0x4202ac CMP X12, X5 |
(207) 0x4202b0 B.NE 420288 |
(208) 0x4202b4 LDR W5, [SP, #108] |
(209) 0x4202b8 ADD X11, X11, #1 |
(209) 0x4202bc ADD X13, X13, #1 |
(209) 0x4202c0 LDR W12, [SP, #112] |
(209) 0x4202c4 ADD W25, W25, #1 |
(209) 0x4202c8 CMP W12, W19 |
(209) 0x4202cc B.LE 4202e0 |
(209) 0x4202d0 LDP W14, W17, [SP, #120] |
(209) 0x4202d4 LDR W18, [SP, #116] |
(209) 0x4202d8 SUB W6, W17, W5 |
(209) 0x4202dc B 4200a8 |
(209) 0x4202e0 LDP X19, X20, [SP, #16] |
(209) 0x4202e4 LDP X21, X22, [SP, #32] |
(209) 0x4202e8 LDP X23, X24, [SP, #48] |
(209) 0x4202ec LDP X25, X26, [SP, #64] |
(209) 0x4202f0 LDP X27, X28, [SP, #80] |
(209) 0x4202f4 LDP X29, X30, [SP], #160 |
(209) 0x4202f8 RET |
(209) 0x4202fc ADD W6, W6, #1 |
(209) 0x420300 MOVZ W7, #0 |
(209) 0x420304 B 420028 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.42+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 |
cycles | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #864]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W22, W24, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W0, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X21, #60] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W2, W24, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W3, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP W2, W3, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W22, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420294 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6+0x2e4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W20, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W23, W2, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W3, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420290 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6+0x2e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W4, W20, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W23, W23, W4, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STR W4, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W6, W23, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W7, W6, W19, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 4202fc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6+0x34c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 30 |
loop length | 120 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 |
cycles | 2.50 | 2.50 | 4.75 | 4.75 | 4.75 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 3.75 |
Overall L1 | 4.75 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #864]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDP W22, W24, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W0, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X21, #60] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W2, W24, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W3, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP W2, W3, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
CMP W22, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420294 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6+0x2e4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD W20, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB W23, W2, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W3, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.GE 420290 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6+0x2e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB W4, W20, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD W23, W23, W4, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
STR W4, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 403530 <@plt_start@+0x4b0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 4033c0 <@plt_start@+0x340> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W6, W23, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 1-0.50 |
ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MSUB W7, W6, W19, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.CC 4202fc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6+0x34c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 3.69 | 4.91 |
▼Loop 208 - advec_cell.cpp:157-202 - exec– | 0 | 0 |
▼Loop 207 - advec_cell.cpp:163-202 - exec– | 0.56 | 0.74 |
▼Loop 211 - advec_cell.cpp:163-202 - exec– | 2.78 | 3.68 |
○Loop 210 - advec_cell.cpp:163-179 - exec | 0.34 | 0.45 |
○Loop 209 - advec_cell.cpp:157-202 - exec | 0.01 | 0.02 |