| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:180-211 [...] | Coverage (incl. loops): 3.91% | (excl. loops): 0.00% |
|---|
| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:180-211 [...] | Coverage (incl. loops): 3.91% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_mom.cpp: 180 - 211 |
-------------------------------------------------------------------------------- |
180: #pragma omp parallel for simd collapse(2) |
181: for (int j = (y_min - 1 + 1); j < (y_max + 1 + 2); j++) { |
182: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) |
183: ({ |
184: int upwind, donor, downwind, dif; |
185: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
186: if (node_flux(i, j) < 0.0) { |
[...] |
197: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(i, donor)); |
198: width = celldy[j]; |
199: vdiffuw = vel1(i, donor) - vel1(i, upwind); |
200: vdiffdw = vel1(i, downwind) - vel1(i, donor); |
201: limiter = 0.0; |
202: if (vdiffuw * vdiffdw > 0.0) { |
203: auw = std::fabs(vdiffuw); |
204: adw = std::fabs(vdiffdw); |
205: wind = 1.0; |
206: if (vdiffdw <= 0.0) wind = -1.0; |
207: limiter = |
208: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldy[dif]) / 6.0, auw), adw); |
209: } |
210: advec_vel_s = vel1(i, donor) + (1.0 - sigma) * limiter; |
211: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
0x42c024 STP X29, X30, [SP, #912]! |
0x42c028 ADD X29, SP, #0 |
0x42c02c STP X19, X20, [SP, #16] |
0x42c030 STP X23, X24, [SP, #48] |
0x42c034 ORR X23, XZR, X0 |
0x42c038 STP X25, X26, [SP, #64] |
0x42c03c LDR W19, [X0, #52] |
0x42c040 LDR W26, [X23, #48] |
0x42c044 LDR W20, [X0, #40] |
0x42c048 ADD W19, W19, #3 |
0x42c04c LDR W0, [X0, #44] |
0x42c050 CMP W26, W19 |
0x42c054 B.GE 42c3bc |
0x42c058 ADD W20, W20, #1 |
0x42c05c STP X21, X22, [SP, #32] |
0x42c060 ADD W22, W0, #3 |
0x42c064 SUB W25, W19, W26 |
0x42c068 CMP W20, W22 |
0x42c06c B.GE 42c3d0 |
0x42c070 SUB W21, W22, W20 |
0x42c074 BL 410670 |
0x42c078 MUL W25, W25, W21 |
0x42c07c ORR W24, WZR, W0 |
0x42c080 BL 410550 |
0x42c084 ORR W1, WZR, W0 |
0x42c088 UDIV W2, W25, W24 |
0x42c08c MSUB W0, W2, W24, W25 |
0x42c090 CMP W1, W0 |
0x42c094 B.CC 42c3e8 |
0x42c098 MADD W1, W2, W1, W0 |
0x42c09c ADD W17, W2, W1 |
0x42c0a0 CMP W1, W17 |
0x42c0a4 B.CS 42c3d0 |
0x42c0a8 UDIV W0, W1, W21 |
0x42c0ac STP D13, D14, [SP, #80] |
0x42c0b0 CNTW X11, ALL |
0x42c0b4 LDR X18, [X23, #32] |
0x42c0b8 PTRUE P4.B, ALL |
0x42c0bc FDUP Z16.D, #112 |
0x42c0c0 LDP X16, X15, [X23] |
0x42c0c4 FDUP Z17.D, #240 |
0x42c0c8 FDUP Z22.D, #0 |
0x42c0cc FDUP Z18.D, #24 |
0x42c0d0 LDP X14, X13, [X23, #16] |
0x42c0d4 STR D15, [SP, #96] |
0x42c0d8 MSUB W4, W0, W21, W1 |
0x42c0dc ADD W8, W0, W26 |
0x42c0e0 SBFM X8, X8, #0, #31 |
0x42c0e4 ADD W5, W8, #1 |
0x42c0e8 ADD W4, W4, W20 |
0x42c0ec SUB W0, W22, W4 |
0x42c0f0 CMP W2, W0 |
0x42c0f4 CSEL X2, X2, X0, #9 |
0x42c0f8 ADD W12, W1, W2 |
0x42c0fc CMP W1, W12 |
0x42c100 B.CS 42c39c |
(213) 0x42c104 LDR X7, [X15] |
(213) 0x42c108 ADD W6, W8, #2 |
(213) 0x42c10c SBFM X1, X4, #0, #31 |
(213) 0x42c110 DUP Z6.S, W6 |
(213) 0x42c114 DUP Z7.S, W5 |
(213) 0x42c118 INDEX Z21.S, W4, #1 |
(213) 0x42c11c LDR X3, [X13] |
(213) 0x42c120 SUB W9, W8, #1 |
(213) 0x42c124 ADD X22, X8, #1 |
(213) 0x42c128 DUP Z24.D, X8 |
(213) 0x42c12c DUP Z20.S, W9 |
(213) 0x42c130 MOVZ X0, #0 |
(213) 0x42c134 LDR X6, [X15, #16] |
(213) 0x42c138 ORR X10, XZR, X2 |
(213) 0x42c13c WHILELO P6.D, XZR, X2 |
(213) 0x42c140 MADD X7, X8, X7, X1 |
(213) 0x42c144 UQDECD X10, ALL |
(213) 0x42c148 DUP Z23.D, X22 |
(213) 0x42c14c LDR X5, [X13, #16] |
(213) 0x42c150 WHILELO P5.D, XZR, X10 |
(213) 0x42c154 LD1RD {Z19.D}, P4/Z, [X14] |
(213) 0x42c158 MADD X3, X8, X3, X1 |
(213) 0x42c15c LD1RD {Z25.D}, P4/Z, [X16] |
(213) 0x42c160 LDR X4, [X18, #8] |
(213) 0x42c164 ADD X7, X6, X7,LSL #3 |
(213) 0x42c168 LDR X1, [X16, #16] |
(213) 0x42c16c ADD X3, X5, X3,LSL #3 |
(213) 0x42c170 ADDVL X9, X7, #1 |
(213) 0x42c174 LDR X6, [X14, #16] |
(213) 0x42c178 ADD X8, X4, X8,LSL #3 |
(213) 0x42c17c ADDVL X5, X3, #1 |
(212) 0x42c180 LD1D {Z26.D}, P6/Z, [X7, X0,LSL #3] |
(212) 0x42c184 LD1D {Z27.D}, P5/Z, [X9, X0,LSL #3] |
(212) 0x42c188 SUNPKHI Z29.D, Z21 |
(212) 0x42c18c FCMLT P14.D, P4/Z, Z26.D, #0.0000000 |
(212) 0x42c190 FCMLT P15.D, P4/Z, Z27.D, #0.0000000 |
(212) 0x42c194 SUNPKLO Z15.D, Z21 |
(212) 0x42c198 UZP1 P13.S, P14.S, P15.S |
(212) 0x42c19c SEL Z31.S, P13, Z6.S, Z20.S |
(212) 0x42c1a0 SUNPKLO Z30.D, Z31 |
(212) 0x42c1a4 SUNPKHI Z31.D, Z31 |
(212) 0x42c1a8 MAD Z30.D, P4/M, Z25.D, Z15.D |
(212) 0x42c1ac MAD Z31.D, P4/M, Z25.D, Z29.D |
(212) 0x42c1b0 LD1D {Z5.D}, P6/Z, [X1, Z30.D,LSL #3] |
(212) 0x42c1b4 LD1D {Z2.D}, P5/Z, [X1, Z31.D,LSL #3] |
(212) 0x42c1b8 SEL Z14.D, P14, Z24.D, Z23.D |
(212) 0x42c1bc SEL Z31.D, P14, Z23.D, Z24.D |
(212) 0x42c1c0 MOVPRFX Z30, Z15 |
(212) 0x42c1c4 MLA Z30.D, P4/M, Z25.D, Z31.D |
(212) 0x42c1c8 MAD Z31.D, P4/M, Z19.D, Z15.D |
(212) 0x42c1cc LD1D {Z28.D}, P6/Z, [X1, Z30.D,LSL #3] |
(212) 0x42c1d0 MLA Z15.D, P4/M, Z25.D, Z14.D |
(212) 0x42c1d4 MOVPRFX Z30, Z28 |
(212) 0x42c1d8 FSUB Z30.D, P6/M, Z30.D, Z5.D |
(212) 0x42c1dc LD1D {Z14.D}, P6/Z, [X1, Z15.D,LSL #3] |
(212) 0x42c1e0 MOVPRFX Z4, Z30 |
(212) 0x42c1e4 FABS Z4.D, P4/M, Z30.D |
(212) 0x42c1e8 FSUB Z14.D, P6/M, Z14.D, Z28.D |
(212) 0x42c1ec FMUL Z30.D, P6/M, Z30.D, Z14.D |
(212) 0x42c1f0 LD1D {Z13.D}, P6/Z, [X6, Z31.D,LSL #3] |
(212) 0x42c1f4 FCMGT P7.D, P6/Z, Z30.D, #0.0000000 |
(212) 0x42c1f8 SEL Z3.S, P13, Z7.S, Z20.S |
(212) 0x42c1fc SUNPKLO Z30.D, Z3 |
(212) 0x42c200 MOVPRFX Z31, Z26 |
(212) 0x42c204 FABS Z31.D, P4/M, Z26.D |
(212) 0x42c208 LD1D {Z15.D}, P7/Z, [X4, Z30.D,LSL #3] |
(212) 0x42c20c FDIV Z31.D, P6/M, Z31.D, Z13.D |
(212) 0x42c210 MOVPRFX Z13, Z31 |
(212) 0x42c214 FADD Z13.D, P7/M, Z13.D, #0.0000000 |
(212) 0x42c218 MOVPRFX Z30, Z4 |
(212) 0x42c21c FMUL Z30.D, P7/M, Z30.D, Z13.D |
(212) 0x42c220 MOVPRFX Z5, Z14 |
(212) 0x42c224 FABS Z5.D, P4/M, Z14.D |
(212) 0x42c228 MOVPRFX Z1, Z30 |
(212) 0x42c22c FDIV Z1.D, P7/M, Z1.D, Z15.D |
(212) 0x42c230 MOVPRFX Z13, Z22 |
(212) 0x42c234 FSUB Z13.D, P7/M, Z13.D, Z31.D |
(212) 0x42c238 SEL Z15.D, P6, Z31.D, Z16.D |
(212) 0x42c23c MOVPRFX Z30, Z5 |
(212) 0x42c240 FMUL Z30.D, P7/M, Z30.D, Z13.D |
(212) 0x42c244 LD1RD {Z31.D}, P4/Z, [X8] |
(212) 0x42c248 FSUBR Z15.D, P6/M, Z15.D, #0.0000000 |
(212) 0x42c24c FCMLE P14.D, P7/Z, Z14.D, #0.0000000 |
(212) 0x42c250 FDIV Z30.D, P7/M, Z30.D, Z31.D |
(212) 0x42c254 EOR P14.B, P7/Z, P14.B, P7.B |
(212) 0x42c258 FADD Z30.D, P7/M, Z30.D, Z1.D |
(212) 0x42c25c MOVPRFX Z14, Z17 |
(212) 0x42c260 FCPY Z14.D, P14/M, #1.0000000 |
(212) 0x42c264 MOVPRFX Z0, Z31 |
(212) 0x42c268 FMUL Z0.D, P7/M, Z0.D, Z30.D |
(212) 0x42c26c FDIV Z0.D, P7/M, Z0.D, Z18.D |
(212) 0x42c270 FMINNM Z4.D, P4/M, Z4.D, Z0.D |
(212) 0x42c274 FMINNM Z5.D, P4/M, Z5.D, Z4.D |
(212) 0x42c278 MOVPRFX Z5.D, P7/Z, Z5.D |
(212) 0x42c27c FMUL Z5.D, P7/M, Z5.D, Z14.D |
(212) 0x42c280 FMLA Z28.D, P6/M, Z5.D, Z15.D |
(212) 0x42c284 SEL Z1.D, P15, Z23.D, Z24.D |
(212) 0x42c288 FMUL Z26.D, P6/M, Z26.D, Z28.D |
(212) 0x42c28c MOVPRFX Z13, Z29 |
(212) 0x42c290 MLA Z13.D, P4/M, Z25.D, Z1.D |
(212) 0x42c294 SEL Z15.D, P15, Z24.D, Z23.D |
(212) 0x42c298 LD1D {Z30.D}, P5/Z, [X1, Z13.D,LSL #3] |
(212) 0x42c29c MAD Z15.D, P4/M, Z25.D, Z29.D |
(212) 0x42c2a0 MOVPRFX Z4, Z30 |
(212) 0x42c2a4 FSUB Z4.D, P5/M, Z4.D, Z2.D |
(212) 0x42c2a8 LD1D {Z28.D}, P5/Z, [X1, Z15.D,LSL #3] |
(212) 0x42c2ac MOVPRFX Z5, Z4 |
(212) 0x42c2b0 FABS Z5.D, P4/M, Z4.D |
(212) 0x42c2b4 FSUB Z28.D, P5/M, Z28.D, Z30.D |
(212) 0x42c2b8 FMUL Z4.D, P5/M, Z4.D, Z28.D |
(212) 0x42c2bc MAD Z1.D, P4/M, Z19.D, Z29.D |
(212) 0x42c2c0 FCMGT P7.D, P5/Z, Z4.D, #0.0000000 |
(212) 0x42c2c4 LD1D {Z0.D}, P5/Z, [X6, Z1.D,LSL #3] |
(212) 0x42c2c8 MOVPRFX Z29, Z27 |
(212) 0x42c2cc FABS Z29.D, P4/M, Z27.D |
(212) 0x42c2d0 SUNPKHI Z3.D, Z3 |
(212) 0x42c2d4 FDIV Z29.D, P5/M, Z29.D, Z0.D |
(212) 0x42c2d8 LD1D {Z4.D}, P7/Z, [X4, Z3.D,LSL #3] |
(212) 0x42c2dc SEL Z2.D, P5, Z29.D, Z16.D |
(212) 0x42c2e0 MOVPRFX Z14, Z28 |
(212) 0x42c2e4 FABS Z14.D, P4/M, Z28.D |
(212) 0x42c2e8 MOVPRFX Z15, Z29 |
(212) 0x42c2ec FADD Z15.D, P7/M, Z15.D, #0.0000000 |
(212) 0x42c2f0 FCMLE P15.D, P7/Z, Z28.D, #0.0000000 |
(212) 0x42c2f4 MOVPRFX Z28, Z5 |
(212) 0x42c2f8 FMUL Z28.D, P7/M, Z28.D, Z15.D |
(212) 0x42c2fc FDIV Z28.D, P7/M, Z28.D, Z4.D |
(212) 0x42c300 MOVPRFX Z4, Z22 |
(212) 0x42c304 FSUB Z4.D, P7/M, Z4.D, Z29.D |
(212) 0x42c308 MOVPRFX Z29, Z14 |
(212) 0x42c30c FMUL Z29.D, P7/M, Z29.D, Z4.D |
(212) 0x42c310 FDIV Z29.D, P7/M, Z29.D, Z31.D |
(212) 0x42c314 FADD Z29.D, P7/M, Z29.D, Z28.D |
(212) 0x42c318 FMUL Z31.D, P7/M, Z31.D, Z29.D |
(212) 0x42c31c FDIV Z31.D, P7/M, Z31.D, Z18.D |
(212) 0x42c320 FSUBR Z2.D, P5/M, Z2.D, #0.0000000 |
(212) 0x42c324 EOR P15.B, P7/Z, P15.B, P7.B |
(212) 0x42c328 FMINNM Z5.D, P4/M, Z5.D, Z31.D |
(212) 0x42c32c MOVPRFX Z15, Z17 |
(212) 0x42c330 FCPY Z15.D, P15/M, #1.0000000 |
(212) 0x42c334 MOVPRFX Z31, Z14 |
(212) 0x42c338 FMINNM Z31.D, P4/M, Z31.D, Z5.D |
(212) 0x42c33c MOVPRFX Z31.D, P7/Z, Z31.D |
(212) 0x42c340 FMUL Z31.D, P7/M, Z31.D, Z15.D |
(212) 0x42c344 FMLA Z30.D, P5/M, Z31.D, Z2.D |
(212) 0x42c348 FMUL Z27.D, P5/M, Z27.D, Z30.D |
(212) 0x42c34c ST1D {Z26.D}, P6, [X3, X0,LSL #3] |
(212) 0x42c350 ST1D {Z27.D}, P5, [X5, X0,LSL #3] |
(212) 0x42c354 ADD X0, X0, X11 |
(212) 0x42c358 WHILELO P5.D, X0, X10 |
(212) 0x42c35c INCW Z21.S, ALL |
(212) 0x42c360 WHILELO P6.D, X0, X2 |
(212) 0x42c364 B.NE 42c180 |
(213) 0x42c368 ORR X8, XZR, X22 |
(213) 0x42c36c CMP W19, W22 |
(213) 0x42c370 B.LE 42c3b0 |
(213) 0x42c374 SUB W2, W17, W12 |
(213) 0x42c378 ORR W0, WZR, W21 |
(213) 0x42c37c ORR W1, WZR, W12 |
(213) 0x42c380 CMP W2, W0 |
(213) 0x42c384 CSEL X2, X2, X0, #9 |
(213) 0x42c388 ADD W12, W1, W2 |
(213) 0x42c38c ORR W4, WZR, W20 |
(213) 0x42c390 ADD W5, W8, #1 |
(213) 0x42c394 CMP W1, W12 |
(213) 0x42c398 B.CC 42c104 |
(214) 0x42c39c ADD X22, X8, #1 |
(214) 0x42c3a0 ORR W12, WZR, W1 |
(214) 0x42c3a4 ORR X8, XZR, X22 |
(214) 0x42c3a8 CMP W19, W22 |
(214) 0x42c3ac B.GT 42c374 |
0x42c3b0 LDR D15, [SP, #96] |
0x42c3b4 LDP X21, X22, [SP, #32] |
0x42c3b8 LDP D13, D14, [SP, #80] |
0x42c3bc LDP X19, X20, [SP, #16] |
0x42c3c0 LDP X23, X24, [SP, #48] |
0x42c3c4 LDP X25, X26, [SP, #64] |
0x42c3c8 LDP X29, X30, [SP], #112 |
0x42c3cc RET |
0x42c3d0 LDP X21, X22, [SP, #32] |
0x42c3d4 LDP X19, X20, [SP, #16] |
0x42c3d8 LDP X23, X24, [SP, #48] |
0x42c3dc LDP X25, X26, [SP, #64] |
0x42c3e0 LDP X29, X30, [SP], #112 |
0x42c3e4 RET |
0x42c3e8 ADD W2, W2, #1 |
0x42c3ec MOVZ W0, #0 |
0x42c3f0 B 42c098 |
0x42c3f4 HINT #0 |
0x42c3f8 HINT #0 |
0x42c3fc HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | advec_mom.cpp:180-211 |
| Module | exec |
| nb instructions | 76 |
| nb uops | 73 |
| loop length | 304 |
| used w registers | 16 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 3 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 4 |
| nb stack references | 19 |
| micro-operation queue | 9.13 cycles |
| front end | 9.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 3.00 | 3.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 2.50 | 2.50 |
| cycles | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 3.00 | 3.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 9.13 |
| Dispatch | 9.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 4% |
| load | 6% |
| store | 14% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 3% |
| load | 6% |
| store | 14% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 30% |
| load | 39% |
| store | 46% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 29% |
| all | 3% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 3% |
| all | 28% |
| load | 39% |
| store | 46% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W19, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W26, [X23, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W20, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W26, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42c3bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x398> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W0, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W25, W19, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42c3d0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x3ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410670 <@plt_start@+0x650> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W25, W25, W21 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410550 <@plt_start@+0x530> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W2, W25, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W2, W24, W25 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W1, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42c3e8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x3c4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W1, W2, W1, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W17, W2, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42c3d0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x3ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W0, W1, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| STP D13, D14, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| CNTW X11, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| LDR X18, [X23, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| PTRUE P4.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| FDUP Z16.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X16, X15, [X23] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FDUP Z17.D, #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z22.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z18.D, #24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X14, X13, [X23, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STR D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| MSUB W4, W0, W21, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W0, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X8, X8, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W5, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W4, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W22, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL X2, X2, X0, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W12, W1, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42c39c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x378> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP D13, D14, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W2, W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 42c098 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x74> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | advec_mom.cpp:180-211 |
| Module | exec |
| nb instructions | 76 |
| nb uops | 73 |
| loop length | 304 |
| used w registers | 16 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 3 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 4 |
| nb stack references | 19 |
| micro-operation queue | 9.13 cycles |
| front end | 9.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 3.00 | 3.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 2.50 | 2.50 |
| cycles | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 3.00 | 3.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 9.13 |
| Dispatch | 9.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 4% |
| load | 6% |
| store | 14% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 3% |
| load | 6% |
| store | 14% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 30% |
| load | 39% |
| store | 46% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 29% |
| all | 3% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 3% |
| all | 28% |
| load | 39% |
| store | 46% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W19, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W26, [X23, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W20, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W26, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42c3bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x398> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W20, W20, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W0, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W25, W19, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42c3d0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x3ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410670 <@plt_start@+0x650> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W25, W25, W21 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410550 <@plt_start@+0x530> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W2, W25, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W2, W24, W25 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W1, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42c3e8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x3c4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W1, W2, W1, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W17, W2, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42c3d0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x3ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W0, W1, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| STP D13, D14, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| CNTW X11, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| LDR X18, [X23, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| PTRUE P4.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| FDUP Z16.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X16, X15, [X23] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FDUP Z17.D, #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z22.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z18.D, #24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X14, X13, [X23, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STR D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| MSUB W4, W0, W21, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W0, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X8, X8, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W5, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W4, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W22, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL X2, X2, X0, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W12, W1, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42c39c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x378> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP D13, D14, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W2, W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 42c098 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x74> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.91 | 5.34 |
| ▼Loop 214 - context.h:46-69 - exec– | 0.00 | 0.00 |
| ▼Loop 213 - advec_mom.cpp:182-211 - exec– | 0.01 | 0.02 |
| ○Loop 212 - context.h:69-69 - exec | 3.90 | 5.18 |
