| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage (incl. loops): 3.67% | (excl. loops): 0.00% |
|---|
| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage (incl. loops): 3.67% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_mom.cpp: 108 - 139 |
-------------------------------------------------------------------------------- |
108: #pragma omp parallel for simd collapse(2) |
109: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
110: for (int i = (x_min - 1 + 1); i < (x_max + 1 + 2); i++) |
111: ({ |
112: int upwind, donor, downwind, dif; |
113: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
114: if (node_flux(i, j) < 0.0) { |
[...] |
120: upwind = i - 1; |
121: donor = i; |
122: downwind = i + 1; |
123: dif = upwind; |
124: } |
125: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(donor, j)); |
126: width = celldx[i]; |
127: vdiffuw = vel1(donor, j) - vel1(upwind, j); |
128: vdiffdw = vel1(downwind, j) - vel1(donor, j); |
129: limiter = 0.0; |
130: if (vdiffuw * vdiffdw > 0.0) { |
131: auw = std::fabs(vdiffuw); |
132: adw = std::fabs(vdiffdw); |
133: wind = 1.0; |
134: if (vdiffdw <= 0.0) wind = -1.0; |
135: limiter = |
136: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldx[dif]) / 6.0, auw), adw); |
137: } |
138: advec_vel_s = vel1(donor, j) + (1.0 - sigma) * limiter; |
139: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
0x42bc64 STP X29, X30, [SP, #912]! |
0x42bc68 ADD X29, SP, #0 |
0x42bc6c STP X19, X20, [SP, #16] |
0x42bc70 STP X23, X24, [SP, #48] |
0x42bc74 ORR X23, XZR, X0 |
0x42bc78 STP X25, X26, [SP, #64] |
0x42bc7c LDP W25, W19, [X0, #48] |
0x42bc80 LDR W20, [X23, #40] |
0x42bc84 LDR W0, [X0, #44] |
0x42bc88 ADD W25, W25, #1 |
0x42bc8c ADD W19, W19, #3 |
0x42bc90 CMP W25, W19 |
0x42bc94 B.GE 42bfec |
0x42bc98 STP X21, X22, [SP, #32] |
0x42bc9c ADD W22, W0, #3 |
0x42bca0 SUB W26, W19, W25 |
0x42bca4 CMP W20, W22 |
0x42bca8 B.GE 42c000 |
0x42bcac SUB W21, W22, W20 |
0x42bcb0 BL 410670 |
0x42bcb4 MUL W26, W26, W21 |
0x42bcb8 ORR W24, WZR, W0 |
0x42bcbc BL 410550 |
0x42bcc0 ORR W1, WZR, W0 |
0x42bcc4 UDIV W2, W26, W24 |
0x42bcc8 MSUB W0, W2, W24, W26 |
0x42bccc CMP W1, W0 |
0x42bcd0 B.CC 42c018 |
0x42bcd4 MADD W1, W2, W1, W0 |
0x42bcd8 ADD W18, W2, W1 |
0x42bcdc CMP W1, W18 |
0x42bce0 B.CS 42c000 |
0x42bce4 UDIV W0, W1, W21 |
0x42bce8 STP D12, D13, [SP, #80] |
0x42bcec CNTW X12, ALL |
0x42bcf0 STP D14, D15, [SP, #96] |
0x42bcf4 PTRUE P3.B, ALL |
0x42bcf8 DUP Z31.S, #2 |
0x42bcfc LDR X30, [X23, #32] |
0x42bd00 DUP Z30.B, #255 |
0x42bd04 FDUP Z29.D, #112 |
0x42bd08 LDP X17, X16, [X23] |
0x42bd0c FDUP Z28.D, #240 |
0x42bd10 FDUP Z27.D, #0 |
0x42bd14 FDUP Z26.D, #24 |
0x42bd18 LDP X15, X14, [X23, #16] |
0x42bd1c MSUB W3, W0, W21, W1 |
0x42bd20 ADD W0, W0, W25 |
0x42bd24 SBFM X11, X0, #0, #31 |
0x42bd28 ADD W3, W3, W20 |
0x42bd2c SUB W0, W22, W3 |
0x42bd30 CMP W2, W0 |
0x42bd34 CSEL X2, X2, X0, #9 |
0x42bd38 ADD W13, W1, W2 |
0x42bd3c CMP W1, W13 |
0x42bd40 B.CS 42bfcc |
(210) 0x42bd44 LDR X7, [X16] |
(210) 0x42bd48 SBFM X8, X3, #0, #31 |
(210) 0x42bd4c INDEX Z25.S, W3, #1 |
(210) 0x42bd50 MOVZ X0, #0 |
(210) 0x42bd54 ORR X22, XZR, X2 |
(210) 0x42bd58 WHILELO P5.D, XZR, X2 |
(210) 0x42bd5c LDR X3, [X14] |
(210) 0x42bd60 UQDECD X22, ALL |
(210) 0x42bd64 WHILELO P4.D, XZR, X22 |
(210) 0x42bd68 LDR X1, [X17] |
(210) 0x42bd6c MADD X7, X11, X7, X8 |
(210) 0x42bd70 LDR X4, [X15] |
(210) 0x42bd74 MADD X3, X11, X3, X8 |
(210) 0x42bd78 LDR X5, [X30, #8] |
(210) 0x42bd7c MUL X1, X11, X1 |
(210) 0x42bd80 LDR X9, [X14, #16] |
(210) 0x42bd84 MUL X4, X11, X4 |
(210) 0x42bd88 LDR X10, [X16, #16] |
(210) 0x42bd8c ADD X8, X5, X8,LSL #3 |
(210) 0x42bd90 LDR X23, [X17, #16] |
(210) 0x42bd94 ADD X3, X9, X3,LSL #3 |
(210) 0x42bd98 LDR X6, [X15, #16] |
(210) 0x42bd9c ADD X7, X10, X7,LSL #3 |
(210) 0x42bda0 ADDVL X10, X8, #1 |
(210) 0x42bda4 ADD X1, X23, X1,LSL #3 |
(210) 0x42bda8 ADDVL X9, X7, #1 |
(210) 0x42bdac ADD X4, X6, X4,LSL #3 |
(210) 0x42bdb0 ADDVL X6, X3, #1 |
(210) 0x42bdb4 HINT #0 |
(210) 0x42bdb8 HINT #0 |
(210) 0x42bdbc HINT #0 |
(209) 0x42bdc0 LD1D {Z21.D}, P5/Z, [X7, X0,LSL #3] |
(209) 0x42bdc4 MOVPRFX Z19, Z25 |
(209) 0x42bdc8 ADD Z19.S, Z19.S, #1 |
(209) 0x42bdcc FCMLT P15.D, P3/Z, Z21.D, #0.0000000 |
(209) 0x42bdd0 SUNPKLO Z23.D, Z25 |
(209) 0x42bdd4 SUNPKLO Z18.D, Z19 |
(209) 0x42bdd8 SEL Z7.D, P15, Z18.D, Z23.D |
(209) 0x42bddc SEL Z18.D, P15, Z23.D, Z18.D |
(209) 0x42bde0 LD1D {Z15.D}, P5/Z, [X1, Z7.D,LSL #3] |
(209) 0x42bde4 LD1D {Z20.D}, P4/Z, [X9, X0,LSL #3] |
(209) 0x42bde8 LD1D {Z3.D}, P5/Z, [X4, Z7.D,LSL #3] |
(209) 0x42bdec FCMLT P14.D, P3/Z, Z20.D, #0.0000000 |
(209) 0x42bdf0 LD1D {Z7.D}, P5/Z, [X1, Z18.D,LSL #3] |
(209) 0x42bdf4 UZP1 P7.S, P15.S, P14.S |
(209) 0x42bdf8 FSUB Z7.D, P5/M, Z7.D, Z15.D |
(209) 0x42bdfc SUNPKHI Z22.D, Z25 |
(209) 0x42be00 SUNPKHI Z17.D, Z19 |
(209) 0x42be04 MOVPRFX Z16, Z25 |
(209) 0x42be08 SUB Z16.S, Z16.S, #1 |
(209) 0x42be0c SEL Z6.D, P14, Z17.D, Z22.D |
(209) 0x42be10 MOVPRFX Z16.S, P7/M, Z25.S |
(209) 0x42be14 ADD Z16.S, P7/M, Z16.S, Z31.S |
(209) 0x42be18 SEL Z17.D, P14, Z22.D, Z17.D |
(209) 0x42be1c SUNPKLO Z13.D, Z16 |
(209) 0x42be20 SUNPKHI Z16.D, Z16 |
(209) 0x42be24 LD1D {Z12.D}, P5/Z, [X1, Z13.D,LSL #3] |
(209) 0x42be28 MOVPRFX Z23, Z15 |
(209) 0x42be2c FSUB Z23.D, P5/M, Z23.D, Z12.D |
(209) 0x42be30 MOVPRFX Z18, Z23 |
(209) 0x42be34 FMUL Z18.D, P5/M, Z18.D, Z7.D |
(209) 0x42be38 LD1D {Z14.D}, P4/Z, [X1, Z6.D,LSL #3] |
(209) 0x42be3c FCMGT P6.D, P5/Z, Z18.D, #0.0000000 |
(209) 0x42be40 LD1D {Z24.D}, P4/Z, [X1, Z16.D,LSL #3] |
(209) 0x42be44 LD1D {Z2.D}, P4/Z, [X4, Z6.D,LSL #3] |
(209) 0x42be48 MOVPRFX Z22, Z14 |
(209) 0x42be4c FSUB Z22.D, P4/M, Z22.D, Z24.D |
(209) 0x42be50 LD1D {Z6.D}, P4/Z, [X1, Z17.D,LSL #3] |
(209) 0x42be54 MOVPRFX Z5, Z21 |
(209) 0x42be58 FABS Z5.D, P3/M, Z21.D |
(209) 0x42be5c FSUB Z6.D, P4/M, Z6.D, Z14.D |
(209) 0x42be60 FDIV Z5.D, P5/M, Z5.D, Z3.D |
(209) 0x42be64 MOVPRFX Z17, Z22 |
(209) 0x42be68 FMUL Z17.D, P4/M, Z17.D, Z6.D |
(209) 0x42be6c EOR P7.B, P3/Z, P7.B, P3.B |
(209) 0x42be70 MOVPRFX Z18, Z5 |
(209) 0x42be74 FADD Z18.D, P6/M, Z18.D, #0.0000000 |
(209) 0x42be78 MOVPRFX Z16, Z27 |
(209) 0x42be7c FSUB Z16.D, P6/M, Z16.D, Z5.D |
(209) 0x42be80 MOVPRFX Z19.S, P7/M, Z25.S |
(209) 0x42be84 ADD Z19.S, P7/M, Z19.S, Z30.S |
(209) 0x42be88 FABS Z23.D, P3/M, Z23.D |
(209) 0x42be8c FCMGT P7.D, P4/Z, Z17.D, #0.0000000 |
(209) 0x42be90 FCMLE P14.D, P6/Z, Z7.D, #0.0000000 |
(209) 0x42be94 MOVPRFX Z3, Z7 |
(209) 0x42be98 FABS Z3.D, P3/M, Z7.D |
(209) 0x42be9c MOVPRFX Z4, Z20 |
(209) 0x42bea0 FABS Z4.D, P3/M, Z20.D |
(209) 0x42bea4 MOVPRFX Z7, Z3 |
(209) 0x42bea8 FMUL Z7.D, P6/M, Z7.D, Z16.D |
(209) 0x42beac FDIV Z4.D, P4/M, Z4.D, Z2.D |
(209) 0x42beb0 MOVPRFX Z16, Z23 |
(209) 0x42beb4 FMUL Z16.D, P6/M, Z16.D, Z18.D |
(209) 0x42beb8 MOVPRFX Z24, Z27 |
(209) 0x42bebc FSUB Z24.D, P7/M, Z24.D, Z4.D |
(209) 0x42bec0 SUNPKLO Z18.D, Z19 |
(209) 0x42bec4 MOVPRFX Z17, Z4 |
(209) 0x42bec8 FADD Z17.D, P7/M, Z17.D, #0.0000000 |
(209) 0x42becc LD1D {Z1.D}, P5/Z, [X8, X0,LSL #3] |
(209) 0x42bed0 LD1D {Z0.D}, P4/Z, [X10, X0,LSL #3] |
(209) 0x42bed4 FABS Z22.D, P3/M, Z22.D |
(209) 0x42bed8 FCMLE P15.D, P7/Z, Z6.D, #0.0000000 |
(209) 0x42bedc MOVPRFX Z2, Z6 |
(209) 0x42bee0 FABS Z2.D, P3/M, Z6.D |
(209) 0x42bee4 FDIV Z7.D, P6/M, Z7.D, Z1.D |
(209) 0x42bee8 MOVPRFX Z6, Z2 |
(209) 0x42beec FMUL Z6.D, P7/M, Z6.D, Z24.D |
(209) 0x42bef0 SUNPKHI Z19.D, Z19 |
(209) 0x42bef4 MOVPRFX Z24, Z22 |
(209) 0x42bef8 FMUL Z24.D, P7/M, Z24.D, Z17.D |
(209) 0x42befc FDIV Z6.D, P7/M, Z6.D, Z0.D |
(209) 0x42bf00 LD1D {Z17.D}, P6/Z, [X5, Z18.D,LSL #3] |
(209) 0x42bf04 LD1D {Z18.D}, P7/Z, [X5, Z19.D,LSL #3] |
(209) 0x42bf08 FDIV Z16.D, P6/M, Z16.D, Z17.D |
(209) 0x42bf0c FDIV Z24.D, P7/M, Z24.D, Z18.D |
(209) 0x42bf10 FADD Z7.D, P6/M, Z7.D, Z16.D |
(209) 0x42bf14 FADD Z6.D, P7/M, Z6.D, Z24.D |
(209) 0x42bf18 FMUL Z1.D, P6/M, Z1.D, Z7.D |
(209) 0x42bf1c FMUL Z0.D, P7/M, Z0.D, Z6.D |
(209) 0x42bf20 SEL Z7.D, P5, Z5.D, Z29.D |
(209) 0x42bf24 SEL Z6.D, P4, Z4.D, Z29.D |
(209) 0x42bf28 FSUBR Z7.D, P5/M, Z7.D, #0.0000000 |
(209) 0x42bf2c FSUBR Z6.D, P4/M, Z6.D, #0.0000000 |
(209) 0x42bf30 EOR P14.B, P6/Z, P14.B, P6.B |
(209) 0x42bf34 EOR P15.B, P7/Z, P15.B, P7.B |
(209) 0x42bf38 MOVPRFX Z13, Z28 |
(209) 0x42bf3c FCPY Z13.D, P14/M, #1.0000000 |
(209) 0x42bf40 FDIV Z1.D, P6/M, Z1.D, Z26.D |
(209) 0x42bf44 FDIV Z0.D, P7/M, Z0.D, Z26.D |
(209) 0x42bf48 FMINNM Z1.D, P3/M, Z1.D, Z23.D |
(209) 0x42bf4c FMINNM Z0.D, P3/M, Z0.D, Z22.D |
(209) 0x42bf50 FMINNM Z1.D, P3/M, Z1.D, Z3.D |
(209) 0x42bf54 FMINNM Z0.D, P3/M, Z0.D, Z2.D |
(209) 0x42bf58 MOVPRFX Z1.D, P6/Z, Z1.D |
(209) 0x42bf5c FMUL Z1.D, P6/M, Z1.D, Z13.D |
(209) 0x42bf60 MOVPRFX Z12, Z28 |
(209) 0x42bf64 FCPY Z12.D, P15/M, #1.0000000 |
(209) 0x42bf68 FMLA Z15.D, P5/M, Z1.D, Z7.D |
(209) 0x42bf6c MOVPRFX Z0.D, P7/Z, Z0.D |
(209) 0x42bf70 FMUL Z0.D, P7/M, Z0.D, Z12.D |
(209) 0x42bf74 FMUL Z21.D, P5/M, Z21.D, Z15.D |
(209) 0x42bf78 FMLA Z14.D, P4/M, Z0.D, Z6.D |
(209) 0x42bf7c FMUL Z20.D, P4/M, Z20.D, Z14.D |
(209) 0x42bf80 ST1D {Z21.D}, P5, [X3, X0,LSL #3] |
(209) 0x42bf84 ST1D {Z20.D}, P4, [X6, X0,LSL #3] |
(209) 0x42bf88 ADD X0, X0, X12 |
(209) 0x42bf8c WHILELO P4.D, X0, X22 |
(209) 0x42bf90 INCW Z25.S, ALL |
(209) 0x42bf94 WHILELO P5.D, X0, X2 |
(209) 0x42bf98 B.NE 42bdc0 |
(210) 0x42bf9c ADD X11, X11, #1 |
(210) 0x42bfa0 CMP W19, W11 |
(210) 0x42bfa4 B.LE 42bfe0 |
(210) 0x42bfa8 SUB W2, W18, W13 |
(210) 0x42bfac ORR W0, WZR, W21 |
(210) 0x42bfb0 ORR W1, WZR, W13 |
(210) 0x42bfb4 CMP W2, W0 |
(210) 0x42bfb8 CSEL X2, X2, X0, #9 |
(210) 0x42bfbc ADD W13, W1, W2 |
(210) 0x42bfc0 ORR W3, WZR, W20 |
(210) 0x42bfc4 CMP W1, W13 |
(210) 0x42bfc8 B.CC 42bd44 |
(211) 0x42bfcc ADD X11, X11, #1 |
(211) 0x42bfd0 ORR W13, WZR, W1 |
(211) 0x42bfd4 CMP W19, W11 |
(211) 0x42bfd8 B.GT 42bfa8 |
0x42bfdc HINT #0 |
0x42bfe0 LDP D12, D13, [SP, #80] |
0x42bfe4 LDP X21, X22, [SP, #32] |
0x42bfe8 LDP D14, D15, [SP, #96] |
0x42bfec LDP X19, X20, [SP, #16] |
0x42bff0 LDP X23, X24, [SP, #48] |
0x42bff4 LDP X25, X26, [SP, #64] |
0x42bff8 LDP X29, X30, [SP], #112 |
0x42bffc RET |
0x42c000 LDP X21, X22, [SP, #32] |
0x42c004 LDP X19, X20, [SP, #16] |
0x42c008 LDP X23, X24, [SP, #48] |
0x42c00c LDP X25, X26, [SP, #64] |
0x42c010 LDP X29, X30, [SP], #112 |
0x42c014 RET |
0x42c018 ADD W2, W2, #1 |
0x42c01c MOVZ W0, #0 |
0x42c020 B 42bcd4 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | advec_mom.cpp:108-139 |
| Module | exec |
| nb instructions | 74 |
| nb uops | 73 |
| loop length | 296 |
| used w registers | 14 |
| used x registers | 19 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 4 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 6 |
| nb stack references | 19 |
| micro-operation queue | 9.13 cycles |
| front end | 9.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.75 | 8.75 | 8.75 | 8.75 | 4.00 | 4.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 2.50 | 2.50 |
| cycles | 5.00 | 5.00 | 8.75 | 8.75 | 8.75 | 8.75 | 4.00 | 4.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 9.13 |
| Dispatch | 8.75 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 8% |
| load | 13% |
| store | 28% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 7% |
| load | 13% |
| store | 28% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 32% |
| load | 44% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 3% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 3% |
| all | 29% |
| load | 44% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W25, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W20, [X23, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42bfec <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x388> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W0, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42c000 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x39c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410670 <@plt_start@+0x650> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W26, W26, W21 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410550 <@plt_start@+0x530> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W2, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W2, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W1, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42c018 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x3b4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W1, W2, W1, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W18, W2, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W18 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42c000 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x39c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W0, W1, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| STP D12, D13, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| CNTW X12, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP D14, D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| PTRUE P3.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| DUP Z31.S, #2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDR X30, [X23, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| DUP Z30.B, #255 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z29.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X17, X16, [X23] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FDUP Z28.D, #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z27.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z26.D, #24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X15, X14, [X23, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W3, W0, W21, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W0, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SBFM X11, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W3, W3, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL X2, X2, X0, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W13, W1, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42bfcc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x368> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| LDP D12, D13, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP D14, D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W2, W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 42bcd4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x70> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | advec_mom.cpp:108-139 |
| Module | exec |
| nb instructions | 74 |
| nb uops | 73 |
| loop length | 296 |
| used w registers | 14 |
| used x registers | 19 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 4 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 6 |
| nb stack references | 19 |
| micro-operation queue | 9.13 cycles |
| front end | 9.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.75 | 8.75 | 8.75 | 8.75 | 4.00 | 4.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 2.50 | 2.50 |
| cycles | 5.00 | 5.00 | 8.75 | 8.75 | 8.75 | 8.75 | 4.00 | 4.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 9.13 |
| Dispatch | 8.75 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 8% |
| load | 13% |
| store | 28% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 7% |
| load | 13% |
| store | 28% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 32% |
| load | 44% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 3% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 3% |
| all | 29% |
| load | 44% |
| store | 50% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X23, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W25, W19, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W20, [X23, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W25, W25, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W25, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42bfec <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x388> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W0, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W26, W19, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W20, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42c000 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x39c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W21, W22, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410670 <@plt_start@+0x650> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W26, W26, W21 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W24, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410550 <@plt_start@+0x530> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W1, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W2, W26, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W0, W2, W24, W26 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W1, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42c018 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x3b4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W1, W2, W1, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W18, W2, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W18 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42c000 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x39c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W0, W1, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| STP D12, D13, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| CNTW X12, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP D14, D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| PTRUE P3.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| DUP Z31.S, #2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDR X30, [X23, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| DUP Z30.B, #255 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z29.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X17, X16, [X23] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| FDUP Z28.D, #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z27.D, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z26.D, #24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| LDP X15, X14, [X23, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W3, W0, W21, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W0, W25 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| SBFM X11, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W3, W3, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W0, W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W2, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL X2, X2, X0, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W13, W1, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W1, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42bfcc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x368> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| LDP D12, D13, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP D14, D15, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W2, W2, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 42bcd4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x70> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.67 | 5.01 |
| ▼Loop 211 - context.h:46-69 - exec– | 0.00 | 0.00 |
| ▼Loop 210 - advec_mom.cpp:110-139 - exec– | 0.01 | 0.02 |
| ○Loop 209 - context.h:69-69 - exec | 3.66 | 4.86 |
