| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:62-66 [...] | Coverage (incl. loops): 2.19% | (excl. loops): 0.00% |
|---|
| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:62-66 [...] | Coverage (incl. loops): 2.19% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_mom.cpp: 62 - 66 |
-------------------------------------------------------------------------------- |
62: #pragma omp parallel for simd collapse(2) |
63: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
64: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
65: post_vol(i, j) = volume(i, j); |
66: pre_vol(i, j) = post_vol(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
0x42ef28 STP X29, X30, [SP, #896]! |
0x42ef2c ADD X29, SP, #0 |
0x42ef30 STP X19, X20, [SP, #16] |
0x42ef34 ORR X19, XZR, X0 |
0x42ef38 STP X23, X24, [SP, #48] |
0x42ef3c LDP W23, W1, [X0, #40] |
0x42ef40 LDR W0, [X0, #32] |
0x42ef44 LDR W3, [X19, #36] |
0x42ef48 ADD W2, W1, #4 |
0x42ef4c SUB W23, W23, #1 |
0x42ef50 SUB W4, W0, #1 |
0x42ef54 STP W2, W4, [SP, #108] |
0x42ef58 CMP W23, W2 |
0x42ef5c B.GE 42f35c |
0x42ef60 ADD W24, W3, #4 |
0x42ef64 STP X21, X22, [SP, #32] |
0x42ef68 SUB W21, W2, W23 |
0x42ef6c CMP W4, W24 |
0x42ef70 B.GE 42f36c |
0x42ef74 SUB W5, W24, W4 |
0x42ef78 MUL W22, W21, W5 |
0x42ef7c STR W5, [SP, #116] |
0x42ef80 BL 410210 |
0x42ef84 ORR W20, WZR, W0 |
0x42ef88 BL 410240 |
0x42ef8c UDIV W7, W22, W20 |
0x42ef90 ORR W6, WZR, W0 |
0x42ef94 MSUB W8, W7, W20, W22 |
0x42ef98 CMP W0, W8 |
0x42ef9c B.CC 42f380 |
0x42efa0 MADD W17, W7, W6, W8 |
0x42efa4 ADD W9, W7, W17 |
0x42efa8 STR W9, [SP, #120] |
0x42efac CMP W17, W9 |
0x42efb0 B.CS 42f36c |
0x42efb4 LDP W10, W11, [SP, #112] |
0x42efb8 STP X25, X26, [SP, #64] |
0x42efbc STP X27, X28, [SP, #80] |
0x42efc0 UDIV W12, W17, W11 |
0x42efc4 LDP X22, X21, [X19] |
0x42efc8 LDP X20, X19, [X19, #16] |
0x42efcc MSUB W13, W12, W11, W17 |
0x42efd0 ADD W14, W12, W23 |
0x42efd4 SBFM X4, X14, #0, #31 |
0x42efd8 ADD W23, W13, W10 |
0x42efdc SUB W24, W24, W23 |
0x42efe0 CMP W7, W24 |
0x42efe4 CSEL W6, W7, W24, #9 |
0x42efe8 ADD W15, W17, W6 |
0x42efec CMP W17, W15 |
0x42eff0 B.CS 42f33c |
0x42eff4 HINT #0 |
0x42eff8 HINT #0 |
0x42effc HINT #0 |
(184) 0x42f000 LDR X26, [X22] |
(184) 0x42f004 ADD X24, X4, #1 |
(184) 0x42f008 LDR X16, [X19] |
(184) 0x42f00c LDR X18, [X20] |
(184) 0x42f010 MADD X13, X4, X26, X26 |
(184) 0x42f014 LDR X25, [X21] |
(184) 0x42f018 MUL X10, X4, X16 |
(184) 0x42f01c LDR X17, [X19, #16] |
(184) 0x42f020 SUB X8, X13, X26 |
(184) 0x42f024 MUL X7, X4, X18 |
(184) 0x42f028 LDR X16, [X20, #16] |
(184) 0x42f02c MUL X11, X4, X25 |
(184) 0x42f030 LDR X18, [X21, #16] |
(184) 0x42f034 LDR X12, [X22, #16] |
(184) 0x42f038 CMP W6, #1 |
(184) 0x42f03c B.EQ 42f2d4 |
(184) 0x42f040 UBFM W27, W6, #1, #31 |
(184) 0x42f044 SBFM X1, X23, #0, #31 |
(184) 0x42f048 UBFM X9, X27, #60, #59 |
(184) 0x42f04c ADD X30, X11, X1 |
(184) 0x42f050 SUB X2, X9, #16 |
(184) 0x42f054 ADD X28, X10, X1 |
(184) 0x42f058 ADD X5, X18, X30,LSL #3 |
(184) 0x42f05c UBFM X3, X2, #4, #63 |
(184) 0x42f060 ADD X27, X13, X1 |
(184) 0x42f064 ADD X4, X17, X28,LSL #3 |
(184) 0x42f068 ADD X14, X3, #1 |
(184) 0x42f06c ADD X26, X8, X1 |
(184) 0x42f070 ADD X3, X12, X27,LSL #3 |
(184) 0x42f074 ANDS X25, X14, #0x7 |
(184) 0x42f078 ADD X14, X7, X1 |
(184) 0x42f07c ADD X2, X12, X26,LSL #3 |
(184) 0x42f080 ADD X1, X16, X14,LSL #3 |
(184) 0x42f084 MOVZ X0, #0 |
(184) 0x42f088 UBFM X30, X30, #61, #60 |
(184) 0x42f08c UBFM X28, X28, #61, #60 |
(184) 0x42f090 UBFM X27, X27, #61, #60 |
(184) 0x42f094 UBFM X26, X26, #61, #60 |
(184) 0x42f098 UBFM X14, X14, #61, #60 |
(184) 0x42f09c B.EQ 42f1b8 |
(184) 0x42f0a0 CMP X25, #1 |
(184) 0x42f0a4 B.EQ 42f190 |
(184) 0x42f0a8 CMP X25, #2 |
(184) 0x42f0ac B.EQ 42f170 |
(184) 0x42f0b0 CMP X25, #3 |
(184) 0x42f0b4 B.EQ 42f150 |
(184) 0x42f0b8 CMP X25, #4 |
(184) 0x42f0bc B.EQ 42f130 |
(184) 0x42f0c0 CMP X25, #5 |
(184) 0x42f0c4 B.EQ 42f110 |
(184) 0x42f0c8 CMP X25, #6 |
(184) 0x42f0cc B.EQ 42f0f0 |
(184) 0x42f0d0 LDR Q0, [X18, X30] |
(184) 0x42f0d4 MOVZ X0, #16 |
(184) 0x42f0d8 STR Q0, [X17, X28] |
(184) 0x42f0dc LDR Q30, [X12, X27] |
(184) 0x42f0e0 LDR Q31, [X12, X26] |
(184) 0x42f0e4 FADD V1.2D, V30.2D, V0.2D |
(184) 0x42f0e8 FSUB V2.2D, V1.2D, V31.2D |
(184) 0x42f0ec STR Q2, [X16, X14] |
(184) 0x42f0f0 LDR Q3, [X5, X0] |
(184) 0x42f0f4 STR Q3, [X4, X0] |
(184) 0x42f0f8 LDR Q28, [X3, X0] |
(184) 0x42f0fc LDR Q29, [X2, X0] |
(184) 0x42f100 FADD V4.2D, V28.2D, V3.2D |
(184) 0x42f104 FSUB V5.2D, V4.2D, V29.2D |
(184) 0x42f108 STR Q5, [X1, X0] |
(184) 0x42f10c ADD X0, X0, #16 |
(184) 0x42f110 LDR Q6, [X5, X0] |
(184) 0x42f114 STR Q6, [X4, X0] |
(184) 0x42f118 LDR Q26, [X3, X0] |
(184) 0x42f11c LDR Q27, [X2, X0] |
(184) 0x42f120 FADD V7.2D, V26.2D, V6.2D |
(184) 0x42f124 FSUB V16.2D, V7.2D, V27.2D |
(184) 0x42f128 STR Q16, [X1, X0] |
(184) 0x42f12c ADD X0, X0, #16 |
(184) 0x42f130 LDR Q17, [X5, X0] |
(184) 0x42f134 STR Q17, [X4, X0] |
(184) 0x42f138 LDR Q24, [X3, X0] |
(184) 0x42f13c LDR Q25, [X2, X0] |
(184) 0x42f140 FADD V18.2D, V24.2D, V17.2D |
(184) 0x42f144 FSUB V19.2D, V18.2D, V25.2D |
(184) 0x42f148 STR Q19, [X1, X0] |
(184) 0x42f14c ADD X0, X0, #16 |
(184) 0x42f150 LDR Q20, [X5, X0] |
(184) 0x42f154 STR Q20, [X4, X0] |
(184) 0x42f158 LDR Q22, [X3, X0] |
(184) 0x42f15c LDR Q23, [X2, X0] |
(184) 0x42f160 FADD V21.2D, V22.2D, V20.2D |
(184) 0x42f164 FSUB V0.2D, V21.2D, V23.2D |
(184) 0x42f168 STR Q0, [X1, X0] |
(184) 0x42f16c ADD X0, X0, #16 |
(184) 0x42f170 LDR Q30, [X5, X0] |
(184) 0x42f174 STR Q30, [X4, X0] |
(184) 0x42f178 LDR Q31, [X3, X0] |
(184) 0x42f17c LDR Q1, [X2, X0] |
(184) 0x42f180 FADD V2.2D, V31.2D, V30.2D |
(184) 0x42f184 FSUB V3.2D, V2.2D, V1.2D |
(184) 0x42f188 STR Q3, [X1, X0] |
(184) 0x42f18c ADD X0, X0, #16 |
(184) 0x42f190 LDR Q28, [X5, X0] |
(184) 0x42f194 STR Q28, [X4, X0] |
(184) 0x42f198 LDR Q29, [X3, X0] |
(184) 0x42f19c LDR Q4, [X2, X0] |
(184) 0x42f1a0 FADD V5.2D, V29.2D, V28.2D |
(184) 0x42f1a4 FSUB V6.2D, V5.2D, V4.2D |
(184) 0x42f1a8 STR Q6, [X1, X0] |
(184) 0x42f1ac ADD X0, X0, #16 |
(184) 0x42f1b0 CMP X9, X0 |
(184) 0x42f1b4 B.EQ 42f2c8 |
(184) 0x42f1b8 STR W6, [SP, #124] |
(185) 0x42f1bc LDR Q26, [X5, X0] |
(185) 0x42f1c0 ADD X6, X0, #16 |
(185) 0x42f1c4 ADD X30, X0, #32 |
(185) 0x42f1c8 ADD X28, X0, #48 |
(185) 0x42f1cc ADD X27, X0, #64 |
(185) 0x42f1d0 ADD X26, X0, #80 |
(185) 0x42f1d4 ADD X25, X0, #96 |
(185) 0x42f1d8 ADD X14, X0, #112 |
(185) 0x42f1dc STR Q26, [X4, X0] |
(185) 0x42f1e0 LDR Q27, [X3, X0] |
(185) 0x42f1e4 LDR Q7, [X2, X0] |
(185) 0x42f1e8 FADD V16.2D, V27.2D, V26.2D |
(185) 0x42f1ec FSUB V17.2D, V16.2D, V7.2D |
(185) 0x42f1f0 STR Q17, [X1, X0] |
(185) 0x42f1f4 ADD X0, X0, #128 |
(185) 0x42f1f8 LDR Q24, [X5, X6] |
(185) 0x42f1fc STR Q24, [X4, X6] |
(185) 0x42f200 LDR Q25, [X3, X6] |
(185) 0x42f204 LDR Q18, [X2, X6] |
(185) 0x42f208 FADD V19.2D, V25.2D, V24.2D |
(185) 0x42f20c FSUB V20.2D, V19.2D, V18.2D |
(185) 0x42f210 STR Q20, [X1, X6] |
(185) 0x42f214 LDR Q21, [X5, X30] |
(185) 0x42f218 STR Q21, [X4, X30] |
(185) 0x42f21c LDR Q22, [X3, X30] |
(185) 0x42f220 LDR Q23, [X2, X30] |
(185) 0x42f224 FADD V0.2D, V22.2D, V21.2D |
(185) 0x42f228 FSUB V30.2D, V0.2D, V23.2D |
(185) 0x42f22c STR Q30, [X1, X30] |
(185) 0x42f230 LDR Q31, [X5, X28] |
(185) 0x42f234 STR Q31, [X4, X28] |
(185) 0x42f238 LDR Q1, [X3, X28] |
(185) 0x42f23c LDR Q2, [X2, X28] |
(185) 0x42f240 FADD V3.2D, V1.2D, V31.2D |
(185) 0x42f244 FSUB V28.2D, V3.2D, V2.2D |
(185) 0x42f248 STR Q28, [X1, X28] |
(185) 0x42f24c LDR Q29, [X5, X27] |
(185) 0x42f250 STR Q29, [X4, X27] |
(185) 0x42f254 LDR Q4, [X3, X27] |
(185) 0x42f258 LDR Q5, [X2, X27] |
(185) 0x42f25c FADD V6.2D, V4.2D, V29.2D |
(185) 0x42f260 FSUB V26.2D, V6.2D, V5.2D |
(185) 0x42f264 STR Q26, [X1, X27] |
(185) 0x42f268 LDR Q27, [X5, X26] |
(185) 0x42f26c STR Q27, [X4, X26] |
(185) 0x42f270 LDR Q7, [X3, X26] |
(185) 0x42f274 LDR Q16, [X2, X26] |
(185) 0x42f278 FADD V17.2D, V7.2D, V27.2D |
(185) 0x42f27c FSUB V24.2D, V17.2D, V16.2D |
(185) 0x42f280 STR Q24, [X1, X26] |
(185) 0x42f284 LDR Q25, [X5, X25] |
(185) 0x42f288 STR Q25, [X4, X25] |
(185) 0x42f28c LDR Q18, [X3, X25] |
(185) 0x42f290 LDR Q19, [X2, X25] |
(185) 0x42f294 FADD V20.2D, V18.2D, V25.2D |
(185) 0x42f298 FSUB V21.2D, V20.2D, V19.2D |
(185) 0x42f29c STR Q21, [X1, X25] |
(185) 0x42f2a0 LDR Q22, [X5, X14] |
(185) 0x42f2a4 STR Q22, [X4, X14] |
(185) 0x42f2a8 LDR Q23, [X3, X14] |
(185) 0x42f2ac LDR Q0, [X2, X14] |
(185) 0x42f2b0 FADD V30.2D, V23.2D, V22.2D |
(185) 0x42f2b4 FSUB V31.2D, V30.2D, V0.2D |
(185) 0x42f2b8 STR Q31, [X1, X14] |
(185) 0x42f2bc CMP X9, X0 |
(185) 0x42f2c0 B.NE 42f1bc |
(184) 0x42f2c4 LDR W6, [SP, #124] |
(184) 0x42f2c8 TBZ W6, #0, 42f308 |
(184) 0x42f2cc AND W9, W6, #0xfffffffe |
(184) 0x42f2d0 ADD W23, W23, W9 |
(184) 0x42f2d4 SBFM X5, X23, #0, #31 |
(184) 0x42f2d8 ADD X11, X11, X5 |
(184) 0x42f2dc ADD X10, X10, X5 |
(184) 0x42f2e0 ADD X13, X13, X5 |
(184) 0x42f2e4 ADD X8, X8, X5 |
(184) 0x42f2e8 LDR D1, [X18, X11,LSL #3] |
(184) 0x42f2ec ADD X7, X7, X5 |
(184) 0x42f2f0 STR D1, [X17, X10,LSL #3] |
(184) 0x42f2f4 LDR D2, [X12, X13,LSL #3] |
(184) 0x42f2f8 LDR D3, [X12, X8,LSL #3] |
(184) 0x42f2fc FADD D28, D1, D2 |
(184) 0x42f300 FSUB D29, D28, S3 |
(184) 0x42f304 STR D29, [X16, X7,LSL #3] |
(184) 0x42f308 ORR W17, WZR, W15 |
(184) 0x42f30c LDR W15, [SP, #108] |
(184) 0x42f310 ORR X4, XZR, X24 |
(184) 0x42f314 CMP W15, W24 |
(184) 0x42f318 B.LE 42f350 |
(184) 0x42f31c LDR W16, [SP, #120] |
(184) 0x42f320 LDP W23, W24, [SP, #112] |
(184) 0x42f324 SUB W7, W16, W17 |
(184) 0x42f328 CMP W7, W24 |
(184) 0x42f32c CSEL W6, W7, W24, #9 |
(184) 0x42f330 ADD W15, W17, W6 |
(184) 0x42f334 CMP W17, W15 |
(184) 0x42f338 B.CC 42f000 |
(186) 0x42f33c LDR W15, [SP, #108] |
(186) 0x42f340 ADD X24, X4, #1 |
(186) 0x42f344 ORR X4, XZR, X24 |
(186) 0x42f348 CMP W15, W24 |
(186) 0x42f34c B.GT 42f31c |
0x42f350 LDP X21, X22, [SP, #32] |
0x42f354 LDP X25, X26, [SP, #64] |
0x42f358 LDP X27, X28, [SP, #80] |
0x42f35c LDP X19, X20, [SP, #16] |
0x42f360 LDP X23, X24, [SP, #48] |
0x42f364 LDP X29, X30, [SP], #128 |
0x42f368 RET |
0x42f36c LDP X21, X22, [SP, #32] |
0x42f370 LDP X19, X20, [SP, #16] |
0x42f374 LDP X23, X24, [SP, #48] |
0x42f378 LDP X29, X30, [SP], #128 |
0x42f37c RET |
0x42f380 ADD W7, W7, #1 |
0x42f384 MOVZ W8, #0 |
0x42f388 B 42efa0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.45+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_mom.cpp:62-66 |
| Module | exec |
| nb instructions | 69 |
| nb uops | 66 |
| loop length | 276 |
| used w registers | 23 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 20 |
| micro-operation queue | 8.25 cycles |
| front end | 8.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 4.50 | 4.50 |
| cycles | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.25 |
| Dispatch | 8.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 26% |
| load | 42% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #896]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W3, [X19, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W2, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W23, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W2, W4, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W23, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42f35c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x434> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W24, W3, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W21, W2, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42f36c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x444> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W24, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W22, W21, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #116] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W22, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W20, W22 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42f380 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x458> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W17, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W17, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42f36c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x444> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W10, W11, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W12, W17, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDP X22, X21, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X20, X19, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| MSUB W13, W12, W11, W17 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X4, X14, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W23, W13, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W24, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W7, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W6, W7, W24, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W15, W17, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W17, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42f33c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x414> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #128 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #128 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42efa0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_mom.cpp:62-66 |
| Module | exec |
| nb instructions | 69 |
| nb uops | 66 |
| loop length | 276 |
| used w registers | 23 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 20 |
| micro-operation queue | 8.25 cycles |
| front end | 8.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 4.50 | 4.50 |
| cycles | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.50 | 8.17 | 8.33 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.25 |
| Dispatch | 8.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 26% |
| load | 42% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #896]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W3, [X19, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W2, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W23, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W2, W4, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W23, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42f35c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x434> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W24, W3, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W21, W2, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42f36c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x444> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W24, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W22, W21, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #116] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W22, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W20, W22 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42f380 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x458> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W17, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W17, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42f36c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x444> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W10, W11, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W12, W17, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDP X22, X21, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X20, X19, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| MSUB W13, W12, W11, W17 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X4, X14, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W23, W13, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W24, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W7, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W6, W7, W24, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W15, W17, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W17, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42f33c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x414> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #128 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #128 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42efa0 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.19 | 2.94 |
| ▼Loop 186 - advec_mom.cpp:64-66 - exec– | 0.00 | 0.00 |
| ▼Loop 184 - advec_mom.cpp:64-66 - exec– | 0.00 | 0.02 |
| ○Loop 185 - advec_mom.cpp:65-66 - exec | 2.18 | 2.85 |
