| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage (incl. loops): 4.13% | (excl. loops): 0.00% |
|---|
| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage (incl. loops): 4.13% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_cell.cpp: 208 - 216 |
-------------------------------------------------------------------------------- |
208: #pragma omp parallel for simd collapse(2) |
209: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
210: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
211: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
212: double post_mass_s = pre_mass_s + mass_flux_y(i, j) - mass_flux_y(i + 0, j + 1); |
213: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 0, j + 1)) / post_mass_s; |
214: double advec_vol_s = pre_vol(i, j) + vol_flux_y(i, j) - vol_flux_y(i + 0, j + 1); |
215: density1(i, j) = post_mass_s / advec_vol_s; |
216: energy1(i, j) = post_ener_s; |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42d220 STP X29, X30, [SP, #784]! |
0x42d224 ADD X29, SP, #0 |
0x42d228 STP X19, X20, [SP, #16] |
0x42d22c ORR X20, XZR, X0 |
0x42d230 STP X21, X22, [SP, #32] |
0x42d234 LDP W22, W2, [X0, #56] |
0x42d238 LDR W1, [X0, #48] |
0x42d23c LDR W0, [X0, #52] |
0x42d240 ADD W3, W2, #2 |
0x42d244 ADD W22, W22, #1 |
0x42d248 ADD W4, W1, #1 |
0x42d24c STP W3, W4, [SP, #168] |
0x42d250 CMP W22, W3 |
0x42d254 B.GE 42d614 |
0x42d258 ADD W19, W0, #2 |
0x42d25c STP X23, X24, [SP, #48] |
0x42d260 SUB W23, W3, W22 |
0x42d264 CMP W4, W19 |
0x42d268 B.GE 42d610 |
0x42d26c SUB W5, W19, W4 |
0x42d270 MUL W24, W23, W5 |
0x42d274 STR W5, [SP, #200] |
0x42d278 BL 410210 |
0x42d27c ORR W21, WZR, W0 |
0x42d280 BL 410240 |
0x42d284 UDIV W7, W24, W21 |
0x42d288 ORR W6, WZR, W0 |
0x42d28c MSUB W8, W7, W21, W24 |
0x42d290 CMP W0, W8 |
0x42d294 B.CC 42d640 |
0x42d298 MADD W21, W7, W6, W8 |
0x42d29c ADD W9, W7, W21 |
0x42d2a0 STR W9, [SP, #204] |
0x42d2a4 CMP W21, W9 |
0x42d2a8 B.CS 42d610 |
0x42d2ac LDR W10, [SP, #200] |
0x42d2b0 STP X25, X26, [SP, #64] |
0x42d2b4 LDR W14, [SP, #172] |
0x42d2b8 UDIV W11, W21, W10 |
0x42d2bc STP X27, X28, [SP, #80] |
0x42d2c0 LDR X16, [X20] |
0x42d2c4 LDP X30, X23, [X20, #32] |
0x42d2c8 LDR X17, [X20, #8] |
0x42d2cc STR X16, [SP, #208] |
0x42d2d0 MSUB W12, W11, W10, W21 |
0x42d2d4 LDR X18, [X20, #16] |
0x42d2d8 ADD W13, W11, W22 |
0x42d2dc SBFM X22, X13, #0, #31 |
0x42d2e0 LDR X25, [X20, #24] |
0x42d2e4 ADD W15, W12, W14 |
0x42d2e8 STR X17, [SP, #224] |
0x42d2ec SUB W4, W19, W15 |
0x42d2f0 STR W15, [SP, #112] |
0x42d2f4 STR X18, [SP, #216] |
0x42d2f8 STR X25, [SP, #232] |
0x42d2fc HINT #0 |
(172) 0x42d300 CMP W7, W4 |
(172) 0x42d304 CSEL W4, W7, W4, #9 |
(172) 0x42d308 ADD W26, W21, W4 |
(172) 0x42d30c STR W26, [SP, #116] |
(172) 0x42d310 CMP W21, W26 |
(172) 0x42d314 B.CS 42d5e8 |
(172) 0x42d318 LDP X1, X0, [SP, #208] |
(172) 0x42d31c LDP X19, X5, [SP, #224] |
(172) 0x42d320 LDR X27, [X23] |
(172) 0x42d324 LDR X2, [X0] |
(172) 0x42d328 LDR X21, [X5] |
(172) 0x42d32c MUL X28, X27, X22 |
(172) 0x42d330 LDR X20, [X30] |
(172) 0x42d334 MUL X14, X2, X22 |
(172) 0x42d338 LDR X9, [X19] |
(172) 0x42d33c ADD X15, X27, X28 |
(172) 0x42d340 MUL X27, X21, X22 |
(172) 0x42d344 LDR X6, [X1] |
(172) 0x42d348 ADD X24, X2, X14 |
(172) 0x42d34c MUL X3, X22, X20 |
(172) 0x42d350 LDR X16, [X30, #16] |
(172) 0x42d354 ADD X8, X21, X27 |
(172) 0x42d358 MUL X10, X22, X9 |
(172) 0x42d35c LDR X12, [X23, #16] |
(172) 0x42d360 MUL X7, X22, X6 |
(172) 0x42d364 LDR X13, [X0, #16] |
(172) 0x42d368 STR X16, [SP, #104] |
(172) 0x42d36c LDR X11, [X5, #16] |
(172) 0x42d370 LDR X20, [X1, #16] |
(172) 0x42d374 LDR X19, [X19, #16] |
(172) 0x42d378 STP X8, X15, [SP, #120] |
(172) 0x42d37c STP X10, X24, [SP, #136] |
(172) 0x42d380 STP X3, X7, [SP, #152] |
(172) 0x42d384 CMP W4, #1 |
(172) 0x42d388 B.EQ 42d548 |
(172) 0x42d38c UBFM W18, W4, #1, #31 |
(172) 0x42d390 MOVZ X0, #0 |
(172) 0x42d394 LDRSW X1, [SP, #112] |
(172) 0x42d398 ADD X6, X15, X1 |
(172) 0x42d39c ADD X5, X27, X1 |
(172) 0x42d3a0 UBFM X15, X6, #61, #60 |
(172) 0x42d3a4 ADD X17, X7, X1 |
(172) 0x42d3a8 ADD X6, X12, X6,LSL #3 |
(172) 0x42d3ac ADD X7, X10, X1 |
(172) 0x42d3b0 ADD X26, X3, X1 |
(172) 0x42d3b4 ADD X3, X20, X17,LSL #3 |
(172) 0x42d3b8 STR X15, [SP, #176] |
(172) 0x42d3bc UBFM X15, X5, #61, #60 |
(172) 0x42d3c0 ADD X2, X14, X1 |
(172) 0x42d3c4 ADD X24, X24, X1 |
(172) 0x42d3c8 ADD X25, X28, X1 |
(172) 0x42d3cc ADD X10, X16, X26,LSL #3 |
(172) 0x42d3d0 ADD X1, X8, X1 |
(172) 0x42d3d4 STR X15, [SP, #184] |
(172) 0x42d3d8 ADD X9, X13, X2,LSL #3 |
(172) 0x42d3dc UBFM X15, X1, #61, #60 |
(172) 0x42d3e0 ADD X8, X13, X24,LSL #3 |
(172) 0x42d3e4 UBFM X21, X2, #61, #60 |
(172) 0x42d3e8 UBFM X16, X7, #61, #60 |
(172) 0x42d3ec ADD X2, X19, X7,LSL #3 |
(172) 0x42d3f0 UBFM X17, X17, #61, #60 |
(172) 0x42d3f4 STR X15, [SP, #192] |
(172) 0x42d3f8 ADD X7, X12, X25,LSL #3 |
(172) 0x42d3fc UBFM X26, X26, #61, #60 |
(172) 0x42d400 ADD X5, X11, X5,LSL #3 |
(172) 0x42d404 UBFM X24, X24, #61, #60 |
(172) 0x42d408 UBFM X25, X25, #61, #60 |
(172) 0x42d40c ADD X1, X11, X1,LSL #3 |
(172) 0x42d410 UBFM X15, X18, #60, #59 |
(172) 0x42d414 TBZ W18, #0, 42d484 |
(172) 0x42d418 LDR X0, [SP, #104] |
(172) 0x42d41c LDR X18, [SP, #176] |
(172) 0x42d420 LDR Q30, [X20, X17] |
(172) 0x42d424 LDR Q31, [X0, X26] |
(172) 0x42d428 MOVZ X0, #16 |
(172) 0x42d42c LDR X26, [SP, #184] |
(172) 0x42d430 LDR Q0, [X12, X25] |
(172) 0x42d434 LDR Q29, [X12, X18] |
(172) 0x42d438 FMUL V3.2D, V30.2D, V31.2D |
(172) 0x42d43c LDR Q2, [X13, X24] |
(172) 0x42d440 LDR Q27, [X11, X26] |
(172) 0x42d444 LDR Q28, [X13, X21] |
(172) 0x42d448 FSUB V4.2D, V0.2D, V29.2D |
(172) 0x42d44c LDR X21, [SP, #192] |
(172) 0x42d450 FSUB V5.2D, V3.2D, V2.2D |
(172) 0x42d454 LDR Q1, [X19, X16] |
(172) 0x42d458 FADD V16.2D, V27.2D, V31.2D |
(172) 0x42d45c LDR Q26, [X11, X21] |
(172) 0x42d460 FADD V6.2D, V5.2D, V28.2D |
(172) 0x42d464 FMLA V4.2D, V1.2D, V3.2D |
(172) 0x42d468 FSUB V17.2D, V16.2D, V26.2D |
(172) 0x42d46c FDIV V18.2D, V6.2D, V17.2D |
(172) 0x42d470 FDIV V7.2D, V4.2D, V6.2D |
(172) 0x42d474 STR Q18, [X20, X17] |
(172) 0x42d478 STR Q7, [X19, X16] |
(172) 0x42d47c CMP X0, X15 |
(172) 0x42d480 B.EQ 42d534 |
(173) 0x42d484 LDR Q19, [X10, X0] |
(173) 0x42d488 ADD X17, X0, #16 |
(173) 0x42d48c LDR Q20, [X3, X0] |
(173) 0x42d490 LDR Q21, [X7, X0] |
(173) 0x42d494 LDR Q22, [X6, X0] |
(173) 0x42d498 LDR Q23, [X5, X0] |
(173) 0x42d49c FMUL V24.2D, V20.2D, V19.2D |
(173) 0x42d4a0 LDR Q25, [X8, X0] |
(173) 0x42d4a4 LDR Q30, [X2, X0] |
(173) 0x42d4a8 FSUB V0.2D, V21.2D, V22.2D |
(173) 0x42d4ac LDR Q1, [X1, X0] |
(173) 0x42d4b0 FADD V28.2D, V23.2D, V19.2D |
(173) 0x42d4b4 LDR Q31, [X9, X0] |
(173) 0x42d4b8 FSUB V2.2D, V24.2D, V25.2D |
(173) 0x42d4bc FMLA V0.2D, V30.2D, V24.2D |
(173) 0x42d4c0 FSUB V29.2D, V28.2D, V1.2D |
(173) 0x42d4c4 FADD V3.2D, V2.2D, V31.2D |
(173) 0x42d4c8 FDIV V4.2D, V3.2D, V29.2D |
(173) 0x42d4cc FDIV V5.2D, V0.2D, V3.2D |
(173) 0x42d4d0 STR Q4, [X3, X0] |
(173) 0x42d4d4 STR Q5, [X2, X0] |
(173) 0x42d4d8 ADD X0, X0, #32 |
(173) 0x42d4dc LDR Q6, [X10, X17] |
(173) 0x42d4e0 LDR Q7, [X3, X17] |
(173) 0x42d4e4 LDR Q27, [X7, X17] |
(173) 0x42d4e8 LDR Q16, [X6, X17] |
(173) 0x42d4ec LDR Q26, [X5, X17] |
(173) 0x42d4f0 FMUL V17.2D, V7.2D, V6.2D |
(173) 0x42d4f4 LDR Q18, [X8, X17] |
(173) 0x42d4f8 LDR Q19, [X2, X17] |
(173) 0x42d4fc FSUB V23.2D, V27.2D, V16.2D |
(173) 0x42d500 LDR Q20, [X1, X17] |
(173) 0x42d504 FADD V21.2D, V26.2D, V6.2D |
(173) 0x42d508 LDR Q22, [X9, X17] |
(173) 0x42d50c FSUB V24.2D, V17.2D, V18.2D |
(173) 0x42d510 FMLA V23.2D, V19.2D, V17.2D |
(173) 0x42d514 FSUB V25.2D, V21.2D, V20.2D |
(173) 0x42d518 FADD V30.2D, V24.2D, V22.2D |
(173) 0x42d51c FDIV V0.2D, V30.2D, V25.2D |
(173) 0x42d520 FDIV V1.2D, V23.2D, V30.2D |
(173) 0x42d524 STR Q0, [X3, X17] |
(173) 0x42d528 STR Q1, [X2, X17] |
(173) 0x42d52c CMP X0, X15 |
(173) 0x42d530 B.NE 42d484 |
(172) 0x42d534 TBZ W4, #0, 42d5e4 |
(172) 0x42d538 LDR W3, [SP, #112] |
(172) 0x42d53c AND W4, W4, #0xfffffffe |
(172) 0x42d540 ADD W10, W3, W4 |
(172) 0x42d544 STR W10, [SP, #112] |
(172) 0x42d548 LDR X5, [SP, #128] |
(172) 0x42d54c LDRSW X9, [SP, #112] |
(172) 0x42d550 LDR X2, [SP, #160] |
(172) 0x42d554 LDR X25, [SP, #152] |
(172) 0x42d558 ADD X28, X28, X9 |
(172) 0x42d55c ADD X1, X5, X9 |
(172) 0x42d560 LDR X8, [SP, #104] |
(172) 0x42d564 ADD X14, X14, X9 |
(172) 0x42d568 ADD X27, X27, X9 |
(172) 0x42d56c ADD X16, X2, X9 |
(172) 0x42d570 LDR D2, [X12, X28,LSL #3] |
(172) 0x42d574 UBFM X7, X16, #61, #60 |
(172) 0x42d578 ADD X6, X25, X9 |
(172) 0x42d57c LDR D5, [X12, X1,LSL #3] |
(172) 0x42d580 LDR X12, [SP, #144] |
(172) 0x42d584 LDR D31, [X8, X6,LSL #3] |
(172) 0x42d588 LDR D3, [X20, X7] |
(172) 0x42d58c FSUB D7, D2, S5 |
(172) 0x42d590 ADD X15, X12, X9 |
(172) 0x42d594 LDR D28, [X13, X14,LSL #3] |
(172) 0x42d598 LDR D27, [X13, X15,LSL #3] |
(172) 0x42d59c LDR X13, [SP, #136] |
(172) 0x42d5a0 FMUL D4, D31, D3 |
(172) 0x42d5a4 LDR X24, [SP, #120] |
(172) 0x42d5a8 LDR D29, [X11, X27,LSL #3] |
(172) 0x42d5ac ADD X18, X13, X9 |
(172) 0x42d5b0 UBFM X26, X18, #61, #60 |
(172) 0x42d5b4 FSUB D16, D4, S27 |
(172) 0x42d5b8 ADD X0, X24, X9 |
(172) 0x42d5bc LDR D18, [X19, X26] |
(172) 0x42d5c0 LDR D26, [X11, X0,LSL #3] |
(172) 0x42d5c4 FADD D6, D31, D29 |
(172) 0x42d5c8 FADD D17, D16, D28 |
(172) 0x42d5cc FMADD D19, D18, D4, D7 |
(172) 0x42d5d0 FSUB D23, D6, S26 |
(172) 0x42d5d4 FDIV D20, D17, D23 |
(172) 0x42d5d8 FDIV D21, D19, D17 |
(172) 0x42d5dc STR D20, [X20, X7] |
(172) 0x42d5e0 STR D21, [X19, X26] |
(172) 0x42d5e4 LDR W21, [SP, #116] |
(172) 0x42d5e8 ADD X22, X22, #1 |
(172) 0x42d5ec LDR W11, [SP, #168] |
(172) 0x42d5f0 CMP W11, W22 |
(172) 0x42d5f4 B.LE 42d624 |
(172) 0x42d5f8 LDR W20, [SP, #172] |
(172) 0x42d5fc LDR W19, [SP, #204] |
(172) 0x42d600 LDR W4, [SP, #200] |
(172) 0x42d604 STR W20, [SP, #112] |
(172) 0x42d608 SUB W7, W19, W21 |
(172) 0x42d60c B 42d300 |
0x42d610 LDP X23, X24, [SP, #48] |
0x42d614 LDP X19, X20, [SP, #16] |
0x42d618 LDP X21, X22, [SP, #32] |
0x42d61c LDP X29, X30, [SP], #240 |
0x42d620 RET |
0x42d624 LDP X23, X24, [SP, #48] |
0x42d628 LDP X25, X26, [SP, #64] |
0x42d62c LDP X27, X28, [SP, #80] |
0x42d630 LDP X19, X20, [SP, #16] |
0x42d634 LDP X21, X22, [SP, #32] |
0x42d638 LDP X29, X30, [SP], #240 |
0x42d63c RET |
0x42d640 ADD W7, W7, #1 |
0x42d644 MOVZ W8, #0 |
0x42d648 B 42d298 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.44+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_cell.cpp:208-216 |
| Module | exec |
| nb instructions | 71 |
| nb uops | 70 |
| loop length | 284 |
| used w registers | 22 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 26 |
| micro-operation queue | 8.75 cycles |
| front end | 8.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 11.33 | 11.33 | 11.33 | 7.00 | 7.00 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 11.33 | 11.33 | 11.33 | 7.00 | 7.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.75 |
| Dispatch | 11.33 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 11.33-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 26% |
| load | 36% |
| store | 33% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #784]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W2, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W3, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W4, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d614 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x3f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W0, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d610 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x3f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42d640 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x420> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W21, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #204] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W21, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42d610 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x3f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W14, [SP, #172] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W11, W21, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X16, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X30, X23, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR X17, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X16, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| MSUB W12, W11, W10, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDR X18, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X22, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X25, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W15, W12, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X17, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| SUB W4, W19, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W15, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR X18, [SP, #216] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X25, [SP, #232] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42d298 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_cell.cpp:208-216 |
| Module | exec |
| nb instructions | 71 |
| nb uops | 70 |
| loop length | 284 |
| used w registers | 22 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 26 |
| micro-operation queue | 8.75 cycles |
| front end | 8.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 11.33 | 11.33 | 11.33 | 7.00 | 7.00 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 11.33 | 11.33 | 11.33 | 7.00 | 7.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.75 |
| Dispatch | 11.33 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 11.33-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 26% |
| load | 36% |
| store | 33% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #784]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W2, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #52] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W3, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W4, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d614 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x3f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W0, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d610 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x3f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42d640 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x420> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W21, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #204] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W21, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42d610 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x3f0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #200] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W14, [SP, #172] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W11, W21, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X16, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X30, X23, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDR X17, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X16, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| MSUB W12, W11, W10, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDR X18, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X22, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X25, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W15, W12, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X17, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| SUB W4, W19, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W15, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR X18, [SP, #216] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X25, [SP, #232] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #240 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42d298 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 4.13 | 5.55 |
| ▼Loop 172 - advec_cell.cpp:210-216 - exec– | 0.01 | 0.02 |
| ○Loop 173 - advec_cell.cpp:211-216 - exec | 4.12 | 5.38 |
