| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:146-150 [...] | Coverage (incl. loops): 1.08% | (excl. loops): 0.00% |
|---|
| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:146-150 [...] | Coverage (incl. loops): 1.08% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_cell.cpp: 146 - 150 |
-------------------------------------------------------------------------------- |
146: #pragma omp parallel for simd collapse(2) |
147: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
148: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
149: pre_vol(i, j) = volume(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
150: post_vol(i, j) = volume(i, j); |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42cec0 STP X29, X30, [SP, #912]! |
0x42cec4 ADD X29, SP, #0 |
0x42cec8 STP X19, X20, [SP, #16] |
0x42cecc ORR X20, XZR, X0 |
0x42ced0 STP X21, X22, [SP, #32] |
0x42ced4 STP X23, X24, [SP, #48] |
0x42ced8 LDP W22, W1, [X0, #40] |
0x42cedc LDR W0, [X0, #32] |
0x42cee0 LDR W2, [X20, #36] |
0x42cee4 SUB W22, W22, #1 |
0x42cee8 ADD W24, W1, #4 |
0x42ceec SUB W3, W0, #1 |
0x42cef0 STR W3, [SP, #96] |
0x42cef4 CMP W22, W24 |
0x42cef8 B.GE 42d1f8 |
0x42cefc ADD W19, W2, #4 |
0x42cf00 SUB W23, W24, W22 |
0x42cf04 CMP W3, W19 |
0x42cf08 B.GE 42d1f8 |
0x42cf0c SUB W4, W19, W3 |
0x42cf10 MUL W23, W23, W4 |
0x42cf14 STR W4, [SP, #100] |
0x42cf18 BL 410210 |
0x42cf1c ORR W21, WZR, W0 |
0x42cf20 BL 410240 |
0x42cf24 UDIV W6, W23, W21 |
0x42cf28 ORR W5, WZR, W0 |
0x42cf2c MSUB W7, W6, W21, W23 |
0x42cf30 CMP W0, W7 |
0x42cf34 B.CC 42d20c |
0x42cf38 MADD W23, W6, W5, W7 |
0x42cf3c ADD W8, W6, W23 |
0x42cf40 STR W8, [SP, #104] |
0x42cf44 CMP W23, W8 |
0x42cf48 B.CS 42d1f8 |
0x42cf4c LDR W9, [SP, #100] |
0x42cf50 STP X25, X26, [SP, #64] |
0x42cf54 LDR W13, [SP, #96] |
0x42cf58 UDIV W10, W23, W9 |
0x42cf5c STP X27, X28, [SP, #80] |
0x42cf60 LDP X28, X27, [X20] |
0x42cf64 LDP X26, X25, [X20, #16] |
0x42cf68 STR W24, [SP, #108] |
0x42cf6c MSUB W11, W10, W9, W23 |
0x42cf70 ADD W12, W10, W22 |
0x42cf74 SBFM X9, X12, #0, #31 |
0x42cf78 ADD W5, W11, W13 |
0x42cf7c SUB W16, W19, W5 |
0x42cf80 CMP W6, W16 |
0x42cf84 CSEL W3, W6, W16, #9 |
0x42cf88 ADD W22, W23, W3 |
0x42cf8c CMP W23, W22 |
0x42cf90 B.CS 42d1dc |
0x42cf94 HINT #0 |
0x42cf98 HINT #0 |
0x42cf9c HINT #0 |
(169) 0x42cfa0 LDR X14, [X25] |
(169) 0x42cfa4 ADD X30, X9, #1 |
(169) 0x42cfa8 LDR X15, [X26] |
(169) 0x42cfac LDR X17, [X27] |
(169) 0x42cfb0 MUL X4, X9, X14 |
(169) 0x42cfb4 LDR X18, [X28] |
(169) 0x42cfb8 MUL X14, X9, X15 |
(169) 0x42cfbc LDR X23, [X25, #16] |
(169) 0x42cfc0 MADD X15, X9, X17, X17 |
(169) 0x42cfc4 LDR X24, [X26, #16] |
(169) 0x42cfc8 MUL X11, X9, X18 |
(169) 0x42cfcc LDR X16, [X27, #16] |
(169) 0x42cfd0 SUB X12, X15, X17 |
(169) 0x42cfd4 LDR X21, [X28, #16] |
(169) 0x42cfd8 CMP W3, #1 |
(169) 0x42cfdc B.EQ 42d168 |
(169) 0x42cfe0 UBFM W20, W3, #1, #31 |
(169) 0x42cfe4 SBFM X2, X5, #0, #31 |
(169) 0x42cfe8 UBFM X13, X20, #60, #59 |
(169) 0x42cfec ADD X6, X11, X2 |
(169) 0x42cff0 SUB X19, X13, #16 |
(169) 0x42cff4 ADD X20, X15, X2 |
(169) 0x42cff8 ADD X1, X21, X6,LSL #3 |
(169) 0x42cffc UBFM X7, X19, #4, #63 |
(169) 0x42d000 ADD X18, X14, X2 |
(169) 0x42d004 ADD X9, X16, X20,LSL #3 |
(169) 0x42d008 ADD X10, X7, #1 |
(169) 0x42d00c ADD X19, X12, X2 |
(169) 0x42d010 ADD X7, X24, X18,LSL #3 |
(169) 0x42d014 ADD X2, X4, X2 |
(169) 0x42d018 ANDS X10, X10, #0x3 |
(169) 0x42d01c ADD X8, X16, X19,LSL #3 |
(169) 0x42d020 UBFM X17, X6, #61, #60 |
(169) 0x42d024 MOVZ X0, #0 |
(169) 0x42d028 ADD X6, X23, X2,LSL #3 |
(169) 0x42d02c UBFM X20, X20, #61, #60 |
(169) 0x42d030 UBFM X19, X19, #61, #60 |
(169) 0x42d034 UBFM X18, X18, #61, #60 |
(169) 0x42d038 UBFM X2, X2, #61, #60 |
(169) 0x42d03c B.EQ 42d0c4 |
(169) 0x42d040 CMP X10, #1 |
(169) 0x42d044 B.EQ 42d098 |
(169) 0x42d048 CMP X10, #2 |
(169) 0x42d04c B.EQ 42d074 |
(169) 0x42d050 LDR Q0, [X16, X20] |
(169) 0x42d054 MOVZ X0, #16 |
(169) 0x42d058 LDR Q30, [X21, X17] |
(169) 0x42d05c LDR Q31, [X16, X19] |
(169) 0x42d060 FADD V1.2D, V0.2D, V30.2D |
(169) 0x42d064 FSUB V2.2D, V1.2D, V31.2D |
(169) 0x42d068 STR Q2, [X24, X18] |
(169) 0x42d06c LDR Q3, [X21, X17] |
(169) 0x42d070 STR Q3, [X23, X2] |
(169) 0x42d074 LDR Q4, [X9, X0] |
(169) 0x42d078 LDR Q28, [X1, X0] |
(169) 0x42d07c LDR Q29, [X8, X0] |
(169) 0x42d080 FADD V5.2D, V4.2D, V28.2D |
(169) 0x42d084 FSUB V6.2D, V5.2D, V29.2D |
(169) 0x42d088 STR Q6, [X7, X0] |
(169) 0x42d08c LDR Q7, [X1, X0] |
(169) 0x42d090 STR Q7, [X6, X0] |
(169) 0x42d094 ADD X0, X0, #16 |
(169) 0x42d098 LDR Q16, [X9, X0] |
(169) 0x42d09c LDR Q26, [X1, X0] |
(169) 0x42d0a0 LDR Q27, [X8, X0] |
(169) 0x42d0a4 FADD V17.2D, V16.2D, V26.2D |
(169) 0x42d0a8 FSUB V18.2D, V17.2D, V27.2D |
(169) 0x42d0ac STR Q18, [X7, X0] |
(169) 0x42d0b0 LDR Q19, [X1, X0] |
(169) 0x42d0b4 STR Q19, [X6, X0] |
(169) 0x42d0b8 ADD X0, X0, #16 |
(169) 0x42d0bc CMP X0, X13 |
(169) 0x42d0c0 B.EQ 42d15c |
(170) 0x42d0c4 LDR Q20, [X9, X0] |
(170) 0x42d0c8 ADD X20, X0, #16 |
(170) 0x42d0cc ADD X17, X0, #32 |
(170) 0x42d0d0 ADD X10, X0, #48 |
(170) 0x42d0d4 LDR Q21, [X1, X0] |
(170) 0x42d0d8 LDR Q22, [X8, X0] |
(170) 0x42d0dc FADD V23.2D, V20.2D, V21.2D |
(170) 0x42d0e0 FSUB V24.2D, V23.2D, V22.2D |
(170) 0x42d0e4 STR Q24, [X7, X0] |
(170) 0x42d0e8 LDR Q25, [X1, X0] |
(170) 0x42d0ec STR Q25, [X6, X0] |
(170) 0x42d0f0 ADD X0, X0, #64 |
(170) 0x42d0f4 LDR Q0, [X9, X20] |
(170) 0x42d0f8 LDR Q30, [X1, X20] |
(170) 0x42d0fc LDR Q31, [X8, X20] |
(170) 0x42d100 FADD V1.2D, V0.2D, V30.2D |
(170) 0x42d104 FSUB V2.2D, V1.2D, V31.2D |
(170) 0x42d108 STR Q2, [X7, X20] |
(170) 0x42d10c LDR Q3, [X1, X20] |
(170) 0x42d110 STR Q3, [X6, X20] |
(170) 0x42d114 LDR Q4, [X9, X17] |
(170) 0x42d118 LDR Q28, [X1, X17] |
(170) 0x42d11c LDR Q29, [X8, X17] |
(170) 0x42d120 FADD V5.2D, V4.2D, V28.2D |
(170) 0x42d124 FSUB V6.2D, V5.2D, V29.2D |
(170) 0x42d128 STR Q6, [X7, X17] |
(170) 0x42d12c LDR Q7, [X1, X17] |
(170) 0x42d130 STR Q7, [X6, X17] |
(170) 0x42d134 LDR Q16, [X9, X10] |
(170) 0x42d138 LDR Q26, [X1, X10] |
(170) 0x42d13c LDR Q27, [X8, X10] |
(170) 0x42d140 FADD V17.2D, V16.2D, V26.2D |
(170) 0x42d144 FSUB V18.2D, V17.2D, V27.2D |
(170) 0x42d148 STR Q18, [X7, X10] |
(170) 0x42d14c LDR Q19, [X1, X10] |
(170) 0x42d150 STR Q19, [X6, X10] |
(170) 0x42d154 CMP X0, X13 |
(170) 0x42d158 B.NE 42d0c4 |
(169) 0x42d15c TBZ W3, #0, 42d1a4 |
(169) 0x42d160 AND W3, W3, #0xfffffffe |
(169) 0x42d164 ADD W5, W5, W3 |
(169) 0x42d168 SBFM X13, X5, #0, #31 |
(169) 0x42d16c ADD X11, X11, X13 |
(169) 0x42d170 ADD X15, X15, X13 |
(169) 0x42d174 UBFM X1, X11, #61, #60 |
(169) 0x42d178 ADD X12, X12, X13 |
(169) 0x42d17c LDR D20, [X16, X15,LSL #3] |
(169) 0x42d180 ADD X14, X14, X13 |
(169) 0x42d184 ADD X4, X4, X13 |
(169) 0x42d188 LDR D21, [X21, X1] |
(169) 0x42d18c LDR D22, [X16, X12,LSL #3] |
(169) 0x42d190 FADD D23, D20, D21 |
(169) 0x42d194 FSUB D24, D23, S22 |
(169) 0x42d198 STR D24, [X24, X14,LSL #3] |
(169) 0x42d19c LDR D25, [X21, X1] |
(169) 0x42d1a0 STR D25, [X23, X4,LSL #3] |
(169) 0x42d1a4 ORR W23, WZR, W22 |
(169) 0x42d1a8 LDR W22, [SP, #108] |
(169) 0x42d1ac ORR X9, XZR, X30 |
(169) 0x42d1b0 CMP W22, W30 |
(169) 0x42d1b4 B.LE 42d1f0 |
(169) 0x42d1b8 LDR W30, [SP, #104] |
(169) 0x42d1bc LDR W16, [SP, #100] |
(169) 0x42d1c0 LDR W5, [SP, #96] |
(169) 0x42d1c4 SUB W6, W30, W23 |
(169) 0x42d1c8 CMP W6, W16 |
(169) 0x42d1cc CSEL W3, W6, W16, #9 |
(169) 0x42d1d0 ADD W22, W23, W3 |
(169) 0x42d1d4 CMP W23, W22 |
(169) 0x42d1d8 B.CC 42cfa0 |
(171) 0x42d1dc LDR W22, [SP, #108] |
(171) 0x42d1e0 ADD X30, X9, #1 |
(171) 0x42d1e4 ORR X9, XZR, X30 |
(171) 0x42d1e8 CMP W22, W30 |
(171) 0x42d1ec B.GT 42d1b8 |
0x42d1f0 LDP X25, X26, [SP, #64] |
0x42d1f4 LDP X27, X28, [SP, #80] |
0x42d1f8 LDP X19, X20, [SP, #16] |
0x42d1fc LDP X21, X22, [SP, #32] |
0x42d200 LDP X23, X24, [SP, #48] |
0x42d204 LDP X29, X30, [SP], #112 |
0x42d208 RET |
0x42d20c ADD W6, W6, #1 |
0x42d210 MOVZ W7, #0 |
0x42d214 B 42cf38 |
0x42d218 HINT #0 |
0x42d21c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.45+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.55+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:157 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:81 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_cell.cpp:146-150 |
| Module | exec |
| nb instructions | 68 |
| nb uops | 63 |
| loop length | 272 |
| used w registers | 21 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 18 |
| micro-operation queue | 7.88 cycles |
| front end | 7.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 7.67 | 7.67 | 7.67 | 5.00 | 5.00 |
| cycles | 4.50 | 4.50 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 7.67 | 7.67 | 7.67 | 5.00 | 5.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 7.88 |
| Dispatch | 8.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 24% |
| load | 37% |
| store | 35% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X20, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SUB W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W24, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W3, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W3, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W22, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d1f8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x338> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W23, W24, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d1f8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x338> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W19, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W23, W23, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W4, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W7, W6, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42d20c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x34c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W23, W6, W5, W7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W23, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42d1f8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x338> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W9, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W13, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W10, W23, W9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP X28, X27, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X25, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STR W24, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| MSUB W11, W10, W9, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W12, W10, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X9, X12, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W5, W11, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W16, W19, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W6, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W3, W6, W16, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W23, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42d1dc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x31c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42cf38 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_cell.cpp:146-150 |
| Module | exec |
| nb instructions | 68 |
| nb uops | 63 |
| loop length | 272 |
| used w registers | 21 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 18 |
| micro-operation queue | 7.88 cycles |
| front end | 7.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 7.67 | 7.67 | 7.67 | 5.00 | 5.00 |
| cycles | 4.50 | 4.50 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 7.67 | 7.67 | 7.67 | 5.00 | 5.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 7.88 |
| Dispatch | 8.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 24% |
| load | 37% |
| store | 35% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X20, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| SUB W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W24, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W3, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W3, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W22, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d1f8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x338> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W23, W24, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42d1f8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x338> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W19, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W23, W23, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W4, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W7, W6, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42d20c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x34c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W23, W6, W5, W7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W23, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42d1f8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x338> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W9, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W13, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W10, W23, W9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP X28, X27, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X25, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| STR W24, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| MSUB W11, W10, W9, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W12, W10, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X9, X12, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W5, W11, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W16, W19, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W6, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W3, W6, W16, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W23, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42d1dc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x31c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42cf38 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.5+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 1.08 | 1.45 |
| ▼Loop 171 - advec_cell.cpp:148-150 - exec– | 0.00 | 0.00 |
| ▼Loop 169 - advec_cell.cpp:148-150 - exec– | 0.00 | 0.02 |
| ○Loop 170 - advec_cell.cpp:149-150 - exec | 1.07 | 1.40 |
