| Function: revert_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, cl ... | Module: exec | Source: revert.cpp:34-38 [...] | Coverage (incl. loops): 2.09% | (excl. loops): 0.00% |
|---|
| Function: revert_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, cl ... | Module: exec | Source: revert.cpp:34-38 [...] | Coverage (incl. loops): 2.09% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/revert.cpp: 34 - 38 |
-------------------------------------------------------------------------------- |
34: #pragma omp parallel for simd collapse(2) |
35: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
36: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
37: density1(i, j) = density0(i, j); |
38: energy1(i, j) = energy0(i, j); |
0x43ee60 STP X29, X30, [SP, #912]! |
0x43ee64 ADD X29, SP, #0 |
0x43ee68 STP X19, X20, [SP, #16] |
0x43ee6c STP X25, X26, [SP, #64] |
0x43ee70 ORR X26, XZR, X0 |
0x43ee74 STP X27, X28, [SP, #80] |
0x43ee78 LDP W27, W19, [X0, #40] |
0x43ee7c LDR W25, [X26, #36] |
0x43ee80 LDR W0, [X0, #32] |
0x43ee84 ADD W28, W27, #1 |
0x43ee88 ADD W19, W19, #2 |
0x43ee8c CMP W28, W19 |
0x43ee90 B.GE 43f17c |
0x43ee94 STP X21, X22, [SP, #32] |
0x43ee98 ADD W22, W25, #2 |
0x43ee9c SUB W21, W19, W28 |
0x43eea0 STP X23, X24, [SP, #48] |
0x43eea4 ADD W23, W0, #1 |
0x43eea8 CMP W23, W22 |
0x43eeac B.GE 43f174 |
0x43eeb0 SUB W24, W22, W23 |
0x43eeb4 BL 410210 |
0x43eeb8 MUL W27, W21, W24 |
0x43eebc ORR W20, WZR, W0 |
0x43eec0 BL 410240 |
0x43eec4 ORR W2, WZR, W0 |
0x43eec8 UDIV W1, W27, W20 |
0x43eecc MSUB W3, W1, W20, W27 |
0x43eed0 CMP W0, W3 |
0x43eed4 B.CC 43f190 |
0x43eed8 MADD W16, W1, W2, W3 |
0x43eedc ADD W4, W1, W16 |
0x43eee0 STR W4, [SP, #100] |
0x43eee4 CMP W16, W4 |
0x43eee8 B.CS 43f174 |
0x43eeec UDIV W5, W16, W24 |
0x43eef0 LDP X20, X30, [X26, #16] |
0x43eef4 MSUB W6, W5, W24, W16 |
0x43eef8 ADD W7, W5, W28 |
0x43eefc SBFM X7, X7, #0, #31 |
0x43ef00 ADD W6, W6, W23 |
0x43ef04 SUB W17, W22, W6 |
0x43ef08 LDP X22, X21, [X26] |
(305) 0x43ef0c CMP W1, W17 |
(305) 0x43ef10 CSEL W5, W1, W17, #9 |
(305) 0x43ef14 ADD W12, W16, W5 |
(305) 0x43ef18 CMP W16, W12 |
(305) 0x43ef1c B.CS 43f154 |
(305) 0x43ef20 LDR X9, [X20] |
(305) 0x43ef24 LDR X10, [X21] |
(305) 0x43ef28 LDR X11, [X22] |
(305) 0x43ef2c MUL X9, X7, X9 |
(305) 0x43ef30 LDR X13, [X30] |
(305) 0x43ef34 MUL X10, X7, X10 |
(305) 0x43ef38 LDR X16, [X20, #16] |
(305) 0x43ef3c MUL X11, X7, X11 |
(305) 0x43ef40 LDR X17, [X21, #16] |
(305) 0x43ef44 MUL X13, X7, X13 |
(305) 0x43ef48 LDR X18, [X22, #16] |
(305) 0x43ef4c LDR X15, [X30, #16] |
(305) 0x43ef50 CMP W5, #1 |
(305) 0x43ef54 B.EQ 43f12c |
(305) 0x43ef58 UBFM W8, W5, #1, #31 |
(305) 0x43ef5c SBFM X25, X6, #0, #31 |
(305) 0x43ef60 UBFM X8, X8, #60, #59 |
(305) 0x43ef64 ADD X28, X11, X25 |
(305) 0x43ef68 SUB X14, X8, #16 |
(305) 0x43ef6c ADD X27, X10, X25 |
(305) 0x43ef70 ADD X4, X18, X28,LSL #3 |
(305) 0x43ef74 UBFM X2, X14, #4, #63 |
(305) 0x43ef78 ADD X26, X9, X25 |
(305) 0x43ef7c ADD X3, X17, X27,LSL #3 |
(305) 0x43ef80 ADD X1, X2, #1 |
(305) 0x43ef84 ADD X25, X13, X25 |
(305) 0x43ef88 ADD X2, X16, X26,LSL #3 |
(305) 0x43ef8c ANDS X14, X1, #0x7 |
(305) 0x43ef90 MOVZ X0, #0 |
(305) 0x43ef94 ADD X1, X15, X25,LSL #3 |
(305) 0x43ef98 UBFM X28, X28, #61, #60 |
(305) 0x43ef9c UBFM X27, X27, #61, #60 |
(305) 0x43efa0 UBFM X26, X26, #61, #60 |
(305) 0x43efa4 UBFM X25, X25, #61, #60 |
(305) 0x43efa8 B.EQ 43f070 |
(305) 0x43efac CMP X14, #1 |
(305) 0x43efb0 B.EQ 43f054 |
(305) 0x43efb4 CMP X14, #2 |
(305) 0x43efb8 B.EQ 43f040 |
(305) 0x43efbc CMP X14, #3 |
(305) 0x43efc0 B.EQ 43f02c |
(305) 0x43efc4 CMP X14, #4 |
(305) 0x43efc8 B.EQ 43f018 |
(305) 0x43efcc CMP X14, #5 |
(305) 0x43efd0 B.EQ 43f004 |
(305) 0x43efd4 CMP X14, #6 |
(305) 0x43efd8 B.EQ 43eff0 |
(305) 0x43efdc LDR Q31, [X18, X28] |
(305) 0x43efe0 MOVZ X0, #16 |
(305) 0x43efe4 STR Q31, [X17, X27] |
(305) 0x43efe8 LDR Q0, [X16, X26] |
(305) 0x43efec STR Q0, [X15, X25] |
(305) 0x43eff0 LDR Q1, [X4, X0] |
(305) 0x43eff4 STR Q1, [X3, X0] |
(305) 0x43eff8 LDR Q2, [X2, X0] |
(305) 0x43effc STR Q2, [X1, X0] |
(305) 0x43f000 ADD X0, X0, #16 |
(305) 0x43f004 LDR Q3, [X4, X0] |
(305) 0x43f008 STR Q3, [X3, X0] |
(305) 0x43f00c LDR Q4, [X2, X0] |
(305) 0x43f010 STR Q4, [X1, X0] |
(305) 0x43f014 ADD X0, X0, #16 |
(305) 0x43f018 LDR Q5, [X4, X0] |
(305) 0x43f01c STR Q5, [X3, X0] |
(305) 0x43f020 LDR Q6, [X2, X0] |
(305) 0x43f024 STR Q6, [X1, X0] |
(305) 0x43f028 ADD X0, X0, #16 |
(305) 0x43f02c LDR Q7, [X4, X0] |
(305) 0x43f030 STR Q7, [X3, X0] |
(305) 0x43f034 LDR Q16, [X2, X0] |
(305) 0x43f038 STR Q16, [X1, X0] |
(305) 0x43f03c ADD X0, X0, #16 |
(305) 0x43f040 LDR Q17, [X4, X0] |
(305) 0x43f044 STR Q17, [X3, X0] |
(305) 0x43f048 LDR Q18, [X2, X0] |
(305) 0x43f04c STR Q18, [X1, X0] |
(305) 0x43f050 ADD X0, X0, #16 |
(305) 0x43f054 LDR Q19, [X4, X0] |
(305) 0x43f058 STR Q19, [X3, X0] |
(305) 0x43f05c LDR Q20, [X2, X0] |
(305) 0x43f060 STR Q20, [X1, X0] |
(305) 0x43f064 ADD X0, X0, #16 |
(305) 0x43f068 CMP X8, X0 |
(305) 0x43f06c B.EQ 43f120 |
(305) 0x43f070 STP W19, W23, [SP, #104] |
(306) 0x43f074 LDR Q21, [X4, X0] |
(306) 0x43f078 ADD X14, X0, #16 |
(306) 0x43f07c ADD X28, X0, #32 |
(306) 0x43f080 ADD X27, X0, #48 |
(306) 0x43f084 ADD X26, X0, #64 |
(306) 0x43f088 ADD X25, X0, #80 |
(306) 0x43f08c ADD X23, X0, #96 |
(306) 0x43f090 ADD X19, X0, #112 |
(306) 0x43f094 STR Q21, [X3, X0] |
(306) 0x43f098 LDR Q22, [X2, X0] |
(306) 0x43f09c STR Q22, [X1, X0] |
(306) 0x43f0a0 ADD X0, X0, #128 |
(306) 0x43f0a4 LDR Q23, [X4, X14] |
(306) 0x43f0a8 STR Q23, [X3, X14] |
(306) 0x43f0ac LDR Q24, [X2, X14] |
(306) 0x43f0b0 STR Q24, [X1, X14] |
(306) 0x43f0b4 LDR Q25, [X4, X28] |
(306) 0x43f0b8 STR Q25, [X3, X28] |
(306) 0x43f0bc LDR Q26, [X2, X28] |
(306) 0x43f0c0 STR Q26, [X1, X28] |
(306) 0x43f0c4 LDR Q27, [X4, X27] |
(306) 0x43f0c8 STR Q27, [X3, X27] |
(306) 0x43f0cc LDR Q28, [X2, X27] |
(306) 0x43f0d0 STR Q28, [X1, X27] |
(306) 0x43f0d4 LDR Q29, [X4, X26] |
(306) 0x43f0d8 STR Q29, [X3, X26] |
(306) 0x43f0dc LDR Q30, [X2, X26] |
(306) 0x43f0e0 STR Q30, [X1, X26] |
(306) 0x43f0e4 LDR Q31, [X4, X25] |
(306) 0x43f0e8 STR Q31, [X3, X25] |
(306) 0x43f0ec LDR Q0, [X2, X25] |
(306) 0x43f0f0 STR Q0, [X1, X25] |
(306) 0x43f0f4 LDR Q1, [X4, X23] |
(306) 0x43f0f8 STR Q1, [X3, X23] |
(306) 0x43f0fc LDR Q2, [X2, X23] |
(306) 0x43f100 STR Q2, [X1, X23] |
(306) 0x43f104 LDR Q3, [X4, X19] |
(306) 0x43f108 STR Q3, [X3, X19] |
(306) 0x43f10c LDR Q4, [X2, X19] |
(306) 0x43f110 STR Q4, [X1, X19] |
(306) 0x43f114 CMP X8, X0 |
(306) 0x43f118 B.NE 43f074 |
(305) 0x43f11c LDP W19, W23, [SP, #104] |
(305) 0x43f120 TBZ W5, #0, 43f150 |
(305) 0x43f124 AND W5, W5, #0xfffffffe |
(305) 0x43f128 ADD W6, W6, W5 |
(305) 0x43f12c SBFM X8, X6, #0, #31 |
(305) 0x43f130 ADD X11, X11, X8 |
(305) 0x43f134 ADD X10, X10, X8 |
(305) 0x43f138 ADD X9, X9, X8 |
(305) 0x43f13c ADD X13, X13, X8 |
(305) 0x43f140 LDR D5, [X18, X11,LSL #3] |
(305) 0x43f144 STR D5, [X17, X10,LSL #3] |
(305) 0x43f148 LDR D6, [X16, X9,LSL #3] |
(305) 0x43f14c STR D6, [X15, X13,LSL #3] |
(305) 0x43f150 ORR W16, WZR, W12 |
(305) 0x43f154 ADD X7, X7, #1 |
(305) 0x43f158 CMP W19, W7 |
(305) 0x43f15c B.LE 43f174 |
(305) 0x43f160 LDR W12, [SP, #100] |
(305) 0x43f164 ORR W6, WZR, W23 |
(305) 0x43f168 ORR W17, WZR, W24 |
(305) 0x43f16c SUB W1, W12, W16 |
(305) 0x43f170 B 43ef0c |
0x43f174 LDP X21, X22, [SP, #32] |
0x43f178 LDP X23, X24, [SP, #48] |
0x43f17c LDP X19, X20, [SP, #16] |
0x43f180 LDP X25, X26, [SP, #64] |
0x43f184 LDP X27, X28, [SP, #80] |
0x43f188 LDP X29, X30, [SP], #112 |
0x43f18c RET |
0x43f190 ADD W1, W1, #1 |
0x43f194 MOVZ W3, #0 |
0x43f198 B 43eed8 |
0x43f19c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.45+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.55+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | revert(global_variables&) | revert.cpp:48 | exec |
| ○ | PdV(global_variables&, bool) | PdV.cpp:131 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:64 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | revert.cpp:34-38 |
| Module | exec |
| nb instructions | 54 |
| nb uops | 53 |
| loop length | 216 |
| used w registers | 20 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 6.63 cycles |
| front end | 6.63 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| cycles | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.63 |
| Dispatch | 7.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 43% |
| store | 44% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X26, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W27, W19, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W25, [X26, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W28, W27, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W28, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43f17c <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x31c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W25, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W19, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W23, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43f174 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W24, W22, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W27, W21, W24 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W1, W27, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W3, W1, W20, W27 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 43f190 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x330> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W4, W1, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W4, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W16, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43f174 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W5, W16, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDP X20, X30, [X26, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W6, W5, W24, W16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W7, W5, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X7, X7, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W6, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W22, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X22, X21, [X26] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 43eed8 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | revert.cpp:34-38 |
| Module | exec |
| nb instructions | 54 |
| nb uops | 53 |
| loop length | 216 |
| used w registers | 20 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 6.63 cycles |
| front end | 6.63 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| cycles | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.63 |
| Dispatch | 7.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 43% |
| store | 44% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X26, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W27, W19, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W25, [X26, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W28, W27, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W28, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43f17c <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x31c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W25, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W19, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W23, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43f174 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W24, W22, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W27, W21, W24 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W1, W27, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W3, W1, W20, W27 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 43f190 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x330> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W4, W1, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W4, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W16, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43f174 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W5, W16, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDP X20, X30, [X26, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W6, W5, W24, W16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W7, W5, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X7, X7, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W6, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W22, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X22, X21, [X26] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 43eed8 <_Z13revert_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼revert_kernel(int, int, int, int, clover::Buffer2D | 2.09 | 2.80 |
| ▼Loop 305 - revert.cpp:36-38 - exec– | 0.00 | 0.02 |
| ○Loop 306 - revert.cpp:37-38 - exec | 2.08 | 2.72 |
