| Function: reset_field_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double> ... | Module: exec | Source: reset_field.cpp:34-38 [...] | Coverage (incl. loops): 2.06% | (excl. loops): 0.00% |
|---|
| Function: reset_field_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double> ... | Module: exec | Source: reset_field.cpp:34-38 [...] | Coverage (incl. loops): 2.06% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/reset_field.cpp: 34 - 38 |
-------------------------------------------------------------------------------- |
34: #pragma omp parallel for simd collapse(2) |
35: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
36: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
37: density0(i, j) = density1(i, j); |
38: energy0(i, j) = energy1(i, j); |
0x43e940 STP X29, X30, [SP, #912]! |
0x43e944 ADD X29, SP, #0 |
0x43e948 STP X19, X20, [SP, #16] |
0x43e94c STP X25, X26, [SP, #64] |
0x43e950 ORR X26, XZR, X0 |
0x43e954 STP X27, X28, [SP, #80] |
0x43e958 LDP W27, W19, [X0, #40] |
0x43e95c LDR W25, [X26, #36] |
0x43e960 LDR W0, [X0, #32] |
0x43e964 ADD W28, W27, #1 |
0x43e968 ADD W19, W19, #2 |
0x43e96c CMP W28, W19 |
0x43e970 B.GE 43ec5c |
0x43e974 STP X21, X22, [SP, #32] |
0x43e978 ADD W22, W25, #2 |
0x43e97c SUB W21, W19, W28 |
0x43e980 STP X23, X24, [SP, #48] |
0x43e984 ADD W23, W0, #1 |
0x43e988 CMP W23, W22 |
0x43e98c B.GE 43ec54 |
0x43e990 SUB W24, W22, W23 |
0x43e994 BL 410210 |
0x43e998 MUL W27, W21, W24 |
0x43e99c ORR W20, WZR, W0 |
0x43e9a0 BL 410240 |
0x43e9a4 ORR W2, WZR, W0 |
0x43e9a8 UDIV W1, W27, W20 |
0x43e9ac MSUB W3, W1, W20, W27 |
0x43e9b0 CMP W0, W3 |
0x43e9b4 B.CC 43ec70 |
0x43e9b8 MADD W15, W1, W2, W3 |
0x43e9bc ADD W4, W1, W15 |
0x43e9c0 STR W4, [SP, #100] |
0x43e9c4 CMP W15, W4 |
0x43e9c8 B.CS 43ec54 |
0x43e9cc UDIV W5, W15, W24 |
0x43e9d0 LDP X20, X30, [X26, #16] |
0x43e9d4 MSUB W6, W5, W24, W15 |
0x43e9d8 ADD W7, W5, W28 |
0x43e9dc SBFM X7, X7, #0, #31 |
0x43e9e0 ADD W6, W6, W23 |
0x43e9e4 SUB W18, W22, W6 |
0x43e9e8 LDP X22, X21, [X26] |
(302) 0x43e9ec CMP W1, W18 |
(302) 0x43e9f0 CSEL W5, W1, W18, #9 |
(302) 0x43e9f4 ADD W12, W15, W5 |
(302) 0x43e9f8 CMP W15, W12 |
(302) 0x43e9fc B.CS 43ec34 |
(302) 0x43ea00 LDR X13, [X20] |
(302) 0x43ea04 LDR X11, [X21] |
(302) 0x43ea08 LDR X10, [X22] |
(302) 0x43ea0c MUL X13, X7, X13 |
(302) 0x43ea10 LDR X9, [X30] |
(302) 0x43ea14 MUL X11, X7, X11 |
(302) 0x43ea18 LDR X15, [X20, #16] |
(302) 0x43ea1c MUL X10, X7, X10 |
(302) 0x43ea20 LDR X18, [X21, #16] |
(302) 0x43ea24 MUL X9, X7, X9 |
(302) 0x43ea28 LDR X17, [X22, #16] |
(302) 0x43ea2c LDR X16, [X30, #16] |
(302) 0x43ea30 CMP W5, #1 |
(302) 0x43ea34 B.EQ 43ec0c |
(302) 0x43ea38 UBFM W8, W5, #1, #31 |
(302) 0x43ea3c SBFM X25, X6, #0, #31 |
(302) 0x43ea40 UBFM X8, X8, #60, #59 |
(302) 0x43ea44 ADD X28, X11, X25 |
(302) 0x43ea48 SUB X14, X8, #16 |
(302) 0x43ea4c ADD X27, X10, X25 |
(302) 0x43ea50 ADD X4, X18, X28,LSL #3 |
(302) 0x43ea54 UBFM X2, X14, #4, #63 |
(302) 0x43ea58 ADD X26, X9, X25 |
(302) 0x43ea5c ADD X3, X17, X27,LSL #3 |
(302) 0x43ea60 ADD X1, X2, #1 |
(302) 0x43ea64 ADD X25, X13, X25 |
(302) 0x43ea68 ADD X2, X16, X26,LSL #3 |
(302) 0x43ea6c ANDS X14, X1, #0x7 |
(302) 0x43ea70 MOVZ X0, #0 |
(302) 0x43ea74 ADD X1, X15, X25,LSL #3 |
(302) 0x43ea78 UBFM X28, X28, #61, #60 |
(302) 0x43ea7c UBFM X27, X27, #61, #60 |
(302) 0x43ea80 UBFM X26, X26, #61, #60 |
(302) 0x43ea84 UBFM X25, X25, #61, #60 |
(302) 0x43ea88 B.EQ 43eb50 |
(302) 0x43ea8c CMP X14, #1 |
(302) 0x43ea90 B.EQ 43eb34 |
(302) 0x43ea94 CMP X14, #2 |
(302) 0x43ea98 B.EQ 43eb20 |
(302) 0x43ea9c CMP X14, #3 |
(302) 0x43eaa0 B.EQ 43eb0c |
(302) 0x43eaa4 CMP X14, #4 |
(302) 0x43eaa8 B.EQ 43eaf8 |
(302) 0x43eaac CMP X14, #5 |
(302) 0x43eab0 B.EQ 43eae4 |
(302) 0x43eab4 CMP X14, #6 |
(302) 0x43eab8 B.EQ 43ead0 |
(302) 0x43eabc LDR Q31, [X18, X28] |
(302) 0x43eac0 MOVZ X0, #16 |
(302) 0x43eac4 STR Q31, [X17, X27] |
(302) 0x43eac8 LDR Q0, [X16, X26] |
(302) 0x43eacc STR Q0, [X15, X25] |
(302) 0x43ead0 LDR Q1, [X4, X0] |
(302) 0x43ead4 STR Q1, [X3, X0] |
(302) 0x43ead8 LDR Q2, [X2, X0] |
(302) 0x43eadc STR Q2, [X1, X0] |
(302) 0x43eae0 ADD X0, X0, #16 |
(302) 0x43eae4 LDR Q3, [X4, X0] |
(302) 0x43eae8 STR Q3, [X3, X0] |
(302) 0x43eaec LDR Q4, [X2, X0] |
(302) 0x43eaf0 STR Q4, [X1, X0] |
(302) 0x43eaf4 ADD X0, X0, #16 |
(302) 0x43eaf8 LDR Q5, [X4, X0] |
(302) 0x43eafc STR Q5, [X3, X0] |
(302) 0x43eb00 LDR Q6, [X2, X0] |
(302) 0x43eb04 STR Q6, [X1, X0] |
(302) 0x43eb08 ADD X0, X0, #16 |
(302) 0x43eb0c LDR Q7, [X4, X0] |
(302) 0x43eb10 STR Q7, [X3, X0] |
(302) 0x43eb14 LDR Q16, [X2, X0] |
(302) 0x43eb18 STR Q16, [X1, X0] |
(302) 0x43eb1c ADD X0, X0, #16 |
(302) 0x43eb20 LDR Q17, [X4, X0] |
(302) 0x43eb24 STR Q17, [X3, X0] |
(302) 0x43eb28 LDR Q18, [X2, X0] |
(302) 0x43eb2c STR Q18, [X1, X0] |
(302) 0x43eb30 ADD X0, X0, #16 |
(302) 0x43eb34 LDR Q19, [X4, X0] |
(302) 0x43eb38 STR Q19, [X3, X0] |
(302) 0x43eb3c LDR Q20, [X2, X0] |
(302) 0x43eb40 STR Q20, [X1, X0] |
(302) 0x43eb44 ADD X0, X0, #16 |
(302) 0x43eb48 CMP X8, X0 |
(302) 0x43eb4c B.EQ 43ec00 |
(302) 0x43eb50 STP W19, W23, [SP, #104] |
(303) 0x43eb54 LDR Q21, [X4, X0] |
(303) 0x43eb58 ADD X14, X0, #16 |
(303) 0x43eb5c ADD X28, X0, #32 |
(303) 0x43eb60 ADD X27, X0, #48 |
(303) 0x43eb64 ADD X26, X0, #64 |
(303) 0x43eb68 ADD X25, X0, #80 |
(303) 0x43eb6c ADD X23, X0, #96 |
(303) 0x43eb70 ADD X19, X0, #112 |
(303) 0x43eb74 STR Q21, [X3, X0] |
(303) 0x43eb78 LDR Q22, [X2, X0] |
(303) 0x43eb7c STR Q22, [X1, X0] |
(303) 0x43eb80 ADD X0, X0, #128 |
(303) 0x43eb84 LDR Q23, [X4, X14] |
(303) 0x43eb88 STR Q23, [X3, X14] |
(303) 0x43eb8c LDR Q24, [X2, X14] |
(303) 0x43eb90 STR Q24, [X1, X14] |
(303) 0x43eb94 LDR Q25, [X4, X28] |
(303) 0x43eb98 STR Q25, [X3, X28] |
(303) 0x43eb9c LDR Q26, [X2, X28] |
(303) 0x43eba0 STR Q26, [X1, X28] |
(303) 0x43eba4 LDR Q27, [X4, X27] |
(303) 0x43eba8 STR Q27, [X3, X27] |
(303) 0x43ebac LDR Q28, [X2, X27] |
(303) 0x43ebb0 STR Q28, [X1, X27] |
(303) 0x43ebb4 LDR Q29, [X4, X26] |
(303) 0x43ebb8 STR Q29, [X3, X26] |
(303) 0x43ebbc LDR Q30, [X2, X26] |
(303) 0x43ebc0 STR Q30, [X1, X26] |
(303) 0x43ebc4 LDR Q31, [X4, X25] |
(303) 0x43ebc8 STR Q31, [X3, X25] |
(303) 0x43ebcc LDR Q0, [X2, X25] |
(303) 0x43ebd0 STR Q0, [X1, X25] |
(303) 0x43ebd4 LDR Q1, [X4, X23] |
(303) 0x43ebd8 STR Q1, [X3, X23] |
(303) 0x43ebdc LDR Q2, [X2, X23] |
(303) 0x43ebe0 STR Q2, [X1, X23] |
(303) 0x43ebe4 LDR Q3, [X4, X19] |
(303) 0x43ebe8 STR Q3, [X3, X19] |
(303) 0x43ebec LDR Q4, [X2, X19] |
(303) 0x43ebf0 STR Q4, [X1, X19] |
(303) 0x43ebf4 CMP X8, X0 |
(303) 0x43ebf8 B.NE 43eb54 |
(302) 0x43ebfc LDP W19, W23, [SP, #104] |
(302) 0x43ec00 TBZ W5, #0, 43ec30 |
(302) 0x43ec04 AND W5, W5, #0xfffffffe |
(302) 0x43ec08 ADD W6, W6, W5 |
(302) 0x43ec0c SBFM X8, X6, #0, #31 |
(302) 0x43ec10 ADD X11, X11, X8 |
(302) 0x43ec14 ADD X10, X10, X8 |
(302) 0x43ec18 ADD X9, X9, X8 |
(302) 0x43ec1c ADD X13, X13, X8 |
(302) 0x43ec20 LDR D5, [X18, X11,LSL #3] |
(302) 0x43ec24 STR D5, [X17, X10,LSL #3] |
(302) 0x43ec28 LDR D6, [X16, X9,LSL #3] |
(302) 0x43ec2c STR D6, [X15, X13,LSL #3] |
(302) 0x43ec30 ORR W15, WZR, W12 |
(302) 0x43ec34 ADD X7, X7, #1 |
(302) 0x43ec38 CMP W19, W7 |
(302) 0x43ec3c B.LE 43ec54 |
(302) 0x43ec40 LDR W12, [SP, #100] |
(302) 0x43ec44 ORR W6, WZR, W23 |
(302) 0x43ec48 ORR W18, WZR, W24 |
(302) 0x43ec4c SUB W1, W12, W15 |
(302) 0x43ec50 B 43e9ec |
0x43ec54 LDP X21, X22, [SP, #32] |
0x43ec58 LDP X23, X24, [SP, #48] |
0x43ec5c LDP X19, X20, [SP, #16] |
0x43ec60 LDP X25, X26, [SP, #64] |
0x43ec64 LDP X27, X28, [SP, #80] |
0x43ec68 LDP X29, X30, [SP], #112 |
0x43ec6c RET |
0x43ec70 ADD W1, W1, #1 |
0x43ec74 MOVZ W3, #0 |
0x43ec78 B 43e9b8 |
0x43ec7c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.44+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.56+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | reset_field(global_variables&) | reset_field.cpp:44 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:84 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | reset_field.cpp:34-38 |
| Module | exec |
| nb instructions | 54 |
| nb uops | 53 |
| loop length | 216 |
| used w registers | 20 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 6.63 cycles |
| front end | 6.63 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| cycles | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.63 |
| Dispatch | 7.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 43% |
| store | 44% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X26, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W27, W19, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W25, [X26, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W28, W27, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W28, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43ec5c <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x31c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W25, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W19, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W23, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43ec54 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W24, W22, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W27, W21, W24 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W1, W27, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W3, W1, W20, W27 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 43ec70 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x330> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W15, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W4, W1, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W4, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W15, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43ec54 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W5, W15, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDP X20, X30, [X26, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W6, W5, W24, W15 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W7, W5, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X7, X7, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W6, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W18, W22, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X22, X21, [X26] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 43e9b8 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | reset_field.cpp:34-38 |
| Module | exec |
| nb instructions | 54 |
| nb uops | 53 |
| loop length | 216 |
| used w registers | 20 |
| used x registers | 15 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 6.63 cycles |
| front end | 6.63 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| cycles | 4.00 | 4.00 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 6.17 | 5.83 | 6.00 | 3.50 | 3.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.63 |
| Dispatch | 7.50 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 43% |
| store | 44% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X26, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W27, W19, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W25, [X26, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W28, W27, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W19, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W28, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43ec5c <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x31c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W25, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W19, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W23, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43ec54 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W24, W22, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MUL W27, W21, W24 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| UDIV W1, W27, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| MSUB W3, W1, W20, W27 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 43ec70 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x330> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W15, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W4, W1, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W4, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W15, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43ec54 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W5, W15, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDP X20, X30, [X26, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W6, W5, W24, W15 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W7, W5, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X7, X7, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W6, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W18, W22, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X22, X21, [X26] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 43e9b8 <_Z18reset_field_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼reset_field_kernel(int, int, int, int, clover::Buffer2D | 2.06 | 2.77 |
| ▼Loop 302 - reset_field.cpp:36-38 - exec– | 0.00 | 0.02 |
| ○Loop 303 - reset_field.cpp:37-38 - exec | 2.06 | 2.69 |
