| Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage (incl. loops): 6.41% | (excl. loops): 0.00% |
|---|
| Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage (incl. loops): 6.41% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/PdV.cpp: 48 - 63 |
-------------------------------------------------------------------------------- |
48: #pragma omp parallel for simd collapse(2) |
49: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
50: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
51: double left_flux = (xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel0(i, j) + xvel0(i + 0, j + 1))) * 0.25 * dt * 0.5; |
52: double right_flux = |
53: (xarea(i + 1, j + 0) * (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1) + xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1))) * 0.25 * dt * |
54: 0.5; |
55: double bottom_flux = (yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel0(i, j) + yvel0(i + 1, j + 0))) * 0.25 * dt * 0.5; |
56: double top_flux = (yarea(i + 0, j + 1) * (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1) + yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1))) * |
57: 0.25 * dt * 0.5; |
58: double total_flux = right_flux - left_flux + top_flux - bottom_flux; |
59: double volume_change_s = volume(i, j) / (volume(i, j) + total_flux); |
60: double recip_volume = 1.0 / volume(i, j); |
61: double energy_change = (pressure(i, j) / density0(i, j) + viscosity(i, j) / density0(i, j)) * total_flux * recip_volume; |
62: energy1(i, j) = energy0(i, j) - energy_change; |
63: density1(i, j) = density0(i, j) * volume_change_s; |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x434ec0 STP X29, X30, [SP, #832]! |
0x434ec4 ADD X29, SP, #0 |
0x434ec8 STP X21, X22, [SP, #32] |
0x434ecc ORR X21, XZR, X0 |
0x434ed0 LDP W22, W1, [X0, #104] |
0x434ed4 LDR W0, [X0, #96] |
0x434ed8 LDR W2, [X21, #100] |
0x434edc ADD W1, W1, #2 |
0x434ee0 ADD W22, W22, #1 |
0x434ee4 ADD W0, W0, #1 |
0x434ee8 STP W1, W0, [SP, #168] |
0x434eec CMP W22, W1 |
0x434ef0 B.GE 4352a0 |
0x434ef4 STP X19, X20, [SP, #16] |
0x434ef8 ADD W20, W2, #2 |
0x434efc STP X23, X24, [SP, #48] |
0x434f00 SUB W23, W1, W22 |
0x434f04 CMP W0, W20 |
0x434f08 B.GE 4352ac |
0x434f0c SUB W0, W20, W0 |
0x434f10 MUL W23, W23, W0 |
0x434f14 STR W0, [SP, #176] |
0x434f18 BL 410670 |
0x434f1c ORR W19, WZR, W0 |
0x434f20 BL 410550 |
0x434f24 UDIV W4, W23, W19 |
0x434f28 ORR W3, WZR, W0 |
0x434f2c MSUB W0, W4, W19, W23 |
0x434f30 CMP W3, W0 |
0x434f34 B.CC 4352c0 |
0x434f38 MADD W3, W4, W3, W0 |
0x434f3c ADD W0, W4, W3 |
0x434f40 STR W0, [SP, #180] |
0x434f44 CMP W3, W0 |
0x434f48 B.CS 4352ac |
0x434f4c LDR W1, [SP, #176] |
0x434f50 PTRUE P7.B, ALL |
0x434f54 CNTD X19, ALL |
0x434f58 STP X25, X26, [SP, #64] |
0x434f5c FDUP Z30.D, #80 |
0x434f60 FDUP Z29.D, #96 |
0x434f64 FDUP Z28.D, #112 |
0x434f68 STP X27, X28, [SP, #80] |
0x434f6c LD1RD {Z31.D}, P7/Z, [X21] |
0x434f70 UDIV W0, W3, W1 |
0x434f74 LDR X2, [X21, #40] |
0x434f78 LDP X27, X26, [X21, #8] |
0x434f7c LDP X25, X24, [X21, #24] |
0x434f80 MSUB W1, W0, W1, W3 |
0x434f84 ADD W0, W0, W22 |
0x434f88 LDP X22, X23, [X21, #80] |
0x434f8c SBFM X8, X0, #0, #31 |
0x434f90 LDP X0, X28, [X21, #56] |
0x434f94 STP X0, X2, [SP, #112] |
0x434f98 LDR W0, [SP, #172] |
0x434f9c LDR X2, [X21, #48] |
0x434fa0 ADD W1, W1, W0 |
0x434fa4 LDR X0, [X21, #72] |
0x434fa8 STR X2, [SP, #184] |
0x434fac SUB W2, W20, W1 |
0x434fb0 CMP W4, W2 |
0x434fb4 CSEL W2, W4, W2, #9 |
0x434fb8 STR X0, [SP, #104] |
0x434fbc ADD W0, W3, W2 |
0x434fc0 STR W0, [SP, #100] |
0x434fc4 CMP W3, W0 |
0x434fc8 B.CS 43527c |
(309) 0x434fcc LDR X4, [SP, #104] |
(309) 0x434fd0 SBFM X1, X1, #0, #31 |
(309) 0x434fd4 ORR W2, WZR, W2 |
(309) 0x434fd8 WHILELO P7.D, XZR, X2 |
(309) 0x434fdc LDR X6, [X22] |
(309) 0x434fe0 LDR X5, [X23] |
(309) 0x434fe4 LDR X11, [X4] |
(309) 0x434fe8 MUL X9, X8, X6 |
(309) 0x434fec LDR X7, [X26] |
(309) 0x434ff0 MUL X20, X8, X5 |
(309) 0x434ff4 LDR X30, [X22, #16] |
(309) 0x434ff8 ADD X6, X6, X9 |
(309) 0x434ffc ADD X9, X9, X1 |
(309) 0x435000 MADD X11, X8, X11, X1 |
(309) 0x435004 ADD X6, X6, X1 |
(309) 0x435008 ADD X5, X5, X20 |
(309) 0x43500c ADD X20, X20, X1 |
(309) 0x435010 LDR X0, [SP, #184] |
(309) 0x435014 MUL X10, X8, X7 |
(309) 0x435018 ADD X5, X5, X1 |
(309) 0x43501c STR X11, [SP, #128] |
(309) 0x435020 LDR X11, [SP, #120] |
(309) 0x435024 ADD X18, X30, X6,LSL #3 |
(309) 0x435028 ADD X17, X30, X9,LSL #3 |
(309) 0x43502c ADD X7, X7, X10 |
(309) 0x435030 ADD X10, X10, X1 |
(309) 0x435034 ADD X6, X30, X6,LSL #3 |
(309) 0x435038 ADD X7, X7, X1 |
(309) 0x43503c LDR X14, [X0] |
(309) 0x435040 ADD X9, X30, X9,LSL #3 |
(309) 0x435044 ADD X18, X18, #8 |
(309) 0x435048 ADD X17, X17, #8 |
(309) 0x43504c LDR X21, [X27] |
(309) 0x435050 LDR X16, [X11] |
(309) 0x435054 MADD X15, X8, X14, X1 |
(309) 0x435058 LDR X11, [X23, #16] |
(309) 0x43505c MADD X21, X8, X21, X1 |
(309) 0x435060 LDR X14, [X27, #16] |
(309) 0x435064 LDR X3, [X24] |
(309) 0x435068 ADD X30, X11, X20,LSL #3 |
(309) 0x43506c LDR X20, [X26, #16] |
(309) 0x435070 LDR X0, [X0, #16] |
(309) 0x435074 MADD X4, X8, X3, X1 |
(309) 0x435078 LDR X3, [SP, #112] |
(309) 0x43507c ADD X7, X20, X7,LSL #3 |
(309) 0x435080 ADD X10, X20, X10,LSL #3 |
(309) 0x435084 LDR X20, [SP, #104] |
(309) 0x435088 LDR X13, [X25] |
(309) 0x43508c LDR X12, [X28] |
(309) 0x435090 LDR X20, [X20, #16] |
(309) 0x435094 MADD X13, X8, X13, X1 |
(309) 0x435098 LDR X3, [X3] |
(309) 0x43509c MADD X12, X8, X12, X1 |
(309) 0x4350a0 STR X20, [SP, #136] |
(309) 0x4350a4 ADD X20, X14, X21,LSL #3 |
(309) 0x4350a8 LDR X21, [SP, #112] |
(309) 0x4350ac MADD X3, X8, X3, X1 |
(309) 0x4350b0 ORR X14, XZR, X20 |
(309) 0x4350b4 ADD X20, X20, #8 |
(309) 0x4350b8 MADD X1, X8, X16, X1 |
(309) 0x4350bc ADD X16, X11, X5,LSL #3 |
(309) 0x4350c0 ORR X11, XZR, X30 |
(309) 0x4350c4 ADD X30, X30, #8 |
(309) 0x4350c8 LDR X21, [X21, #16] |
(309) 0x4350cc ORR X5, XZR, X16 |
(309) 0x4350d0 ADD X16, X16, #8 |
(309) 0x4350d4 STP X0, X21, [SP, #144] |
(309) 0x4350d8 LDR X21, [X24, #16] |
(309) 0x4350dc LDR X0, [X25, #16] |
(309) 0x4350e0 ADD X4, X21, X4,LSL #3 |
(309) 0x4350e4 LDR X21, [SP, #120] |
(309) 0x4350e8 ADD X13, X0, X13,LSL #3 |
(309) 0x4350ec LDR X0, [X28, #16] |
(309) 0x4350f0 LDR X21, [X21, #16] |
(309) 0x4350f4 ADD X12, X0, X12,LSL #3 |
(309) 0x4350f8 LDR X0, [SP, #136] |
(309) 0x4350fc STR X21, [SP, #160] |
(309) 0x435100 LDR X21, [SP, #128] |
(309) 0x435104 ADD X21, X0, X21,LSL #3 |
(309) 0x435108 LDR X0, [SP, #144] |
(309) 0x43510c ADD X15, X0, X15,LSL #3 |
(309) 0x435110 LDR X0, [SP, #152] |
(309) 0x435114 ADD X3, X0, X3,LSL #3 |
(309) 0x435118 LDR X0, [SP, #160] |
(309) 0x43511c ADD X1, X0, X1,LSL #3 |
(309) 0x435120 MOVZ X0, #0 |
(308) 0x435124 LD1D {Z26.D}, P7/Z, [X9, X0,LSL #3] |
(308) 0x435128 LD1D {Z3.D}, P7/Z, [X6, X0,LSL #3] |
(308) 0x43512c LD1D {Z21.D}, P7/Z, [X17, X0,LSL #3] |
(308) 0x435130 LD1D {Z2.D}, P7/Z, [X18, X0,LSL #3] |
(308) 0x435134 LD1D {Z19.D}, P7/Z, [X11, X0,LSL #3] |
(308) 0x435138 LD1D {Z18.D}, P7/Z, [X30, X0,LSL #3] |
(308) 0x43513c LD1D {Z27.D}, P7/Z, [X14, X0,LSL #3] |
(308) 0x435140 LD1D {Z1.D}, P7/Z, [X20, X0,LSL #3] |
(308) 0x435144 MOVPRFX Z16, Z26 |
(308) 0x435148 FADD Z16.D, P7/M, Z16.D, Z3.D |
(308) 0x43514c MOVPRFX Z24, Z21 |
(308) 0x435150 FADD Z24.D, P7/M, Z24.D, Z2.D |
(308) 0x435154 FADD Z26.D, P7/M, Z26.D, Z16.D |
(308) 0x435158 FADD Z21.D, P7/M, Z21.D, Z24.D |
(308) 0x43515c FADD Z3.D, P7/M, Z3.D, Z26.D |
(308) 0x435160 FADD Z2.D, P7/M, Z2.D, Z21.D |
(308) 0x435164 MOVPRFX Z0, Z19 |
(308) 0x435168 FADD Z0.D, P7/M, Z0.D, Z18.D |
(308) 0x43516c FMUL Z27.D, P7/M, Z27.D, Z3.D |
(308) 0x435170 FMUL Z1.D, P7/M, Z1.D, Z2.D |
(308) 0x435174 FMUL Z27.D, P7/M, Z27.D, Z30.D |
(308) 0x435178 FMUL Z1.D, P7/M, Z1.D, Z30.D |
(308) 0x43517c LD1D {Z3.D}, P7/Z, [X13, X0,LSL #3] |
(308) 0x435180 LD1D {Z7.D}, P7/Z, [X5, X0,LSL #3] |
(308) 0x435184 LD1D {Z6.D}, P7/Z, [X16, X0,LSL #3] |
(308) 0x435188 LD1D {Z24.D}, P7/Z, [X4, X0,LSL #3] |
(308) 0x43518c LD1D {Z5.D}, P7/Z, [X7, X0,LSL #3] |
(308) 0x435190 LD1D {Z17.D}, P7/Z, [X10, X0,LSL #3] |
(308) 0x435194 LD1D {Z21.D}, P7/Z, [X21, X0,LSL #3] |
(308) 0x435198 MOVPRFX Z23, Z31 |
(308) 0x43519c FMUL Z23.D, P7/M, Z23.D, Z27.D |
(308) 0x4351a0 MOVPRFX Z4, Z31 |
(308) 0x4351a4 FMUL Z4.D, P7/M, Z4.D, Z1.D |
(308) 0x4351a8 MOVPRFX Z16, Z7 |
(308) 0x4351ac FADD Z16.D, P7/M, Z16.D, Z6.D |
(308) 0x4351b0 LD1D {Z1.D}, P7/Z, [X12, X0,LSL #3] |
(308) 0x4351b4 FADD Z19.D, P7/M, Z19.D, Z0.D |
(308) 0x4351b8 FMUL Z4.D, P7/M, Z4.D, #0.0000000 |
(308) 0x4351bc LD1D {Z0.D}, P7/Z, [X15, X0,LSL #3] |
(308) 0x4351c0 FMLS Z4.D, P7/M, Z23.D, Z29.D |
(308) 0x4351c4 FADD Z7.D, P7/M, Z7.D, Z16.D |
(308) 0x4351c8 FADD Z18.D, P7/M, Z18.D, Z19.D |
(308) 0x4351cc FADD Z6.D, P7/M, Z6.D, Z7.D |
(308) 0x4351d0 FMUL Z17.D, P7/M, Z17.D, Z18.D |
(308) 0x4351d4 FMUL Z5.D, P7/M, Z5.D, Z6.D |
(308) 0x4351d8 FMUL Z17.D, P7/M, Z17.D, Z30.D |
(308) 0x4351dc FMUL Z5.D, P7/M, Z5.D, Z30.D |
(308) 0x4351e0 MOVPRFX Z20, Z31 |
(308) 0x4351e4 FMUL Z20.D, P7/M, Z20.D, Z17.D |
(308) 0x4351e8 MOVPRFX Z25, Z31 |
(308) 0x4351ec FMUL Z25.D, P7/M, Z25.D, Z5.D |
(308) 0x4351f0 MOVPRFX Z22, Z28 |
(308) 0x4351f4 FDIV Z22.D, P7/M, Z22.D, Z3.D |
(308) 0x4351f8 FMUL Z25.D, P7/M, Z25.D, #0.0000000 |
(308) 0x4351fc FDIV Z1.D, P7/M, Z1.D, Z24.D |
(308) 0x435200 FADD Z25.D, P7/M, Z25.D, Z4.D |
(308) 0x435204 FDIV Z21.D, P7/M, Z21.D, Z24.D |
(308) 0x435208 FMLS Z25.D, P7/M, Z20.D, Z29.D |
(308) 0x43520c FADD Z1.D, P7/M, Z1.D, Z21.D |
(308) 0x435210 MOVPRFX Z2, Z25 |
(308) 0x435214 FADD Z2.D, P7/M, Z2.D, Z3.D |
(308) 0x435218 FMUL Z25.D, P7/M, Z25.D, Z1.D |
(308) 0x43521c MOVPRFX Z27, Z3 |
(308) 0x435220 FDIV Z27.D, P7/M, Z27.D, Z2.D |
(308) 0x435224 FMLS Z0.D, P7/M, Z25.D, Z22.D |
(308) 0x435228 ST1D {Z0.D}, P7, [X3, X0,LSL #3] |
(308) 0x43522c LD1D {Z26.D}, P7/Z, [X4, X0,LSL #3] |
(308) 0x435230 FMUL Z27.D, P7/M, Z27.D, Z26.D |
(308) 0x435234 ST1D {Z27.D}, P7, [X1, X0,LSL #3] |
(308) 0x435238 ADD X0, X0, X19 |
(308) 0x43523c WHILELO P7.D, X0, X2 |
(308) 0x435240 B.NE 435124 |
(309) 0x435244 LDR W0, [SP, #168] |
(309) 0x435248 ADD X8, X8, #1 |
(309) 0x43524c CMP W0, W8 |
(309) 0x435250 B.LE 435290 |
(309) 0x435254 LDR W3, [SP, #100] |
(309) 0x435258 LDR W0, [SP, #180] |
(309) 0x43525c LDP W1, W2, [SP, #172] |
(309) 0x435260 SUB W4, W0, W3 |
(309) 0x435264 CMP W4, W2 |
(309) 0x435268 CSEL W2, W4, W2, #9 |
(309) 0x43526c ADD W0, W3, W2 |
(309) 0x435270 STR W0, [SP, #100] |
(309) 0x435274 CMP W3, W0 |
(309) 0x435278 B.CC 434fcc |
(310) 0x43527c LDR W0, [SP, #168] |
(310) 0x435280 ADD X8, X8, #1 |
(310) 0x435284 STR W3, [SP, #100] |
(310) 0x435288 CMP W0, W8 |
(310) 0x43528c B.GT 435254 |
0x435290 LDP X19, X20, [SP, #16] |
0x435294 LDP X23, X24, [SP, #48] |
0x435298 LDP X25, X26, [SP, #64] |
0x43529c LDP X27, X28, [SP, #80] |
0x4352a0 LDP X21, X22, [SP, #32] |
0x4352a4 LDP X29, X30, [SP], #192 |
0x4352a8 RET |
0x4352ac LDP X19, X20, [SP, #16] |
0x4352b0 LDP X23, X24, [SP, #48] |
0x4352b4 LDP X21, X22, [SP, #32] |
0x4352b8 LDP X29, X30, [SP], #192 |
0x4352bc RET |
0x4352c0 ADD W4, W4, #1 |
0x4352c4 MOVZ W0, #0 |
0x4352c8 B 434f38 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.43+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.57+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | PdV(global_variables&, bool) | PdV.cpp:48 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | PdV.cpp:48-63 |
| Module | exec |
| nb instructions | 82 |
| nb uops | 82 |
| loop length | 328 |
| used w registers | 10 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 4 |
| nb stack references | 25 |
| micro-operation queue | 10.25 cycles |
| front end | 10.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 1.50 | 1.50 | 0.00 | 0.00 | 12.00 | 12.00 | 12.00 | 6.50 | 6.50 |
| cycles | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 1.50 | 1.50 | 0.00 | 0.00 | 12.00 | 12.00 | 12.00 | 6.50 | 6.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 10.25 |
| Dispatch | 12.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 12.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 31% |
| load | 39% |
| store | 35% |
| mul | 12% |
| add-sub | 14% |
| fma | 12% |
| other | 28% |
| all | 3% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 3% |
| all | 29% |
| load | 39% |
| store | 35% |
| mul | 12% |
| add-sub | 14% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #832]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP W22, W1, [X0, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X21, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W1, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W0, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP W1, W0, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4352a0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W20, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W1, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W0, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4352ac <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3ec> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W0, W20, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MUL W23, W23, W0 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W0, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410670 <@plt_start@+0x650> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410550 <@plt_start@+0x530> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W4, W23, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W3, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W0, W4, W19, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 4352c0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x400> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W4, W3, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W4, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W0, [SP, #180] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 4352ac <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3ec> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| PTRUE P7.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| CNTD X19, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| FDUP Z30.D, #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z29.D, #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z28.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LD1RD {Z31.D}, P7/Z, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | scal (25.0%) |
| UDIV W0, W3, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| LDR X2, [X21, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X27, X26, [X21, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X24, [X21, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W1, W0, W1, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W0, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X22, X23, [X21, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SBFM X8, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDP X0, X28, [X21, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| STP X0, X2, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W0, [SP, #172] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X2, [X21, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W1, W1, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X0, [X21, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X2, [SP, #184] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| SUB W2, W20, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W2, W4, W2, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| ADD W0, W3, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W0, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43527c <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3bc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #192 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #192 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W4, W4, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 434f38 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_0
| Source file and lines | PdV.cpp:48-63 |
| Module | exec |
| nb instructions | 82 |
| nb uops | 82 |
| loop length | 328 |
| used w registers | 10 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 4 |
| nb stack references | 25 |
| micro-operation queue | 10.25 cycles |
| front end | 10.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 1.50 | 1.50 | 0.00 | 0.00 | 12.00 | 12.00 | 12.00 | 6.50 | 6.50 |
| cycles | 5.00 | 5.00 | 9.00 | 9.00 | 9.00 | 9.00 | 1.50 | 1.50 | 0.00 | 0.00 | 12.00 | 12.00 | 12.00 | 6.50 | 6.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 10.25 |
| Dispatch | 12.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 12.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 31% |
| load | 39% |
| store | 35% |
| mul | 12% |
| add-sub | 14% |
| fma | 12% |
| other | 28% |
| all | 3% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 3% |
| all | 29% |
| load | 39% |
| store | 35% |
| mul | 12% |
| add-sub | 14% |
| fma | 12% |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #832]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP W22, W1, [X0, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X21, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W1, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W0, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP W1, W0, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4352a0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W20, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W1, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W0, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 4352ac <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3ec> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W0, W20, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MUL W23, W23, W0 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W0, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410670 <@plt_start@+0x650> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410550 <@plt_start@+0x530> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W4, W23, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W3, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W0, W4, W19, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 4352c0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x400> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W3, W4, W3, W0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W4, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W0, [SP, #180] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 4352ac <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3ec> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W1, [SP, #176] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| PTRUE P7.B, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (100.0%) |
| CNTD X19, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| FDUP Z30.D, #80 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z29.D, #96 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| FDUP Z28.D, #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (3.1%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LD1RD {Z31.D}, P7/Z, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 6 | 0.50 | scal (25.0%) |
| UDIV W0, W3, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| LDR X2, [X21, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X27, X26, [X21, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X24, [X21, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W1, W0, W1, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W0, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDP X22, X23, [X21, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SBFM X8, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDP X0, X28, [X21, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| STP X0, X2, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W0, [SP, #172] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X2, [X21, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W1, W1, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X0, [X21, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X2, [SP, #184] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| SUB W2, W20, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W2, W4, W2, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X0, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| ADD W0, W3, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W0, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W3, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43527c <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x3bc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #192 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X29, X30, [SP], #192 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W4, W4, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W0, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| B 434f38 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D | 6.41 | 8.74 |
| ▼Loop 310 - PdV.cpp:50-63 - exec– | 0.00 | 0.00 |
| ▼Loop 309 - PdV.cpp:50-63 - exec– | 0.01 | 0.02 |
| ○Loop 308 - PdV.cpp:51-63 - exec | 6.39 | 8.49 |
