| Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:69-83 [...] | Coverage (incl. loops): 8.39% | (excl. loops): 0.00% |
|---|
| Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:69-83 [...] | Coverage (incl. loops): 8.39% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/PdV.cpp: 69 - 83 |
-------------------------------------------------------------------------------- |
69: #pragma omp parallel for simd collapse(2) |
70: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
71: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
72: double left_flux = (xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel1(i, j) + xvel1(i + 0, j + 1))) * 0.25 * dt; |
73: double right_flux = |
74: (xarea(i + 1, j + 0) * (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1) + xvel1(i + 1, j + 0) + xvel1(i + 1, j + 1))) * 0.25 * dt; |
75: double bottom_flux = (yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel1(i, j) + yvel1(i + 1, j + 0))) * 0.25 * dt; |
76: double top_flux = |
77: (yarea(i + 0, j + 1) * (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1) + yvel1(i + 0, j + 1) + yvel1(i + 1, j + 1))) * 0.25 * dt; |
78: double total_flux = right_flux - left_flux + top_flux - bottom_flux; |
79: double volume_change_s = volume(i, j) / (volume(i, j) + total_flux); |
80: double recip_volume = 1.0 / volume(i, j); |
81: double energy_change = (pressure(i, j) / density0(i, j) + viscosity(i, j) / density0(i, j)) * total_flux * recip_volume; |
82: energy1(i, j) = energy0(i, j) - energy_change; |
83: density1(i, j) = density0(i, j) * volume_change_s; |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x43db24 STP X29, X30, [SP, #528]! |
0x43db28 ADD X29, SP, #0 |
0x43db2c STP X19, X20, [SP, #16] |
0x43db30 ORR X20, XZR, X0 |
0x43db34 STP X21, X22, [SP, #32] |
0x43db38 LDP W22, W2, [X0, #120] |
0x43db3c LDR W0, [X0, #112] |
0x43db40 LDR W1, [X20, #116] |
0x43db44 ADD W3, W2, #2 |
0x43db48 ADD W22, W22, #1 |
0x43db4c ADD W4, W0, #1 |
0x43db50 STR W3, [SP, #376] |
0x43db54 STR W4, [SP, #380] |
0x43db58 CMP W22, W3 |
0x43db5c B.GE 43e1d4 |
0x43db60 ADD W19, W1, #2 |
0x43db64 STP X23, X24, [SP, #48] |
0x43db68 SUB W23, W3, W22 |
0x43db6c CMP W4, W19 |
0x43db70 B.GE 43e1d0 |
0x43db74 SUB W5, W19, W4 |
0x43db78 MUL W24, W23, W5 |
0x43db7c STR W5, [SP, #384] |
0x43db80 BL 410210 |
0x43db84 ORR W21, WZR, W0 |
0x43db88 BL 410240 |
0x43db8c UDIV W7, W24, W21 |
0x43db90 ORR W6, WZR, W0 |
0x43db94 MSUB W8, W7, W21, W24 |
0x43db98 CMP W0, W8 |
0x43db9c B.CC 43e200 |
0x43dba0 MADD W6, W7, W6, W8 |
0x43dba4 ADD W9, W7, W6 |
0x43dba8 STR W9, [SP, #388] |
0x43dbac CMP W6, W9 |
0x43dbb0 B.CS 43e1d0 |
0x43dbb4 LDR W10, [SP, #384] |
0x43dbb8 FMOV D30, #0.2500000 |
0x43dbbc STP X25, X26, [SP, #64] |
0x43dbc0 STP X27, X28, [SP, #80] |
0x43dbc4 UDIV W11, W6, W10 |
0x43dbc8 LDR D31, [X20] |
0x43dbcc LDR X26, [X20, #24] |
0x43dbd0 LDR X27, [X20, #64] |
0x43dbd4 LDR W16, [SP, #380] |
0x43dbd8 FMUL D30, D31, D30 |
0x43dbdc LDR X14, [X20, #8] |
0x43dbe0 MSUB W12, W11, W10, W6 |
0x43dbe4 ADD W13, W11, W22 |
0x43dbe8 SBFM X28, X13, #0, #31 |
0x43dbec LDR X18, [X20, #16] |
0x43dbf0 ADD W17, W12, W16 |
0x43dbf4 LDR X15, [X20, #48] |
0x43dbf8 DUP V29.2D, V30.D[0] |
0x43dbfc STR W17, [SP, #148] |
0x43dc00 LDR X25, [X20, #56] |
0x43dc04 STP X26, X27, [SP, #440] |
0x43dc08 LDR X30, [X20, #32] |
0x43dc0c STR X18, [SP, #416] |
0x43dc10 SUB W18, W19, W17 |
0x43dc14 LDR X0, [X20, #72] |
0x43dc18 STR X14, [SP, #392] |
0x43dc1c STR X15, [SP, #472] |
0x43dc20 STR X25, [SP, #480] |
0x43dc24 STP X30, X0, [SP, #456] |
0x43dc28 LDR X22, [X20, #40] |
0x43dc2c LDR X3, [X20, #80] |
0x43dc30 LDR X4, [X20, #88] |
0x43dc34 STR X22, [SP, #488] |
0x43dc38 LDR X19, [X20, #96] |
0x43dc3c STR X3, [SP, #400] |
0x43dc40 LDR X20, [X20, #104] |
0x43dc44 STR X4, [SP, #408] |
0x43dc48 STR X19, [SP, #424] |
0x43dc4c STR X20, [SP, #432] |
(295) 0x43dc50 CMP W7, W18 |
(295) 0x43dc54 CSEL W1, W7, W18, #9 |
(295) 0x43dc58 ADD W23, W6, W1 |
(295) 0x43dc5c STR W1, [SP, #280] |
(295) 0x43dc60 STR W23, [SP, #284] |
(295) 0x43dc64 CMP W6, W23 |
(295) 0x43dc68 B.CS 43e1a8 |
(295) 0x43dc6c LDP X9, X10, [SP, #408] |
(295) 0x43dc70 LDP X21, X8, [SP, #392] |
(295) 0x43dc74 LDP X11, X12, [SP, #424] |
(295) 0x43dc78 LDP X18, X19, [SP, #440] |
(295) 0x43dc7c LDR X2, [X8] |
(295) 0x43dc80 LDR X7, [X9] |
(295) 0x43dc84 LDR X26, [X12] |
(295) 0x43dc88 MUL X5, X2, X28 |
(295) 0x43dc8c LDR X14, [X10] |
(295) 0x43dc90 MUL X6, X7, X28 |
(295) 0x43dc94 LDR X15, [X11] |
(295) 0x43dc98 ADD X24, X2, X5 |
(295) 0x43dc9c MUL X17, X26, X28 |
(295) 0x43dca0 LDR X3, [X18] |
(295) 0x43dca4 ADD X25, X7, X6 |
(295) 0x43dca8 MUL X16, X14, X28 |
(295) 0x43dcac LDR X22, [X21] |
(295) 0x43dcb0 ADD X0, X26, X17 |
(295) 0x43dcb4 MUL X7, X15, X28 |
(295) 0x43dcb8 LDR X2, [SP, #456] |
(295) 0x43dcbc ADD X27, X14, X16 |
(295) 0x43dcc0 MUL X4, X28, X3 |
(295) 0x43dcc4 LDR X13, [SP, #464] |
(295) 0x43dcc8 ADD X30, X15, X7 |
(295) 0x43dccc MUL X26, X28, X22 |
(295) 0x43dcd0 LDR X20, [X19] |
(295) 0x43dcd4 LDR X1, [X2] |
(295) 0x43dcd8 STP X4, X0, [SP, #256] |
(295) 0x43dcdc MUL X23, X28, X20 |
(295) 0x43dce0 STP X30, X27, [SP, #296] |
(295) 0x43dce4 MUL X14, X28, X1 |
(295) 0x43dce8 STP X17, X7, [SP, #312] |
(295) 0x43dcec STP X16, X25, [SP, #328] |
(295) 0x43dcf0 STP X6, X24, [SP, #344] |
(295) 0x43dcf4 STP X5, X26, [SP, #360] |
(295) 0x43dcf8 LDR X15, [X13] |
(295) 0x43dcfc STR X14, [SP, #224] |
(295) 0x43dd00 LDR X3, [X2, #16] |
(295) 0x43dd04 STR X23, [SP, #240] |
(295) 0x43dd08 LDR X14, [X11, #16] |
(295) 0x43dd0c MUL X0, X28, X15 |
(295) 0x43dd10 LDR X15, [X12, #16] |
(295) 0x43dd14 STR X0, [SP, #208] |
(295) 0x43dd18 LDR X4, [X21, #16] |
(295) 0x43dd1c LDP X21, X22, [SP, #472] |
(295) 0x43dd20 STR X3, [SP, #216] |
(295) 0x43dd24 LDR X11, [X13, #16] |
(295) 0x43dd28 STR X15, [SP, #104] |
(295) 0x43dd2c LDR X13, [X21] |
(295) 0x43dd30 STR X14, [SP, #112] |
(295) 0x43dd34 LDR X20, [SP, #488] |
(295) 0x43dd38 STR X4, [SP, #136] |
(295) 0x43dd3c LDR X2, [X22] |
(295) 0x43dd40 MUL X23, X28, X13 |
(295) 0x43dd44 LDR X12, [X21, #16] |
(295) 0x43dd48 LDR X0, [X20] |
(295) 0x43dd4c STP X11, X23, [SP, #152] |
(295) 0x43dd50 MUL X1, X28, X2 |
(295) 0x43dd54 LDR X23, [X8, #16] |
(295) 0x43dd58 MUL X8, X28, X0 |
(295) 0x43dd5c STR X12, [SP, #168] |
(295) 0x43dd60 ORR X0, XZR, X15 |
(295) 0x43dd64 LDR X13, [X9, #16] |
(295) 0x43dd68 STR X1, [SP, #176] |
(295) 0x43dd6c LDR X3, [X10, #16] |
(295) 0x43dd70 STR X8, [SP, #192] |
(295) 0x43dd74 LDR X9, [X18, #16] |
(295) 0x43dd78 LDR X10, [X19, #16] |
(295) 0x43dd7c STP X13, X23, [SP, #120] |
(295) 0x43dd80 LDR X19, [X22, #16] |
(295) 0x43dd84 STR X10, [SP, #232] |
(295) 0x43dd88 LDR X11, [X20, #16] |
(295) 0x43dd8c STR X9, [SP, #248] |
(295) 0x43dd90 LDR W18, [SP, #280] |
(295) 0x43dd94 STR X19, [SP, #184] |
(295) 0x43dd98 STR X3, [SP, #272] |
(295) 0x43dd9c STR X11, [SP, #200] |
(295) 0x43dda0 CMP W18, #1 |
(295) 0x43dda4 B.EQ 43dfe4 |
(295) 0x43dda8 LDRSW X1, [SP, #148] |
(295) 0x43ddac UBFM W21, W18, #1, #31 |
(295) 0x43ddb0 UBFM X3, X21, #60, #59 |
(295) 0x43ddb4 LDR X18, [SP, #192] |
(295) 0x43ddb8 ADD X9, X6, X1 |
(295) 0x43ddbc ADD X8, X25, X1 |
(295) 0x43ddc0 ADD X12, X26, X1 |
(295) 0x43ddc4 ADD X6, X17, X1 |
(295) 0x43ddc8 ADD X17, X13, X9,LSL #3 |
(295) 0x43ddcc ADD X26, X4, X12,LSL #3 |
(295) 0x43ddd0 ADD X11, X5, X1 |
(295) 0x43ddd4 ADD X10, X24, X1 |
(295) 0x43ddd8 ADD X4, X13, X8,LSL #3 |
(295) 0x43dddc LDR X13, [SP, #136] |
(295) 0x43dde0 ADD X7, X7, X1 |
(295) 0x43dde4 ADD X25, X23, X11,LSL #3 |
(295) 0x43dde8 ADD X5, X30, X1 |
(295) 0x43ddec ADD X30, X27, X1 |
(295) 0x43ddf0 ADD X24, X23, X10,LSL #3 |
(295) 0x43ddf4 ADD X23, X17, #8 |
(295) 0x43ddf8 LDR X17, [SP, #264] |
(295) 0x43ddfc ADD X27, X14, X7,LSL #3 |
(295) 0x43de00 ADD X22, X4, #8 |
(295) 0x43de04 ADD X26, X26, #8 |
(295) 0x43de08 ADD X2, X14, X5,LSL #3 |
(295) 0x43de0c LDR X14, [SP, #240] |
(295) 0x43de10 ADD X25, X25, #8 |
(295) 0x43de14 ADD X12, X13, X12,LSL #3 |
(295) 0x43de18 ADD X21, X27, #8 |
(295) 0x43de1c ADD X24, X24, #8 |
(295) 0x43de20 LDR X13, [SP, #224] |
(295) 0x43de24 ADD X15, X15, X6,LSL #3 |
(295) 0x43de28 ADD X19, X2, #8 |
(295) 0x43de2c ADD X4, X17, X1 |
(295) 0x43de30 ADD X17, X16, X1 |
(295) 0x43de34 LDR X27, [SP, #208] |
(295) 0x43de38 ADD X20, X15, #8 |
(295) 0x43de3c LDR X16, [SP, #256] |
(295) 0x43de40 ADD X13, X13, X1 |
(295) 0x43de44 LDR X2, [SP, #128] |
(295) 0x43de48 ADD X15, X27, X1 |
(295) 0x43de4c STR X13, [SP, #288] |
(295) 0x43de50 LDR X13, [SP, #176] |
(295) 0x43de54 ADD X27, X16, X1 |
(295) 0x43de58 ADD X16, X14, X1 |
(295) 0x43de5c LDR X14, [SP, #160] |
(295) 0x43de60 ADD X11, X2, X11,LSL #3 |
(295) 0x43de64 ADD X10, X2, X10,LSL #3 |
(295) 0x43de68 LDR X2, [SP, #104] |
(295) 0x43de6c ADD X13, X13, X1 |
(295) 0x43de70 ADD X14, X14, X1 |
(295) 0x43de74 ADD X1, X18, X1 |
(295) 0x43de78 ADD X18, X0, X4,LSL #3 |
(295) 0x43de7c LDR X0, [SP, #120] |
(295) 0x43de80 ADD X6, X2, X6,LSL #3 |
(295) 0x43de84 ADD X18, X18, #8 |
(295) 0x43de88 ADD X4, X2, X4,LSL #3 |
(295) 0x43de8c LDR X2, [SP, #288] |
(295) 0x43de90 ADD X9, X0, X9,LSL #3 |
(295) 0x43de94 ADD X8, X0, X8,LSL #3 |
(295) 0x43de98 LDR X0, [SP, #112] |
(295) 0x43de9c ADD X7, X0, X7,LSL #3 |
(295) 0x43dea0 ADD X5, X0, X5,LSL #3 |
(295) 0x43dea4 LDR X0, [SP, #272] |
(295) 0x43dea8 ADD X17, X0, X17,LSL #3 |
(295) 0x43deac ADD X30, X0, X30,LSL #3 |
(295) 0x43deb0 LDR X0, [SP, #248] |
(295) 0x43deb4 ADD X27, X0, X27,LSL #3 |
(295) 0x43deb8 LDR X0, [SP, #232] |
(295) 0x43debc ADD X16, X0, X16,LSL #3 |
(295) 0x43dec0 LDR X0, [SP, #216] |
(295) 0x43dec4 ADD X2, X0, X2,LSL #3 |
(295) 0x43dec8 LDR X0, [SP, #152] |
(295) 0x43decc ADD X15, X0, X15,LSL #3 |
(295) 0x43ded0 LDR X0, [SP, #168] |
(295) 0x43ded4 ADD X14, X0, X14,LSL #3 |
(295) 0x43ded8 LDR X0, [SP, #184] |
(295) 0x43dedc ADD X13, X0, X13,LSL #3 |
(295) 0x43dee0 LDR X0, [SP, #200] |
(295) 0x43dee4 ADD X1, X0, X1,LSL #3 |
(295) 0x43dee8 MOVZ X0, #0 |
(296) 0x43deec LDR Q23, [X21, X0] |
(296) 0x43def0 LDR Q5, [X7, X0] |
(296) 0x43def4 LDR Q21, [X20, X0] |
(296) 0x43def8 LDR Q20, [X6, X0] |
(296) 0x43defc LDR Q2, [X19, X0] |
(296) 0x43df00 FADD V3.2D, V23.2D, V5.2D |
(296) 0x43df04 LDR Q1, [X5, X0] |
(296) 0x43df08 LDR Q0, [X18, X0] |
(296) 0x43df0c FADD V4.2D, V21.2D, V20.2D |
(296) 0x43df10 LDR Q19, [X4, X0] |
(296) 0x43df14 LDR Q16, [X10, X0] |
(296) 0x43df18 FADD V17.2D, V2.2D, V1.2D |
(296) 0x43df1c LDR Q7, [X11, X0] |
(296) 0x43df20 FADD V31.2D, V3.2D, V4.2D |
(296) 0x43df24 LDR Q6, [X8, X0] |
(296) 0x43df28 FADD V18.2D, V0.2D, V19.2D |
(296) 0x43df2c LDR Q24, [X9, X0] |
(296) 0x43df30 LDR Q27, [X24, X0] |
(296) 0x43df34 FADD V22.2D, V16.2D, V7.2D |
(296) 0x43df38 LDR Q26, [X25, X0] |
(296) 0x43df3c FADD V23.2D, V17.2D, V18.2D |
(296) 0x43df40 LDR Q28, [X22, X0] |
(296) 0x43df44 FADD V5.2D, V6.2D, V24.2D |
(296) 0x43df48 LDR Q21, [X23, X0] |
(296) 0x43df4c LDR Q2, [X17, X0] |
(296) 0x43df50 FADD V20.2D, V27.2D, V26.2D |
(296) 0x43df54 LDR Q25, [X30, X0] |
(296) 0x43df58 FADD V1.2D, V22.2D, V5.2D |
(296) 0x43df5c FADD V3.2D, V28.2D, V21.2D |
(296) 0x43df60 LDR Q0, [X12, X0] |
(296) 0x43df64 LDR Q4, [X26, X0] |
(296) 0x43df68 FMUL V19.2D, V31.2D, V2.2D |
(296) 0x43df6c FMUL V16.2D, V23.2D, V25.2D |
(296) 0x43df70 LDR Q7, [X15, X0] |
(296) 0x43df74 FADD V31.2D, V20.2D, V3.2D |
(296) 0x43df78 LDR Q6, [X16, X0] |
(296) 0x43df7c FMLA V19.2D, V1.2D, V0.2D |
(296) 0x43df80 LDR Q24, [X2, X0] |
(296) 0x43df84 LDR Q17, [X27, X0] |
(296) 0x43df88 FMLA V16.2D, V31.2D, V4.2D |
(296) 0x43df8c LDR Q27, [X14, X0] |
(296) 0x43df90 FADD V22.2D, V7.2D, V6.2D |
(296) 0x43df94 FSUB V18.2D, V16.2D, V19.2D |
(296) 0x43df98 FDIV V26.2D, V22.2D, V24.2D |
(296) 0x43df9c FDIV V23.2D, V26.2D, V17.2D |
(296) 0x43dfa0 FMUL V28.2D, V18.2D, V29.2D |
(296) 0x43dfa4 FMLS V27.2D, V23.2D, V28.2D |
(296) 0x43dfa8 FADD V5.2D, V17.2D, V28.2D |
(296) 0x43dfac STR Q27, [X13, X0] |
(296) 0x43dfb0 FDIV V21.2D, V17.2D, V5.2D |
(296) 0x43dfb4 LDR Q20, [X2, X0] |
(296) 0x43dfb8 FMUL V2.2D, V20.2D, V21.2D |
(296) 0x43dfbc STR Q2, [X1, X0] |
(296) 0x43dfc0 ADD X0, X0, #16 |
(296) 0x43dfc4 CMP X0, X3 |
(296) 0x43dfc8 B.NE 43deec |
(295) 0x43dfcc LDR W3, [SP, #280] |
(295) 0x43dfd0 TBZ W3, #0, 43e1a4 |
(295) 0x43dfd4 LDR W24, [SP, #148] |
(295) 0x43dfd8 AND W25, W3, #0xfffffffe |
(295) 0x43dfdc ADD W23, W24, W25 |
(295) 0x43dfe0 STR W23, [SP, #148] |
(295) 0x43dfe4 LDR W22, [SP, #148] |
(295) 0x43dfe8 LDP X26, X25, [SP, #104] |
(295) 0x43dfec LDP X19, X20, [SP, #312] |
(295) 0x43dff0 ADD W21, W22, #1 |
(295) 0x43dff4 SBFM X30, X22, #0, #31 |
(295) 0x43dff8 SBFM X27, X21, #0, #31 |
(295) 0x43dffc LDP X2, X5, [SP, #352] |
(295) 0x43e000 ADD X4, X19, X27 |
(295) 0x43e004 LDP X24, X23, [SP, #120] |
(295) 0x43e008 ADD X15, X5, X27 |
(295) 0x43e00c ADD X21, X2, X30 |
(295) 0x43e010 LDR X12, [SP, #264] |
(295) 0x43e014 LDR X11, [SP, #296] |
(295) 0x43e018 LDR X16, [SP, #336] |
(295) 0x43e01c ADD X9, X12, X27 |
(295) 0x43e020 ADD X18, X12, X30 |
(295) 0x43e024 LDR D31, [X26, X4,LSL #3] |
(295) 0x43e028 ADD X4, X2, X27 |
(295) 0x43e02c ADD X12, X5, X30 |
(295) 0x43e030 ADD X8, X11, X27 |
(295) 0x43e034 ADD X6, X11, X30 |
(295) 0x43e038 LDR D1, [X26, X9,LSL #3] |
(295) 0x43e03c ADD X14, X16, X27 |
(295) 0x43e040 ADD X11, X16, X30 |
(295) 0x43e044 LDR X9, [SP, #344] |
(295) 0x43e048 LDR X10, [SP, #328] |
(295) 0x43e04c LDR D7, [X23, X15,LSL #3] |
(295) 0x43e050 ADD X16, X9, X27 |
(295) 0x43e054 LDR D16, [X23, X4,LSL #3] |
(295) 0x43e058 ADD X7, X10, X30 |
(295) 0x43e05c ADD X10, X19, X30 |
(295) 0x43e060 LDR D6, [X24, X14,LSL #3] |
(295) 0x43e064 ADD X19, X20, X27 |
(295) 0x43e068 ADD X20, X20, X30 |
(295) 0x43e06c LDR D27, [X24, X16,LSL #3] |
(295) 0x43e070 LDR X1, [SP, #256] |
(295) 0x43e074 FADD D17, D16, D7 |
(295) 0x43e078 LDR X17, [SP, #208] |
(295) 0x43e07c LDR D25, [X26, X18,LSL #3] |
(295) 0x43e080 FADD D23, D6, D27 |
(295) 0x43e084 LDR D3, [X25, X8,LSL #3] |
(295) 0x43e088 ADD X22, X17, X30 |
(295) 0x43e08c ADD X17, X9, X30 |
(295) 0x43e090 LDR D4, [X25, X6,LSL #3] |
(295) 0x43e094 ADD X6, X1, X30 |
(295) 0x43e098 LDR X13, [SP, #240] |
(295) 0x43e09c FADD D5, D17, D23 |
(295) 0x43e0a0 LDR X5, [SP, #160] |
(295) 0x43e0a4 FADD D0, D1, D3 |
(295) 0x43e0a8 LDR X14, [SP, #176] |
(295) 0x43e0ac FADD D19, D25, D4 |
(295) 0x43e0b0 ADD X18, X13, X30 |
(295) 0x43e0b4 LDR X1, [SP, #192] |
(295) 0x43e0b8 ADD X5, X5, X30 |
(295) 0x43e0bc LDR X2, [SP, #224] |
(295) 0x43e0c0 ADD X4, X14, X30 |
(295) 0x43e0c4 LDR X3, [SP, #304] |
(295) 0x43e0c8 FADD D24, D19, D0 |
(295) 0x43e0cc LDR X0, [SP, #368] |
(295) 0x43e0d0 ADD X15, X2, X30 |
(295) 0x43e0d4 LDR D22, [X25, X19,LSL #3] |
(295) 0x43e0d8 UBFM X13, X15, #61, #60 |
(295) 0x43e0dc ADD X8, X3, X30 |
(295) 0x43e0e0 ADD X3, X1, X30 |
(295) 0x43e0e4 LDR D26, [X26, X10,LSL #3] |
(295) 0x43e0e8 ADD X27, X0, X27 |
(295) 0x43e0ec ADD X30, X0, X30 |
(295) 0x43e0f0 LDR D21, [X23, X21,LSL #3] |
(295) 0x43e0f4 LDR D18, [X23, X12,LSL #3] |
(295) 0x43e0f8 FADD D20, D31, D22 |
(295) 0x43e0fc LDR D28, [X25, X20,LSL #3] |
(295) 0x43e100 LDR X26, [SP, #152] |
(295) 0x43e104 LDR X25, [SP, #232] |
(295) 0x43e108 FADD D3, D21, D18 |
(295) 0x43e10c LDR X23, [SP, #272] |
(295) 0x43e110 FADD D1, D26, D28 |
(295) 0x43e114 LDR D2, [X24, X11,LSL #3] |
(295) 0x43e118 LDR D25, [X24, X17,LSL #3] |
(295) 0x43e11c LDR D31, [X26, X22,LSL #3] |
(295) 0x43e120 FADD D4, D1, D20 |
(295) 0x43e124 LDR D17, [X23, X7,LSL #3] |
(295) 0x43e128 LDR D7, [X25, X18,LSL #3] |
(295) 0x43e12c FADD D0, D2, D25 |
(295) 0x43e130 LDR X7, [SP, #136] |
(295) 0x43e134 LDR X22, [SP, #216] |
(295) 0x43e138 LDR D16, [X23, X8,LSL #3] |
(295) 0x43e13c FADD D6, D31, D7 |
(295) 0x43e140 FADD D19, D3, D0 |
(295) 0x43e144 LDR D22, [X7, X27,LSL #3] |
(295) 0x43e148 LDR D18, [X22, X13] |
(295) 0x43e14c LDR D26, [X7, X30,LSL #3] |
(295) 0x43e150 FMUL D24, D24, D16 |
(295) 0x43e154 FMUL D23, D5, D22 |
(295) 0x43e158 LDR X21, [SP, #248] |
(295) 0x43e15c FDIV D5, D6, D18 |
(295) 0x43e160 LDR X12, [SP, #168] |
(295) 0x43e164 FMSUB D27, D4, D17, D24 |
(295) 0x43e168 LDR X11, [SP, #184] |
(295) 0x43e16c FMSUB D28, D19, D26, D23 |
(295) 0x43e170 LDR D2, [X21, X6,LSL #3] |
(295) 0x43e174 LDR D4, [X12, X5,LSL #3] |
(295) 0x43e178 LDR X10, [SP, #200] |
(295) 0x43e17c FADD D21, D28, D27 |
(295) 0x43e180 FDIV D1, D5, D2 |
(295) 0x43e184 FMUL D20, D21, D30 |
(295) 0x43e188 FADD D25, D2, D20 |
(295) 0x43e18c FDIV D3, D2, D25 |
(295) 0x43e190 FMSUB D0, D1, D20, D4 |
(295) 0x43e194 STR D0, [X11, X4,LSL #3] |
(295) 0x43e198 LDR D19, [X22, X13] |
(295) 0x43e19c FMUL D31, D19, D3 |
(295) 0x43e1a0 STR D31, [X10, X3,LSL #3] |
(295) 0x43e1a4 LDR W6, [SP, #284] |
(295) 0x43e1a8 ADD X28, X28, #1 |
(295) 0x43e1ac LDR W19, [SP, #376] |
(295) 0x43e1b0 CMP W19, W28 |
(295) 0x43e1b4 B.LE 43e1e4 |
(295) 0x43e1b8 LDR W20, [SP, #388] |
(295) 0x43e1bc LDR W0, [SP, #380] |
(295) 0x43e1c0 LDR W18, [SP, #384] |
(295) 0x43e1c4 SUB W7, W20, W6 |
(295) 0x43e1c8 STR W0, [SP, #148] |
(295) 0x43e1cc B 43dc50 |
0x43e1d0 LDP X23, X24, [SP, #48] |
0x43e1d4 LDP X19, X20, [SP, #16] |
0x43e1d8 LDP X21, X22, [SP, #32] |
0x43e1dc LDP X29, X30, [SP], #496 |
0x43e1e0 RET |
0x43e1e4 LDP X23, X24, [SP, #48] |
0x43e1e8 LDP X25, X26, [SP, #64] |
0x43e1ec LDP X27, X28, [SP, #80] |
0x43e1f0 LDP X19, X20, [SP, #16] |
0x43e1f4 LDP X21, X22, [SP, #32] |
0x43e1f8 LDP X29, X30, [SP], #496 |
0x43e1fc RET |
0x43e200 ADD W7, W7, #1 |
0x43e204 MOVZ W8, #0 |
0x43e208 B 43dba0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.58+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | PdV(global_variables&, bool) | PdV.cpp:99 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1076 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | PdV.cpp:69-83 |
| Module | exec |
| nb instructions | 90 |
| nb uops | 90 |
| loop length | 360 |
| used w registers | 23 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 34 |
| micro-operation queue | 11.25 cycles |
| front end | 11.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 17.00 | 17.00 | 17.00 | 11.00 | 11.00 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 17.00 | 17.00 | 17.00 | 11.00 | 11.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 11.25 |
| Dispatch | 17.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 17.00-25.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 10% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 26% |
| load | 31% |
| store | 31% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 26% |
| load | 31% |
| store | 31% |
| mul | 18% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #528]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W2, [X0, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W1, [X20, #116] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W3, [SP, #376] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W4, [SP, #380] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43e1d4 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6b0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43e1d0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #384] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 43e200 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W6, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #388] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W6, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43e1d0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #384] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D30, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W11, W6, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR D31, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X26, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X27, [X20, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W16, [SP, #380] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMUL D30, D31, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR X14, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W12, W11, W10, W6 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X28, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X18, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W17, W12, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X15, [X20, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| DUP V29.2D, V30.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| STR W17, [SP, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDR X25, [X20, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X26, X27, [SP, #440] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X30, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X18, [SP, #416] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| SUB W18, W19, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X0, [X20, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X14, [SP, #392] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X15, [SP, #472] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X25, [SP, #480] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STP X30, X0, [SP, #456] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X22, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X3, [X20, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X4, [X20, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X22, [SP, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X19, [X20, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X3, [SP, #400] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X20, [X20, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X4, [SP, #408] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X19, [SP, #424] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X20, [SP, #432] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #496 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #496 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 43dba0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x7c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | PdV.cpp:69-83 |
| Module | exec |
| nb instructions | 90 |
| nb uops | 90 |
| loop length | 360 |
| used w registers | 23 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 34 |
| micro-operation queue | 11.25 cycles |
| front end | 11.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 17.00 | 17.00 | 17.00 | 11.00 | 11.00 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 17.00 | 17.00 | 17.00 | 11.00 | 11.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 11.25 |
| Dispatch | 17.00 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 17.00-25.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 10% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 26% |
| load | 31% |
| store | 31% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 26% |
| load | 31% |
| store | 31% |
| mul | 18% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #528]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W2, [X0, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W1, [X20, #116] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W2, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W3, [SP, #376] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W4, [SP, #380] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43e1d4 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6b0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 43e1d0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #384] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 43e200 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6dc> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W6, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #388] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W6, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 43e1d0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x6ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #384] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D30, #0.2500000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W11, W6, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR D31, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X26, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X27, [X20, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W16, [SP, #380] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMUL D30, D31, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR X14, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W12, W11, W10, W6 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X28, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDR X18, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W17, W12, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X15, [X20, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| DUP V29.2D, V30.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| STR W17, [SP, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDR X25, [X20, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X26, X27, [SP, #440] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X30, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X18, [SP, #416] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| SUB W18, W19, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X0, [X20, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X14, [SP, #392] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X15, [SP, #472] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X25, [SP, #480] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STP X30, X0, [SP, #456] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X22, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X3, [X20, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X4, [X20, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X22, [SP, #488] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X19, [X20, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X3, [SP, #400] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X20, [X20, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X4, [SP, #408] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X19, [SP, #424] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X20, [SP, #432] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #496 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #496 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 43dba0 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.1+0x7c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D | 8.39 | 11.28 |
| ▼Loop 295 - PdV.cpp:71-83 - exec– | 0.02 | 0.03 |
| ○Loop 296 - PdV.cpp:72-83 - exec | 8.37 | 10.94 |
