| Function: accelerate_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D< ... | Module: exec | Source: accelerate.cpp:40-53 [...] | Coverage (incl. loops): 6.44% | (excl. loops): 0.01% |
|---|
| Function: accelerate_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D< ... | Module: exec | Source: accelerate.cpp:40-53 [...] | Coverage (incl. loops): 6.44% | (excl. loops): 0.01% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/accelerate.cpp: 40 - 53 |
-------------------------------------------------------------------------------- |
40: #pragma omp parallel for simd collapse(2) |
41: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
42: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
43: double stepbymass_s = halfdt / ((density0(i - 1, j - 1) * volume(i - 1, j - 1) + density0(i - 1, j + 0) * volume(i - 1, j + 0) + |
44: density0(i, j) * volume(i, j) + density0(i + 0, j - 1) * volume(i + 0, j - 1)) * |
45: 0.25); |
46: xvel1(i, j) = xvel0(i, j) - stepbymass_s * (xarea(i, j) * (pressure(i, j) - pressure(i - 1, j + 0)) + |
47: xarea(i + 0, j - 1) * (pressure(i + 0, j - 1) - pressure(i - 1, j - 1))); |
48: yvel1(i, j) = yvel0(i, j) - stepbymass_s * (yarea(i, j) * (pressure(i, j) - pressure(i + 0, j - 1)) + |
49: yarea(i - 1, j + 0) * (pressure(i - 1, j + 0) - pressure(i - 1, j - 1))); |
50: xvel1(i, j) = xvel1(i, j) - stepbymass_s * (xarea(i, j) * (viscosity(i, j) - viscosity(i - 1, j + 0)) + |
51: xarea(i + 0, j - 1) * (viscosity(i + 0, j - 1) - viscosity(i - 1, j - 1))); |
52: yvel1(i, j) = yvel1(i, j) - stepbymass_s * (yarea(i, j) * (viscosity(i, j) - viscosity(i + 0, j - 1)) + |
53: yarea(i - 1, j + 0) * (viscosity(i - 1, j + 0) - viscosity(i - 1, j - 1))); |
0x42b5c0 STP X29, X30, [SP, #640]! |
0x42b5c4 ADD X29, SP, #0 |
0x42b5c8 STP X19, X20, [SP, #16] |
0x42b5cc ORR X20, XZR, X0 |
0x42b5d0 STP X21, X22, [SP, #32] |
0x42b5d4 LDP W22, W2, [X0, #96] |
0x42b5d8 LDR W0, [X0, #88] |
0x42b5dc LDR W1, [X20, #92] |
0x42b5e0 ADD W3, W2, #3 |
0x42b5e4 ADD W22, W22, #1 |
0x42b5e8 ADD W4, W0, #1 |
0x42b5ec STR W3, [SP, #288] |
0x42b5f0 STR W4, [SP, #292] |
0x42b5f4 CMP W22, W3 |
0x42b5f8 B.GE 42bc30 |
0x42b5fc ADD W19, W1, #3 |
0x42b600 STP X23, X24, [SP, #48] |
0x42b604 SUB W23, W3, W22 |
0x42b608 CMP W4, W19 |
0x42b60c B.GE 42bc2c |
0x42b610 SUB W5, W19, W4 |
0x42b614 MUL W24, W23, W5 |
0x42b618 STR W5, [SP, #296] |
0x42b61c BL 410210 |
0x42b620 ORR W21, WZR, W0 |
0x42b624 BL 410240 |
0x42b628 UDIV W7, W24, W21 |
0x42b62c ORR W6, WZR, W0 |
0x42b630 MSUB W8, W7, W21, W24 |
0x42b634 CMP W0, W8 |
0x42b638 B.CC 42bc5c |
0x42b63c MADD W6, W7, W6, W8 |
0x42b640 ADD W9, W7, W6 |
0x42b644 STR W9, [SP, #300] |
0x42b648 CMP W6, W9 |
0x42b64c B.CS 42bc2c |
0x42b650 LDR W10, [SP, #296] |
0x42b654 FMOV D30, #4.0000000 |
0x42b658 STP X25, X26, [SP, #64] |
0x42b65c STP X27, X28, [SP, #80] |
0x42b660 UDIV W11, W6, W10 |
0x42b664 LDR D3, [X20, #80] |
0x42b668 LDR W16, [SP, #292] |
0x42b66c LDR X14, [X20] |
0x42b670 FMUL D30, D3, D30 |
0x42b674 LDR X18, [X20, #8] |
0x42b678 MSUB W12, W11, W10, W6 |
0x42b67c LDR X26, [X20, #16] |
0x42b680 ADD W13, W11, W22 |
0x42b684 SBFM X27, X13, #0, #31 |
0x42b688 STR X14, [SP, #328] |
0x42b68c LDR X30, [X20, #24] |
0x42b690 ADD W17, W12, W16 |
0x42b694 STR X18, [SP, #360] |
0x42b698 LDR X22, [X20, #32] |
0x42b69c DUP V29.2D, V30.D[0] |
0x42b6a0 STR W17, [SP, #100] |
0x42b6a4 SUB W17, W19, W17 |
0x42b6a8 LDR X15, [X20, #40] |
0x42b6ac STR X30, [SP, #304] |
0x42b6b0 LDR X25, [X20, #48] |
0x42b6b4 STR X26, [SP, #312] |
0x42b6b8 LDR X28, [X20, #56] |
0x42b6bc STR X22, [SP, #336] |
0x42b6c0 LDR X0, [X20, #64] |
0x42b6c4 STR X25, [SP, #320] |
0x42b6c8 LDR X20, [X20, #72] |
0x42b6cc STR X28, [SP, #352] |
0x42b6d0 STR X15, [SP, #376] |
0x42b6d4 STR X0, [SP, #344] |
0x42b6d8 STR X20, [SP, #368] |
0x42b6dc HINT #0 |
(156) 0x42b6e0 CMP W7, W17 |
(156) 0x42b6e4 CSEL W3, W7, W17, #9 |
(156) 0x42b6e8 ADD W4, W6, W3 |
(156) 0x42b6ec STR W3, [SP, #176] |
(156) 0x42b6f0 STR W4, [SP, #180] |
(156) 0x42b6f4 CMP W6, W4 |
(156) 0x42b6f8 B.CS 42bc04 |
(156) 0x42b6fc LDR X11, [SP, #376] |
(156) 0x42b700 SUB W2, W27, #1 |
(156) 0x42b704 LDP X5, X6, [SP, #304] |
(156) 0x42b708 SBFM X19, X2, #0, #31 |
(156) 0x42b70c LDP X9, X7, [SP, #320] |
(156) 0x42b710 LDP X10, X12, [SP, #336] |
(156) 0x42b714 LDP X13, X14, [SP, #360] |
(156) 0x42b718 LDR X1, [X5] |
(156) 0x42b71c LDR X23, [X6] |
(156) 0x42b720 LDR X24, [X7] |
(156) 0x42b724 MUL X16, X27, X1 |
(156) 0x42b728 LDR X25, [X10] |
(156) 0x42b72c MUL X15, X19, X1 |
(156) 0x42b730 MUL X17, X19, X23 |
(156) 0x42b734 LDR X8, [X9] |
(156) 0x42b738 MUL X20, X27, X23 |
(156) 0x42b73c MUL X21, X19, X24 |
(156) 0x42b740 STR X15, [SP, #280] |
(156) 0x42b744 LDR X26, [X11] |
(156) 0x42b748 MUL X22, X24, X27 |
(156) 0x42b74c MUL X23, X19, X25 |
(156) 0x42b750 LDR X4, [SP, #352] |
(156) 0x42b754 MUL X24, X25, X27 |
(156) 0x42b758 MUL X18, X27, X8 |
(156) 0x42b75c LDR X28, [X12] |
(156) 0x42b760 MUL X25, X19, X26 |
(156) 0x42b764 STR X23, [SP, #232] |
(156) 0x42b768 MUL X8, X26, X27 |
(156) 0x42b76c STR X18, [SP, #160] |
(156) 0x42b770 LDR X0, [X4] |
(156) 0x42b774 STP X24, X22, [SP, #216] |
(156) 0x42b778 MUL X30, X27, X28 |
(156) 0x42b77c STP X8, X25, [SP, #184] |
(156) 0x42b780 MUL X2, X27, X0 |
(156) 0x42b784 STP X21, X20, [SP, #248] |
(156) 0x42b788 STP X16, X17, [SP, #264] |
(156) 0x42b78c LDR X3, [X13] |
(156) 0x42b790 STR X2, [SP, #136] |
(156) 0x42b794 LDR X19, [X14] |
(156) 0x42b798 STR X30, [SP, #144] |
(156) 0x42b79c LDR X18, [X5, #16] |
(156) 0x42b7a0 MUL X3, X27, X3 |
(156) 0x42b7a4 LDR X1, [X4, #16] |
(156) 0x42b7a8 MUL X5, X27, X19 |
(156) 0x42b7ac STR X3, [SP, #200] |
(156) 0x42b7b0 LDR X30, [X6, #16] |
(156) 0x42b7b4 STR X18, [SP, #168] |
(156) 0x42b7b8 LDR X6, [X9, #16] |
(156) 0x42b7bc LDR X28, [X10, #16] |
(156) 0x42b7c0 LDR X9, [X7, #16] |
(156) 0x42b7c4 STR X6, [SP, #152] |
(156) 0x42b7c8 LDR X12, [X12, #16] |
(156) 0x42b7cc LDR X10, [X13, #16] |
(156) 0x42b7d0 STR X9, [SP, #240] |
(156) 0x42b7d4 LDR X7, [X14, #16] |
(156) 0x42b7d8 STR X12, [SP, #208] |
(156) 0x42b7dc LDR W13, [SP, #176] |
(156) 0x42b7e0 LDR X26, [X11, #16] |
(156) 0x42b7e4 STP X7, X5, [SP, #104] |
(156) 0x42b7e8 STP X10, X1, [SP, #120] |
(156) 0x42b7ec CMP W13, #1 |
(156) 0x42b7f0 B.EQ 42ba38 |
(156) 0x42b7f4 UBFM W14, W13, #1, #31 |
(156) 0x42b7f8 ORR X11, XZR, X6 |
(156) 0x42b7fc LDRSW X1, [SP, #100] |
(156) 0x42b800 UBFM X19, X14, #60, #59 |
(156) 0x42b804 MOVZ X0, #0 |
(156) 0x42b808 ADD X2, X15, X1 |
(156) 0x42b80c ADD X15, X20, X1 |
(156) 0x42b810 ADD X20, X23, X1 |
(156) 0x42b814 ADD X23, X24, X1 |
(156) 0x42b818 ADD X24, X3, X1 |
(156) 0x42b81c ADD X17, X17, X1 |
(156) 0x42b820 UBFM X5, X24, #61, #60 |
(156) 0x42b824 LDR X24, [SP, #160] |
(156) 0x42b828 ADD X25, X25, X1 |
(156) 0x42b82c UBFM X18, X2, #61, #60 |
(156) 0x42b830 UBFM X10, X17, #61, #60 |
(156) 0x42b834 UBFM X2, X15, #61, #60 |
(156) 0x42b838 ADD X21, X21, X1 |
(156) 0x42b83c ADD X13, X22, X1 |
(156) 0x42b840 UBFM X4, X25, #61, #60 |
(156) 0x42b844 ADD X14, X9, X21,LSL #3 |
(156) 0x42b848 SUB X22, X10, #8 |
(156) 0x42b84c ADD X13, X9, X13,LSL #3 |
(156) 0x42b850 ADD X17, X30, X10 |
(156) 0x42b854 LDR X10, [SP, #144] |
(156) 0x42b858 SUB X9, X2, #8 |
(156) 0x42b85c ADD X25, X24, X1 |
(156) 0x42b860 UBFM X7, X20, #61, #60 |
(156) 0x42b864 ADD X20, X30, X9 |
(156) 0x42b868 ADD X25, X11, X25,LSL #3 |
(156) 0x42b86c LDR X24, [SP, #136] |
(156) 0x42b870 ADD X15, X30, X2 |
(156) 0x42b874 ADD X3, X8, X1 |
(156) 0x42b878 ADD X16, X16, X1 |
(156) 0x42b87c UBFM X6, X23, #61, #60 |
(156) 0x42b880 LDR X9, [SP, #128] |
(156) 0x42b884 UBFM X16, X16, #61, #60 |
(156) 0x42b888 SUB X23, X18, #8 |
(156) 0x42b88c ADD X2, X10, X1 |
(156) 0x42b890 SUB X21, X16, #8 |
(156) 0x42b894 LDR X8, [SP, #104] |
(156) 0x42b898 UBFM X3, X3, #61, #60 |
(156) 0x42b89c SUB X11, X6, #8 |
(156) 0x42b8a0 ADD X10, X24, X1 |
(156) 0x42b8a4 ADD X2, X12, X2,LSL #3 |
(156) 0x42b8a8 SUB X12, X7, #8 |
(156) 0x42b8ac ADD X22, X30, X22 |
(156) 0x42b8b0 ADD X11, X28, X11 |
(156) 0x42b8b4 ADD X24, X9, X10,LSL #3 |
(156) 0x42b8b8 LDR X10, [SP, #112] |
(156) 0x42b8bc SUB X9, X4, #8 |
(156) 0x42b8c0 ADD X6, X28, X6 |
(156) 0x42b8c4 ADD X12, X28, X12 |
(156) 0x42b8c8 ADD X7, X28, X7 |
(156) 0x42b8cc ADD X9, X26, X9 |
(156) 0x42b8d0 ADD X4, X26, X4 |
(156) 0x42b8d4 ADD X1, X10, X1 |
(156) 0x42b8d8 SUB X10, X5, #8 |
(156) 0x42b8dc ADD X1, X8, X1,LSL #3 |
(156) 0x42b8e0 LDR X8, [SP, #168] |
(156) 0x42b8e4 ADD X23, X8, X23 |
(156) 0x42b8e8 ADD X18, X8, X18 |
(156) 0x42b8ec ADD X21, X8, X21 |
(156) 0x42b8f0 ADD X16, X8, X16 |
(156) 0x42b8f4 LDR X8, [SP, #120] |
(156) 0x42b8f8 ADD X10, X8, X10 |
(156) 0x42b8fc ADD X5, X8, X5 |
(156) 0x42b900 SUB X8, X3, #8 |
(156) 0x42b904 ADD X3, X26, X3 |
(156) 0x42b908 ADD X8, X26, X8 |
(157) 0x42b90c LDR Q19, [X20, X0] |
(157) 0x42b910 LDR Q25, [X21, X0] |
(157) 0x42b914 LDR Q22, [X17, X0] |
(157) 0x42b918 LDR Q31, [X18, X0] |
(157) 0x42b91c LDR Q28, [X22, X0] |
(157) 0x42b920 FMUL V1.2D, V19.2D, V25.2D |
(157) 0x42b924 LDR Q2, [X23, X0] |
(157) 0x42b928 LDR Q0, [X15, X0] |
(157) 0x42b92c FMUL V4.2D, V22.2D, V31.2D |
(157) 0x42b930 LDR Q21, [X16, X0] |
(157) 0x42b934 LDR Q18, [X11, X0] |
(157) 0x42b938 FMLA V1.2D, V28.2D, V2.2D |
(157) 0x42b93c LDR Q17, [X6, X0] |
(157) 0x42b940 FMLA V4.2D, V0.2D, V21.2D |
(157) 0x42b944 LDR Q27, [X12, X0] |
(157) 0x42b948 LDR Q20, [X7, X0] |
(157) 0x42b94c LDR Q16, [X13, X0] |
(157) 0x42b950 FSUB V5.2D, V18.2D, V17.2D |
(157) 0x42b954 LDR Q6, [X14, X0] |
(157) 0x42b958 FSUB V23.2D, V27.2D, V20.2D |
(157) 0x42b95c LDR Q7, [X25, X0] |
(157) 0x42b960 FADD V24.2D, V4.2D, V1.2D |
(157) 0x42b964 FMUL V26.2D, V5.2D, V16.2D |
(157) 0x42b968 FMLA V26.2D, V23.2D, V6.2D |
(157) 0x42b96c FDIV V31.2D, V29.2D, V24.2D |
(157) 0x42b970 FMLA V7.2D, V26.2D, V31.2D |
(157) 0x42b974 STR Q7, [X2, X0] |
(157) 0x42b978 LDR Q3, [X7, X0] |
(157) 0x42b97c LDR Q19, [X6, X0] |
(157) 0x42b980 LDR Q25, [X12, X0] |
(157) 0x42b984 LDR Q22, [X11, X0] |
(157) 0x42b988 LDR Q28, [X5, X0] |
(157) 0x42b98c FSUB V2.2D, V3.2D, V19.2D |
(157) 0x42b990 LDR Q0, [X10, X0] |
(157) 0x42b994 FSUB V4.2D, V25.2D, V22.2D |
(157) 0x42b998 LDR Q1, [X24, X0] |
(157) 0x42b99c FMUL V21.2D, V2.2D, V28.2D |
(157) 0x42b9a0 FMLA V21.2D, V0.2D, V4.2D |
(157) 0x42b9a4 FMLA V1.2D, V31.2D, V21.2D |
(157) 0x42b9a8 STR Q1, [X1, X0] |
(157) 0x42b9ac LDR Q18, [X8, X0] |
(157) 0x42b9b0 LDR Q17, [X3, X0] |
(157) 0x42b9b4 LDR Q27, [X9, X0] |
(157) 0x42b9b8 LDR Q16, [X4, X0] |
(157) 0x42b9bc LDR Q20, [X13, X0] |
(157) 0x42b9c0 FSUB V5.2D, V18.2D, V17.2D |
(157) 0x42b9c4 LDR Q6, [X14, X0] |
(157) 0x42b9c8 FSUB V23.2D, V27.2D, V16.2D |
(157) 0x42b9cc LDR Q7, [X2, X0] |
(157) 0x42b9d0 FMUL V24.2D, V5.2D, V20.2D |
(157) 0x42b9d4 FMLA V24.2D, V6.2D, V23.2D |
(157) 0x42b9d8 FMLA V7.2D, V31.2D, V24.2D |
(157) 0x42b9dc STR Q7, [X2, X0] |
(157) 0x42b9e0 LDR Q26, [X1, X0] |
(157) 0x42b9e4 LDR Q3, [X4, X0] |
(157) 0x42b9e8 LDR Q19, [X3, X0] |
(157) 0x42b9ec LDR Q25, [X9, X0] |
(157) 0x42b9f0 LDR Q22, [X8, X0] |
(157) 0x42b9f4 LDR Q28, [X5, X0] |
(157) 0x42b9f8 FSUB V2.2D, V3.2D, V19.2D |
(157) 0x42b9fc LDR Q0, [X10, X0] |
(157) 0x42ba00 FSUB V4.2D, V25.2D, V22.2D |
(157) 0x42ba04 FMUL V1.2D, V2.2D, V28.2D |
(157) 0x42ba08 FMLA V1.2D, V0.2D, V4.2D |
(157) 0x42ba0c FMLA V26.2D, V31.2D, V1.2D |
(157) 0x42ba10 STR Q26, [X1, X0] |
(157) 0x42ba14 ADD X0, X0, #16 |
(157) 0x42ba18 CMP X19, X0 |
(157) 0x42ba1c B.NE 42b90c |
(156) 0x42ba20 LDR W19, [SP, #176] |
(156) 0x42ba24 TBZ W19, #0, 42bc00 |
(156) 0x42ba28 LDR W22, [SP, #100] |
(156) 0x42ba2c AND W13, W19, #0xfffffffe |
(156) 0x42ba30 ADD W17, W22, W13 |
(156) 0x42ba34 STR W17, [SP, #100] |
(156) 0x42ba38 LDR W20, [SP, #100] |
(156) 0x42ba3c LDP X6, X12, [SP, #256] |
(156) 0x42ba40 LDP X5, X3, [SP, #224] |
(156) 0x42ba44 SUB W15, W20, #1 |
(156) 0x42ba48 SBFM X25, X20, #0, #31 |
(156) 0x42ba4c SBFM X24, X15, #0, #31 |
(156) 0x42ba50 ADD X18, X6, X25 |
(156) 0x42ba54 ADD X4, X6, X24 |
(156) 0x42ba58 ADD X2, X12, X24 |
(156) 0x42ba5c LDR X7, [SP, #272] |
(156) 0x42ba60 ADD X21, X12, X25 |
(156) 0x42ba64 ADD X14, X5, X25 |
(156) 0x42ba68 ADD X6, X3, X25 |
(156) 0x42ba6c LDR X0, [SP, #280] |
(156) 0x42ba70 UBFM X17, X14, #61, #60 |
(156) 0x42ba74 LDR X1, [SP, #168] |
(156) 0x42ba78 ADD X9, X7, X25 |
(156) 0x42ba7c ADD X16, X7, X24 |
(156) 0x42ba80 LDR D21, [X30, X4,LSL #3] |
(156) 0x42ba84 ADD X19, X0, X25 |
(156) 0x42ba88 ADD X22, X0, X24 |
(156) 0x42ba8c LDR D31, [X30, X9,LSL #3] |
(156) 0x42ba90 LDR D18, [X1, X2,LSL #3] |
(156) 0x42ba94 LDR D20, [X1, X19,LSL #3] |
(156) 0x42ba98 LDR D17, [X30, X16,LSL #3] |
(156) 0x42ba9c LDR D16, [X30, X18,LSL #3] |
(156) 0x42baa0 FMUL D27, D21, D18 |
(156) 0x42baa4 LDR D5, [X1, X21,LSL #3] |
(156) 0x42baa8 FMUL D23, D31, D20 |
(156) 0x42baac LDR D6, [X1, X22,LSL #3] |
(156) 0x42bab0 LDR X8, [SP, #160] |
(156) 0x42bab4 FMADD D24, D16, D5, D23 |
(156) 0x42bab8 LDR X11, [SP, #216] |
(156) 0x42babc FMADD D7, D17, D6, D27 |
(156) 0x42bac0 LDR X14, [SP, #112] |
(156) 0x42bac4 ADD X20, X8, X25 |
(156) 0x42bac8 LDR X19, [SP, #136] |
(156) 0x42bacc ADD X13, X11, X24 |
(156) 0x42bad0 ADD X15, X11, X25 |
(156) 0x42bad4 LDR X12, [SP, #144] |
(156) 0x42bad8 UBFM X7, X13, #61, #60 |
(156) 0x42badc ADD X11, X3, X24 |
(156) 0x42bae0 ADD X13, X14, X25 |
(156) 0x42bae4 UBFM X9, X11, #61, #60 |
(156) 0x42bae8 LDR X23, [SP, #184] |
(156) 0x42baec ADD X22, X19, X25 |
(156) 0x42baf0 LDR X10, [SP, #192] |
(156) 0x42baf4 ADD X21, X12, X25 |
(156) 0x42baf8 LDR X8, [SP, #200] |
(156) 0x42bafc FADD D2, D24, D7 |
(156) 0x42bb00 UBFM X16, X21, #61, #60 |
(156) 0x42bb04 ADD X30, X23, X25 |
(156) 0x42bb08 ADD X1, X23, X24 |
(156) 0x42bb0c LDR X4, [SP, #248] |
(156) 0x42bb10 UBFM X23, X1, #61, #60 |
(156) 0x42bb14 LDR D26, [X28, X7] |
(156) 0x42bb18 ADD X5, X8, X24 |
(156) 0x42bb1c ADD X24, X10, X24 |
(156) 0x42bb20 LDR D3, [X28, X15,LSL #3] |
(156) 0x42bb24 FDIV D31, D30, D2 |
(156) 0x42bb28 UBFM X3, X5, #61, #60 |
(156) 0x42bb2c ADD X2, X4, X25 |
(156) 0x42bb30 ADD X25, X10, X25 |
(156) 0x42bb34 LDR X10, [SP, #240] |
(156) 0x42bb38 UBFM X18, X2, #61, #60 |
(156) 0x42bb3c UBFM X15, X13, #61, #60 |
(156) 0x42bb40 UBFM X0, X24, #61, #60 |
(156) 0x42bb44 LDR D19, [X28, X9] |
(156) 0x42bb48 LDR D25, [X28, X6,LSL #3] |
(156) 0x42bb4c FSUB D22, D26, S3 |
(156) 0x42bb50 LDR D0, [X10, X17] |
(156) 0x42bb54 LDR D4, [X10, X18] |
(156) 0x42bb58 LDR X11, [SP, #152] |
(156) 0x42bb5c FSUB D28, D19, S25 |
(156) 0x42bb60 FMUL D1, D22, D0 |
(156) 0x42bb64 LDR X1, [SP, #208] |
(156) 0x42bb68 LDR D18, [X11, X20,LSL #3] |
(156) 0x42bb6c FMADD D21, D28, D4, D1 |
(156) 0x42bb70 LDR X20, [SP, #128] |
(156) 0x42bb74 FMADD D17, D21, D31, D18 |
(156) 0x42bb78 STR D17, [X1, X16] |
(156) 0x42bb7c LDR Q16, [X28, X7] |
(156) 0x42bb80 LDR Q27, [X28, X9] |
(156) 0x42bb84 LDR X7, [SP, #120] |
(156) 0x42bb88 LDR D5, [X20, X22,LSL #3] |
(156) 0x42bb8c FSUB V20.2D, V27.2D, V16.2D |
(156) 0x42bb90 LDR X28, [SP, #104] |
(156) 0x42bb94 LDR Q6, [X7, X3] |
(156) 0x42bb98 FMUL V23.2D, V20.2D, V6.2D |
(156) 0x42bb9c FADDP D7, V23.2D |
(156) 0x42bba0 FMADD D26, D7, D31, D5 |
(156) 0x42bba4 STR D26, [X28, X15] |
(156) 0x42bba8 LDR D24, [X26, X23] |
(156) 0x42bbac LDR D3, [X26, X30,LSL #3] |
(156) 0x42bbb0 LDR D19, [X26, X0] |
(156) 0x42bbb4 LDR D25, [X26, X25,LSL #3] |
(156) 0x42bbb8 LDR D28, [X10, X17] |
(156) 0x42bbbc FSUB D2, D24, S3 |
(156) 0x42bbc0 LDR D0, [X10, X18] |
(156) 0x42bbc4 FSUB D4, D19, S25 |
(156) 0x42bbc8 LDR D22, [X1, X16] |
(156) 0x42bbcc FMUL D1, D2, D28 |
(156) 0x42bbd0 FMADD D21, D4, D0, D1 |
(156) 0x42bbd4 FMADD D18, D21, D31, D22 |
(156) 0x42bbd8 STR D18, [X1, X16] |
(156) 0x42bbdc LDR Q17, [X26, X0] |
(156) 0x42bbe0 LDR Q27, [X26, X23] |
(156) 0x42bbe4 LDR Q5, [X7, X3] |
(156) 0x42bbe8 LDR D16, [X28, X15] |
(156) 0x42bbec FSUB V20.2D, V17.2D, V27.2D |
(156) 0x42bbf0 FMUL V6.2D, V20.2D, V5.2D |
(156) 0x42bbf4 FADDP D23, V6.2D |
(156) 0x42bbf8 FMADD D31, D23, D31, D16 |
(156) 0x42bbfc STR D31, [X28, X15] |
(156) 0x42bc00 LDR W6, [SP, #180] |
(156) 0x42bc04 ADD X27, X27, #1 |
(156) 0x42bc08 LDR W26, [SP, #288] |
(156) 0x42bc0c CMP W26, W27 |
(156) 0x42bc10 B.LE 42bc40 |
(156) 0x42bc14 LDR W30, [SP, #300] |
(156) 0x42bc18 LDR W9, [SP, #292] |
(156) 0x42bc1c LDR W17, [SP, #296] |
(156) 0x42bc20 SUB W7, W30, W6 |
(156) 0x42bc24 STR W9, [SP, #100] |
(156) 0x42bc28 B 42b6e0 |
0x42bc2c LDP X23, X24, [SP, #48] |
0x42bc30 LDP X19, X20, [SP, #16] |
0x42bc34 LDP X21, X22, [SP, #32] |
0x42bc38 LDP X29, X30, [SP], #384 |
0x42bc3c RET |
0x42bc40 LDP X23, X24, [SP, #48] |
0x42bc44 LDP X25, X26, [SP, #64] |
0x42bc48 LDP X27, X28, [SP, #80] |
0x42bc4c LDP X19, X20, [SP, #16] |
0x42bc50 LDP X21, X22, [SP, #32] |
0x42bc54 LDP X29, X30, [SP], #384 |
0x42bc58 RET |
0x42bc5c ADD W7, W7, #1 |
0x42bc60 MOVZ W8, #0 |
0x42bc64 B 42b63c |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.01+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►49.99+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►74.99+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►25.01+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►87.44+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►12.56+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►92.90+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►7.10+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►95.74+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.26+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.82+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.18+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.46+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.54+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.89+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.11+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.19+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.81+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.58+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | accelerate(global_variables&) | accelerate.cpp:66 | exec |
| ○ | hydro(global_variables&, paral[...] | hydro.cpp:68 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | accelerate.cpp:40-53 |
| Module | exec |
| nb instructions | 87 |
| nb uops | 86 |
| loop length | 348 |
| used w registers | 22 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 33 |
| micro-operation queue | 10.75 cycles |
| front end | 10.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 15.83 | 15.50 | 15.67 | 10.50 | 10.50 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 15.83 | 15.50 | 15.67 | 10.50 | 10.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 10.75 |
| Dispatch | 15.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 15.83-25.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 10% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 26% |
| load | 32% |
| store | 29% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 26% |
| load | 32% |
| store | 29% |
| mul | 18% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #640]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W2, [X0, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W1, [X20, #92] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W3, [SP, #288] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W4, [SP, #292] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42bc30 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x670> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42bc2c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x66c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #296] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42bc5c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x69c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W6, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #300] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W6, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42bc2c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x66c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #296] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D30, #4.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W11, W6, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR D3, [X20, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR W16, [SP, #292] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR X14, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| FMUL D30, D3, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR X18, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W12, W11, W10, W6 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDR X26, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X27, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| STR X14, [SP, #328] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X30, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W17, W12, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X18, [SP, #360] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X22, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| DUP V29.2D, V30.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| STR W17, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| SUB W17, W19, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X15, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X30, [SP, #304] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X25, [X20, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X26, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X28, [X20, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X22, [SP, #336] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X0, [X20, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X25, [SP, #320] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X20, [X20, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X28, [SP, #352] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X15, [SP, #376] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X0, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X20, [SP, #368] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #384 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #384 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42b63c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x7c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | accelerate.cpp:40-53 |
| Module | exec |
| nb instructions | 87 |
| nb uops | 86 |
| loop length | 348 |
| used w registers | 22 |
| used x registers | 18 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 33 |
| micro-operation queue | 10.75 cycles |
| front end | 10.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 15.83 | 15.50 | 15.67 | 10.50 | 10.50 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.75 | 0.75 | 0.75 | 0.75 | 15.83 | 15.50 | 15.67 | 10.50 | 10.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 10.75 |
| Dispatch | 15.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 15.83-25.00 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 10% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 26% |
| load | 32% |
| store | 29% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 25% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 26% |
| load | 32% |
| store | 29% |
| mul | 18% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 26% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #640]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W2, [X0, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #88] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W1, [X20, #92] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W2, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W3, [SP, #288] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W4, [SP, #292] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W22, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42bc30 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x670> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W19, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| SUB W23, W3, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42bc2c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x66c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W19, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W23, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #296] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W21, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42bc5c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x69c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W6, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #300] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W6, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42bc2c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x66c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W10, [SP, #296] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| FMOV D30, #4.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W11, W6, W10 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR D3, [X20, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR W16, [SP, #292] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR X14, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| FMUL D30, D3, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| LDR X18, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W12, W11, W10, W6 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDR X26, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W13, W11, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X27, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| STR X14, [SP, #328] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X30, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| ADD W17, W12, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X18, [SP, #360] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X22, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| DUP V29.2D, V30.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| STR W17, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| SUB W17, W19, W17 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDR X15, [X20, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X30, [SP, #304] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X25, [X20, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X26, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X28, [X20, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X22, [SP, #336] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X0, [X20, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X25, [SP, #320] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X20, [X20, #72] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR X28, [SP, #352] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X15, [SP, #376] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X0, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X20, [SP, #368] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #384 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #384 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42b63c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x7c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 1 | 0.02 | 0.99 | 0.06 | 0.97 | 0.18 | 0.54 | 2.99 | 0.33 | 5.08 | 0.27 | 5.55 | 0.23 | 5.83 | 0.2 | 6.01 | 0.17 | 6.12 | 0.15 | 6.22 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 95.710006713867 | 6.4394373893738 |
| 1x2 | 2 | 1 | 1.99 | 2 | 47.855018615723 | 6.4006934165955 |
| 1x4 | 4 | 0.99 | 3.96 | 4 | 23.960004806519 | 6.3012456893921 |
| 1x8 | 8 | 0.97 | 7.75 | 8 | 12.224998474121 | 5.6928009986877 |
| 1x16 | 16 | 0.54 | 8.69 | 16 | 11.995004653931 | 6.5446667671204 |
| 1x24 | 24 | 0.33 | 8.01 | 24 | 11.850003242493 | 7.6205520629883 |
| 1x32 | 32 | 0.27 | 8.69 | 32 | 10.88000202179 | 7.6259837150574 |
| 1x40 | 40 | 0.23 | 9.23 | 40 | 10.290000915527 | 7.5728163719177 |
| 1x48 | 48 | 0.2 | 9.5 | 48 | 9.9249992370605 | 7.4866557121277 |
| 1x56 | 56 | 0.17 | 9.64 | 56 | 9.7850036621094 | 7.3927454948425 |
| 1x64 | 64 | 0.15 | 9.71 | 64 | 9.730001449585 | 7.3295331001282 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼accelerate_kernel(int, int, int, int, double, clover::Buffer2D | 6.44 | 95.71 |
| ▼Loop 156 - accelerate.cpp:42-53 - exec– | 0.00 | 0.00 |
| ○Loop 157 - accelerate.cpp:43-53 - exec | 6.43 | 95.63 |
