| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage (incl. loops): 3.00% | (excl. loops): 0.00% |
|---|
| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage (incl. loops): 3.00% | (excl. loops): 0.00% |
|---|
/usr/include/c++/14/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_cell.cpp: 65 - 110 |
-------------------------------------------------------------------------------- |
65: #pragma omp parallel for simd collapse(2) |
66: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
67: for (int i = (x_min + 1); i < (x_max + 2 + 2); i++) |
68: ({ |
69: int upwind, donor, downwind, dif; |
70: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
71: if (vol_flux_x(i, j) > 0.0) { |
72: upwind = i - 2; |
73: donor = i - 1; |
74: downwind = i; |
75: dif = donor; |
76: } else { |
77: upwind = std::min(i + 1, x_max + 2); |
78: donor = i; |
79: downwind = i - 1; |
80: dif = upwind; |
81: } |
82: sigmat = std::fabs(vol_flux_x(i, j)) / pre_vol(donor, j); |
83: sigma3 = (1.0 + sigmat) * (vertexdx[i] / vertexdx[dif]); |
84: sigma4 = 2.0 - sigmat; |
85: sigmav = sigmat; |
86: diffuw = density1(donor, j) - density1(upwind, j); |
87: diffdw = density1(downwind, j) - density1(donor, j); |
88: wind = 1.0; |
89: if (diffdw <= 0.0) wind = -1.0; |
90: if (diffuw * diffdw > 0.0) { |
91: limiter = (1.0 - sigmav) * wind * |
92: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
93: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
94: } else { |
95: limiter = 0.0; |
96: } |
97: mass_flux_x(i, j) = vol_flux_x(i, j) * (density1(donor, j) + limiter); |
98: sigmam = std::fabs(mass_flux_x(i, j)) / (density1(donor, j) * pre_vol(donor, j)); |
99: diffuw = energy1(donor, j) - energy1(upwind, j); |
100: diffdw = energy1(downwind, j) - energy1(donor, j); |
101: wind = 1.0; |
102: if (diffdw <= 0.0) wind = -1.0; |
103: if (diffuw * diffdw > 0.0) { |
104: limiter = (1.0 - sigmam) * wind * |
105: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
106: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
107: } else { |
108: limiter = 0.0; |
109: } |
110: ener_flux(i, j) = mass_flux_x(i, j) * (energy1(donor, j) + limiter); |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42d64c STP X29, X30, [SP, #672]! |
0x42d650 ADD X29, SP, #0 |
0x42d654 STP X21, X22, [SP, #32] |
0x42d658 ORR X21, XZR, X0 |
0x42d65c STP X23, X24, [SP, #48] |
0x42d660 LDP W23, W1, [X0, #64] |
0x42d664 LDR W0, [X0, #56] |
0x42d668 LDR W22, [X21, #60] |
0x42d66c ADD W23, W23, #1 |
0x42d670 ADD W14, W1, #2 |
0x42d674 ADD W2, W0, #1 |
0x42d678 STR W2, [SP, #308] |
0x42d67c CMP W23, W14 |
0x42d680 B.GE 42dff8 |
0x42d684 STP X19, X20, [SP, #16] |
0x42d688 ADD W19, W22, #4 |
0x42d68c SUB W24, W14, W23 |
0x42d690 STR W14, [SP, #168] |
0x42d694 CMP W2, W19 |
0x42d698 B.GE 42dff4 |
0x42d69c SUB W3, W19, W2 |
0x42d6a0 MUL W24, W24, W3 |
0x42d6a4 STR W3, [SP, #312] |
0x42d6a8 BL 410210 |
0x42d6ac ORR W20, WZR, W0 |
0x42d6b0 BL 410240 |
0x42d6b4 UDIV W7, W24, W20 |
0x42d6b8 LDR W6, [SP, #168] |
0x42d6bc ORR W4, WZR, W0 |
0x42d6c0 MSUB W5, W7, W20, W24 |
0x42d6c4 CMP W0, W5 |
0x42d6c8 B.CC 42e064 |
0x42d6cc MADD W4, W7, W4, W5 |
0x42d6d0 ADD W8, W7, W4 |
0x42d6d4 STR W8, [SP, #316] |
0x42d6d8 CMP W4, W8 |
0x42d6dc B.CS 42dff4 |
0x42d6e0 LDR W12, [SP, #312] |
0x42d6e4 ADD W11, W22, #2 |
0x42d6e8 ORR X9, XZR, #0x55 |
0x42d6ec MOVK X9, #16325 |
0x42d6f0 STP X25, X26, [SP, #64] |
0x42d6f4 FMOV V29.2D, #1.0000000 |
0x42d6f8 FMOV V5.2D, #-1.0000000 |
0x42d6fc FMOV S30, W11 |
0x42d700 UDIV W16, W4, W12 |
0x42d704 FMOV D31, X9 |
0x42d708 LDR W26, [SP, #308] |
0x42d70c STP X27, X28, [SP, #80] |
0x42d710 STP D8, D9, [SP, #96] |
0x42d714 FMOV D9, #1.0000000 |
0x42d718 FMOV D8, #-1.0000000 |
0x42d71c STP D10, D11, [SP, #112] |
0x42d720 MOVI V11.4S, #4 |
0x42d724 STP D12, D13, [SP, #128] |
0x42d728 MVNI V12.4S, #0 |
0x42d72c MVNI V13.4S, #1 |
0x42d730 DUP V16.2S, V30.S[0] |
0x42d734 STP D14, D15, [SP, #144] |
0x42d738 DUP V10.4S, V30.S[0] |
0x42d73c LDR X13, [X21, #16] |
0x42d740 FMOV V15.2D, #2.0000000 |
0x42d744 MOVI V14.4S, #1 |
0x42d748 MSUB W18, W16, W12, W4 |
0x42d74c LDP X17, X27, [X21] |
0x42d750 DUP V6.2D, V31.D[0] |
0x42d754 ADD W25, W16, W23 |
0x42d758 SBFM X25, X25, #0, #31 |
0x42d75c LDP X15, X10, [X21, #32] |
0x42d760 ADD W26, W18, W26 |
0x42d764 SUB W19, W19, W26 |
0x42d768 STR X17, [SP, #320] |
0x42d76c LDR X28, [X21, #48] |
0x42d770 STP D16, D31, [SP, #328] |
0x42d774 LDR X14, [X21, #24] |
0x42d778 STR W6, [SP, #344] |
0x42d77c STR W11, [SP, #348] |
(174) 0x42d780 CMP W7, W19 |
(174) 0x42d784 CSEL W1, W7, W19, #9 |
(174) 0x42d788 ADD W30, W4, W1 |
(174) 0x42d78c STR W30, [SP, #304] |
(174) 0x42d790 CMP W4, W30 |
(174) 0x42d794 B.CS 42dfd0 |
(174) 0x42d798 LDR X19, [X14] |
(174) 0x42d79c SUB W21, W1, #1 |
(174) 0x42d7a0 LDR X23, [X13] |
(174) 0x42d7a4 LDR X4, [X28] |
(174) 0x42d7a8 MUL X24, X25, X19 |
(174) 0x42d7ac LDR X7, [X28, #16] |
(174) 0x42d7b0 MUL X3, X25, X23 |
(174) 0x42d7b4 LDR X8, [X14, #16] |
(174) 0x42d7b8 MUL X23, X25, X4 |
(174) 0x42d7bc LDR X11, [SP, #320] |
(174) 0x42d7c0 STR X7, [SP, #184] |
(174) 0x42d7c4 LDR X0, [X10] |
(174) 0x42d7c8 LDR X2, [X15] |
(174) 0x42d7cc LDR X20, [X27] |
(174) 0x42d7d0 MUL X30, X25, X0 |
(174) 0x42d7d4 LDR X16, [X11, #8] |
(174) 0x42d7d8 MUL X18, X25, X2 |
(174) 0x42d7dc LDR X9, [X10, #16] |
(174) 0x42d7e0 MUL X6, X25, X20 |
(174) 0x42d7e4 LDR X5, [X13, #16] |
(174) 0x42d7e8 LDR X22, [X15, #16] |
(174) 0x42d7ec LDR X4, [X27, #16] |
(174) 0x42d7f0 STP X8, X24, [SP, #168] |
(174) 0x42d7f4 CMP W21, #2 |
(174) 0x42d7f8 B.LS 42e058 |
(174) 0x42d7fc SBFM X12, X26, #0, #31 |
(174) 0x42d800 MOVZ X11, #0 |
(174) 0x42d804 ADD X2, X24, X12 |
(174) 0x42d808 DUP V2.4S, W26 |
(174) 0x42d80c ADD X0, X18, X12 |
(174) 0x42d810 ADD X19, X8, X2,LSL #3 |
(174) 0x42d814 ADRP X8, |
(174) 0x42d818 ADD X17, X23, X12 |
(174) 0x42d81c ADD X20, X22, X0,LSL #3 |
(174) 0x42d820 UBFM W24, W1, #2, #31 |
(174) 0x42d824 STR W26, [SP, #192] |
(174) 0x42d828 LDR Q0, [X8, #576] |
(174) 0x42d82c ADD X17, X7, X17,LSL #3 |
(174) 0x42d830 UBFM X24, X24, #59, #58 |
(174) 0x42d834 ADD X7, X9, X30,LSL #3 |
(174) 0x42d838 STR W1, [SP, #200] |
(174) 0x42d83c ADD X0, X4, X6,LSL #3 |
(174) 0x42d840 STP X15, X22, [SP, #208] |
(174) 0x42d844 ADD X2, X5, X3,LSL #3 |
(174) 0x42d848 ADD X21, X16, X12,LSL #3 |
(174) 0x42d84c MOVZ X12, #16 |
(174) 0x42d850 STP X10, X27, [SP, #224] |
(174) 0x42d854 ADD V2.4S, V2.4S, V0.4S |
(174) 0x42d858 STP X14, X13, [SP, #240] |
(174) 0x42d85c STP X18, X28, [SP, #256] |
(174) 0x42d860 STP X4, X6, [SP, #272] |
(174) 0x42d864 STP X9, X3, [SP, #288] |
(175) 0x42d868 ORR V1.16B, V2.16B, V2.16B |
(175) 0x42d86c ADD V2.4S, V2.4S, V11.4S |
(175) 0x42d870 LDR Q3, [X20, X11] |
(175) 0x42d874 ADD V24.4S, V1.4S, V12.4S |
(175) 0x42d878 SSHLL V28.2D, V1.2S, #32 |
(175) 0x42d87c LDR Q4, [X20, X12] |
(175) 0x42d880 ADD V7.4S, V1.4S, V14.4S |
(175) 0x42d884 ADD V26.4S, V1.4S, V13.4S |
(175) 0x42d888 SSHLL2 V31.2D, V1.4S, #32 |
(175) 0x42d88c LDR Q27, [X21, X11] |
(175) 0x42d890 FCMGT V25.2D, V3.2D, #0.0000000 |
(175) 0x42d894 SSHLL V20.2D, V24.2S, #32 |
(175) 0x42d898 SMIN V17.4S, V7.4S, V10.4S |
(175) 0x42d89c FCMGT V22.2D, V4.2D, #0.0000000 |
(175) 0x42d8a0 SSHLL V23.2D, V26.2S, #32 |
(175) 0x42d8a4 FABS V18.2D, V3.2D |
(175) 0x42d8a8 SSHLL2 V0.2D, V24.4S, #32 |
(175) 0x42d8ac SSHLL2 V1.2D, V26.4S, #32 |
(175) 0x42d8b0 ORR V19.16B, V25.16B, V25.16B |
(175) 0x42d8b4 SSHLL V21.2D, V17.2S, #32 |
(175) 0x42d8b8 SSHLL2 V30.2D, V17.4S, #32 |
(175) 0x42d8bc ORR V16.16B, V22.16B, V22.16B |
(175) 0x42d8c0 BSL V19.16B, V20.16B, V28.16B |
(175) 0x42d8c4 BIF V28.16B, V20.16B, V25.16B |
(175) 0x42d8c8 BIF V23.16B, V21.16B, V25.16B |
(175) 0x42d8cc BSL V16.16B, V0.16B, V31.16B |
(175) 0x42d8d0 FMOV X13, D19 |
(175) 0x42d8d4 MOV D7, V19.D[1] |
(175) 0x42d8d8 FMOV X15, D28 |
(175) 0x42d8dc MOV D28, V28.D[1] |
(175) 0x42d8e0 FMOV X3, D23 |
(175) 0x42d8e4 BIF V31.16B, V0.16B, V22.16B |
(175) 0x42d8e8 BSL V25.16B, V20.16B, V21.16B |
(175) 0x42d8ec FMOV X27, D16 |
(175) 0x42d8f0 FMOV X28, D7 |
(175) 0x42d8f4 MOV D17, V23.D[1] |
(175) 0x42d8f8 MOV D20, V16.D[1] |
(175) 0x42d8fc FMOV X10, D28 |
(175) 0x42d900 BIF V1.16B, V30.16B, V22.16B |
(175) 0x42d904 MOV D24, V31.D[1] |
(175) 0x42d908 FMOV X1, D31 |
(175) 0x42d90c UBFM X9, X13, #61, #60 |
(175) 0x42d910 BIT V30.16B, V0.16B, V22.16B |
(175) 0x42d914 UBFM X26, X15, #61, #60 |
(175) 0x42d918 FMOV X22, D20 |
(175) 0x42d91c FABS V0.2D, V4.2D |
(175) 0x42d920 LDR D26, [X7, X9] |
(175) 0x42d924 UBFM X13, X3, #61, #60 |
(175) 0x42d928 MOV D22, V1.D[1] |
(175) 0x42d92c UBFM X8, X27, #61, #60 |
(175) 0x42d930 FMOV X18, D24 |
(175) 0x42d934 LDR D21, [X9, X0] |
(175) 0x42d938 UBFM X14, X28, #61, #60 |
(175) 0x42d93c ADD X3, X7, X14 |
(175) 0x42d940 UBFM X4, X10, #61, #60 |
(175) 0x42d944 LDR D23, [X0, X26] |
(175) 0x42d948 UBFM X1, X1, #61, #60 |
(175) 0x42d94c MOV X28, V25.D[1] |
(175) 0x42d950 FMOV D31, D26 |
(175) 0x42d954 FMOV X6, D17 |
(175) 0x42d958 FMOV X15, D1 |
(175) 0x42d95c LDR D19, [X7, X8] |
(175) 0x42d960 UBFM X10, X22, #61, #60 |
(175) 0x42d964 UBFM X22, X18, #61, #60 |
(175) 0x42d968 MOV X27, V30.D[1] |
(175) 0x42d96c LD1 {V31.D[1]}, [X3] |
(175) 0x42d970 ADD X3, X14, X0 |
(175) 0x42d974 LD1 {V21.D[1]}, [X3] |
(175) 0x42d978 ADD X3, X0, X4 |
(175) 0x42d97c UBFM X18, X6, #61, #60 |
(175) 0x42d980 LD1 {V23.D[1]}, [X3] |
(175) 0x42d984 UBFM X6, X15, #61, #60 |
(175) 0x42d988 FMOV X3, D25 |
(175) 0x42d98c LDR D16, [X16, X28,LSL #3] |
(175) 0x42d990 FMOV X15, D22 |
(175) 0x42d994 LDR D22, [X8, X0] |
(175) 0x42d998 FMOV X28, D30 |
(175) 0x42d99c FDIV V18.2D, V18.2D, V31.2D |
(175) 0x42d9a0 LDR D25, [X0, X1] |
(175) 0x42d9a4 LDR D7, [X16, X3,LSL #3] |
(175) 0x42d9a8 ADD X3, X7, X10 |
(175) 0x42d9ac FSUB V28.2D, V23.2D, V21.2D |
(175) 0x42d9b0 LD1 {V19.D[1]}, [X3] |
(175) 0x42d9b4 ADD X3, X10, X0 |
(175) 0x42d9b8 UBFM X15, X15, #61, #60 |
(175) 0x42d9bc FADD V1.2D, V18.2D, V29.2D |
(175) 0x42d9c0 LDR D30, [X0, X13] |
(175) 0x42d9c4 FSUB V26.2D, V15.2D, V18.2D |
(175) 0x42d9c8 FSUB V18.2D, V29.2D, V18.2D |
(175) 0x42d9cc LD1 {V22.D[1]}, [X3] |
(175) 0x42d9d0 ADD X3, X0, X22 |
(175) 0x42d9d4 MOV V7.D[1], V16.D[0] |
(175) 0x42d9d8 LDR D20, [X16, X28,LSL #3] |
(175) 0x42d9dc ADD X28, X0, X15 |
(175) 0x42d9e0 LDR D31, [X0, X6] |
(175) 0x42d9e4 FDIV V23.2D, V27.2D, V7.2D |
(175) 0x42d9e8 FDIV V24.2D, V0.2D, V19.2D |
(175) 0x42d9ec FABS V0.2D, V28.2D |
(175) 0x42d9f0 LD1 {V25.D[1]}, [X3] |
(175) 0x42d9f4 ADD X3, X14, X0 |
(175) 0x42d9f8 LDR D19, [X16, X27,LSL #3] |
(175) 0x42d9fc ADD X27, X0, X18 |
(175) 0x42da00 FMOV D27, D20 |
(175) 0x42da04 FMUL V23.2D, V23.2D, V1.2D |
(175) 0x42da08 LD1 {V30.D[1]}, [X27] |
(175) 0x42da0c ADD X27, X10, X0 |
(175) 0x42da10 FADD V20.2D, V24.2D, V29.2D |
(175) 0x42da14 LD1 {V31.D[1]}, [X28] |
(175) 0x42da18 LDR Q17, [X21, X12] |
(175) 0x42da1c MOV V27.D[1], V19.D[0] |
(175) 0x42da20 FSUB V25.2D, V25.2D, V22.2D |
(175) 0x42da24 FMUL V19.2D, V26.2D, V0.2D |
(175) 0x42da28 FSUB V30.2D, V21.2D, V30.2D |
(175) 0x42da2c FDIV V1.2D, V17.2D, V27.2D |
(175) 0x42da30 FSUB V27.2D, V15.2D, V24.2D |
(175) 0x42da34 FSUB V31.2D, V22.2D, V31.2D |
(175) 0x42da38 FABS V16.2D, V25.2D |
(175) 0x42da3c FMUL V17.2D, V1.2D, V20.2D |
(175) 0x42da40 FABS V7.2D, V30.2D |
(175) 0x42da44 FMUL V30.2D, V30.2D, V28.2D |
(175) 0x42da48 FMUL V20.2D, V27.2D, V16.2D |
(175) 0x42da4c FABS V1.2D, V31.2D |
(175) 0x42da50 FMLA V19.2D, V23.2D, V7.2D |
(175) 0x42da54 FMLA V20.2D, V17.2D, V1.2D |
(175) 0x42da58 FCMLE V28.2D, V28.2D, #0.0000000 |
(175) 0x42da5c FMUL V31.2D, V31.2D, V25.2D |
(175) 0x42da60 FCMLE V25.2D, V25.2D, #0.0000000 |
(175) 0x42da64 FMINNM V0.2D, V7.2D, V0.2D |
(175) 0x42da68 FMINNM V16.2D, V1.2D, V16.2D |
(175) 0x42da6c FMUL V19.2D, V19.2D, V6.2D |
(175) 0x42da70 FCMGT V7.2D, V30.2D, #0.0000000 |
(175) 0x42da74 BSL V28.16B, V5.16B, V29.16B |
(175) 0x42da78 FMUL V30.2D, V20.2D, V6.2D |
(175) 0x42da7c FSUB V24.2D, V29.2D, V24.2D |
(175) 0x42da80 BSL V25.16B, V5.16B, V29.16B |
(175) 0x42da84 FMUL V18.2D, V18.2D, V28.2D |
(175) 0x42da88 FMINNM V28.2D, V19.2D, V0.2D |
(175) 0x42da8c FMINNM V20.2D, V30.2D, V16.2D |
(175) 0x42da90 ORR V0.16B, V21.16B, V21.16B |
(175) 0x42da94 ORR V16.16B, V22.16B, V22.16B |
(175) 0x42da98 FMUL V24.2D, V24.2D, V25.2D |
(175) 0x42da9c FCMGT V1.2D, V31.2D, #0.0000000 |
(175) 0x42daa0 FMLA V0.2D, V18.2D, V28.2D |
(175) 0x42daa4 FMLA V16.2D, V24.2D, V20.2D |
(175) 0x42daa8 BSL V7.16B, V0.16B, V21.16B |
(175) 0x42daac BSL V1.16B, V16.16B, V22.16B |
(175) 0x42dab0 FMUL V30.2D, V3.2D, V7.2D |
(175) 0x42dab4 FMUL V31.2D, V4.2D, V1.2D |
(175) 0x42dab8 STR Q30, [X19, X11] |
(175) 0x42dabc FABS V3.2D, V30.2D |
(175) 0x42dac0 STR Q31, [X19, X12] |
(175) 0x42dac4 LDR D4, [X9, X0] |
(175) 0x42dac8 LDR D7, [X8, X0] |
(175) 0x42dacc LDR D25, [X7, X9] |
(175) 0x42dad0 LDR D22, [X7, X14] |
(175) 0x42dad4 LD1 {V4.D[1]}, [X3] |
(175) 0x42dad8 LDR D18, [X7, X8] |
(175) 0x42dadc LD1 {V7.D[1]}, [X27] |
(175) 0x42dae0 MOV V25.D[1], V22.D[0] |
(175) 0x42dae4 LDR D21, [X14, X2] |
(175) 0x42dae8 LDR D20, [X9, X2] |
(175) 0x42daec ADD X9, X10, X2 |
(175) 0x42daf0 FMUL V19.2D, V4.2D, V25.2D |
(175) 0x42daf4 LDR D1, [X4, X2] |
(175) 0x42daf8 LDR D0, [X26, X2] |
(175) 0x42dafc MOV V20.D[1], V21.D[0] |
(175) 0x42db00 LDR D21, [X8, X2] |
(175) 0x42db04 FDIV V16.2D, V3.2D, V19.2D |
(175) 0x42db08 LDR D28, [X22, X2] |
(175) 0x42db0c LDR D24, [X1, X2] |
(175) 0x42db10 MOV V0.D[1], V1.D[0] |
(175) 0x42db14 FSUB V19.2D, V29.2D, V16.2D |
(175) 0x42db18 LDR D3, [X18, X2] |
(175) 0x42db1c LDR D25, [X13, X2] |
(175) 0x42db20 FSUB V22.2D, V0.2D, V20.2D |
(175) 0x42db24 LD1 {V21.D[1]}, [X9] |
(175) 0x42db28 MOV V24.D[1], V28.D[0] |
(175) 0x42db2c LDR D1, [X15, X2] |
(175) 0x42db30 LDR D0, [X6, X2] |
(175) 0x42db34 MOV V25.D[1], V3.D[0] |
(175) 0x42db38 FABS V4.2D, V22.2D |
(175) 0x42db3c FSUB V25.2D, V20.2D, V25.2D |
(175) 0x42db40 FSUB V24.2D, V24.2D, V21.2D |
(175) 0x42db44 MOV V0.D[1], V1.D[0] |
(175) 0x42db48 LDR D1, [X7, X10] |
(175) 0x42db4c FMUL V26.2D, V26.2D, V4.2D |
(175) 0x42db50 FSUB V28.2D, V21.2D, V0.2D |
(175) 0x42db54 FABS V3.2D, V24.2D |
(175) 0x42db58 FABS V16.2D, V25.2D |
(175) 0x42db5c MOV V18.D[1], V1.D[0] |
(175) 0x42db60 FMUL V0.2D, V25.2D, V22.2D |
(175) 0x42db64 FCMLE V22.2D, V22.2D, #0.0000000 |
(175) 0x42db68 FMUL V27.2D, V27.2D, V3.2D |
(175) 0x42db6c FMLA V26.2D, V23.2D, V16.2D |
(175) 0x42db70 FABS V23.2D, V28.2D |
(175) 0x42db74 FMINNM V4.2D, V16.2D, V4.2D |
(175) 0x42db78 FMUL V28.2D, V28.2D, V24.2D |
(175) 0x42db7c FMLA V27.2D, V17.2D, V23.2D |
(175) 0x42db80 FMINNM V17.2D, V23.2D, V3.2D |
(175) 0x42db84 FMUL V26.2D, V26.2D, V6.2D |
(175) 0x42db88 FMUL V7.2D, V7.2D, V18.2D |
(175) 0x42db8c FCMLE V18.2D, V24.2D, #0.0000000 |
(175) 0x42db90 FABS V3.2D, V31.2D |
(175) 0x42db94 BSL V22.16B, V5.16B, V29.16B |
(175) 0x42db98 FMUL V24.2D, V27.2D, V6.2D |
(175) 0x42db9c FMINNM V16.2D, V26.2D, V4.2D |
(175) 0x42dba0 FDIV V1.2D, V3.2D, V7.2D |
(175) 0x42dba4 BSL V18.16B, V5.16B, V29.16B |
(175) 0x42dba8 FCMGT V25.2D, V0.2D, #0.0000000 |
(175) 0x42dbac FMINNM V27.2D, V24.2D, V17.2D |
(175) 0x42dbb0 FSUB V23.2D, V29.2D, V1.2D |
(175) 0x42dbb4 FMUL V4.2D, V16.2D, V22.2D |
(175) 0x42dbb8 ORR V22.16B, V20.16B, V20.16B |
(175) 0x42dbbc FCMGT V0.2D, V28.2D, #0.0000000 |
(175) 0x42dbc0 FMUL V28.2D, V27.2D, V18.2D |
(175) 0x42dbc4 FMLA V22.2D, V4.2D, V19.2D |
(175) 0x42dbc8 ORR V19.16B, V21.16B, V21.16B |
(175) 0x42dbcc FMLA V19.2D, V23.2D, V28.2D |
(175) 0x42dbd0 BSL V25.16B, V22.16B, V20.16B |
(175) 0x42dbd4 FMUL V30.2D, V30.2D, V25.2D |
(175) 0x42dbd8 BSL V0.16B, V19.16B, V21.16B |
(175) 0x42dbdc FMUL V31.2D, V31.2D, V0.2D |
(175) 0x42dbe0 STR Q30, [X17, X11] |
(175) 0x42dbe4 ADD X11, X11, #32 |
(175) 0x42dbe8 STR Q31, [X17, X12] |
(175) 0x42dbec ADD X12, X12, #32 |
(175) 0x42dbf0 CMP X11, X24 |
(175) 0x42dbf4 B.NE 42d868 |
(174) 0x42dbf8 LDR W1, [SP, #200] |
(174) 0x42dbfc LDP X15, X22, [SP, #208] |
(174) 0x42dc00 LDP X10, X27, [SP, #224] |
(174) 0x42dc04 AND W8, W1, #0xfffffffc |
(174) 0x42dc08 LDP X14, X13, [SP, #240] |
(174) 0x42dc0c LDP X18, X28, [SP, #256] |
(174) 0x42dc10 LDP X4, X6, [SP, #272] |
(174) 0x42dc14 LDP X9, X3, [SP, #288] |
(174) 0x42dc18 LDR W26, [SP, #192] |
(174) 0x42dc1c ADD W7, W8, W26 |
(174) 0x42dc20 ANDS XZR, X1, #0x3 |
(174) 0x42dc24 B.EQ 42dfcc |
(174) 0x42dc28 SUB W1, W1, W8 |
(174) 0x42dc2c CMP W1, #1 |
(174) 0x42dc30 B.EQ 42de78 |
(174) 0x42dc34 FMOV S2, W7 |
(174) 0x42dc38 SBFM X2, X26, #0, #31 |
(174) 0x42dc3c ADD W24, W7, #1 |
(174) 0x42dc40 ADD X20, X18, X2 |
(174) 0x42dc44 MVNI V20.2S, #0 |
(174) 0x42dc48 LDR D23, [SP, #328] |
(174) 0x42dc4c ADD X0, X20, X8 |
(174) 0x42dc50 MOVI V26.2S, #1 |
(174) 0x42dc54 MVNI V24.2S, #1 |
(174) 0x42dc58 UBFM X26, X0, #61, #60 |
(174) 0x42dc5c ADD X17, X2, X8 |
(174) 0x42dc60 ADD X21, X9, X30,LSL #3 |
(174) 0x42dc64 UBFM X11, X17, #61, #60 |
(174) 0x42dc68 LDR X17, [SP, #176] |
(174) 0x42dc6c ADD X12, X4, X6,LSL #3 |
(174) 0x42dc70 ADD X19, X5, X3,LSL #3 |
(174) 0x42dc74 LDR Q17, [X22, X26] |
(174) 0x42dc78 LDR Q18, [X16, X11] |
(174) 0x42dc7c MOV V2.S[1], W24 |
(174) 0x42dc80 FCMGT V16.2D, V17.2D, #0.0000000 |
(174) 0x42dc84 FABS V19.2D, V17.2D |
(174) 0x42dc88 ADD V21.2S, V2.2S, V20.2S |
(174) 0x42dc8c SSHLL V25.2D, V2.2S, #32 |
(174) 0x42dc90 ORR V1.16B, V16.16B, V16.16B |
(174) 0x42dc94 ADD V7.2S, V2.2S, V26.2S |
(174) 0x42dc98 ADD V3.2S, V2.2S, V24.2S |
(174) 0x42dc9c SSHLL V0.2D, V21.2S, #32 |
(174) 0x42dca0 SMIN V22.2S, V7.2S, V23.2S |
(174) 0x42dca4 SSHLL V27.2D, V3.2S, #32 |
(174) 0x42dca8 BSL V1.16B, V0.16B, V25.16B |
(174) 0x42dcac BIF V25.16B, V0.16B, V16.16B |
(174) 0x42dcb0 SSHLL V30.2D, V22.2S, #32 |
(174) 0x42dcb4 FMOV X20, D1 |
(174) 0x42dcb8 MOV D4, V1.D[1] |
(174) 0x42dcbc MOV D28, V25.D[1] |
(174) 0x42dcc0 FMOV X0, D25 |
(174) 0x42dcc4 BIF V27.16B, V30.16B, V16.16B |
(174) 0x42dcc8 BSL V16.16B, V0.16B, V30.16B |
(174) 0x42dccc FMOV X24, D28 |
(174) 0x42dcd0 MOV D20, V27.D[1] |
(174) 0x42dcd4 UBFM X11, X20, #61, #60 |
(174) 0x42dcd8 ADD X20, X17, X2 |
(174) 0x42dcdc FMOV X17, D4 |
(174) 0x42dce0 ADD X2, X23, X2 |
(174) 0x42dce4 LDR D31, [X21, X11] |
(174) 0x42dce8 LDR D2, [X11, X12] |
(174) 0x42dcec UBFM X26, X24, #61, #60 |
(174) 0x42dcf0 ADD X24, X20, X8 |
(174) 0x42dcf4 ADD X8, X2, X8 |
(174) 0x42dcf8 UBFM X20, X24, #61, #60 |
(174) 0x42dcfc UBFM X2, X8, #61, #60 |
(174) 0x42dd00 UBFM X8, X0, #61, #60 |
(174) 0x42dd04 UBFM X17, X17, #61, #60 |
(174) 0x42dd08 FMOV X0, D27 |
(174) 0x42dd0c STR X2, [SP, #192] |
(174) 0x42dd10 ADD X2, SP, #200 |
(174) 0x42dd14 LDR D21, [X12, X8] |
(174) 0x42dd18 ST1 {V16.D[1]}, [X2] |
(174) 0x42dd1c ADD X2, X21, X17 |
(174) 0x42dd20 FMOV X24, D20 |
(174) 0x42dd24 LD1 {V31.D[1]}, [X2] |
(174) 0x42dd28 UBFM X0, X0, #61, #60 |
(174) 0x42dd2c FMOV X2, D16 |
(174) 0x42dd30 LDR D26, [X12, X0] |
(174) 0x42dd34 UBFM X24, X24, #61, #60 |
(174) 0x42dd38 LDR D7, [X16, X2,LSL #3] |
(174) 0x42dd3c ADD X2, X17, X12 |
(174) 0x42dd40 FDIV V25.2D, V19.2D, V31.2D |
(174) 0x42dd44 LD1 {V2.D[1]}, [X2] |
(174) 0x42dd48 ADD X2, X12, X26 |
(174) 0x42dd4c LD1 {V21.D[1]}, [X2] |
(174) 0x42dd50 FADD V16.2D, V25.2D, V29.2D |
(174) 0x42dd54 LDR X2, [SP, #200] |
(174) 0x42dd58 FSUB V0.2D, V15.2D, V25.2D |
(174) 0x42dd5c FSUB V27.2D, V29.2D, V25.2D |
(174) 0x42dd60 LDR D24, [X16, X2,LSL #3] |
(174) 0x42dd64 ADD X2, X12, X24 |
(174) 0x42dd68 LD1 {V26.D[1]}, [X2] |
(174) 0x42dd6c FSUB V3.2D, V21.2D, V2.2D |
(174) 0x42dd70 LDR X2, [SP, #168] |
(174) 0x42dd74 MOV V7.D[1], V24.D[0] |
(174) 0x42dd78 FABS V1.2D, V3.2D |
(174) 0x42dd7c FDIV V18.2D, V18.2D, V7.2D |
(174) 0x42dd80 FSUB V28.2D, V2.2D, V26.2D |
(174) 0x42dd84 FCMLE V4.2D, V3.2D, #0.0000000 |
(174) 0x42dd88 FMUL V23.2D, V0.2D, V1.2D |
(174) 0x42dd8c FMUL V7.2D, V18.2D, V16.2D |
(174) 0x42dd90 FABS V22.2D, V28.2D |
(174) 0x42dd94 ORR V30.16B, V4.16B, V4.16B |
(174) 0x42dd98 FMUL V19.2D, V28.2D, V3.2D |
(174) 0x42dd9c ORR V3.16B, V2.16B, V2.16B |
(174) 0x42dda0 FMLA V23.2D, V7.2D, V22.2D |
(174) 0x42dda4 FMINNM V20.2D, V22.2D, V1.2D |
(174) 0x42dda8 BSL V30.16B, V5.16B, V29.16B |
(174) 0x42ddac FCMGT V26.2D, V19.2D, #0.0000000 |
(174) 0x42ddb0 FMUL V21.2D, V27.2D, V30.2D |
(174) 0x42ddb4 FMUL V31.2D, V23.2D, V6.2D |
(174) 0x42ddb8 FMINNM V25.2D, V31.2D, V20.2D |
(174) 0x42ddbc FMLA V3.2D, V21.2D, V25.2D |
(174) 0x42ddc0 BSL V26.16B, V3.16B, V2.16B |
(174) 0x42ddc4 FMUL V17.2D, V17.2D, V26.2D |
(174) 0x42ddc8 STR Q17, [X2, X20] |
(174) 0x42ddcc FABS V2.2D, V17.2D |
(174) 0x42ddd0 LDR D16, [X11, X19] |
(174) 0x42ddd4 LDR D1, [X26, X19] |
(174) 0x42ddd8 ADD X26, X17, X19 |
(174) 0x42dddc LDR D23, [X8, X19] |
(174) 0x42dde0 LD1 {V16.D[1]}, [X26] |
(174) 0x42dde4 LDR D24, [X24, X19] |
(174) 0x42dde8 LDR D4, [X0, X19] |
(174) 0x42ddec MOV V23.D[1], V1.D[0] |
(174) 0x42ddf0 LDR D28, [X17, X12] |
(174) 0x42ddf4 LDR D27, [X11, X12] |
(174) 0x42ddf8 FSUB V18.2D, V23.2D, V16.2D |
(174) 0x42ddfc MOV V4.D[1], V24.D[0] |
(174) 0x42de00 LDR D20, [X21, X11] |
(174) 0x42de04 LDR D22, [X21, X17] |
(174) 0x42de08 LDP X21, X12, [SP, #184] |
(174) 0x42de0c FSUB V30.2D, V16.2D, V4.2D |
(174) 0x42de10 FABS V19.2D, V18.2D |
(174) 0x42de14 MOV V27.D[1], V28.D[0] |
(174) 0x42de18 FCMLE V21.2D, V18.2D, #0.0000000 |
(174) 0x42de1c ORR V28.16B, V16.16B, V16.16B |
(174) 0x42de20 MOV V20.D[1], V22.D[0] |
(174) 0x42de24 FMUL V0.2D, V0.2D, V19.2D |
(174) 0x42de28 FABS V31.2D, V30.2D |
(174) 0x42de2c FMUL V3.2D, V30.2D, V18.2D |
(174) 0x42de30 FMUL V26.2D, V27.2D, V20.2D |
(174) 0x42de34 ORR V25.16B, V21.16B, V21.16B |
(174) 0x42de38 FMLA V0.2D, V7.2D, V31.2D |
(174) 0x42de3c FDIV V7.2D, V2.2D, V26.2D |
(174) 0x42de40 FMINNM V2.2D, V31.2D, V19.2D |
(174) 0x42de44 BSL V25.16B, V5.16B, V29.16B |
(174) 0x42de48 FCMGT V1.2D, V3.2D, #0.0000000 |
(174) 0x42de4c FSUB V4.2D, V29.2D, V7.2D |
(174) 0x42de50 FMUL V24.2D, V0.2D, V6.2D |
(174) 0x42de54 FMINNM V23.2D, V24.2D, V2.2D |
(174) 0x42de58 FMUL V27.2D, V23.2D, V25.2D |
(174) 0x42de5c FMLA V28.2D, V27.2D, V4.2D |
(174) 0x42de60 BSL V1.16B, V28.16B, V16.16B |
(174) 0x42de64 FMUL V17.2D, V17.2D, V1.2D |
(174) 0x42de68 STR Q17, [X21, X12] |
(174) 0x42de6c TBZ W1, #0, 42dfcc |
(174) 0x42de70 AND W1, W1, #0xfffffffe |
(174) 0x42de74 ADD W7, W7, W1 |
(174) 0x42de78 SBFM X19, X7, #0, #31 |
(174) 0x42de7c SUB W11, W7, #1 |
(174) 0x42de80 ADD X18, X18, X19 |
(174) 0x42de84 LDR D16, [X22, X18,LSL #3] |
(174) 0x42de88 FCMPE D16, #0 |
(174) 0x42de8c B.LS 42e008 |
(174) 0x42de90 SBFM X8, X11, #0, #31 |
(174) 0x42de94 SUB W2, W7, #2 |
(174) 0x42de98 SBFM X26, X2, #0, #31 |
(174) 0x42de9c ORR X24, XZR, X8 |
(174) 0x42dea0 ORR X20, XZR, X19 |
(174) 0x42dea4 ADD X30, X30, X8 |
(174) 0x42dea8 FABS D18, D16 |
(174) 0x42deac ADD X21, X6, X8 |
(174) 0x42deb0 UBFM X1, X30, #61, #60 |
(174) 0x42deb4 LDR D20, [X16, X24,LSL #3] |
(174) 0x42deb8 ADD X12, X6, X20 |
(174) 0x42debc UBFM X11, X21, #61, #60 |
(174) 0x42dec0 ADD X6, X6, X26 |
(174) 0x42dec4 FMOV D1, #2.0000000 |
(174) 0x42dec8 LDR D22, [X9, X1] |
(174) 0x42decc LDR D19, [X16, X19,LSL #3] |
(174) 0x42ded0 LDR D0, [X4, X11] |
(174) 0x42ded4 FDIV D30, D18, D22 |
(174) 0x42ded8 LDR D21, [X4, X12,LSL #3] |
(174) 0x42dedc LDR D3, [X4, X6,LSL #3] |
(174) 0x42dee0 FDIV D26, D19, D20 |
(174) 0x42dee4 FSUB D31, D21, S0 |
(174) 0x42dee8 FSUB D7, D0, S3 |
(174) 0x42deec FCMPE D31, #0 |
(174) 0x42def0 FMUL D2, D7, D31 |
(174) 0x42def4 FADD D25, D30, D9 |
(174) 0x42def8 FSUB D4, D1, S30 |
(174) 0x42defc FCSEL D24, D8, D9, #9 |
(174) 0x42df00 FCMPE D2, #0 |
(174) 0x42df04 FMUL D17, D26, D25 |
(174) 0x42df08 B.LS 42df38 |
(174) 0x42df0c FABS D28, D31 |
(174) 0x42df10 FABS D27, D7 |
(174) 0x42df14 LDR D19, [SP, #336] |
(174) 0x42df18 FSUB D18, D9, S30 |
(174) 0x42df1c FMINNM D23, D27, D28 |
(174) 0x42df20 FMUL D20, D28, D4 |
(174) 0x42df24 FMADD D22, D27, D17, D20 |
(174) 0x42df28 FMUL D30, D22, D19 |
(174) 0x42df2c FMINNM D21, D30, D23 |
(174) 0x42df30 FMUL D3, D21, D18 |
(174) 0x42df34 FMADD D0, D24, D3, D0 |
(174) 0x42df38 ADD X18, X3, X20 |
(174) 0x42df3c ADD X16, X3, X8 |
(174) 0x42df40 FMUL D16, D16, D0 |
(174) 0x42df44 ADD X7, X3, X26 |
(174) 0x42df48 LDP X3, X22, [SP, #168] |
(174) 0x42df4c ADD X20, X22, X19 |
(174) 0x42df50 STR D16, [X3, X20,LSL #3] |
(174) 0x42df54 LDR D0, [X5, X16,LSL #3] |
(174) 0x42df58 LDR D26, [X5, X18,LSL #3] |
(174) 0x42df5c LDR D7, [X5, X7,LSL #3] |
(174) 0x42df60 FSUB D31, D26, S0 |
(174) 0x42df64 FSUB D2, D0, S7 |
(174) 0x42df68 FCMPE D31, #0 |
(174) 0x42df6c FMUL D25, D2, D31 |
(174) 0x42df70 FCSEL D1, D8, D9, #9 |
(174) 0x42df74 FCMPE D25, #0 |
(174) 0x42df78 B.LS 42dfbc |
(174) 0x42df7c LDR D24, [X4, X11] |
(174) 0x42df80 FABS D18, D16 |
(174) 0x42df84 FABS D23, D31 |
(174) 0x42df88 FABS D27, D2 |
(174) 0x42df8c FMINNM D22, D27, D23 |
(174) 0x42df90 LDR D20, [X9, X1] |
(174) 0x42df94 FMUL D4, D23, D4 |
(174) 0x42df98 LDR D21, [SP, #336] |
(174) 0x42df9c FMUL D19, D24, D20 |
(174) 0x42dfa0 FMADD D17, D27, D17, D4 |
(174) 0x42dfa4 FDIV D30, D18, D19 |
(174) 0x42dfa8 FMUL D3, D17, D21 |
(174) 0x42dfac FMINNM D26, D3, D22 |
(174) 0x42dfb0 FSUB D7, D9, S30 |
(174) 0x42dfb4 FMUL D31, D7, D26 |
(174) 0x42dfb8 FMADD D0, D1, D31, D0 |
(174) 0x42dfbc FMUL D16, D0, D16 |
(174) 0x42dfc0 ADD X5, X23, X19 |
(174) 0x42dfc4 LDR X23, [SP, #184] |
(174) 0x42dfc8 STR D16, [X23, X5,LSL #3] |
(174) 0x42dfcc LDR W4, [SP, #304] |
(174) 0x42dfd0 ADD X25, X25, #1 |
(174) 0x42dfd4 LDR W9, [SP, #344] |
(174) 0x42dfd8 CMP W9, W25 |
(174) 0x42dfdc B.LE 42e02c |
(174) 0x42dfe0 LDR W8, [SP, #316] |
(174) 0x42dfe4 LDR W26, [SP, #308] |
(174) 0x42dfe8 LDR W19, [SP, #312] |
(174) 0x42dfec SUB W7, W8, W4 |
(174) 0x42dff0 B 42d780 |
0x42dff4 LDP X19, X20, [SP, #16] |
0x42dff8 LDP X21, X22, [SP, #32] |
0x42dffc LDP X23, X24, [SP, #48] |
0x42e000 LDP X29, X30, [SP], #352 |
0x42e004 RET |
(174) 0x42e008 LDR W0, [SP, #348] |
(174) 0x42e00c ADD W22, W7, #1 |
(174) 0x42e010 SBFM X20, X11, #0, #31 |
(174) 0x42e014 ORR X8, XZR, X19 |
(174) 0x42e018 CMP W22, W0 |
(174) 0x42e01c CSEL W17, W22, W0, #13 |
(174) 0x42e020 SBFM X24, X17, #0, #31 |
(174) 0x42e024 ORR X26, XZR, X24 |
(174) 0x42e028 B 42dea4 |
0x42e02c LDP D8, D9, [SP, #96] |
0x42e030 LDP X19, X20, [SP, #16] |
0x42e034 LDP X25, X26, [SP, #64] |
0x42e038 LDP X27, X28, [SP, #80] |
0x42e03c LDP X21, X22, [SP, #32] |
0x42e040 LDP X23, X24, [SP, #48] |
0x42e044 LDP D10, D11, [SP, #112] |
0x42e048 LDP D12, D13, [SP, #128] |
0x42e04c LDP D14, D15, [SP, #144] |
0x42e050 LDP X29, X30, [SP], #352 |
0x42e054 RET |
(174) 0x42e058 MOVZ X8, #0 |
(174) 0x42e05c ORR W7, WZR, W26 |
(174) 0x42e060 B 42dc28 |
0x42e064 ADD W7, W7, #1 |
0x42e068 MOVZ W5, #0 |
0x42e06c B 42d6cc |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.43+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_cell.cpp:65-110 |
| Module | exec |
| nb instructions | 96 |
| nb uops | 96 |
| loop length | 384 |
| used w registers | 22 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 2 |
| used d registers | 10 |
| used q registers | 0 |
| used v registers | 12 |
| used z registers | 0 |
| nb stack references | 35 |
| micro-operation queue | 12.00 cycles |
| front end | 12.00 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 8.25 | 8.25 | 8.25 | 8.25 | 4.75 | 4.75 | 4.75 | 4.75 | 14.33 | 14.33 | 14.33 | 6.50 | 6.50 |
| cycles | 4.50 | 4.50 | 8.25 | 8.25 | 8.25 | 8.25 | 4.75 | 4.75 | 4.75 | 4.75 | 14.33 | 14.33 | 14.33 | 6.50 | 6.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 12.00 |
| Dispatch | 14.33 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 14.33-25.00 |
| all | 16% |
| load | 18% |
| store | 27% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 16% |
| all | 42% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 42% |
| all | 18% |
| load | 18% |
| store | 27% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 25% |
| all | 28% |
| load | 38% |
| store | 36% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 23% |
| all | 33% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 33% |
| all | 29% |
| load | 38% |
| store | 36% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 27% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #672]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W22, [X21, #60] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W23, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W14, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W2, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W2, [SP, #308] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W23, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42dff8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x9ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W19, W22, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W14, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W14, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W2, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42dff4 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x9a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W3, W19, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W24, W3 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W3, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| LDR W6, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR W4, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W5, W7, W20, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42e064 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0xa18> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W7, W4, W5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W7, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #316] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W4, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42dff4 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x9a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W12, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W11, W22, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X9, XZR, #0x55 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVK X9, #16325 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| FMOV V29.2D, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| FMOV V5.2D, #-1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| FMOV S30, W11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| UDIV W16, W4, W12 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| FMOV D31, X9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR W26, [SP, #308] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP D8, D9, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| FMOV D9, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV D8, #-1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP D10, D11, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| MOVI V11.4S, #4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| STP D12, D13, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| MVNI V12.4S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| MVNI V13.4S, #1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| DUP V16.2S, V30.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| STP D14, D15, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| DUP V10.4S, V30.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| LDR X13, [X21, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| FMOV V15.2D, #2.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| MOVI V14.4S, #1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| MSUB W18, W16, W12, W4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDP X17, X27, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| DUP V6.2D, V31.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| ADD W25, W16, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X25, X25, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDP X15, X10, [X21, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD W26, W18, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W19, W19, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X17, [SP, #320] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X28, [X21, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP D16, D31, [SP, #328] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| LDR X14, [X21, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR W6, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W11, [SP, #348] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #352 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP D8, D9, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP D10, D11, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP D12, D13, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP D14, D15, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X29, X30, [SP], #352 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W5, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42d6cc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x80> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run gcc_4
| Source file and lines | advec_cell.cpp:65-110 |
| Module | exec |
| nb instructions | 96 |
| nb uops | 96 |
| loop length | 384 |
| used w registers | 22 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 2 |
| used d registers | 10 |
| used q registers | 0 |
| used v registers | 12 |
| used z registers | 0 |
| nb stack references | 35 |
| micro-operation queue | 12.00 cycles |
| front end | 12.00 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 8.25 | 8.25 | 8.25 | 8.25 | 4.75 | 4.75 | 4.75 | 4.75 | 14.33 | 14.33 | 14.33 | 6.50 | 6.50 |
| cycles | 4.50 | 4.50 | 8.25 | 8.25 | 8.25 | 8.25 | 4.75 | 4.75 | 4.75 | 4.75 | 14.33 | 14.33 | 14.33 | 6.50 | 6.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 12.00 |
| Dispatch | 14.33 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 14.33-25.00 |
| all | 16% |
| load | 18% |
| store | 27% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 16% |
| all | 42% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 42% |
| all | 18% |
| load | 18% |
| store | 27% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 25% |
| all | 28% |
| load | 38% |
| store | 36% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| other | 23% |
| all | 33% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 33% |
| all | 29% |
| load | 38% |
| store | 36% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 27% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #672]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X21, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W22, [X21, #60] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W23, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W14, W1, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W2, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W2, [SP, #308] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W23, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42dff8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x9ac> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W19, W22, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W14, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W14, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W2, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42dff4 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x9a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W3, W19, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W24, W24, W3 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W3, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W24, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| LDR W6, [SP, #168] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ORR W4, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W5, W7, W20, W24 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42e064 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0xa18> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W4, W7, W4, W5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W7, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #316] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W4, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42dff4 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x9a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W12, [SP, #312] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W11, W22, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR X9, XZR, #0x55 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVK X9, #16325 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| FMOV V29.2D, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| FMOV V5.2D, #-1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| FMOV S30, W11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| UDIV W16, W4, W12 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| FMOV D31, X9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| LDR W26, [SP, #308] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP D8, D9, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| FMOV D9, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| FMOV D8, #-1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| STP D10, D11, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| MOVI V11.4S, #4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| STP D12, D13, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| MVNI V12.4S, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| MVNI V13.4S, #1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| DUP V16.2S, V30.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| STP D14, D15, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| DUP V10.4S, V30.S[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| LDR X13, [X21, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| FMOV V15.2D, #2.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| MOVI V14.4S, #1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (12.5%) |
| MSUB W18, W16, W12, W4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| LDP X17, X27, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| DUP V6.2D, V31.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| ADD W25, W16, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X25, X25, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| LDP X15, X10, [X21, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| ADD W26, W18, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W19, W19, W26 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X17, [SP, #320] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X28, [X21, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP D16, D31, [SP, #328] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| LDR X14, [X21, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR W6, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| STR W11, [SP, #348] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #352 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP D8, D9, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP D10, D11, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP D12, D13, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP D14, D15, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDP X29, X30, [SP], #352 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W5, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42d6cc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2+0x80> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 3.00 | 4.04 |
| ▼Loop 174 - advec_cell.cpp:67-110 - exec– | 0.01 | 0.02 |
| ○Loop 175 - advec_cell.cpp:71-110 - exec | 2.99 | 3.91 |
