Function: .omp_outlined..12#0x41d4c0 | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 3.68% |
---|
Function: .omp_outlined..12#0x41d4c0 | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 3.68% |
---|
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 108 - 139 |
-------------------------------------------------------------------------------- |
108: #pragma omp parallel for simd collapse(2) |
109: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
110: for (int i = (x_min - 1 + 1); i < (x_max + 1 + 2); i++) |
111: ({ |
112: int upwind, donor, downwind, dif; |
113: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
114: if (node_flux(i, j) < 0.0) { |
115: upwind = i + 2; |
116: donor = i + 1; |
117: downwind = i; |
118: dif = donor; |
119: } else { |
120: upwind = i - 1; |
121: donor = i; |
122: downwind = i + 1; |
123: dif = upwind; |
124: } |
125: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(donor, j)); |
126: width = celldx[i]; |
127: vdiffuw = vel1(donor, j) - vel1(upwind, j); |
128: vdiffdw = vel1(downwind, j) - vel1(donor, j); |
129: limiter = 0.0; |
130: if (vdiffuw * vdiffdw > 0.0) { |
131: auw = std::fabs(vdiffuw); |
132: adw = std::fabs(vdiffdw); |
133: wind = 1.0; |
134: if (vdiffdw <= 0.0) wind = -1.0; |
135: limiter = |
136: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldx[dif]) / 6.0, auw), adw); |
137: } |
138: advec_vel_s = vel1(donor, j) + (1.0 - sigma) * limiter; |
139: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
/home/hbollore/qaas-runs/170-290-5445/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41d4c0 SUB SP, SP, #176 |
0x41d4c4 STP D9, D8, [SP, #64] |
0x41d4c8 STP X29, X30, [SP, #80] |
0x41d4cc STP X28, X27, [SP, #96] |
0x41d4d0 STP X26, X25, [SP, #112] |
0x41d4d4 STP X24, X23, [SP, #128] |
0x41d4d8 STP X22, X21, [SP, #144] |
0x41d4dc STP X20, X19, [SP, #160] |
0x41d4e0 ADD X29, SP, #80 |
0x41d4e4 LDR W8, [X2] |
0x41d4e8 LDR W9, [X3] |
0x41d4ec LDR W10, [X5] |
0x41d4f0 ADD W21, W8, #1 |
0x41d4f4 LDR W20, [X4] |
0x41d4f8 ADD W8, W9, #3 |
0x41d4fc ADD W9, W10, #3 |
0x41d500 SUBS W8, W8, W21 |
0x41d504 CCMP W9, W20, #4, #12 |
0x41d508 B.LE 41d5a8 |
0x41d50c LDP X11, X10, [X29, #104] |
0x41d510 LDR X12, [X29, #96] |
0x41d514 LDR X14, [X6] |
0x41d518 SUB W24, W9, W20 |
0x41d51c MOVN X9, #0 |
0x41d520 ADD X3, SP, #36 |
0x41d524 SUB X4, X29, #24 |
0x41d528 SUB X5, X29, #32 |
0x41d52c LDR X13, [X7] |
0x41d530 LDR W1, [X0] |
0x41d534 LDR X23, [X6, #16] |
0x41d538 ADRP X0, |
0x41d53c ADD X0, X0, #3984 |
0x41d540 ADD X6, SP, #40 |
0x41d544 MOVZ W2, #34 |
0x41d548 LDR X25, [X7, #16] |
0x41d54c MOVZ W7, #1 |
0x41d550 LDR X26, [X12, #8] |
0x41d554 LDR X12, [X11] |
0x41d558 LDR X28, [X11, #16] |
0x41d55c LDR X19, [X10] |
0x41d560 LDR X22, [X10, #16] |
0x41d564 UMADDL X27, W24, W8, X9 |
0x41d568 MOVZ W8, #1 |
0x41d56c STP X13, X14, [SP, #16] |
0x41d570 STP X27, XZR, [X29, #992] |
0x41d574 STR X8, [SP, #40] |
0x41d578 STP W1, WZR, [SP, #32] |
0x41d57c STP X8, X12, [SP] |
0x41d580 BL 4033a0 |
0x41d584 LDP X8, X13, [X29, #992] |
0x41d588 CMP X8, X27 |
0x41d58c CSEL X8, X8, X27, #11 |
0x41d590 CMP X13, X8 |
0x41d594 B.LE 41d5cc |
(305) 0x41d598 LDR W1, [SP, #32] |
(305) 0x41d59c ADRP X0, |
(305) 0x41d5a0 ADD X0, X0, #4008 |
(305) 0x41d5a4 BL 403260 |
(305) 0x41d5a8 LDP D9, D8, [SP, #64] |
(305) 0x41d5ac LDP X20, X19, [SP, #160] |
(305) 0x41d5b0 LDP X22, X21, [SP, #144] |
(305) 0x41d5b4 LDP X24, X23, [SP, #128] |
(305) 0x41d5b8 LDP X26, X25, [SP, #112] |
(305) 0x41d5bc LDP X28, X27, [SP, #96] |
(305) 0x41d5c0 LDP X29, X30, [SP, #80] |
(305) 0x41d5c4 ADD SP, SP, #176 |
(305) 0x41d5c8 RET |
(305) 0x41d5cc ADD X8, X8, #1 |
(305) 0x41d5d0 CNTD X10, ALL |
(305) 0x41d5d4 ORR X9, XZR, X13 |
(305) 0x41d5d8 SUB X11, X8, X13 |
(305) 0x41d5dc CMP X11, X10 |
(305) 0x41d5e0 B.CC 41d778 |
(305) 0x41d5e4 UDIV X9, X11, X10 |
(305) 0x41d5e8 PTRUE P0.D, ALL |
(305) 0x41d5ec FDUP Z20.D, #0 |
(305) 0x41d5f0 DUP Z21.D, #0 |
(305) 0x41d5f4 MADD X12, X9, X10, XZR |
(305) 0x41d5f8 INDEX Z0.D, X13, #1 |
(305) 0x41d5fc ADD X9, X13, X12 |
(305) 0x41d600 LDR X13, [SP, #24] |
(305) 0x41d604 DUP Z1.D, X10 |
(305) 0x41d608 SUB X11, X11, X12 |
(305) 0x41d60c DUP Z2.D, X24 |
(305) 0x41d610 DUP Z3.D, X21 |
(305) 0x41d614 DUP Z4.D, X20 |
(305) 0x41d618 DUP Z16.D, X19 |
(305) 0x41d61c DUP Z5.D, X13 |
(305) 0x41d620 LDR X13, [SP, #16] |
(305) 0x41d624 DUP Z6.D, X13 |
(305) 0x41d628 LDR X13, [SP, #8] |
(305) 0x41d62c DUP Z7.D, X13 |
(305) 0x41d630 ADD W13, W20, #1 |
(305) 0x41d634 DUP Z17.D, X13 |
(305) 0x41d638 SUB W13, W20, #1 |
(305) 0x41d63c DUP Z18.D, X13 |
(305) 0x41d640 ADD W13, W20, #2 |
(305) 0x41d644 DUP Z19.D, X13 |
(305) 0x41d648 ORR X13, XZR, #3840 |
(305) 0x41d64c MOVK X13, #16325 |
(305) 0x41d650 DUP Z22.D, X13 |
(305) 0x41d654 HINT #0 |
(305) 0x41d658 HINT #0 |
(305) 0x41d65c HINT #0 |
(304) 0x41d660 MOVPRFX Z23, Z0 |
(304) 0x41d664 SDIV Z23.D, P0/M, Z23.D, Z2.D |
(304) 0x41d668 ADD Z24.D, Z3.D, Z23.D |
(304) 0x41d66c MSB Z23.D, P0/M, Z2.D, Z0.D |
(304) 0x41d670 ADD Z0.D, Z0.D, Z1.D |
(304) 0x41d674 SUBS X12, X12, X10 |
(304) 0x41d678 SXTW Z24.D, P0/M, Z24.D |
(304) 0x41d67c MOVPRFX Z9, Z7 |
(304) 0x41d680 MUL Z9.D, P0/M, Z9.D, Z24.D |
(304) 0x41d684 ADD Z25.D, Z4.D, Z23.D |
(304) 0x41d688 MOVPRFX Z26, Z25 |
(304) 0x41d68c SXTW Z26.D, P0/M, Z25.D |
(304) 0x41d690 MOVPRFX Z27, Z26 |
(304) 0x41d694 MLA Z27.D, P0/M, Z5.D, Z24.D |
(304) 0x41d698 LD1D {Z27.D}, P0/Z, [X23, Z27.D,LSL #3] |
(304) 0x41d69c ADD Z28.D, Z17.D, Z23.D |
(304) 0x41d6a0 ADD Z30.D, Z18.D, Z23.D |
(304) 0x41d6a4 ADD Z23.D, Z19.D, Z23.D |
(304) 0x41d6a8 MOVPRFX Z29, Z28 |
(304) 0x41d6ac SXTW Z29.D, P0/M, Z28.D |
(304) 0x41d6b0 MOVPRFX Z8, Z27 |
(304) 0x41d6b4 FABS Z8.D, P0/M, Z27.D |
(304) 0x41d6b8 FCMLT P1.D, P0/Z, Z27.D, #0 |
(304) 0x41d6bc SEL Z31.D, P1, Z26.D, Z29.D |
(304) 0x41d6c0 SEL Z29.D, P1, Z29.D, Z26.D |
(304) 0x41d6c4 SEL Z23.D, P1, Z23.D, Z30.D |
(304) 0x41d6c8 SEL Z28.D, P1, Z28.D, Z30.D |
(304) 0x41d6cc MOVPRFX Z30, Z29 |
(304) 0x41d6d0 MLA Z30.D, P0/M, Z6.D, Z24.D |
(304) 0x41d6d4 LD1D {Z30.D}, P0/Z, [X25, Z30.D,LSL #3] |
(304) 0x41d6d8 MAD Z24.D, P0/M, Z16.D, Z26.D |
(304) 0x41d6dc ADD Z29.D, Z29.D, Z9.D |
(304) 0x41d6e0 ADR Z23.D, [Z9, Z23.D,SXTW] |
(304) 0x41d6e4 ADD Z31.D, Z31.D, Z9.D |
(304) 0x41d6e8 FDIVR Z30.D, P0/M, Z30.D, Z8.D |
(304) 0x41d6ec LD1D {Z29.D}, P0/Z, [X28, Z29.D,LSL #3] |
(304) 0x41d6f0 LD1D {Z23.D}, P0/Z, [X28, Z23.D,LSL #3] |
(304) 0x41d6f4 LD1D {Z31.D}, P0/Z, [X28, Z31.D,LSL #3] |
(304) 0x41d6f8 FSUB Z9.D, Z29.D, Z23.D |
(304) 0x41d6fc FSUB Z8.D, Z31.D, Z29.D |
(304) 0x41d700 FABD Z31.D, P0/M, Z31.D, Z29.D |
(304) 0x41d704 FABD Z23.D, P0/M, Z23.D, Z29.D |
(304) 0x41d708 FMUL Z9.D, Z8.D, Z9.D |
(304) 0x41d70c FCMGT P2.D, P0/Z, Z8.D, #0 |
(304) 0x41d710 FCMGT P1.D, P0/Z, Z9.D, #0 |
(304) 0x41d714 LD1D {Z25.D}, P1/Z, [X26, Z25.D,SXTW #3] |
(304) 0x41d718 LD1D {Z28.D}, P1/Z, [X26, Z28.D,SXTW #3] |
(304) 0x41d71c FSUB Z9.D, Z20.D, Z30.D |
(304) 0x41d720 FMUL Z9.D, Z31.D, Z9.D |
(304) 0x41d724 MOVPRFX Z8, Z9 |
(304) 0x41d728 FDIV Z8.D, P0/M, Z8.D, Z25.D |
(304) 0x41d72c MOVPRFX Z9, Z23 |
(304) 0x41d730 FMLA Z9.D, P0/M, Z30.D, Z23.D |
(304) 0x41d734 FDIVR Z28.D, P0/M, Z28.D, Z9.D |
(304) 0x41d738 FMUL Z25.D, Z25.D, Z22.D |
(304) 0x41d73c FADD Z28.D, Z28.D, Z8.D |
(304) 0x41d740 FMUL Z25.D, Z25.D, Z28.D |
(304) 0x41d744 FMINNM Z23.D, P0/M, Z23.D, Z25.D |
(304) 0x41d748 FMINNM Z23.D, P0/M, Z23.D, Z31.D |
(304) 0x41d74c MOVPRFX Z25, Z23 |
(304) 0x41d750 FNEG Z25.D, P0/M, Z23.D |
(304) 0x41d754 SEL Z23.D, P2, Z23.D, Z25.D |
(304) 0x41d758 MOVPRFX Z25, Z30 |
(304) 0x41d75c FSUBR Z25.D, P0/M, Z25.D, #1 |
(304) 0x41d760 SEL Z23.D, P1, Z23.D, Z21.D |
(304) 0x41d764 FMAD Z23.D, P0/M, Z25.D, Z29.D |
(304) 0x41d768 FMUL Z23.D, Z23.D, Z27.D |
(304) 0x41d76c ST1D {Z23.D}, P0, [X22, Z24.D,LSL #3] |
(304) 0x41d770 B.NE 41d660 |
(305) 0x41d774 CBZ X11, 41d598 |
(303) 0x41d778 LDP X2, X1, [SP, #16] |
(303) 0x41d77c LDR X3, [SP, #8] |
(303) 0x41d780 SUB W10, WZR, W24 |
(303) 0x41d784 FMOV D0, #2.0000000 |
(303) 0x41d788 ORR X12, XZR, #3840 |
(303) 0x41d78c SUB W11, W20, #1 |
(303) 0x41d790 MOVK X12, #16325 |
(303) 0x41d794 B 41d7c0 |
0x41d798 HINT #0 |
0x41d79c HINT #0 |
(303) 0x41d7a0 FMSUB D2, D2, D6, D6 |
(303) 0x41d7a4 MADD X13, X19, X14, X13 |
(303) 0x41d7a8 ADD X9, X9, #1 |
(303) 0x41d7ac CMP X8, X9 |
(303) 0x41d7b0 FADD D2, D2, D3 |
(303) 0x41d7b4 FMUL D1, D2, D1 |
(303) 0x41d7b8 STR D1, [X22, X13,LSL #3] |
(303) 0x41d7bc B.EQ 41d598 |
(303) 0x41d7c0 SDIV X17, X9, X24 |
(303) 0x41d7c4 MSUB W13, W17, W24, W9 |
(303) 0x41d7c8 ADD W14, W21, W17 |
(303) 0x41d7cc ADD W15, W20, W13 |
(303) 0x41d7d0 ADD W13, W20, W9 |
(303) 0x41d7d4 SBFM X14, X14, #0, #31 |
(303) 0x41d7d8 MADD W16, W10, W17, W13 |
(303) 0x41d7dc SBFM X13, X16, #0, #31 |
(303) 0x41d7e0 ADD W16, W16, #1 |
(303) 0x41d7e4 MADD X18, X1, X14, X13 |
(303) 0x41d7e8 SBFM X16, X16, #0, #31 |
(303) 0x41d7ec LDR D1, [X23, X18,LSL #3] |
(303) 0x41d7f0 FCMP D1, #0 |
(303) 0x41d7f4 B.GE 41d820 |
(303) 0x41d7f8 ADD W18, W20, W9 |
(303) 0x41d7fc ADD W15, W15, #1 |
(303) 0x41d800 MADD W17, W10, W17, W18 |
(303) 0x41d804 ADD W18, W17, #2 |
(303) 0x41d808 ORR X17, XZR, X13 |
(303) 0x41d80c B 41d834 |
0x41d810 HINT #0 |
0x41d814 HINT #0 |
0x41d818 HINT #0 |
0x41d81c HINT #0 |
(302) 0x41d820 SUB W18, W15, #1 |
(302) 0x41d824 ADD W15, W11, W9 |
(302) 0x41d828 MADD W15, W10, W17, W15 |
(302) 0x41d82c ORR X17, XZR, X16 |
(302) 0x41d830 ORR X16, XZR, X13 |
(303) 0x41d834 MADD X0, X2, X14, X16 |
(303) 0x41d838 FABS D2, D1 |
(303) 0x41d83c MOVI D6, #0 |
(303) 0x41d840 LDR D3, [X25, X0,LSL #3] |
(303) 0x41d844 MADD X0, X3, X14, XZR |
(303) 0x41d848 ADD X16, X16, X0 |
(303) 0x41d84c FDIV D2, D2, D3 |
(303) 0x41d850 LDR D3, [X28, X16,LSL #3] |
(303) 0x41d854 ADD X16, X0, W18,SXTW |
(303) 0x41d858 LDR D4, [X28, X16,LSL #3] |
(303) 0x41d85c ADD X16, X17, X0 |
(303) 0x41d860 FSUB D5, D3, S4 |
(303) 0x41d864 LDR D4, [X28, X16,LSL #3] |
(303) 0x41d868 FSUB D4, D4, S3 |
(303) 0x41d86c FMUL D7, D4, D5 |
(303) 0x41d870 FCMP D7, #0 |
(303) 0x41d874 B.LE 41d7a0 |
(303) 0x41d878 LDR D6, [X26, X13,LSL #3] |
(303) 0x41d87c FABS D5, D5 |
(303) 0x41d880 LDR D18, [X26, X15,SXTW #3] |
(303) 0x41d884 FABS D7, D4 |
(303) 0x41d888 FSUB D16, D0, S2 |
(303) 0x41d88c FCMP D4, #0 |
(303) 0x41d890 FMADD D17, D2, D5, D5 |
(303) 0x41d894 FMUL D16, D7, D16 |
(303) 0x41d898 FDIV D16, D16, D6 |
(303) 0x41d89c FDIV D17, D17, D18 |
(303) 0x41d8a0 FADD D16, D17, D16 |
(303) 0x41d8a4 FMOV D17, X12 |
(303) 0x41d8a8 FMUL D6, D6, D17 |
(303) 0x41d8ac FMUL D6, D6, D16 |
(303) 0x41d8b0 FMINNM D5, D6, D5 |
(303) 0x41d8b4 FMINNM D5, D5, D7 |
(303) 0x41d8b8 FNEG D6, D5 |
(303) 0x41d8bc FCSEL D6, D5, D6, #12 |
(303) 0x41d8c0 B 41d7a0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 60 |
loop length | 240 |
nb stack references | 0 |
front end | 6.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
cycles | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.75 |
Overall L1 | 9.67 |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #176 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W9, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W21, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W20, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W8, W9, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP W9, W20, #4, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.LE 41d5a8 <.omp_outlined..12+0xe8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X11, X10, [X29, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X12, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X14, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W24, W9, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, SP, #36 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X13, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X23, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <45e538> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #3984 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X6, SP, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X25, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X26, [X12, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X12, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X28, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X19, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
UMADDL X27, W24, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X13, X14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X27, XZR, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP W1, WZR, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X8, X12, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 4033a0 <@plt_start@+0x670> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X13, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X8, X8, X27, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X13, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41d5cc <.omp_outlined..12+0x10c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 60 |
loop length | 240 |
nb stack references | 0 |
front end | 6.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
cycles | 1.50 | 1.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.00 | 0.00 | 9.67 | 9.67 | 9.67 | 5.50 | 5.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.75 |
Overall L1 | 9.67 |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUB SP, SP, #176 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP D9, D8, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
STP X29, X30, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X28, X27, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X26, X25, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X24, X23, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X22, X21, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #80 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W8, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W9, [X3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W10, [X5] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W21, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR W20, [X4] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADD W8, W9, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD W9, W10, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUBS W8, W8, W21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CCMP W9, W20, #4, #12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.LE 41d5a8 <.omp_outlined..12+0xe8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X11, X10, [X29, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
LDR X12, [X29, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X14, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB W24, W9, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVN X9, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, SP, #36 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X4, X29, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X5, X29, #32 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X13, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR W1, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X23, [X6, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ADRP X0, <45e538> | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, #3984 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X6, SP, #40 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVZ W2, #34 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X25, [X7, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
MOVZ W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X26, [X12, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X12, [X11] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X28, [X11, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X19, [X10] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X22, [X10, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
UMADDL X27, W24, W8, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MOVZ W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STP X13, X14, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X27, XZR, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X8, [SP, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP W1, WZR, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X8, X12, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
BL 4033a0 <@plt_start@+0x670> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP X8, X13, [X29, #992] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 |
CMP X8, X27 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CSEL X8, X8, X27, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X13, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 41d5cc <.omp_outlined..12+0x10c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..12#0x41d4c0– | 3.68 | 4.91 |
▼Loop 302 - context.h:69-69 - exec– | 0 | 0 |
▼Loop 303 - advec_mom.cpp:108-139 - exec– | 0 | 0 |
▼Loop 305 - advec_mom.cpp:108-139 - exec– | 0 | 0 |
○Loop 304 - advec_mom.cpp:109-139 - exec | 3.68 | 4.89 |