Function: hypre_CSRMatrixSetRownnz | Module: exec | Source: csr_matrix.c:136-168 [...] | Coverage: 0.03% |
---|
Function: hypre_CSRMatrixSetRownnz | Module: exec | Source: csr_matrix.c:136-168 [...] | Coverage: 0.03% |
---|
/home/hbollore/qaas/qaas-runs/169-817-3176/intel/AMG/build/AMG/AMG/seq_mv/csr_matrix.c: 136 - 168 |
-------------------------------------------------------------------------------- |
136: { |
137: HYPRE_Int ierr=0; |
138: HYPRE_Int num_rows = hypre_CSRMatrixNumRows(matrix); |
[...] |
145: for (i=0; i < num_rows; i++) |
146: { |
147: adiag = (A_i[i+1] - A_i[i]); |
148: if(adiag > 0) irownnz++; |
149: } |
150: |
151: hypre_CSRMatrixNumRownnz(matrix) = irownnz; |
152: |
153: if ((irownnz == 0) || (irownnz == num_rows)) |
[...] |
159: Arownnz = hypre_CTAlloc(HYPRE_Int, irownnz); |
160: irownnz = 0; |
161: for (i=0; i < num_rows; i++) |
162: { |
163: adiag = A_i[i+1]-A_i[i]; |
164: if(adiag > 0) Arownnz[irownnz++] = i; |
165: } |
166: hypre_CSRMatrixRownnz(matrix) = Arownnz; |
167: } |
168: return ierr; |
0x4a2b90 STP X29, X30, [SP, #976]! |
0x4a2b94 STR X21, [SP, #16] |
0x4a2b98 STP X20, X19, [SP, #32] |
0x4a2b9c ADD X29, SP, #0 |
0x4a2ba0 LDR X20, [X0, #16] |
0x4a2ba4 ORR X19, XZR, X0 |
0x4a2ba8 CMP X20, #0 |
0x4a2bac B.LE 4a2bd0 |
0x4a2bb0 LDR X21, [X19] |
0x4a2bb4 CNTW X9, ALL |
0x4a2bb8 CMP X20, X9 |
0x4a2bbc LDR X10, [X21] |
0x4a2bc0 B.CS 4a2bf0 |
0x4a2bc4 ORR X0, XZR, XZR |
0x4a2bc8 ORR X8, XZR, XZR |
0x4a2bcc B 4a2c84 |
0x4a2bd0 ORR X0, XZR, XZR |
0x4a2bd4 STR XZR, [X19, #64] |
(3624) 0x4a2bd8 STR X0, [X19, #56] |
(3624) 0x4a2bdc ORR X0, XZR, XZR |
(3624) 0x4a2be0 LDP X20, X19, [SP, #32] |
(3624) 0x4a2be4 LDR X21, [SP, #16] |
(3624) 0x4a2be8 LDP X29, X30, [SP], #48 |
(3624) 0x4a2bec RET |
(3624) 0x4a2bf0 UDIV X8, X20, X9 |
(3624) 0x4a2bf4 CNTD X12, ALL |
(3624) 0x4a2bf8 INDEX Z0.D, #0, #1 |
(3624) 0x4a2bfc PTRUE P0.D, ALL |
(3624) 0x4a2c00 ADD X14, X21, #8 |
(3624) 0x4a2c04 ORR X11, XZR, XZR |
(3624) 0x4a2c08 DUP Z2.D, #1 |
(3624) 0x4a2c0c SUB W12, W12, #1 |
(3624) 0x4a2c10 MADD X8, X8, X9, XZR |
(3624) 0x4a2c14 DUP Z1.D, X12 |
(3624) 0x4a2c18 CMPEQ P1.D, P0/Z, Z0.D, Z1.D |
(3624) 0x4a2c1c DUP Z1.D, #0 |
(3624) 0x4a2c20 ORR Z3.D, Z1.D, Z1.D |
(3624) 0x4a2c24 SUB X13, X20, X8 |
(3624) 0x4a2c28 CPY Z0.D, P1/M, X10 |
(3624) 0x4a2c2c PTRUE P1.D, VL1 |
(3624) 0x4a2c30 ADDVL X10, X14, #1 |
(3624) 0x4a2c34 REV P1.D, P1.D |
(3624) 0x4a2c38 HINT #0 |
(3624) 0x4a2c3c HINT #0 |
(3623) 0x4a2c40 LD1D {Z4.D}, P0/Z, [X14, X11,LSL #3] |
(3623) 0x4a2c44 SPLICE Z0.D, P1/M, Z0.D, Z4.D |
(3623) 0x4a2c48 CMPGT P2.D, P0/Z, Z4.D, Z0.D |
(3623) 0x4a2c4c LD1D {Z0.D}, P0/Z, [X10, X11,LSL #3] |
(3623) 0x4a2c50 ADD X11, X11, X9 |
(3623) 0x4a2c54 ADD Z1.D, P2/M, Z1.D, Z2.D |
(3623) 0x4a2c58 SPLICE Z4.D, P1/M, Z4.D, Z0.D |
(3623) 0x4a2c5c CMPGT P3.D, P0/Z, Z0.D, Z4.D |
(3623) 0x4a2c60 CMP X8, X11 |
(3623) 0x4a2c64 ADD Z3.D, P3/M, Z3.D, Z2.D |
(3623) 0x4a2c68 B.NE 4a2c40 |
(3624) 0x4a2c6c ADD Z1.D, Z3.D, Z1.D |
(3624) 0x4a2c70 UADDV D1, P0, Z1.D |
(3624) 0x4a2c74 FMOV X0, D1 |
(3624) 0x4a2c78 CBZ X13, 4a2cb8 |
0x4a2c7c WHILELS P0.D, XZR, X12 |
0x4a2c80 LASTB X10, P0, Z0.D |
0x4a2c84 SUB X9, X20, X8 |
0x4a2c88 ADD X8, X21, X8,LSL #3 |
0x4a2c8c ADD X8, X8, #8 |
0x4a2c90 HINT #0 |
0x4a2c94 HINT #0 |
0x4a2c98 HINT #0 |
0x4a2c9c HINT #0 |
(3625) 0x4a2ca0 LDR X11, [X8], #8 |
(3625) 0x4a2ca4 CMP X11, X10 |
(3625) 0x4a2ca8 ORR X10, XZR, X11 |
(3625) 0x4a2cac CSINC X0, X0, X0, #13 |
(3625) 0x4a2cb0 SUBS X9, X9, #1 |
(3625) 0x4a2cb4 B.NE 4a2ca0 |
(3624) 0x4a2cb8 CMP X0, #0 |
(3624) 0x4a2cbc STR X0, [X19, #64] |
(3624) 0x4a2cc0 CCMP X0, X20, #4, #1 |
(3624) 0x4a2cc4 B.NE 4a2ce0 |
(3624) 0x4a2cc8 STR XZR, [X19, #56] |
(3624) 0x4a2ccc ORR X0, XZR, XZR |
(3624) 0x4a2cd0 LDP X20, X19, [SP, #32] |
(3624) 0x4a2cd4 LDR X21, [SP, #16] |
(3624) 0x4a2cd8 LDP X29, X30, [SP], #48 |
(3624) 0x4a2cdc RET |
(3624) 0x4a2ce0 MOVZ W1, #8 |
(3624) 0x4a2ce4 BL 4ae090 |
(3624) 0x4a2ce8 ORR X8, XZR, XZR |
(3624) 0x4a2cec ORR X9, XZR, XZR |
(3624) 0x4a2cf0 B 4a2d0c |
0x4a2cf4 HINT #0 |
0x4a2cf8 HINT #0 |
0x4a2cfc HINT #0 |
(3624) 0x4a2d00 ADD X9, X9, #1 |
(3624) 0x4a2d04 CMP X20, X9 |
(3624) 0x4a2d08 B.EQ 4a2bd8 |
(3624) 0x4a2d0c ADD X10, X21, X9,LSL #3 |
(3624) 0x4a2d10 LDP X10, X11, [X10] |
(3624) 0x4a2d14 CMP X11, X10 |
(3624) 0x4a2d18 B.LE 4a2d00 |
(3624) 0x4a2d1c STR X9, [X0, X8,LSL #3] |
(3624) 0x4a2d20 ADD X8, X8, #1 |
(3624) 0x4a2d24 B 4a2d00 |
0x4a2d28 HINT #0 |
0x4a2d2c HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2823 | exec |
○ | BuildIJLaplacian27pt | amg.c:2272 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.31.so | |
○ | _start | exec |
Path / |
Source file and lines | csr_matrix.c:136-168 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 2.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 1.00 | 0.00 | 0.00 | 2.33 | 2.33 | 2.33 | 2.00 | 2.00 |
cycles | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 1.00 | 0.00 | 0.00 | 2.33 | 2.33 | 2.33 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 2.88 |
Overall L1 | 3.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X21, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X20, [X0, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 4a2bd0 <hypre_CSRMatrixSetRownnz+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X21, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CNTW X9, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP X20, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR X10, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
B.CS 4a2bf0 <hypre_CSRMatrixSetRownnz+0x60> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X0, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X8, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4a2c84 <hypre_CSRMatrixSetRownnz+0xf4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X0, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR XZR, [X19, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
WHILELS P0.D, XZR, X12 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 |
LASTB X10, P0, Z0.D | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 |
SUB X9, X20, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X8, X21, X8,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X8, X8, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Source file and lines | csr_matrix.c:136-168 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 2.88 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 1.00 | 0.00 | 0.00 | 2.33 | 2.33 | 2.33 | 2.00 | 2.00 |
cycles | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 1.00 | 0.00 | 0.00 | 2.33 | 2.33 | 2.33 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 2.88 |
Overall L1 | 3.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STR X21, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
STP X20, X19, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X20, [X0, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 4a2bd0 <hypre_CSRMatrixSetRownnz+0x40> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDR X21, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CNTW X9, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP X20, X9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR X10, [X21] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
B.CS 4a2bf0 <hypre_CSRMatrixSetRownnz+0x60> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X0, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X8, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4a2c84 <hypre_CSRMatrixSetRownnz+0xf4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X0, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
STR XZR, [X19, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
WHILELS P0.D, XZR, X12 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 |
LASTB X10, P0, Z0.D | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 |
SUB X9, X20, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X8, X21, X8,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X8, X8, #8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixSetRownnz– | 0.03 | 0 |
○Loop 3625 - csr_matrix.c:145-148 - exec | 0 | 0 |
▼Loop 3624 - csr_matrix.c:145-168 - exec– | 0 | 0 |
○Loop 3623 - csr_matrix.c:147-148 - exec | 0.03 | 0 |