| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage (incl. loops): 0.69% | (excl. loops): 0.01% |
|---|
| Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage (incl. loops): 0.69% | (excl. loops): 0.01% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_cell.cpp: 136 - 140 |
-------------------------------------------------------------------------------- |
136: #pragma omp parallel for simd collapse(2) |
137: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
138: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
139: pre_vol(i, j) = volume(i, j) + (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j)); |
140: post_vol(i, j) = pre_vol(i, j) - (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j)); |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42ca00 STP X29, X30, [SP, #848]! |
0x42ca04 ADD X29, SP, #0 |
0x42ca08 STP X19, X20, [SP, #16] |
0x42ca0c ORR X19, XZR, X0 |
0x42ca10 STP X23, X24, [SP, #48] |
0x42ca14 LDP W23, W1, [X0, #48] |
0x42ca18 LDR W0, [X0, #40] |
0x42ca1c LDR W2, [X19, #44] |
0x42ca20 ADD W3, W1, #4 |
0x42ca24 SUB W24, W23, #1 |
0x42ca28 SUB W4, W0, #1 |
0x42ca2c STP W3, W4, [SP, #136] |
0x42ca30 CMP W24, W3 |
0x42ca34 B.GE 42ce7c |
0x42ca38 STP X21, X22, [SP, #32] |
0x42ca3c ADD W22, W2, #4 |
0x42ca40 SUB W21, W3, W24 |
0x42ca44 CMP W4, W22 |
0x42ca48 B.GE 42ce8c |
0x42ca4c SUB W5, W22, W4 |
0x42ca50 MUL W23, W21, W5 |
0x42ca54 STR W5, [SP, #144] |
0x42ca58 BL 410210 |
0x42ca5c ORR W20, WZR, W0 |
0x42ca60 BL 410240 |
0x42ca64 UDIV W6, W23, W20 |
0x42ca68 ORR W7, WZR, W0 |
0x42ca6c MSUB W8, W6, W20, W23 |
0x42ca70 CMP W0, W8 |
0x42ca74 B.CC 42cea8 |
0x42ca78 MADD W23, W6, W7, W8 |
0x42ca7c ADD W9, W6, W23 |
0x42ca80 STR W9, [SP, #148] |
0x42ca84 CMP W23, W9 |
0x42ca88 B.CS 42ce8c |
0x42ca8c LDP W10, W11, [SP, #140] |
0x42ca90 STP X27, X28, [SP, #80] |
0x42ca94 STP X25, X26, [SP, #64] |
0x42ca98 UDIV W12, W23, W11 |
0x42ca9c LDR X27, [X19, #32] |
0x42caa0 LDP X4, X26, [X19, #16] |
0x42caa4 MSUB W13, W12, W11, W23 |
0x42caa8 ADD W14, W12, W24 |
0x42caac LDP X25, X24, [X19] |
0x42cab0 SBFM X5, X14, #0, #31 |
0x42cab4 ADD W28, W13, W10 |
0x42cab8 SUB W22, W22, W28 |
0x42cabc CMP W6, W22 |
0x42cac0 CSEL W6, W6, W22, #9 |
0x42cac4 ADD W20, W23, W6 |
0x42cac8 CMP W23, W20 |
0x42cacc B.CS 42ce58 |
(166) 0x42cad0 LDR X19, [X26] |
(166) 0x42cad4 ADD X16, X5, #1 |
(166) 0x42cad8 LDR X17, [X4] |
(166) 0x42cadc LDR X30, [X24] |
(166) 0x42cae0 MUL X3, X5, X19 |
(166) 0x42cae4 LDR X14, [X4, #16] |
(166) 0x42cae8 MADD X12, X5, X17, X17 |
(166) 0x42caec LDR X15, [X24, #16] |
(166) 0x42caf0 MUL X13, X5, X30 |
(166) 0x42caf4 LDR X21, [X25, #16] |
(166) 0x42caf8 SUB X18, X12, X17 |
(166) 0x42cafc LDR X23, [X26, #16] |
(166) 0x42cb00 STR X18, [SP, #120] |
(166) 0x42cb04 LDR X22, [X27, #16] |
(166) 0x42cb08 STP X16, X3, [SP, #104] |
(166) 0x42cb0c LDR X1, [X25] |
(166) 0x42cb10 LDR X2, [X27] |
(166) 0x42cb14 MUL X7, X5, X1 |
(166) 0x42cb18 MUL X30, X5, X2 |
(166) 0x42cb1c STR X7, [SP, #128] |
(166) 0x42cb20 CMP W6, #1 |
(166) 0x42cb24 B.EQ 42cdb0 |
(166) 0x42cb28 SBFM X8, X28, #0, #31 |
(166) 0x42cb2c UBFM W5, W6, #1, #31 |
(166) 0x42cb30 ADD X9, X13, X8 |
(166) 0x42cb34 UBFM X11, X5, #60, #59 |
(166) 0x42cb38 ADD X16, X9, #1 |
(166) 0x42cb3c SUB X10, X11, #16 |
(166) 0x42cb40 ADD X5, X15, X16,LSL #3 |
(166) 0x42cb44 UBFM X2, X16, #61, #60 |
(166) 0x42cb48 UBFM X17, X10, #4, #63 |
(166) 0x42cb4c ADD X1, X30, X8 |
(166) 0x42cb50 ADD X7, X7, X8 |
(166) 0x42cb54 ADD X19, X17, #1 |
(166) 0x42cb58 ADD X16, X12, X8 |
(166) 0x42cb5c STR X2, [SP, #152] |
(166) 0x42cb60 STR X5, [SP, #96] |
(166) 0x42cb64 SUB X5, X2, #8 |
(166) 0x42cb68 ADD X17, X18, X8 |
(166) 0x42cb6c ADD X18, X3, X8 |
(166) 0x42cb70 ADD X8, X15, X5 |
(166) 0x42cb74 ADD X9, X21, X7,LSL #3 |
(166) 0x42cb78 STR X5, [SP, #160] |
(166) 0x42cb7c UBFM X5, X1, #61, #60 |
(166) 0x42cb80 ANDS X10, X19, #0x3 |
(166) 0x42cb84 ADD X2, X14, X16,LSL #3 |
(166) 0x42cb88 UBFM X19, X7, #61, #60 |
(166) 0x42cb8c MOVZ X0, #0 |
(166) 0x42cb90 STR X5, [SP, #168] |
(166) 0x42cb94 ADD X7, X23, X18,LSL #3 |
(166) 0x42cb98 UBFM X16, X16, #61, #60 |
(166) 0x42cb9c ADD X3, X14, X17,LSL #3 |
(166) 0x42cba0 UBFM X18, X18, #61, #60 |
(166) 0x42cba4 UBFM X17, X17, #61, #60 |
(166) 0x42cba8 ADD X1, X22, X1,LSL #3 |
(166) 0x42cbac B.EQ 42cea0 |
(166) 0x42cbb0 CMP X10, #1 |
(166) 0x42cbb4 B.EQ 42cc50 |
(166) 0x42cbb8 CMP X10, #2 |
(166) 0x42cbbc B.EQ 42cc0c |
(166) 0x42cbc0 LDR Q2, [X21, X19] |
(166) 0x42cbc4 MOVZ X0, #16 |
(166) 0x42cbc8 LDR Q3, [X14, X16] |
(166) 0x42cbcc LDR X10, [SP, #152] |
(166) 0x42cbd0 LDR X19, [SP, #160] |
(166) 0x42cbd4 LDR Q30, [X14, X17] |
(166) 0x42cbd8 FADD V0.2D, V3.2D, V2.2D |
(166) 0x42cbdc LDR Q1, [X15, X10] |
(166) 0x42cbe0 LDR Q5, [X15, X19] |
(166) 0x42cbe4 FADD V4.2D, V0.2D, V1.2D |
(166) 0x42cbe8 FADD V6.2D, V5.2D, V30.2D |
(166) 0x42cbec FSUB V7.2D, V4.2D, V6.2D |
(166) 0x42cbf0 STR Q7, [X23, X18] |
(166) 0x42cbf4 LDR Q29, [X14, X17] |
(166) 0x42cbf8 LDR Q31, [X14, X16] |
(166) 0x42cbfc LDR X18, [SP, #168] |
(166) 0x42cc00 FSUB V16.2D, V29.2D, V31.2D |
(166) 0x42cc04 FADD V17.2D, V16.2D, V7.2D |
(166) 0x42cc08 STR Q17, [X22, X18] |
(166) 0x42cc0c LDR Q18, [X2, X0] |
(166) 0x42cc10 LDR Q19, [X9, X0] |
(166) 0x42cc14 LDR X16, [SP, #96] |
(166) 0x42cc18 LDR Q20, [X8, X0] |
(166) 0x42cc1c LDR Q27, [X3, X0] |
(166) 0x42cc20 FADD V21.2D, V18.2D, V19.2D |
(166) 0x42cc24 LDR Q23, [X16, X0] |
(166) 0x42cc28 FADD V22.2D, V20.2D, V27.2D |
(166) 0x42cc2c FADD V24.2D, V21.2D, V23.2D |
(166) 0x42cc30 FSUB V25.2D, V24.2D, V22.2D |
(166) 0x42cc34 STR Q25, [X7, X0] |
(166) 0x42cc38 LDR Q26, [X3, X0] |
(166) 0x42cc3c LDR Q28, [X2, X0] |
(166) 0x42cc40 FSUB V3.2D, V26.2D, V28.2D |
(166) 0x42cc44 FADD V2.2D, V3.2D, V25.2D |
(166) 0x42cc48 STR Q2, [X1, X0] |
(166) 0x42cc4c ADD X0, X0, #16 |
(166) 0x42cc50 LDR Q30, [X2, X0] |
(166) 0x42cc54 LDR Q0, [X9, X0] |
(166) 0x42cc58 LDR X5, [SP, #96] |
(166) 0x42cc5c LDR Q1, [X8, X0] |
(166) 0x42cc60 LDR Q4, [X3, X0] |
(166) 0x42cc64 FADD V5.2D, V30.2D, V0.2D |
(166) 0x42cc68 LDR Q7, [X5, X0] |
(166) 0x42cc6c FADD V6.2D, V1.2D, V4.2D |
(166) 0x42cc70 FADD V29.2D, V5.2D, V7.2D |
(166) 0x42cc74 FSUB V31.2D, V29.2D, V6.2D |
(166) 0x42cc78 STR Q31, [X7, X0] |
(166) 0x42cc7c LDR Q16, [X3, X0] |
(166) 0x42cc80 LDR Q17, [X2, X0] |
(166) 0x42cc84 FSUB V18.2D, V16.2D, V17.2D |
(166) 0x42cc88 FADD V19.2D, V18.2D, V31.2D |
(166) 0x42cc8c STR Q19, [X1, X0] |
(166) 0x42cc90 ADD X0, X0, #16 |
(166) 0x42cc94 CMP X0, X11 |
(166) 0x42cc98 B.EQ 42cda4 |
(167) 0x42cc9c LDR Q20, [X2, X0] |
(167) 0x42cca0 ADD X10, X0, #16 |
(167) 0x42cca4 ADD X17, X0, #32 |
(167) 0x42cca8 ADD X19, X0, #48 |
(167) 0x42ccac LDR Q27, [X9, X0] |
(167) 0x42ccb0 LDR Q21, [X5, X0] |
(167) 0x42ccb4 LDR Q22, [X8, X0] |
(167) 0x42ccb8 LDR Q23, [X3, X0] |
(167) 0x42ccbc FADD V24.2D, V20.2D, V27.2D |
(167) 0x42ccc0 FADD V26.2D, V22.2D, V23.2D |
(167) 0x42ccc4 FADD V25.2D, V24.2D, V21.2D |
(167) 0x42ccc8 FSUB V28.2D, V25.2D, V26.2D |
(167) 0x42cccc STR Q28, [X7, X0] |
(167) 0x42ccd0 LDR Q3, [X3, X0] |
(167) 0x42ccd4 LDR Q2, [X2, X0] |
(167) 0x42ccd8 FSUB V30.2D, V3.2D, V2.2D |
(167) 0x42ccdc FADD V0.2D, V30.2D, V28.2D |
(167) 0x42cce0 STR Q0, [X1, X0] |
(167) 0x42cce4 ADD X0, X0, #64 |
(167) 0x42cce8 LDR Q1, [X2, X10] |
(167) 0x42ccec LDR Q4, [X9, X10] |
(167) 0x42ccf0 LDR Q5, [X5, X10] |
(167) 0x42ccf4 LDR Q6, [X8, X10] |
(167) 0x42ccf8 LDR Q7, [X3, X10] |
(167) 0x42ccfc FADD V29.2D, V1.2D, V4.2D |
(167) 0x42cd00 FADD V31.2D, V6.2D, V7.2D |
(167) 0x42cd04 FADD V16.2D, V29.2D, V5.2D |
(167) 0x42cd08 FSUB V17.2D, V16.2D, V31.2D |
(167) 0x42cd0c STR Q17, [X7, X10] |
(167) 0x42cd10 LDR Q18, [X3, X10] |
(167) 0x42cd14 LDR Q19, [X2, X10] |
(167) 0x42cd18 FSUB V20.2D, V18.2D, V19.2D |
(167) 0x42cd1c FADD V27.2D, V20.2D, V17.2D |
(167) 0x42cd20 STR Q27, [X1, X10] |
(167) 0x42cd24 LDR Q21, [X2, X17] |
(167) 0x42cd28 LDR Q22, [X9, X17] |
(167) 0x42cd2c LDR Q23, [X5, X17] |
(167) 0x42cd30 LDR Q24, [X8, X17] |
(167) 0x42cd34 LDR Q25, [X3, X17] |
(167) 0x42cd38 FADD V26.2D, V21.2D, V22.2D |
(167) 0x42cd3c FADD V28.2D, V24.2D, V25.2D |
(167) 0x42cd40 FADD V3.2D, V26.2D, V23.2D |
(167) 0x42cd44 FSUB V2.2D, V3.2D, V28.2D |
(167) 0x42cd48 STR Q2, [X7, X17] |
(167) 0x42cd4c LDR Q30, [X3, X17] |
(167) 0x42cd50 LDR Q0, [X2, X17] |
(167) 0x42cd54 FSUB V1.2D, V30.2D, V0.2D |
(167) 0x42cd58 FADD V4.2D, V1.2D, V2.2D |
(167) 0x42cd5c STR Q4, [X1, X17] |
(167) 0x42cd60 LDR Q5, [X2, X19] |
(167) 0x42cd64 LDR Q6, [X9, X19] |
(167) 0x42cd68 LDR Q7, [X5, X19] |
(167) 0x42cd6c LDR Q29, [X8, X19] |
(167) 0x42cd70 LDR Q31, [X3, X19] |
(167) 0x42cd74 FADD V16.2D, V5.2D, V6.2D |
(167) 0x42cd78 FADD V17.2D, V29.2D, V31.2D |
(167) 0x42cd7c FADD V18.2D, V16.2D, V7.2D |
(167) 0x42cd80 FSUB V19.2D, V18.2D, V17.2D |
(167) 0x42cd84 STR Q19, [X7, X19] |
(167) 0x42cd88 LDR Q20, [X3, X19] |
(167) 0x42cd8c LDR Q27, [X2, X19] |
(167) 0x42cd90 FSUB V21.2D, V20.2D, V27.2D |
(167) 0x42cd94 FADD V22.2D, V21.2D, V19.2D |
(167) 0x42cd98 STR Q22, [X1, X19] |
(167) 0x42cd9c CMP X0, X11 |
(167) 0x42cda0 B.NE 42cc9c |
(166) 0x42cda4 TBZ W6, #0, 42ce20 |
(166) 0x42cda8 AND W6, W6, #0xfffffffe |
(166) 0x42cdac ADD W28, W28, W6 |
(166) 0x42cdb0 SBFM X11, X28, #0, #31 |
(166) 0x42cdb4 ADD W9, W28, #1 |
(166) 0x42cdb8 ADD X7, X13, W9,SXTW |
(166) 0x42cdbc ADD X13, X13, X11 |
(166) 0x42cdc0 LDR D23, [X15, X7,LSL #3] |
(166) 0x42cdc4 ADD X12, X12, X11 |
(166) 0x42cdc8 UBFM X8, X12, #61, #60 |
(166) 0x42cdcc ADD X30, X30, X11 |
(166) 0x42cdd0 LDR D25, [X15, X13,LSL #3] |
(166) 0x42cdd4 LDR X15, [SP, #128] |
(166) 0x42cdd8 LDR D24, [X14, X8] |
(166) 0x42cddc ADD X2, X15, X11 |
(166) 0x42cde0 LDR D26, [X21, X2,LSL #3] |
(166) 0x42cde4 LDP X21, X3, [SP, #112] |
(166) 0x42cde8 ADD X1, X3, X11 |
(166) 0x42cdec FADD D28, D26, D24 |
(166) 0x42cdf0 ADD X16, X21, X11 |
(166) 0x42cdf4 UBFM X18, X1, #61, #60 |
(166) 0x42cdf8 LDR D2, [X14, X18] |
(166) 0x42cdfc FSUB D3, D28, S25 |
(166) 0x42ce00 FSUB D30, D23, S2 |
(166) 0x42ce04 FADD D0, D3, D30 |
(166) 0x42ce08 STR D0, [X23, X16,LSL #3] |
(166) 0x42ce0c LDR D1, [X14, X18] |
(166) 0x42ce10 LDR D4, [X14, X8] |
(166) 0x42ce14 FSUB D5, D1, S4 |
(166) 0x42ce18 FADD D6, D5, D0 |
(166) 0x42ce1c STR D6, [X22, X30,LSL #3] |
(166) 0x42ce20 LDR X5, [SP, #104] |
(166) 0x42ce24 ORR W23, WZR, W20 |
(166) 0x42ce28 LDR W20, [SP, #136] |
(166) 0x42ce2c CMP W20, W5 |
(166) 0x42ce30 B.LE 42ce70 |
(166) 0x42ce34 LDR W14, [SP, #148] |
(166) 0x42ce38 LDR W22, [SP, #144] |
(166) 0x42ce3c LDR W28, [SP, #140] |
(166) 0x42ce40 SUB W6, W14, W23 |
(166) 0x42ce44 CMP W6, W22 |
(166) 0x42ce48 CSEL W6, W6, W22, #9 |
(166) 0x42ce4c ADD W20, W23, W6 |
(166) 0x42ce50 CMP W23, W20 |
(166) 0x42ce54 B.CC 42cad0 |
(168) 0x42ce58 ADD X15, X5, #1 |
(168) 0x42ce5c LDR W20, [SP, #136] |
(168) 0x42ce60 STR X15, [SP, #104] |
(168) 0x42ce64 LDR X5, [SP, #104] |
(168) 0x42ce68 CMP W20, W5 |
(168) 0x42ce6c B.GT 42ce34 |
0x42ce70 LDP X21, X22, [SP, #32] |
0x42ce74 LDP X25, X26, [SP, #64] |
0x42ce78 LDP X27, X28, [SP, #80] |
0x42ce7c LDP X19, X20, [SP, #16] |
0x42ce80 LDP X23, X24, [SP, #48] |
0x42ce84 LDP X29, X30, [SP], #176 |
0x42ce88 RET |
0x42ce8c LDP X21, X22, [SP, #32] |
0x42ce90 LDP X19, X20, [SP, #16] |
0x42ce94 LDP X23, X24, [SP, #48] |
0x42ce98 LDP X29, X30, [SP], #176 |
0x42ce9c RET |
(166) 0x42cea0 LDR X5, [SP, #96] |
(166) 0x42cea4 B 42cc9c |
0x42cea8 ADD W6, W6, #1 |
0x42ceac MOVZ W8, #0 |
0x42ceb0 B 42ca78 |
0x42ceb4 HINT #0 |
0x42ceb8 HINT #0 |
0x42cebc HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.41+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►49.59+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►74.19+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►25.81+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►87.24+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►12.73+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►93.64+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►6.36+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►95.76+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.24+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.85+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.15+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.49+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.51+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.91+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.09+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.22+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.78+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.42+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.58+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_cell_kernel(int, int, in[...] | advec_cell.cpp:136 | exec |
| ○ | advec_cell_driver(global_varia[...] | advec_cell.cpp:232 | exec |
| ○ | advection(global_variables&) | advection.cpp:50 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | advec_cell.cpp:136-140 |
| Module | exec |
| nb instructions | 70 |
| nb uops | 67 |
| loop length | 280 |
| used w registers | 22 |
| used x registers | 17 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 20 |
| micro-operation queue | 8.38 cycles |
| front end | 8.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| cycles | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.38 |
| Dispatch | 8.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 41% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 21% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #848]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X19, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W4, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W24, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42ce7c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x47c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W3, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42ce8c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x48c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W22, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W23, W21, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W23, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W7, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W6, W20, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42cea8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x4a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W23, W6, W7, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W23, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42ce8c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x48c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W10, W11, [SP, #140] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W12, W23, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR X27, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X4, X26, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W13, W12, W11, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X25, X24, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SBFM X5, X14, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W28, W13, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W22, W22, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W6, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W6, W6, W22, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W20, W23, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42ce58 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x458> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42ca78 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | advec_cell.cpp:136-140 |
| Module | exec |
| nb instructions | 70 |
| nb uops | 67 |
| loop length | 280 |
| used w registers | 22 |
| used x registers | 17 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 20 |
| micro-operation queue | 8.38 cycles |
| front end | 8.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| cycles | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.38 |
| Dispatch | 8.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 41% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 21% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #848]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X19, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W4, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W24, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42ce7c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x47c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W3, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42ce8c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x48c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W22, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W23, W21, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W23, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W7, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W6, W20, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42cea8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x4a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W23, W6, W7, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W6, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W23, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42ce8c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x48c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W10, W11, [SP, #140] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W12, W23, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR X27, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X4, X26, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W13, W12, W11, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X25, X24, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SBFM X5, X14, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W28, W13, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W22, W22, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W6, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W6, W6, W22, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD W20, W23, W6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W23, W20 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42ce58 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x458> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42ca78 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.98 | 0.01 | 0.92 | 0.06 | 0.66 | 0.3 | 0.33 | 0.79 | 0.22 | 0.97 | 0.17 | 1.06 | 0.14 | 1.12 | 0.12 | 1.17 | 0.1 | 1.21 | 0.09 | 1.23 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 10.285000801086 | 0.69198226928711 |
| 1x2 | 2 | 0.98 | 1.96 | 2 | 5.2599992752075 | 0.6979593038559 |
| 1x4 | 4 | 0.92 | 3.69 | 4 | 2.8549993038177 | 0.72788953781128 |
| 1x8 | 8 | 0.66 | 5.29 | 8 | 1.9499996900558 | 0.89615386724472 |
| 1x16 | 16 | 0.33 | 5.21 | 16 | 1.9349998235703 | 1.1742082834244 |
| 1x24 | 24 | 0.22 | 5.28 | 24 | 1.9350000619888 | 1.2423857450485 |
| 1x32 | 32 | 0.17 | 5.57 | 32 | 1.8200001716614 | 1.2788784503937 |
| 1x40 | 40 | 0.14 | 5.77 | 40 | 1.7700001001358 | 1.3030351400375 |
| 1x48 | 48 | 0.12 | 5.76 | 48 | 1.7650001049042 | 1.3265722990036 |
| 1x56 | 56 | 0.1 | 5.7 | 56 | 1.8049997091293 | 1.3422124385834 |
| 1x64 | 64 | 0.09 | 5.67 | 64 | 1.7949998378754 | 1.3483386039734 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 0.69 | 10.29 |
| ▼Loop 168 - advec_cell.cpp:136-140 - exec– | 0.00 | 0.00 |
| ▼Loop 166 - advec_cell.cpp:136-140 - exec– | 0.00 | 0.00 |
| ○Loop 167 - advec_cell.cpp:139-140 - exec | 0.69 | 10.19 |
