| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage (incl. loops): 2.78% | (excl. loops): 0.01% |
|---|
| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage (incl. loops): 2.78% | (excl. loops): 0.01% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_mom.cpp: 218 - 221 |
-------------------------------------------------------------------------------- |
218: #pragma omp parallel for simd collapse(2) |
219: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
220: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
221: vel1(i, j) = (vel1(i, j) * node_mass_pre(i, j) + mom_flux(i + 0, j - 1) - mom_flux(i, j)) / node_mass_post(i, j); |
0x4309e0 STP X29, X30, [SP, #912]! |
0x4309e4 ADD X29, SP, #0 |
0x4309e8 STP X19, X20, [SP, #16] |
0x4309ec ORR X20, XZR, X0 |
0x4309f0 STP X21, X22, [SP, #32] |
0x4309f4 LDP W22, W1, [X0, #40] |
0x4309f8 LDR W0, [X0, #32] |
0x4309fc LDR W19, [X20, #36] |
0x430a00 ADD W2, W1, #3 |
0x430a04 ADD W22, W22, #1 |
0x430a08 ADD W3, W0, #1 |
0x430a0c STP W2, W3, [SP, #96] |
0x430a10 CMP W22, W2 |
0x430a14 B.GE 430d24 |
0x430a18 STP X23, X24, [SP, #48] |
0x430a1c ADD W24, W19, #3 |
0x430a20 SUB W23, W2, W22 |
0x430a24 CMP W3, W24 |
0x430a28 B.GE 430d20 |
0x430a2c SUB W4, W24, W3 |
0x430a30 MUL W19, W23, W4 |
0x430a34 STR W4, [SP, #104] |
0x430a38 BL 410210 |
0x430a3c ORR W21, WZR, W0 |
0x430a40 BL 410240 |
0x430a44 UDIV W6, W19, W21 |
0x430a48 ORR W5, WZR, W0 |
0x430a4c MSUB W7, W6, W21, W19 |
0x430a50 CMP W0, W7 |
0x430a54 B.CC 430d50 |
0x430a58 MADD W16, W6, W5, W7 |
0x430a5c ADD W8, W6, W16 |
0x430a60 STR W8, [SP, #108] |
0x430a64 CMP W16, W8 |
0x430a68 B.CS 430d20 |
0x430a6c LDR W9, [SP, #104] |
0x430a70 STP X25, X26, [SP, #64] |
0x430a74 LDR W13, [SP, #100] |
0x430a78 UDIV W10, W16, W9 |
0x430a7c STP X27, X28, [SP, #80] |
0x430a80 LDP X28, X27, [X20] |
0x430a84 LDP X26, X25, [X20, #16] |
0x430a88 MSUB W11, W10, W9, W16 |
0x430a8c ADD W12, W10, W22 |
0x430a90 SBFM X5, X12, #0, #31 |
0x430a94 ADD W4, W11, W13 |
0x430a98 SUB W17, W24, W4 |
0x430a9c HINT #0 |
(199) 0x430aa0 CMP W6, W17 |
(199) 0x430aa4 CSEL W3, W6, W17, #9 |
(199) 0x430aa8 ADD W23, W16, W3 |
(199) 0x430aac CMP W16, W23 |
(199) 0x430ab0 B.CS 430cfc |
(199) 0x430ab4 LDR X17, [X25] |
(199) 0x430ab8 SUB W14, W5, #1 |
(199) 0x430abc SBFM X15, X14, #0, #31 |
(199) 0x430ac0 LDR X18, [X26] |
(199) 0x430ac4 LDR X20, [X27] |
(199) 0x430ac8 MUL X12, X15, X17 |
(199) 0x430acc LDR X1, [X28] |
(199) 0x430ad0 MUL X14, X17, X5 |
(199) 0x430ad4 MUL X15, X5, X18 |
(199) 0x430ad8 LDR X16, [X25, #16] |
(199) 0x430adc MUL X17, X5, X20 |
(199) 0x430ae0 LDR X30, [X26, #16] |
(199) 0x430ae4 MUL X11, X5, X1 |
(199) 0x430ae8 LDR X24, [X27, #16] |
(199) 0x430aec LDR X22, [X28, #16] |
(199) 0x430af0 CMP W3, #1 |
(199) 0x430af4 B.EQ 430cb8 |
(199) 0x430af8 UBFM W19, W3, #1, #31 |
(199) 0x430afc SBFM X2, X4, #0, #31 |
(199) 0x430b00 UBFM X13, X19, #60, #59 |
(199) 0x430b04 ADD X6, X11, X2 |
(199) 0x430b08 SUB X21, X13, #16 |
(199) 0x430b0c ADD X20, X15, X2 |
(199) 0x430b10 ADD X1, X22, X6,LSL #3 |
(199) 0x430b14 UBFM X7, X21, #4, #63 |
(199) 0x430b18 ADD X19, X12, X2 |
(199) 0x430b1c ADD X8, X30, X20,LSL #3 |
(199) 0x430b20 ADD X9, X7, #1 |
(199) 0x430b24 ADD X21, X14, X2 |
(199) 0x430b28 ADD X7, X16, X19,LSL #3 |
(199) 0x430b2c ADD X2, X17, X2 |
(199) 0x430b30 ANDS X10, X9, #0x3 |
(199) 0x430b34 ADD X9, X16, X21,LSL #3 |
(199) 0x430b38 UBFM X18, X6, #61, #60 |
(199) 0x430b3c MOVZ X0, #0 |
(199) 0x430b40 ADD X6, X24, X2,LSL #3 |
(199) 0x430b44 UBFM X20, X20, #61, #60 |
(199) 0x430b48 UBFM X19, X19, #61, #60 |
(199) 0x430b4c UBFM X21, X21, #61, #60 |
(199) 0x430b50 UBFM X2, X2, #61, #60 |
(199) 0x430b54 B.EQ 430bf4 |
(199) 0x430b58 CMP X10, #1 |
(199) 0x430b5c B.EQ 430bc0 |
(199) 0x430b60 CMP X10, #2 |
(199) 0x430b64 B.EQ 430b94 |
(199) 0x430b68 LDR Q28, [X16, X21] |
(199) 0x430b6c MOVZ X0, #16 |
(199) 0x430b70 LDR Q30, [X30, X20] |
(199) 0x430b74 LDR Q0, [X22, X18] |
(199) 0x430b78 FNEG V1.2D, V28.2D |
(199) 0x430b7c LDR Q31, [X16, X19] |
(199) 0x430b80 LDR Q29, [X24, X2] |
(199) 0x430b84 FMLA V1.2D, V30.2D, V0.2D |
(199) 0x430b88 FADD V2.2D, V1.2D, V31.2D |
(199) 0x430b8c FDIV V3.2D, V2.2D, V29.2D |
(199) 0x430b90 STR Q3, [X22, X18] |
(199) 0x430b94 LDR Q24, [X9, X0] |
(199) 0x430b98 LDR Q26, [X8, X0] |
(199) 0x430b9c LDR Q4, [X1, X0] |
(199) 0x430ba0 FNEG V5.2D, V24.2D |
(199) 0x430ba4 LDR Q27, [X7, X0] |
(199) 0x430ba8 LDR Q25, [X6, X0] |
(199) 0x430bac FMLA V5.2D, V26.2D, V4.2D |
(199) 0x430bb0 FADD V6.2D, V5.2D, V27.2D |
(199) 0x430bb4 FDIV V7.2D, V6.2D, V25.2D |
(199) 0x430bb8 STR Q7, [X1, X0] |
(199) 0x430bbc ADD X0, X0, #16 |
(199) 0x430bc0 LDR Q20, [X9, X0] |
(199) 0x430bc4 LDR Q22, [X8, X0] |
(199) 0x430bc8 LDR Q16, [X1, X0] |
(199) 0x430bcc FNEG V17.2D, V20.2D |
(199) 0x430bd0 LDR Q23, [X7, X0] |
(199) 0x430bd4 LDR Q21, [X6, X0] |
(199) 0x430bd8 FMLA V17.2D, V22.2D, V16.2D |
(199) 0x430bdc FADD V18.2D, V17.2D, V23.2D |
(199) 0x430be0 FDIV V19.2D, V18.2D, V21.2D |
(199) 0x430be4 STR Q19, [X1, X0] |
(199) 0x430be8 ADD X0, X0, #16 |
(199) 0x430bec CMP X13, X0 |
(199) 0x430bf0 B.EQ 430cac |
(200) 0x430bf4 LDR Q28, [X9, X0] |
(200) 0x430bf8 ADD X20, X0, #16 |
(200) 0x430bfc ADD X18, X0, #32 |
(200) 0x430c00 ADD X10, X0, #48 |
(200) 0x430c04 LDR Q30, [X8, X0] |
(200) 0x430c08 LDR Q0, [X1, X0] |
(200) 0x430c0c FNEG V1.2D, V28.2D |
(200) 0x430c10 LDR Q31, [X7, X0] |
(200) 0x430c14 LDR Q29, [X6, X0] |
(200) 0x430c18 FMLA V1.2D, V30.2D, V0.2D |
(200) 0x430c1c FADD V2.2D, V1.2D, V31.2D |
(200) 0x430c20 FDIV V3.2D, V2.2D, V29.2D |
(200) 0x430c24 STR Q3, [X1, X0] |
(200) 0x430c28 ADD X0, X0, #64 |
(200) 0x430c2c LDR Q24, [X9, X20] |
(200) 0x430c30 LDR Q26, [X8, X20] |
(200) 0x430c34 LDR Q4, [X1, X20] |
(200) 0x430c38 FNEG V5.2D, V24.2D |
(200) 0x430c3c LDR Q27, [X7, X20] |
(200) 0x430c40 LDR Q25, [X6, X20] |
(200) 0x430c44 FMLA V5.2D, V26.2D, V4.2D |
(200) 0x430c48 FADD V6.2D, V5.2D, V27.2D |
(200) 0x430c4c FDIV V7.2D, V6.2D, V25.2D |
(200) 0x430c50 STR Q7, [X1, X20] |
(200) 0x430c54 LDR Q20, [X9, X18] |
(200) 0x430c58 LDR Q22, [X8, X18] |
(200) 0x430c5c LDR Q16, [X1, X18] |
(200) 0x430c60 FNEG V17.2D, V20.2D |
(200) 0x430c64 LDR Q23, [X7, X18] |
(200) 0x430c68 LDR Q21, [X6, X18] |
(200) 0x430c6c FMLA V17.2D, V22.2D, V16.2D |
(200) 0x430c70 FADD V18.2D, V17.2D, V23.2D |
(200) 0x430c74 FDIV V19.2D, V18.2D, V21.2D |
(200) 0x430c78 STR Q19, [X1, X18] |
(200) 0x430c7c LDR Q28, [X9, X10] |
(200) 0x430c80 LDR Q30, [X8, X10] |
(200) 0x430c84 LDR Q0, [X1, X10] |
(200) 0x430c88 FNEG V1.2D, V28.2D |
(200) 0x430c8c LDR Q31, [X7, X10] |
(200) 0x430c90 LDR Q29, [X6, X10] |
(200) 0x430c94 FMLA V1.2D, V30.2D, V0.2D |
(200) 0x430c98 FADD V2.2D, V1.2D, V31.2D |
(200) 0x430c9c FDIV V3.2D, V2.2D, V29.2D |
(200) 0x430ca0 STR Q3, [X1, X10] |
(200) 0x430ca4 CMP X13, X0 |
(200) 0x430ca8 B.NE 430bf4 |
(199) 0x430cac TBZ W3, #0, 430cf8 |
(199) 0x430cb0 AND W3, W3, #0xfffffffe |
(199) 0x430cb4 ADD W4, W4, W3 |
(199) 0x430cb8 SBFM X13, X4, #0, #31 |
(199) 0x430cbc ADD X11, X11, X13 |
(199) 0x430cc0 ADD X15, X15, X13 |
(199) 0x430cc4 UBFM X1, X11, #61, #60 |
(199) 0x430cc8 ADD X14, X14, X13 |
(199) 0x430ccc LDR D24, [X30, X15,LSL #3] |
(199) 0x430cd0 ADD X12, X12, X13 |
(199) 0x430cd4 ADD X30, X17, X13 |
(199) 0x430cd8 LDR D26, [X22, X1] |
(199) 0x430cdc LDR D4, [X16, X14,LSL #3] |
(199) 0x430ce0 LDR D5, [X16, X12,LSL #3] |
(199) 0x430ce4 LDR D27, [X24, X30,LSL #3] |
(199) 0x430ce8 FNMSUB D25, D24, D26, D4 |
(199) 0x430cec FADD D6, D25, D5 |
(199) 0x430cf0 FDIV D7, D6, D27 |
(199) 0x430cf4 STR D7, [X22, X1] |
(199) 0x430cf8 ORR W16, WZR, W23 |
(199) 0x430cfc LDR W23, [SP, #96] |
(199) 0x430d00 ADD X5, X5, #1 |
(199) 0x430d04 CMP W23, W5 |
(199) 0x430d08 B.LE 430d34 |
(199) 0x430d0c LDR W24, [SP, #108] |
(199) 0x430d10 LDR W4, [SP, #100] |
(199) 0x430d14 LDR W17, [SP, #104] |
(199) 0x430d18 SUB W6, W24, W16 |
(199) 0x430d1c B 430aa0 |
0x430d20 LDP X23, X24, [SP, #48] |
0x430d24 LDP X19, X20, [SP, #16] |
0x430d28 LDP X21, X22, [SP, #32] |
0x430d2c LDP X29, X30, [SP], #112 |
0x430d30 RET |
0x430d34 LDP X23, X24, [SP, #48] |
0x430d38 LDP X25, X26, [SP, #64] |
0x430d3c LDP X27, X28, [SP, #80] |
0x430d40 LDP X19, X20, [SP, #16] |
0x430d44 LDP X21, X22, [SP, #32] |
0x430d48 LDP X29, X30, [SP], #112 |
0x430d4c RET |
0x430d50 ADD W6, W6, #1 |
0x430d54 MOVZ W7, #0 |
0x430d58 B 430a58 |
0x430d5c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►25.46+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:101 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►25.14+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:99 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►24.87+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►24.53+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►49.70+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►12.75+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:101 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►12.72+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:99 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►12.44+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►12.38+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►74.45+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►6.47+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:99 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►6.46+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:101 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►6.31+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►6.31+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►86.95+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.31+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:99 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►3.27+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:101 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►3.25+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►3.22+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►93.65+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.62+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:101 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.61+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:99 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.57+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.54+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►95.79+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.07+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:99 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.06+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:101 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.04+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.04+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:218 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.85+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.48+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.91+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.23+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.43+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | advec_mom.cpp:218-221 |
| Module | exec |
| nb instructions | 64 |
| nb uops | 62 |
| loop length | 256 |
| used w registers | 22 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 7.75 cycles |
| front end | 7.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 7.75 |
| Dispatch | 8.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 40% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W19, [X20, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W2, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W3, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W2, W3, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 430d24 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x344> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W24, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W23, W2, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 430d20 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x340> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W24, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W19, W23, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W4, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W19, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W7, W6, W21, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 430d50 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x370> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W6, W5, W7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W6, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W16, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 430d20 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x340> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W9, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W13, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W10, W16, W9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP X28, X27, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X25, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W11, W10, W9, W16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W12, W10, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X5, X12, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W4, W11, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W24, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 430a58 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | advec_mom.cpp:218-221 |
| Module | exec |
| nb instructions | 64 |
| nb uops | 62 |
| loop length | 256 |
| used w registers | 22 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 7.75 cycles |
| front end | 7.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| cycles | 4.50 | 4.50 | 7.50 | 7.50 | 7.50 | 7.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 7.75 |
| Dispatch | 8.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 40% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W22, W1, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W19, [X20, #36] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W2, W1, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W22, W22, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W3, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W2, W3, [SP, #96] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W22, W2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 430d24 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x344> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W24, W19, #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W23, W2, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W3, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 430d20 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x340> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W4, W24, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W19, W23, W4 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W4, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W6, W19, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W5, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W7, W6, W21, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 430d50 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x370> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W16, W6, W5, W7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W8, W6, W16 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W8, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W16, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 430d20 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x340> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W9, [SP, #104] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR W13, [SP, #100] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| UDIV W10, W16, W9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP X28, X27, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X26, X25, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W11, W10, W9, W16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W12, W10, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SBFM X5, X12, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W4, W11, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W17, W24, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W6, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W7, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 430a58 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.98 | 0.06 | 0.93 | 0.22 | 0.62 | 1.43 | 0.3 | 3.57 | 0.21 | 4.05 | 0.18 | 4.04 | 0.15 | 4.12 | 0.13 | 4.17 | 0.12 | 4.22 | 0.1 | 4.25 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 41.295013427734 | 2.7783579826355 |
| 1x2 | 2 | 0.98 | 1.96 | 2 | 21.119997024536 | 2.8085587024689 |
| 1x4 | 4 | 0.93 | 3.7 | 4 | 11.295003890991 | 2.9099128246307 |
| 1x8 | 8 | 0.62 | 4.99 | 8 | 8.5050010681152 | 3.8124566078186 |
| 1x16 | 16 | 0.3 | 4.81 | 16 | 8.3400011062622 | 5.1026082038879 |
| 1x24 | 24 | 0.21 | 5.11 | 24 | 7.8950009346008 | 5.1463418006897 |
| 1x32 | 32 | 0.18 | 5.79 | 32 | 6.9850015640259 | 4.9356217384338 |
| 1x40 | 40 | 0.15 | 6.19 | 40 | 6.5700006484985 | 4.8732900619507 |
| 1x48 | 48 | 0.13 | 6.37 | 48 | 6.3800001144409 | 4.813419342041 |
| 1x56 | 56 | 0.12 | 6.45 | 56 | 6.3150005340576 | 4.7675275802612 |
| 1x64 | 64 | 0.1 | 6.49 | 64 | 6.2700004577637 | 4.7295136451721 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.78 | 41.30 |
| ▼Loop 199 - advec_mom.cpp:220-221 - exec– | 0.00 | 0.00 |
| ○Loop 200 - advec_mom.cpp:221-221 - exec | 2.77 | 41.15 |
