| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:53-57 [...] | Coverage (incl. loops): 1.36% | (excl. loops): 0.01% |
|---|
| Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:53-57 [...] | Coverage (incl. loops): 1.36% | (excl. loops): 0.01% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/advec_mom.cpp: 53 - 57 |
-------------------------------------------------------------------------------- |
53: #pragma omp parallel for simd collapse(2) |
54: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
55: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
56: post_vol(i, j) = volume(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
57: pre_vol(i, j) = post_vol(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
0x42eb10 STP X29, X30, [SP, #848]! |
0x42eb14 ADD X29, SP, #0 |
0x42eb18 STP X19, X20, [SP, #16] |
0x42eb1c ORR X19, XZR, X0 |
0x42eb20 STP X23, X24, [SP, #48] |
0x42eb24 LDP W23, W1, [X0, #48] |
0x42eb28 LDR W0, [X0, #40] |
0x42eb2c LDR W2, [X19, #44] |
0x42eb30 ADD W3, W1, #4 |
0x42eb34 SUB W24, W23, #1 |
0x42eb38 SUB W4, W0, #1 |
0x42eb3c STP W3, W4, [SP, #136] |
0x42eb40 CMP W24, W3 |
0x42eb44 B.GE 42eef8 |
0x42eb48 STP X21, X22, [SP, #32] |
0x42eb4c ADD W22, W2, #4 |
0x42eb50 SUB W21, W3, W24 |
0x42eb54 CMP W4, W22 |
0x42eb58 B.GE 42ef08 |
0x42eb5c SUB W5, W22, W4 |
0x42eb60 MUL W23, W21, W5 |
0x42eb64 STR W5, [SP, #144] |
0x42eb68 BL 410210 |
0x42eb6c ORR W20, WZR, W0 |
0x42eb70 BL 410240 |
0x42eb74 UDIV W7, W23, W20 |
0x42eb78 ORR W6, WZR, W0 |
0x42eb7c MSUB W8, W7, W20, W23 |
0x42eb80 CMP W0, W8 |
0x42eb84 B.CC 42ef1c |
0x42eb88 MADD W1, W7, W6, W8 |
0x42eb8c ADD W9, W7, W1 |
0x42eb90 STR W9, [SP, #148] |
0x42eb94 CMP W1, W9 |
0x42eb98 B.CS 42ef08 |
0x42eb9c LDP W10, W11, [SP, #140] |
0x42eba0 STP X27, X28, [SP, #80] |
0x42eba4 STP X25, X26, [SP, #64] |
0x42eba8 UDIV W12, W1, W11 |
0x42ebac LDR X27, [X19, #32] |
0x42ebb0 LDP X20, X25, [X19] |
0x42ebb4 MSUB W13, W12, W11, W1 |
0x42ebb8 ADD W14, W12, W24 |
0x42ebbc LDP X24, X26, [X19, #16] |
0x42ebc0 SBFM X15, X14, #0, #31 |
0x42ebc4 ADD W28, W13, W10 |
0x42ebc8 SUB W22, W22, W28 |
0x42ebcc CMP W7, W22 |
0x42ebd0 CSEL W3, W7, W22, #9 |
0x42ebd4 ADD W19, W1, W3 |
0x42ebd8 CMP W1, W19 |
0x42ebdc B.CS 42eed4 |
(181) 0x42ebe0 LDR X18, [X25] |
(181) 0x42ebe4 ADD X16, X15, #1 |
(181) 0x42ebe8 LDR X30, [X27] |
(181) 0x42ebec LDR X17, [X20] |
(181) 0x42ebf0 MADD X12, X15, X18, X18 |
(181) 0x42ebf4 LDR X14, [X20, #16] |
(181) 0x42ebf8 MUL X4, X15, X30 |
(181) 0x42ebfc SUB X6, X12, X18 |
(181) 0x42ec00 LDR X21, [X24, #16] |
(181) 0x42ec04 MUL X11, X15, X17 |
(181) 0x42ec08 LDR X13, [X25, #16] |
(181) 0x42ec0c STR X4, [SP, #120] |
(181) 0x42ec10 LDR X22, [X26, #16] |
(181) 0x42ec14 LDR X23, [X27, #16] |
(181) 0x42ec18 STP X16, X6, [SP, #104] |
(181) 0x42ec1c LDR X1, [X24] |
(181) 0x42ec20 LDR X2, [X26] |
(181) 0x42ec24 MUL X5, X15, X1 |
(181) 0x42ec28 MUL X30, X15, X2 |
(181) 0x42ec2c STR X5, [SP, #128] |
(181) 0x42ec30 CMP W3, #1 |
(181) 0x42ec34 B.EQ 42ee40 |
(181) 0x42ec38 SBFM X1, X28, #0, #31 |
(181) 0x42ec3c UBFM W7, W3, #1, #31 |
(181) 0x42ec40 ADD X8, X11, X1 |
(181) 0x42ec44 UBFM X10, X7, #60, #59 |
(181) 0x42ec48 ADD X15, X8, #1 |
(181) 0x42ec4c SUB X9, X10, #16 |
(181) 0x42ec50 UBFM X16, X9, #4, #63 |
(181) 0x42ec54 UBFM X7, X15, #61, #60 |
(181) 0x42ec58 ADD X2, X14, X15,LSL #3 |
(181) 0x42ec5c ADD X4, X4, X1 |
(181) 0x42ec60 ADD X15, X6, X1 |
(181) 0x42ec64 ADD X17, X16, #1 |
(181) 0x42ec68 ADD X6, X23, X4,LSL #3 |
(181) 0x42ec6c STR X7, [SP, #152] |
(181) 0x42ec70 ANDS X9, X17, #0x3 |
(181) 0x42ec74 SUB X7, X7, #8 |
(181) 0x42ec78 UBFM X17, X4, #61, #60 |
(181) 0x42ec7c ADD X4, X13, X15,LSL #3 |
(181) 0x42ec80 UBFM X15, X15, #61, #60 |
(181) 0x42ec84 ADD X18, X5, X1 |
(181) 0x42ec88 ADD X16, X12, X1 |
(181) 0x42ec8c STR X7, [SP, #160] |
(181) 0x42ec90 ADD X1, X30, X1 |
(181) 0x42ec94 STR X15, [SP, #168] |
(181) 0x42ec98 ADD X8, X21, X18,LSL #3 |
(181) 0x42ec9c ADD X5, X13, X16,LSL #3 |
(181) 0x42eca0 UBFM X15, X1, #61, #60 |
(181) 0x42eca4 MOVZ X0, #0 |
(181) 0x42eca8 ADD X1, X22, X1,LSL #3 |
(181) 0x42ecac UBFM X18, X18, #61, #60 |
(181) 0x42ecb0 UBFM X16, X16, #61, #60 |
(181) 0x42ecb4 ADD X7, X14, X7 |
(181) 0x42ecb8 B.EQ 42ed6c |
(181) 0x42ecbc CMP X9, #1 |
(181) 0x42ecc0 B.EQ 42ed34 |
(181) 0x42ecc4 CMP X9, #2 |
(181) 0x42ecc8 B.EQ 42ed04 |
(181) 0x42eccc LDP X0, X9, [SP, #152] |
(181) 0x42ecd0 LDR Q0, [X21, X18] |
(181) 0x42ecd4 LDR Q1, [X14, X0] |
(181) 0x42ecd8 MOVZ X0, #16 |
(181) 0x42ecdc LDR Q29, [X14, X9] |
(181) 0x42ece0 LDR X18, [SP, #168] |
(181) 0x42ece4 FADD V2.2D, V1.2D, V0.2D |
(181) 0x42ece8 FSUB V3.2D, V2.2D, V29.2D |
(181) 0x42ecec STR Q3, [X23, X17] |
(181) 0x42ecf0 LDR Q31, [X13, X16] |
(181) 0x42ecf4 LDR Q30, [X13, X18] |
(181) 0x42ecf8 FSUB V4.2D, V31.2D, V30.2D |
(181) 0x42ecfc FADD V5.2D, V4.2D, V3.2D |
(181) 0x42ed00 STR Q5, [X22, X15] |
(181) 0x42ed04 LDR Q6, [X2, X0] |
(181) 0x42ed08 LDR Q7, [X8, X0] |
(181) 0x42ed0c LDR Q26, [X7, X0] |
(181) 0x42ed10 FADD V16.2D, V6.2D, V7.2D |
(181) 0x42ed14 FSUB V17.2D, V16.2D, V26.2D |
(181) 0x42ed18 STR Q17, [X6, X0] |
(181) 0x42ed1c LDR Q28, [X5, X0] |
(181) 0x42ed20 LDR Q27, [X4, X0] |
(181) 0x42ed24 FSUB V18.2D, V28.2D, V27.2D |
(181) 0x42ed28 FADD V19.2D, V18.2D, V17.2D |
(181) 0x42ed2c STR Q19, [X1, X0] |
(181) 0x42ed30 ADD X0, X0, #16 |
(181) 0x42ed34 LDR Q20, [X2, X0] |
(181) 0x42ed38 LDR Q21, [X8, X0] |
(181) 0x42ed3c LDR Q23, [X7, X0] |
(181) 0x42ed40 FADD V22.2D, V20.2D, V21.2D |
(181) 0x42ed44 FSUB V0.2D, V22.2D, V23.2D |
(181) 0x42ed48 STR Q0, [X6, X0] |
(181) 0x42ed4c LDR Q25, [X5, X0] |
(181) 0x42ed50 LDR Q24, [X4, X0] |
(181) 0x42ed54 FSUB V1.2D, V25.2D, V24.2D |
(181) 0x42ed58 FADD V29.2D, V1.2D, V0.2D |
(181) 0x42ed5c STR Q29, [X1, X0] |
(181) 0x42ed60 ADD X0, X0, #16 |
(181) 0x42ed64 CMP X0, X10 |
(181) 0x42ed68 B.EQ 42ee34 |
(182) 0x42ed6c LDR Q2, [X2, X0] |
(182) 0x42ed70 ADD X17, X0, #16 |
(182) 0x42ed74 ADD X16, X0, #32 |
(182) 0x42ed78 ADD X9, X0, #48 |
(182) 0x42ed7c LDR Q3, [X8, X0] |
(182) 0x42ed80 LDR Q31, [X7, X0] |
(182) 0x42ed84 FADD V30.2D, V2.2D, V3.2D |
(182) 0x42ed88 FSUB V4.2D, V30.2D, V31.2D |
(182) 0x42ed8c STR Q4, [X6, X0] |
(182) 0x42ed90 LDR Q5, [X5, X0] |
(182) 0x42ed94 LDR Q6, [X4, X0] |
(182) 0x42ed98 FSUB V7.2D, V5.2D, V6.2D |
(182) 0x42ed9c FADD V26.2D, V7.2D, V4.2D |
(182) 0x42eda0 STR Q26, [X1, X0] |
(182) 0x42eda4 ADD X0, X0, #64 |
(182) 0x42eda8 LDR Q16, [X2, X17] |
(182) 0x42edac LDR Q17, [X8, X17] |
(182) 0x42edb0 LDR Q28, [X7, X17] |
(182) 0x42edb4 FADD V27.2D, V16.2D, V17.2D |
(182) 0x42edb8 FSUB V18.2D, V27.2D, V28.2D |
(182) 0x42edbc STR Q18, [X6, X17] |
(182) 0x42edc0 LDR Q19, [X5, X17] |
(182) 0x42edc4 LDR Q20, [X4, X17] |
(182) 0x42edc8 FSUB V21.2D, V19.2D, V20.2D |
(182) 0x42edcc FADD V23.2D, V21.2D, V18.2D |
(182) 0x42edd0 STR Q23, [X1, X17] |
(182) 0x42edd4 LDR Q22, [X2, X16] |
(182) 0x42edd8 LDR Q0, [X8, X16] |
(182) 0x42eddc LDR Q25, [X7, X16] |
(182) 0x42ede0 FADD V24.2D, V22.2D, V0.2D |
(182) 0x42ede4 FSUB V1.2D, V24.2D, V25.2D |
(182) 0x42ede8 STR Q1, [X6, X16] |
(182) 0x42edec LDR Q29, [X5, X16] |
(182) 0x42edf0 LDR Q2, [X4, X16] |
(182) 0x42edf4 FSUB V3.2D, V29.2D, V2.2D |
(182) 0x42edf8 FADD V31.2D, V3.2D, V1.2D |
(182) 0x42edfc STR Q31, [X1, X16] |
(182) 0x42ee00 LDR Q30, [X2, X9] |
(182) 0x42ee04 LDR Q4, [X8, X9] |
(182) 0x42ee08 LDR Q5, [X7, X9] |
(182) 0x42ee0c FADD V6.2D, V30.2D, V4.2D |
(182) 0x42ee10 FSUB V7.2D, V6.2D, V5.2D |
(182) 0x42ee14 STR Q7, [X6, X9] |
(182) 0x42ee18 LDR Q26, [X5, X9] |
(182) 0x42ee1c LDR Q16, [X4, X9] |
(182) 0x42ee20 FSUB V17.2D, V26.2D, V16.2D |
(182) 0x42ee24 FADD V28.2D, V17.2D, V7.2D |
(182) 0x42ee28 STR Q28, [X1, X9] |
(182) 0x42ee2c CMP X0, X10 |
(182) 0x42ee30 B.NE 42ed6c |
(181) 0x42ee34 TBZ W3, #0, 42ee9c |
(181) 0x42ee38 AND W3, W3, #0xfffffffe |
(181) 0x42ee3c ADD W28, W28, W3 |
(181) 0x42ee40 SBFM X10, X28, #0, #31 |
(181) 0x42ee44 ADD W2, W28, #1 |
(181) 0x42ee48 ADD X8, X11, W2,SXTW |
(181) 0x42ee4c ADD X11, X11, X10 |
(181) 0x42ee50 LDR D27, [X14, X8,LSL #3] |
(181) 0x42ee54 ADD X12, X12, X10 |
(181) 0x42ee58 ADD X30, X30, X10 |
(181) 0x42ee5c LDR D18, [X14, X11,LSL #3] |
(181) 0x42ee60 LDR X14, [SP, #128] |
(181) 0x42ee64 FSUB D19, D27, S18 |
(181) 0x42ee68 ADD X6, X14, X10 |
(181) 0x42ee6c LDR D20, [X21, X6,LSL #3] |
(181) 0x42ee70 LDR X21, [SP, #120] |
(181) 0x42ee74 FADD D21, D19, D20 |
(181) 0x42ee78 ADD X5, X21, X10 |
(181) 0x42ee7c STR D21, [X23, X5,LSL #3] |
(181) 0x42ee80 LDR X23, [SP, #112] |
(181) 0x42ee84 LDR D23, [X13, X12,LSL #3] |
(181) 0x42ee88 ADD X4, X23, X10 |
(181) 0x42ee8c LDR D22, [X13, X4,LSL #3] |
(181) 0x42ee90 FSUB D0, D23, S22 |
(181) 0x42ee94 FADD D25, D0, D21 |
(181) 0x42ee98 STR D25, [X22, X30,LSL #3] |
(181) 0x42ee9c LDR X15, [SP, #104] |
(181) 0x42eea0 ORR W1, WZR, W19 |
(181) 0x42eea4 LDR W19, [SP, #136] |
(181) 0x42eea8 CMP W19, W15 |
(181) 0x42eeac B.LE 42eeec |
(181) 0x42eeb0 LDR W13, [SP, #148] |
(181) 0x42eeb4 LDR W22, [SP, #144] |
(181) 0x42eeb8 LDR W28, [SP, #140] |
(181) 0x42eebc SUB W7, W13, W1 |
(181) 0x42eec0 CMP W7, W22 |
(181) 0x42eec4 CSEL W3, W7, W22, #9 |
(181) 0x42eec8 ADD W19, W1, W3 |
(181) 0x42eecc CMP W1, W19 |
(181) 0x42eed0 B.CC 42ebe0 |
(183) 0x42eed4 ADD X15, X15, #1 |
(183) 0x42eed8 LDR W19, [SP, #136] |
(183) 0x42eedc STR X15, [SP, #104] |
(183) 0x42eee0 LDR X15, [SP, #104] |
(183) 0x42eee4 CMP W19, W15 |
(183) 0x42eee8 B.GT 42eeb0 |
0x42eeec LDP X21, X22, [SP, #32] |
0x42eef0 LDP X25, X26, [SP, #64] |
0x42eef4 LDP X27, X28, [SP, #80] |
0x42eef8 LDP X19, X20, [SP, #16] |
0x42eefc LDP X23, X24, [SP, #48] |
0x42ef00 LDP X29, X30, [SP], #176 |
0x42ef04 RET |
0x42ef08 LDP X21, X22, [SP, #32] |
0x42ef0c LDP X19, X20, [SP, #16] |
0x42ef10 LDP X23, X24, [SP, #48] |
0x42ef14 LDP X29, X30, [SP], #176 |
0x42ef18 RET |
0x42ef1c ADD W7, W7, #1 |
0x42ef20 MOVZ W8, #0 |
0x42ef24 B 42eb88 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►50.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►49.51+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►25.32+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►25.17+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►74.31+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►12.86+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►12.83+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►87.26+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►6.38+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►6.35+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►93.56+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.22+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►3.22+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►95.77+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.12+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►2.11+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.84+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.61+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.55+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.47+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.27+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.26+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.91+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.05+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:68 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| ►1.04+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | advec_mom_kernel(int, int, int[...] | advec_mom.cpp:152 | exec |
| ○ | advec_mom_driver(global_variab[...] | advec_mom.cpp:244 | exec |
| ○ | advection(global_variables&) | advection.cpp:70 | exec |
| ○ | hydro(global_variables&, paral[...] | basic_string.h:1231 | exec |
| ○ | main | clover_leaf.cpp:209 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.19+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.44+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | advec_mom.cpp:53-57 |
| Module | exec |
| nb instructions | 67 |
| nb uops | 67 |
| loop length | 268 |
| used w registers | 23 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 20 |
| micro-operation queue | 8.38 cycles |
| front end | 8.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| cycles | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.38 |
| Dispatch | 8.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 41% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #848]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X19, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W4, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W24, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42eef8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3e8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W3, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42ef08 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3f8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W22, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W23, W21, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W23, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W20, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42ef1c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x40c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W1, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W1, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42ef08 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3f8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W10, W11, [SP, #140] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W12, W1, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR X27, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X20, X25, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W13, W12, W11, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X24, X26, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SBFM X15, X14, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W28, W13, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W22, W22, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W7, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W3, W7, W22, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W1, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W1, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42eed4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3c4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42eb88 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.01% of application time for run 1x1
| Source file and lines | advec_mom.cpp:53-57 |
| Module | exec |
| nb instructions | 67 |
| nb uops | 67 |
| loop length | 268 |
| used w registers | 23 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 20 |
| micro-operation queue | 8.38 cycles |
| front end | 8.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| cycles | 5.00 | 5.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.00 | 0.00 | 0.00 | 0.00 | 8.83 | 8.50 | 8.67 | 4.50 | 4.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 8.38 |
| Dispatch | 8.83 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 27% |
| load | 41% |
| store | 38% |
| mul | 12% |
| add-sub | 13% |
| fma | 12% |
| div/sqrt | 12% |
| other | 20% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #848]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X19, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W1, [X0, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W0, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X19, #44] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| ADD W3, W1, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W24, W23, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W4, W0, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP W3, W4, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP W24, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42eef8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3e8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD W22, W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W21, W3, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W4, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GE 42ef08 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3f8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| SUB W5, W22, W4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MUL W23, W21, W5 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| STR W5, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W20, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W7, W23, W20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W6, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W8, W7, W20, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 42ef1c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x40c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W1, W7, W6, W8 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W9, W7, W1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR W9, [SP, #148] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W1, W9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42ef08 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3f8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP W10, W11, [SP, #140] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W12, W1, W11 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| LDR X27, [X19, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDP X20, X25, [X19] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| MSUB W13, W12, W11, W1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W14, W12, W24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| LDP X24, X26, [X19, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| SBFM X15, X14, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| ADD W28, W13, W10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| SUB W22, W22, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W7, W22 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| CSEL W3, W7, W22, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD W19, W1, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| CMP W1, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 42eed4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x3c4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #176 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W7, W7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 42eb88 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.1+0x78> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.98 | 0.03 | 0.91 | 0.12 | 0.65 | 0.62 | 0.32 | 1.59 | 0.22 | 1.95 | 0.17 | 2.12 | 0.14 | 2.23 | 0.12 | 2.34 | 0.1 | 2.41 | 0.09 | 2.46 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 20.189998626709 | 1.358397603035 |
| 1x2 | 2 | 0.98 | 1.96 | 2 | 10.369999885559 | 1.3738461732864 |
| 1x4 | 4 | 0.91 | 3.65 | 4 | 5.6249990463257 | 1.4409710168839 |
| 1x8 | 8 | 0.65 | 5.21 | 8 | 3.8899993896484 | 1.7867506742477 |
| 1x16 | 16 | 0.32 | 5.14 | 16 | 3.860000371933 | 2.3371019363403 |
| 1x24 | 24 | 0.22 | 5.18 | 24 | 3.8399999141693 | 2.4857320785522 |
| 1x32 | 32 | 0.17 | 5.47 | 32 | 3.6349995136261 | 2.5550799369812 |
| 1x40 | 40 | 0.14 | 5.67 | 40 | 3.5049998760223 | 2.6009929180145 |
| 1x48 | 48 | 0.12 | 5.66 | 48 | 3.5 | 2.6509122848511 |
| 1x56 | 56 | 0.1 | 5.61 | 56 | 3.5449995994568 | 2.6810710430145 |
| 1x64 | 64 | 0.09 | 5.57 | 64 | 3.5850002765656 | 2.6965577602386 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 1.36 | 20.19 |
| ▼Loop 183 - advec_mom.cpp:55-57 - exec– | 0.00 | 0.00 |
| ▼Loop 181 - advec_mom.cpp:55-57 - exec– | 0.00 | 0.00 |
| ○Loop 182 - advec_mom.cpp:56-57 - exec | 1.35 | 20.10 |
