| Function: initialise_chunk(int, global_variables&) [clone ._omp_fn.4] | Module: exec | Source: initialise_chunk.cpp:77-82 [...] | Coverage (incl. loops): NA% | (excl. loops): NA% |
|---|
| Function: initialise_chunk(int, global_variables&) [clone ._omp_fn.4] | Module: exec | Source: initialise_chunk.cpp:77-82 [...] | Coverage (incl. loops): NA% | (excl. loops): NA% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/initialise_chunk.cpp: 77 - 82 |
-------------------------------------------------------------------------------- |
77: #pragma omp parallel for simd collapse(2) |
78: for (int j = (0); j < (yrange1); j++) { |
79: for (int i = (0); i < (xrange1); i++) { |
80: field.volume(i, j) = dx * dy; |
81: field.xarea(i, j) = field.celldy[j]; |
82: field.yarea(i, j) = field.celldx[i]; |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x4381f0 STP X29, X30, [SP, #912]! |
0x4381f4 ADD X29, SP, #0 |
0x4381f8 STP X19, X20, [SP, #16] |
0x4381fc LDP W19, W20, [X0, #24] |
0x438200 CMP W20, #0 |
0x438204 B.LE 438548 |
0x438208 CMP W19, #0 |
0x43820c B.LE 438548 |
0x438210 STP X21, X22, [SP, #32] |
0x438214 ORR X22, XZR, X0 |
0x438218 STP X23, X24, [SP, #48] |
0x43821c MUL W23, W20, W19 |
0x438220 BL 410210 |
0x438224 ORR W21, WZR, W0 |
0x438228 BL 410240 |
0x43822c UDIV W1, W23, W21 |
0x438230 ORR W2, WZR, W0 |
0x438234 MSUB W3, W1, W21, W23 |
0x438238 CMP W0, W3 |
0x43823c B.CC 438568 |
0x438240 MADD W14, W1, W2, W3 |
0x438244 ADD W23, W1, W14 |
0x438248 CMP W14, W23 |
0x43824c B.CS 438554 |
0x438250 STP X25, X26, [SP, #64] |
0x438254 UDIV W6, W14, W19 |
0x438258 ORR W24, WZR, W19 |
0x43825c STP X27, X28, [SP, #80] |
0x438260 LDP D30, D22, [X22] |
0x438264 LDR X7, [X22, #16] |
0x438268 MSUB W13, W6, W19, W14 |
0x43826c SBFM X6, X6, #0, #31 |
0x438270 FMUL D23, D22, D30 |
0x438274 SUB W15, W19, W13 |
0x438278 DUP V29.2D, V23.D[0] |
0x43827c HINT #0 |
(252) 0x438280 CMP W1, W15 |
(252) 0x438284 CSEL W5, W1, W15, #9 |
(252) 0x438288 ADD W15, W14, W5 |
(252) 0x43828c CMP W14, W15 |
(252) 0x438290 B.CS 43851c |
(252) 0x438294 ADD X0, X7, #672 |
(252) 0x438298 LDR X12, [X7, #584] |
(252) 0x43829c UBFM X14, X6, #61, #60 |
(252) 0x4382a0 LDP X22, X10, [X0] |
(252) 0x4382a4 LDP X21, X16, [X0, #24] |
(252) 0x4382a8 ADD X8, X12, X6,LSL #3 |
(252) 0x4382ac MUL X10, X6, X10 |
(252) 0x4382b0 LDR X11, [X7, #656] |
(252) 0x4382b4 MUL X16, X6, X16 |
(252) 0x4382b8 LDR X19, [X7, #552] |
(252) 0x4382bc LDR X18, [X7, #720] |
(252) 0x4382c0 MUL X11, X6, X11 |
(252) 0x4382c4 CMP W5, #1 |
(252) 0x4382c8 B.EQ 4384f4 |
(252) 0x4382cc UBFM W9, W5, #1, #31 |
(252) 0x4382d0 SBFM X26, X13, #0, #31 |
(252) 0x4382d4 UBFM X9, X9, #60, #59 |
(252) 0x4382d8 ADD X25, X11, X26 |
(252) 0x4382dc SUB X17, X9, #16 |
(252) 0x4382e0 ADD X28, X10, X26 |
(252) 0x4382e4 ADD X4, X22, X25,LSL #3 |
(252) 0x4382e8 UBFM X27, X17, #4, #63 |
(252) 0x4382ec ADD X2, X16, X26 |
(252) 0x4382f0 ADD X3, X21, X28,LSL #3 |
(252) 0x4382f4 ADD X30, X27, #1 |
(252) 0x4382f8 UBFM X27, X13, #61, #31 |
(252) 0x4382fc ADD X1, X18, X2,LSL #3 |
(252) 0x438300 ANDS X17, X30, #0x7 |
(252) 0x438304 UBFM X26, X2, #61, #60 |
(252) 0x438308 MOVZ X0, #0 |
(252) 0x43830c UBFM X25, X25, #61, #60 |
(252) 0x438310 UBFM X28, X28, #61, #60 |
(252) 0x438314 ADD X2, X19, X27 |
(252) 0x438318 B.EQ 438418 |
(252) 0x43831c CMP X17, #1 |
(252) 0x438320 B.EQ 4383f4 |
(252) 0x438324 CMP X17, #2 |
(252) 0x438328 B.EQ 4383d8 |
(252) 0x43832c CMP X17, #3 |
(252) 0x438330 B.EQ 4383bc |
(252) 0x438334 CMP X17, #4 |
(252) 0x438338 B.EQ 4383a0 |
(252) 0x43833c CMP X17, #5 |
(252) 0x438340 B.EQ 438384 |
(252) 0x438344 CMP X17, #6 |
(252) 0x438348 B.EQ 438368 |
(252) 0x43834c STR Q29, [X22, X25] |
(252) 0x438350 MOVZ X0, #16 |
(252) 0x438354 LDR D21, [X12, X14] |
(252) 0x438358 DUP V0.2D, V21.D[0] |
(252) 0x43835c STR Q0, [X21, X28] |
(252) 0x438360 LDR Q31, [X19, X27] |
(252) 0x438364 STR Q31, [X18, X26] |
(252) 0x438368 STR Q29, [X4, X0] |
(252) 0x43836c LDR D20, [X12, X14] |
(252) 0x438370 DUP V1.2D, V20.D[0] |
(252) 0x438374 STR Q1, [X3, X0] |
(252) 0x438378 LDR Q2, [X2, X0] |
(252) 0x43837c STR Q2, [X1, X0] |
(252) 0x438380 ADD X0, X0, #16 |
(252) 0x438384 STR Q29, [X4, X0] |
(252) 0x438388 LDR D19, [X12, X14] |
(252) 0x43838c DUP V3.2D, V19.D[0] |
(252) 0x438390 STR Q3, [X3, X0] |
(252) 0x438394 LDR Q4, [X2, X0] |
(252) 0x438398 STR Q4, [X1, X0] |
(252) 0x43839c ADD X0, X0, #16 |
(252) 0x4383a0 STR Q29, [X4, X0] |
(252) 0x4383a4 LDR D18, [X12, X14] |
(252) 0x4383a8 DUP V5.2D, V18.D[0] |
(252) 0x4383ac STR Q5, [X3, X0] |
(252) 0x4383b0 LDR Q6, [X2, X0] |
(252) 0x4383b4 STR Q6, [X1, X0] |
(252) 0x4383b8 ADD X0, X0, #16 |
(252) 0x4383bc STR Q29, [X4, X0] |
(252) 0x4383c0 LDR D17, [X12, X14] |
(252) 0x4383c4 DUP V7.2D, V17.D[0] |
(252) 0x4383c8 STR Q7, [X3, X0] |
(252) 0x4383cc LDR Q16, [X2, X0] |
(252) 0x4383d0 STR Q16, [X1, X0] |
(252) 0x4383d4 ADD X0, X0, #16 |
(252) 0x4383d8 STR Q29, [X4, X0] |
(252) 0x4383dc LDR D24, [X12, X14] |
(252) 0x4383e0 DUP V25.2D, V24.D[0] |
(252) 0x4383e4 STR Q25, [X3, X0] |
(252) 0x4383e8 LDR Q26, [X2, X0] |
(252) 0x4383ec STR Q26, [X1, X0] |
(252) 0x4383f0 ADD X0, X0, #16 |
(252) 0x4383f4 STR Q29, [X4, X0] |
(252) 0x4383f8 LDR D27, [X12, X14] |
(252) 0x4383fc DUP V28.2D, V27.D[0] |
(252) 0x438400 STR Q28, [X3, X0] |
(252) 0x438404 LDR Q30, [X2, X0] |
(252) 0x438408 STR Q30, [X1, X0] |
(252) 0x43840c ADD X0, X0, #16 |
(252) 0x438410 CMP X9, X0 |
(252) 0x438414 B.EQ 4384e8 |
(252) 0x438418 STR W20, [SP, #108] |
(253) 0x43841c STR Q29, [X4, X0] |
(253) 0x438420 ADD X17, X0, #16 |
(253) 0x438424 ADD X28, X0, #32 |
(253) 0x438428 LD1R {V22.2D}, [X8] |
(253) 0x43842c ADD X30, X0, #48 |
(253) 0x438430 ADD X27, X0, #64 |
(253) 0x438434 ADD X26, X0, #80 |
(253) 0x438438 ADD X25, X0, #96 |
(253) 0x43843c ADD X20, X0, #112 |
(253) 0x438440 STR Q22, [X3, X0] |
(253) 0x438444 LDR Q21, [X2, X0] |
(253) 0x438448 STR Q21, [X1, X0] |
(253) 0x43844c ADD X0, X0, #128 |
(253) 0x438450 STR Q29, [X4, X17] |
(253) 0x438454 LD1R {V0.2D}, [X8] |
(253) 0x438458 STR Q0, [X3, X17] |
(253) 0x43845c LDR Q31, [X2, X17] |
(253) 0x438460 STR Q31, [X1, X17] |
(253) 0x438464 STR Q29, [X4, X28] |
(253) 0x438468 LD1R {V20.2D}, [X8] |
(253) 0x43846c STR Q20, [X3, X28] |
(253) 0x438470 LDR Q1, [X2, X28] |
(253) 0x438474 STR Q1, [X1, X28] |
(253) 0x438478 STR Q29, [X4, X30] |
(253) 0x43847c LD1R {V2.2D}, [X8] |
(253) 0x438480 STR Q2, [X3, X30] |
(253) 0x438484 LDR Q19, [X2, X30] |
(253) 0x438488 STR Q19, [X1, X30] |
(253) 0x43848c STR Q29, [X4, X27] |
(253) 0x438490 LD1R {V3.2D}, [X8] |
(253) 0x438494 STR Q3, [X3, X27] |
(253) 0x438498 LDR Q4, [X2, X27] |
(253) 0x43849c STR Q4, [X1, X27] |
(253) 0x4384a0 STR Q29, [X4, X26] |
(253) 0x4384a4 LD1R {V18.2D}, [X8] |
(253) 0x4384a8 STR Q18, [X3, X26] |
(253) 0x4384ac LDR Q5, [X2, X26] |
(253) 0x4384b0 STR Q5, [X1, X26] |
(253) 0x4384b4 STR Q29, [X4, X25] |
(253) 0x4384b8 LD1R {V6.2D}, [X8] |
(253) 0x4384bc STR Q6, [X3, X25] |
(253) 0x4384c0 LDR Q17, [X2, X25] |
(253) 0x4384c4 STR Q17, [X1, X25] |
(253) 0x4384c8 STR Q29, [X4, X20] |
(253) 0x4384cc LD1R {V7.2D}, [X8] |
(253) 0x4384d0 STR Q7, [X3, X20] |
(253) 0x4384d4 LDR Q16, [X2, X20] |
(253) 0x4384d8 STR Q16, [X1, X20] |
(253) 0x4384dc CMP X9, X0 |
(253) 0x4384e0 B.NE 43841c |
(252) 0x4384e4 LDR W20, [SP, #108] |
(252) 0x4384e8 TBZ W5, #0, 438518 |
(252) 0x4384ec AND W5, W5, #0xfffffffe |
(252) 0x4384f0 ADD W13, W13, W5 |
(252) 0x4384f4 SBFM X8, X13, #0, #31 |
(252) 0x4384f8 ADD X11, X11, X8 |
(252) 0x4384fc ADD X10, X10, X8 |
(252) 0x438500 ADD X16, X16, X8 |
(252) 0x438504 STR D23, [X22, X11,LSL #3] |
(252) 0x438508 LDR D24, [X12, X14] |
(252) 0x43850c STR D24, [X21, X10,LSL #3] |
(252) 0x438510 LDR D25, [X19, W13,UXTW #3] |
(252) 0x438514 STR D25, [X18, X16,LSL #3] |
(252) 0x438518 ORR W14, WZR, W15 |
(252) 0x43851c ADD X6, X6, #1 |
(252) 0x438520 CMP W20, W6 |
(252) 0x438524 B.LE 438538 |
(252) 0x438528 SUB W1, W23, W14 |
(252) 0x43852c ORR W15, WZR, W24 |
(252) 0x438530 MOVZ W13, #0 |
(252) 0x438534 B 438280 |
0x438538 LDP X21, X22, [SP, #32] |
0x43853c LDP X23, X24, [SP, #48] |
0x438540 LDP X25, X26, [SP, #64] |
0x438544 LDP X27, X28, [SP, #80] |
0x438548 LDP X19, X20, [SP, #16] |
0x43854c LDP X29, X30, [SP], #112 |
0x438550 RET |
0x438554 LDP X21, X22, [SP, #32] |
0x438558 LDP X23, X24, [SP, #48] |
0x43855c LDP X19, X20, [SP, #16] |
0x438560 LDP X29, X30, [SP], #112 |
0x438564 RET |
0x438568 ADD W1, W1, #1 |
0x43856c MOVZ W3, #0 |
0x438570 B 438240 |
0x438574 HINT #0 |
0x438578 HINT #0 |
0x43857c HINT #0 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | initialise_chunk.cpp:77-82 |
| Module | exec |
| nb instructions | 54 |
| nb uops | 50 |
| loop length | 216 |
| used w registers | 14 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 3 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 16 |
| micro-operation queue | 6.25 cycles |
| front end | 6.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.50 | 0.50 | 6.33 | 6.33 | 6.33 | 3.00 | 3.00 |
| cycles | 4.50 | 4.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.50 | 0.50 | 6.33 | 6.33 | 6.33 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.25 |
| Dispatch | 6.33 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 5% |
| load | 9% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 9% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 5% |
| load | 9% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 33% |
| load | 45% |
| store | 50% |
| mul | 12% |
| add-sub | 16% |
| fma | 12% |
| other | 23% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 32% |
| load | 45% |
| store | 50% |
| mul | 18% |
| add-sub | 16% |
| fma | 12% |
| div/sqrt | 12% |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W19, W20, [X0, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP W20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 438548 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x358> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W19, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 438548 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x358> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MUL W23, W20, W19 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W1, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W3, W1, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 438568 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x378> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W14, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W23, W1, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W14, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 438554 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x364> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W6, W14, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| ORR W24, WZR, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP D30, D22, [X22] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDR X7, [X22, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W13, W6, W19, W14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| SBFM X6, X6, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| FMUL D23, D22, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| SUB W15, W19, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| DUP V29.2D, V23.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 438240 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x50> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | initialise_chunk.cpp:77-82 |
| Module | exec |
| nb instructions | 54 |
| nb uops | 50 |
| loop length | 216 |
| used w registers | 14 |
| used x registers | 16 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 3 |
| used q registers | 0 |
| used v registers | 2 |
| used z registers | 0 |
| nb stack references | 16 |
| micro-operation queue | 6.25 cycles |
| front end | 6.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.50 | 0.50 | 6.33 | 6.33 | 6.33 | 3.00 | 3.00 |
| cycles | 4.50 | 4.50 | 5.75 | 5.75 | 5.75 | 5.75 | 0.50 | 0.50 | 0.50 | 0.50 | 6.33 | 6.33 | 6.33 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.25 |
| Dispatch | 6.33 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 5% |
| load | 9% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| other | 9% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 0% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 5% |
| load | 9% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 10% |
| all | 33% |
| load | 45% |
| store | 50% |
| mul | 12% |
| add-sub | 16% |
| fma | 12% |
| other | 23% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 32% |
| load | 45% |
| store | 50% |
| mul | 18% |
| add-sub | 16% |
| fma | 12% |
| div/sqrt | 12% |
| other | 25% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #912]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W19, W20, [X0, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP W20, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 438548 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x358> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W19, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 438548 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x358> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X22, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MUL W23, W20, W19 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W21, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W1, W23, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W3, W1, W21, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 438568 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x378> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W14, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W23, W1, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP W14, W23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 438554 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x364> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| UDIV W6, W14, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| ORR W24, WZR, W19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP D30, D22, [X22] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | vect (50.0%) |
| LDR X7, [X22, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W13, W6, W19, W14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| SBFM X6, X6, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| FMUL D23, D22, D30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (25.0%) |
| SUB W15, W19, W13 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| DUP V29.2D, V23.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | vect (50.0%) |
| HINT #0 | N/A | ||||||||||||||||||
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #112 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 438240 <_Z16initialise_chunkiR16global_variables._omp_fn.4+0x50> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | ||||||
| 1x2 | 1 | 1 | 1 | 2 | 0.074999995529652 | 0.0050164782442153 |
| 1x4 | 2 | 1 | 1 | 4 | 0.03999999910593 | 0.0052650230936706 |
| 1x8 | 8 | 1 | 1 | 8 | 0.030000001192093 | 0.013454008847475 |
| 1x16 | 16 | 1 | 1 | 16 | 0.025000000372529 | 0.015606692060828 |
| 1x24 | 24 | 1 | 1 | 24 | 0.025000000372529 | 0.015910439193249 |
| 1x32 | 32 | 1 | 1 | 32 | 0.025000002235174 | 0.017732771113515 |
| 1x40 | 40 | 1 | 1 | 40 | 0.025000002235174 | 0.018804172053933 |
| 1x48 | 48 | 1 | 1 | 48 | 0.025000002235174 | 0.019134406000376 |
| 1x56 | 56 | 1 | 1 | 56 | 0.025000002235174 | 0.018960317596793 |
| 1x64 | 64 | 1 | 1 | 64 | 0.029999999329448 | 0.018985640257597 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼initialise_chunk(int, global_variables&) [clone ._omp_fn.4]– | 0.00 | 0.00 |
| ▼Loop 252 - initialise_chunk.cpp:77-82 - exec– | 0.00 | 0.00 |
| ○Loop 253 - initialise_chunk.cpp:80-82 - exec | 0.00 | 0.00 |
