| Function: generate_chunk(int, global_variables&) [clone ._omp_fn.0] | Module: exec | Source: generate_chunk.cpp:74-80 [...] | Coverage (incl. loops): 0.03% | (excl. loops): 0.00% |
|---|
| Function: generate_chunk(int, global_variables&) [clone ._omp_fn.0] | Module: exec | Source: generate_chunk.cpp:74-80 [...] | Coverage (incl. loops): 0.03% | (excl. loops): 0.00% |
|---|
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/build/CloverLeaf2.0-CXX/src/omp/generate_chunk.cpp: 74 - 80 |
-------------------------------------------------------------------------------- |
74: #pragma omp parallel for simd collapse(2) |
75: for (int j = (0); j < (yrange); j++) { |
76: for (int i = (0); i < (xrange); i++) { |
77: field.energy0(i, j) = state_energy[0]; |
78: field.density0(i, j) = state_density[0]; |
79: field.xvel0(i, j) = state_xvel[0]; |
80: field.yvel0(i, j) = state_yvel[0]; |
0x435e60 STP X29, X30, [SP, #880]! |
0x435e64 ADD X29, SP, #0 |
0x435e68 STP X23, X24, [SP, #48] |
0x435e6c LDP W23, W24, [X0, #40] |
0x435e70 CMP W24, #0 |
0x435e74 B.LE 436154 |
0x435e78 CMP W23, #0 |
0x435e7c B.LE 436154 |
0x435e80 STP X19, X20, [SP, #16] |
0x435e84 ORR X20, XZR, X0 |
0x435e88 STP X21, X22, [SP, #32] |
0x435e8c MUL W21, W24, W23 |
0x435e90 BL 410210 |
0x435e94 ORR W19, WZR, W0 |
0x435e98 BL 410240 |
0x435e9c UDIV W1, W21, W19 |
0x435ea0 ORR W2, WZR, W0 |
0x435ea4 MSUB W3, W1, W19, W21 |
0x435ea8 CMP W0, W3 |
0x435eac B.CC 436174 |
0x435eb0 MADD W30, W1, W2, W3 |
0x435eb4 ADD W0, W1, W30 |
0x435eb8 STR W0, [SP, #108] |
0x435ebc CMP W30, W0 |
0x435ec0 B.CS 436160 |
0x435ec4 UDIV W13, W30, W23 |
0x435ec8 STP X25, X26, [SP, #64] |
0x435ecc STP X27, X28, [SP, #80] |
0x435ed0 LDR X4, [X20] |
0x435ed4 LDR X5, [X20, #8] |
0x435ed8 LDR X6, [X20, #16] |
0x435edc MSUB W7, W13, W23, W30 |
0x435ee0 SBFM X13, X13, #0, #31 |
0x435ee4 STR X4, [SP, #120] |
0x435ee8 LDR X8, [X20, #24] |
0x435eec SUB W27, W23, W7 |
0x435ef0 STR X5, [SP, #112] |
0x435ef4 LDR X15, [X20, #32] |
0x435ef8 STR X6, [SP, #128] |
0x435efc STR X8, [SP, #136] |
(238) 0x435f00 CMP W1, W27 |
(238) 0x435f04 CSEL W1, W1, W27, #9 |
(238) 0x435f08 ADD W25, W30, W1 |
(238) 0x435f0c CMP W30, W25 |
(238) 0x435f10 B.CS 436124 |
(238) 0x435f14 ORR X9, XZR, X15 |
(238) 0x435f18 LDR X18, [X15, #48] |
(238) 0x435f1c LDR X11, [SP, #112] |
(238) 0x435f20 LDR X12, [SP, #120] |
(238) 0x435f24 MUL X18, X13, X18 |
(238) 0x435f28 LDR X14, [SP, #128] |
(238) 0x435f2c LDR X22, [SP, #136] |
(238) 0x435f30 LDR X17, [X9], #168 |
(238) 0x435f34 LDR X16, [X15, #168] |
(238) 0x435f38 LDR X10, [X15, #216] |
(238) 0x435f3c MUL X17, X13, X17 |
(238) 0x435f40 LDR X6, [X11, #8] |
(238) 0x435f44 MUL X16, X13, X16 |
(238) 0x435f48 LDR X5, [X12, #8] |
(238) 0x435f4c MUL X19, X13, X10 |
(238) 0x435f50 LDR X4, [X14, #8] |
(238) 0x435f54 LDR X3, [X22, #8] |
(238) 0x435f58 LDR X27, [X9, #16] |
(238) 0x435f5c LDR X28, [X15, #16] |
(238) 0x435f60 LDR X30, [X15, #64] |
(238) 0x435f64 LDR X26, [X15, #232] |
(238) 0x435f68 CMP W1, #1 |
(238) 0x435f6c B.EQ 4360ec |
(238) 0x435f70 UBFM W20, W1, #1, #31 |
(238) 0x435f74 SBFM X2, X7, #0, #31 |
(238) 0x435f78 UBFM X14, X20, #60, #59 |
(238) 0x435f7c ADD X8, X18, X2 |
(238) 0x435f80 SUB X9, X14, #16 |
(238) 0x435f84 ADD X21, X17, X2 |
(238) 0x435f88 ADD X11, X30, X8,LSL #3 |
(238) 0x435f8c UBFM X12, X9, #4, #63 |
(238) 0x435f90 ADD X20, X16, X2 |
(238) 0x435f94 ADD X10, X28, X21,LSL #3 |
(238) 0x435f98 ADD X22, X12, #1 |
(238) 0x435f9c ADD X2, X19, X2 |
(238) 0x435fa0 ADD X9, X27, X20,LSL #3 |
(238) 0x435fa4 ANDS X12, X22, #0x3 |
(238) 0x435fa8 MOVZ X0, #0 |
(238) 0x435fac UBFM X22, X8, #61, #60 |
(238) 0x435fb0 UBFM X21, X21, #61, #60 |
(238) 0x435fb4 ADD X8, X26, X2,LSL #3 |
(238) 0x435fb8 UBFM X20, X20, #61, #60 |
(238) 0x435fbc UBFM X2, X2, #61, #60 |
(238) 0x435fc0 B.EQ 436048 |
(238) 0x435fc4 CMP X12, #1 |
(238) 0x435fc8 B.EQ 43601c |
(238) 0x435fcc CMP X12, #2 |
(238) 0x435fd0 B.EQ 435ff8 |
(238) 0x435fd4 LD1R {V28.2D}, [X6] |
(238) 0x435fd8 MOVZ X0, #16 |
(238) 0x435fdc STR Q28, [X30, X22] |
(238) 0x435fe0 LD1R {V29.2D}, [X5] |
(238) 0x435fe4 STR Q29, [X28, X21] |
(238) 0x435fe8 LD1R {V30.2D}, [X4] |
(238) 0x435fec STR Q30, [X27, X20] |
(238) 0x435ff0 LD1R {V31.2D}, [X3] |
(238) 0x435ff4 STR Q31, [X26, X2] |
(238) 0x435ff8 LD1R {V24.2D}, [X6] |
(238) 0x435ffc STR Q24, [X11, X0] |
(238) 0x436000 LD1R {V25.2D}, [X5] |
(238) 0x436004 STR Q25, [X10, X0] |
(238) 0x436008 LD1R {V26.2D}, [X4] |
(238) 0x43600c STR Q26, [X9, X0] |
(238) 0x436010 LD1R {V27.2D}, [X3] |
(238) 0x436014 STR Q27, [X8, X0] |
(238) 0x436018 ADD X0, X0, #16 |
(238) 0x43601c LD1R {V20.2D}, [X6] |
(238) 0x436020 STR Q20, [X11, X0] |
(238) 0x436024 LD1R {V21.2D}, [X5] |
(238) 0x436028 STR Q21, [X10, X0] |
(238) 0x43602c LD1R {V22.2D}, [X4] |
(238) 0x436030 STR Q22, [X9, X0] |
(238) 0x436034 LD1R {V23.2D}, [X3] |
(238) 0x436038 STR Q23, [X8, X0] |
(238) 0x43603c ADD X0, X0, #16 |
(238) 0x436040 CMP X14, X0 |
(238) 0x436044 B.EQ 4360e0 |
(239) 0x436048 LD1R {V0.2D}, [X6] |
(239) 0x43604c ADD X22, X0, #16 |
(239) 0x436050 ADD X21, X0, #32 |
(239) 0x436054 ADD X12, X0, #48 |
(239) 0x436058 STR Q0, [X11, X0] |
(239) 0x43605c LD1R {V1.2D}, [X5] |
(239) 0x436060 STR Q1, [X10, X0] |
(239) 0x436064 LD1R {V2.2D}, [X4] |
(239) 0x436068 STR Q2, [X9, X0] |
(239) 0x43606c LD1R {V3.2D}, [X3] |
(239) 0x436070 STR Q3, [X8, X0] |
(239) 0x436074 ADD X0, X0, #64 |
(239) 0x436078 LD1R {V4.2D}, [X6] |
(239) 0x43607c STR Q4, [X11, X22] |
(239) 0x436080 LD1R {V5.2D}, [X5] |
(239) 0x436084 STR Q5, [X10, X22] |
(239) 0x436088 LD1R {V6.2D}, [X4] |
(239) 0x43608c STR Q6, [X9, X22] |
(239) 0x436090 LD1R {V7.2D}, [X3] |
(239) 0x436094 STR Q7, [X8, X22] |
(239) 0x436098 LD1R {V16.2D}, [X6] |
(239) 0x43609c STR Q16, [X11, X21] |
(239) 0x4360a0 LD1R {V17.2D}, [X5] |
(239) 0x4360a4 STR Q17, [X10, X21] |
(239) 0x4360a8 LD1R {V18.2D}, [X4] |
(239) 0x4360ac STR Q18, [X9, X21] |
(239) 0x4360b0 LD1R {V19.2D}, [X3] |
(239) 0x4360b4 STR Q19, [X8, X21] |
(239) 0x4360b8 LD1R {V28.2D}, [X6] |
(239) 0x4360bc STR Q28, [X11, X12] |
(239) 0x4360c0 LD1R {V29.2D}, [X5] |
(239) 0x4360c4 STR Q29, [X10, X12] |
(239) 0x4360c8 LD1R {V30.2D}, [X4] |
(239) 0x4360cc STR Q30, [X9, X12] |
(239) 0x4360d0 LD1R {V31.2D}, [X3] |
(239) 0x4360d4 STR Q31, [X8, X12] |
(239) 0x4360d8 CMP X14, X0 |
(239) 0x4360dc B.NE 436048 |
(238) 0x4360e0 TBZ W1, #0, 436120 |
(238) 0x4360e4 AND W1, W1, #0xfffffffe |
(238) 0x4360e8 ADD W7, W7, W1 |
(238) 0x4360ec LDR D24, [X6] |
(238) 0x4360f0 SBFM X14, X7, #0, #31 |
(238) 0x4360f4 ADD X18, X18, X14 |
(238) 0x4360f8 ADD X17, X17, X14 |
(238) 0x4360fc ADD X16, X16, X14 |
(238) 0x436100 ADD X19, X19, X14 |
(238) 0x436104 STR D24, [X30, X18,LSL #3] |
(238) 0x436108 LDR D25, [X5] |
(238) 0x43610c STR D25, [X28, X17,LSL #3] |
(238) 0x436110 LDR D26, [X4] |
(238) 0x436114 STR D26, [X27, X16,LSL #3] |
(238) 0x436118 LDR D27, [X3] |
(238) 0x43611c STR D27, [X26, X19,LSL #3] |
(238) 0x436120 ORR W30, WZR, W25 |
(238) 0x436124 ADD X13, X13, #1 |
(238) 0x436128 CMP W24, W13 |
(238) 0x43612c B.LE 436144 |
(238) 0x436130 LDR W25, [SP, #108] |
(238) 0x436134 MOVZ W7, #0 |
(238) 0x436138 ORR W27, WZR, W23 |
(238) 0x43613c SUB W1, W25, W30 |
(238) 0x436140 B 435f00 |
0x436144 LDP X19, X20, [SP, #16] |
0x436148 LDP X21, X22, [SP, #32] |
0x43614c LDP X25, X26, [SP, #64] |
0x436150 LDP X27, X28, [SP, #80] |
0x436154 LDP X23, X24, [SP, #48] |
0x436158 LDP X29, X30, [SP], #144 |
0x43615c RET |
0x436160 LDP X19, X20, [SP, #16] |
0x436164 LDP X21, X22, [SP, #32] |
0x436168 LDP X23, X24, [SP, #48] |
0x43616c LDP X29, X30, [SP], #144 |
0x436170 RET |
0x436174 ADD W1, W1, #1 |
0x436178 MOVZ W3, #0 |
0x43617c B 435eb0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.00+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►50.00+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►75.53+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►24.47+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►87.50+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►12.50+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►93.59+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►6.41+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►95.63+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.37+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.78+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.22+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.64+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.36+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.99+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.01+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.27+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.73+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.48+ | omp_fulfill_event | libgomp.so.1.0.0 | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►1.52+ | GOMP_parallel | libgomp.so.1.0.0 | |
| ○ | generate_chunk(int, global_var[...] | generate_chunk.cpp:84 | exec |
| ○ | start(parallel_&, global_confi[...] | start.cpp:81 | exec |
| ○ | initialise(parallel_&, std::ve[...] | clover_leaf.cpp:192 | exec |
| ○ | main | clover_leaf.cpp:204 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | clover_leaf.cpp:51 | exec |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | generate_chunk.cpp:74-80 |
| Module | exec |
| nb instructions | 55 |
| nb uops | 55 |
| loop length | 220 |
| used w registers | 13 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 6.88 cycles |
| front end | 6.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 5.50 | 5.50 | 5.50 | 5.50 | 0.00 | 0.00 | 0.00 | 0.00 | 9.17 | 8.83 | 9.00 | 5.50 | 5.50 |
| cycles | 4.50 | 4.50 | 5.50 | 5.50 | 5.50 | 5.50 | 0.00 | 0.00 | 0.00 | 0.00 | 9.17 | 8.83 | 9.00 | 5.50 | 5.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.88 |
| Dispatch | 9.17 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 31% |
| load | 39% |
| store | 37% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #880]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W24, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP W24, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 436154 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W23, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 436154 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MUL W21, W24, W23 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W1, W21, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W3, W1, W19, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 436174 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W30, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W1, W30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W0, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W30, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 436160 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x300> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W13, W30, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X4, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X5, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X6, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W7, W13, W23, W30 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| SBFM X13, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| STR X4, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X8, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUB W27, W23, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X5, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X15, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X6, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X8, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #144 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #144 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 435eb0 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x50> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
| Source file and lines | generate_chunk.cpp:74-80 |
| Module | exec |
| nb instructions | 55 |
| nb uops | 55 |
| loop length | 220 |
| used w registers | 13 |
| used x registers | 20 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 0 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 6.88 cycles |
| front end | 6.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 4.50 | 4.50 | 5.50 | 5.50 | 5.50 | 5.50 | 0.00 | 0.00 | 0.00 | 0.00 | 9.17 | 8.83 | 9.00 | 5.50 | 5.50 |
| cycles | 4.50 | 4.50 | 5.50 | 5.50 | 5.50 | 5.50 | 0.00 | 0.00 | 0.00 | 0.00 | 9.17 | 8.83 | 9.00 | 5.50 | 5.50 |
| Cycles executing div or sqrt instructions | 10.00-25.00 |
| Front-end | 6.88 |
| Dispatch | 9.17 |
| DIV/SQRT | 10.00-25.00 |
| Overall L1 | 10.00-25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | 0% |
| add-sub | 0% |
| fma | 0% |
| div/sqrt | 0% |
| other | 0% |
| all | 31% |
| load | 39% |
| store | 37% |
| mul | 12% |
| add-sub | 18% |
| fma | 12% |
| div/sqrt | 12% |
| other | 23% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| STP X29, X30, [SP, #880]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDP W23, W24, [X0, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| CMP W24, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 436154 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W23, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 436154 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x2f4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| ORR X20, XZR, X0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| MUL W21, W24, W23 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (12.5%) |
| BL 410210 <@plt_start@+0x1f0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ORR W19, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 410240 <@plt_start@+0x220> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W1, W21, W19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | N/A |
| ORR W2, WZR, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MSUB W3, W1, W19, W21 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| CMP W0, W3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CC 436174 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x314> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MADD W30, W1, W2, W3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| ADD W0, W1, W30 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W0, [SP, #108] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| CMP W30, W0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.CS 436160 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x300> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| UDIV W13, W30, W23 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 | scal (12.5%) |
| STP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| LDR X4, [X20] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X5, [X20, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR X6, [X20, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MSUB W7, W13, W23, W30 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
| SBFM X13, X13, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (100.0%) |
| STR X4, [SP, #120] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X8, [X20, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| SUB W27, W23, W7 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| STR X5, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X15, [X20, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| STR X6, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| STR X8, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X25, X26, [SP, #64] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X27, X28, [SP, #80] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #144 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| LDP X21, X22, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X23, X24, [SP, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| LDP X29, X30, [SP], #144 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | scal (50.0%) |
| RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD W1, W1, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ W3, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| B 435eb0 <_Z14generate_chunkiR16global_variables._omp_fn.0+0x50> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable>MPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-219-7589/intel/CloverLeaf2.0-CXX/run/oneview_runs/multicore/gcc_4/oneview_run_1782227431OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.94 | 0 | 0.85 | 0 | 0.62 | 0.01 | 0.33 | 0.03 | 0.23 | 0.04 | 0.18 | 0.04 | 0.15 | 0.04 | 0.13 | 0.04 | 0.11 | 0.04 | 0.1 | 0.04 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 0.40499997138977 | 0.027248689904809 |
| 1x2 | 2 | 0.94 | 1.88 | 2 | 0.21500000357628 | 0.028761142864823 |
| 1x4 | 4 | 0.85 | 3.41 | 4 | 0.12000000476837 | 0.030932011082768 |
| 1x8 | 8 | 0.62 | 4.98 | 8 | 0.080000005662441 | 0.037437241524458 |
| 1x16 | 16 | 0.33 | 5.27 | 16 | 0.075000002980232 | 0.045649576932192 |
| 1x24 | 24 | 0.23 | 5.49 | 24 | 0.075000002980232 | 0.047045528888702 |
| 1x32 | 32 | 0.18 | 5.78 | 32 | 0.070000007748604 | 0.048514187335968 |
| 1x40 | 40 | 0.15 | 6.18 | 40 | 0.065000005066395 | 0.047856617718935 |
| 1x48 | 48 | 0.13 | 6.33 | 48 | 0.065000005066395 | 0.047517105937004 |
| 1x56 | 56 | 0.11 | 6.37 | 56 | 0.065000005066395 | 0.047366570681334 |
| 1x64 | 64 | 0.1 | 6.37 | 64 | 0.065000005066395 | 0.047225285321474 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼generate_chunk(int, global_variables&) [clone ._omp_fn.0]– | 0.03 | 0.41 |
| ▼Loop 238 - generate_chunk.cpp:74-80 - exec– | 0.00 | 0.00 |
| ○Loop 239 - generate_chunk.cpp:77-80 - exec | 0.03 | 0.40 |
