| Function: k_means(int, point_t*, point_t*, int*, point_t*, int, int) [clone .extracted] | Module: kmeans-icpx-O3-aggressive | Source: main.cpp:58-70 | Coverage (incl. loops): 92.44% | (excl. loops): 0.00% |
|---|
| Function: k_means(int, point_t*, point_t*, int*, point_t*, int, int) [clone .extracted] | Module: kmeans-icpx-O3-aggressive | Source: main.cpp:58-70 | Coverage (incl. loops): 92.44% | (excl. loops): 0.00% |
|---|
/home/fmusial/KMEANS_Benchmarks/kmeans/main.cpp: 58 - 70 |
-------------------------------------------------------------------------------- |
58: #pragma omp parallel for |
59: for (int i = 0; i < n; ++i) { |
60: double optimal_dist = DBL_MAX; |
61: for (int j = 0; j < k; ++j) { |
62: double dist = |
63: (points[i].x - centroids[j].x) * (points[i].x - centroids[j].x) + |
64: (points[i].y - centroids[j].y) * (points[i].y - centroids[j].y); |
65: if (dist < optimal_dist) { |
66: optimal_dist = dist; |
67: assignment[i] = j; |
68: } |
69: } |
70: } |
0x403780 PUSH %RBP |
0x403781 MOV %RSP,%RBP |
0x403784 PUSH %R15 |
0x403786 PUSH %R14 |
0x403788 PUSH %R13 |
0x40378a PUSH %R12 |
0x40378c PUSH %RBX |
0x40378d SUB $0x18,%RSP |
0x403791 MOV 0x18(%RBP),%EAX |
0x403794 MOVL $0,-0x3c(%RBP) |
0x40379b TEST %EAX,%EAX |
0x40379d JS 403840 |
0x4037a3 MOV %R9,%RBX |
0x4037a6 MOV %R8,%R14 |
0x4037a9 MOV %RCX,%R13 |
0x4037ac MOV %RDX,%R15 |
0x4037af MOV (%RDI),%ESI |
0x4037b1 MOVL $0,-0x30(%RBP) |
0x4037b8 MOV %EAX,-0x2c(%RBP) |
0x4037bb MOVL $0x1,-0x38(%RBP) |
0x4037c2 SUB $0x8,%RSP |
0x4037c6 LEA -0x38(%RBP),%RAX |
0x4037ca LEA -0x3c(%RBP),%RCX |
0x4037ce LEA -0x30(%RBP),%R8 |
0x4037d2 LEA -0x2c(%RBP),%R9 |
0x4037d6 MOV $0x40e160,%EDI |
0x4037db MOV %ESI,-0x34(%RBP) |
0x4037de MOV $0x22,%EDX |
0x4037e3 PUSH $0x1 |
0x4037e5 PUSH $0x1 |
0x4037e7 PUSH %RAX |
0x4037e8 CALL 402200 <__kmpc_for_static_init_4@plt> |
0x4037ed ADD $0x20,%RSP |
0x4037f1 MOV -0x30(%RBP),%EAX |
0x4037f4 MOV -0x2c(%RBP),%ECX |
0x4037f7 CMP %ECX,%EAX |
0x4037f9 JA 403824 |
0x4037fb TEST %EBX,%EBX |
0x4037fd JLE 403824 |
0x4037ff MOV %EBX,%EDX |
0x403801 AND $0x7fffffff,%EDX |
0x403807 SUB %RAX,%RCX |
0x40380a LEA 0x1(%RCX),%RSI |
0x40380e CMP $0x8,%RSI |
0x403812 JAE 403860 |
0x403814 MOV %RSI,%RDI |
0x403817 AND $-0x8,%RDI |
0x40381b CMP %RSI,%RDI |
0x40381e JNE 403a80 |
0x403824 MOV $0x40e180,%EDI |
0x403829 MOV -0x34(%RBP),%ESI |
0x40382c ADD $0x18,%RSP |
0x403830 POP %RBX |
0x403831 POP %R12 |
0x403833 POP %R13 |
0x403835 POP %R14 |
0x403837 POP %R15 |
0x403839 POP %RBP |
0x40383a JMP 402050 |
0x40383f NOP |
0x403840 ADD $0x18,%RSP |
0x403844 POP %RBX |
0x403845 POP %R12 |
0x403847 POP %R13 |
0x403849 POP %R14 |
0x40384b POP %R15 |
0x40384d POP %RBP |
0x40384e RET |
0x40384f NOPW %CS:(%RAX,%RAX,1) |
0x40385e XCHG %AX,%AX |
0x403860 MOV %RSI,%RDI |
0x403863 SHR $0x3,%RDI |
0x403867 DEC %RDI |
0x40386a XOR %R8D,%R8D |
0x40386d VMOVSD 0x67a3(%RIP),%XMM0 |
0x403875 JMP 403889 |
0x403877 NOPW (%RAX,%RAX,1) |
(17) 0x403880 CMP %RDI,%R8 |
(17) 0x403883 LEA 0x1(%R8),%R8 |
(17) 0x403887 JE 403814 |
(17) 0x403889 LEA (%RAX,%R8,8),%R9 |
(17) 0x40388d MOV %R9,%R10 |
(17) 0x403890 SAL $0x4,%R10 |
(17) 0x403894 VMOVUPD (%R15,%R10,1),%XMM1 |
(17) 0x40389a VMOVUPD 0x10(%R15,%R10,1),%XMM2 |
(17) 0x4038a1 VMOVUPD 0x20(%R15,%R10,1),%XMM3 |
(17) 0x4038a8 VMOVUPD 0x30(%R15,%R10,1),%XMM4 |
(17) 0x4038af VMOVUPD 0x40(%R15,%R10,1),%XMM5 |
(17) 0x4038b6 VMOVUPD 0x50(%R15,%R10,1),%XMM6 |
(17) 0x4038bd VMOVUPD 0x60(%R15,%R10,1),%XMM7 |
(17) 0x4038c4 VMOVUPD 0x70(%R15,%R10,1),%XMM8 |
(17) 0x4038cb MOV %R13,%R10 |
(17) 0x4038ce XOR %R11D,%R11D |
(17) 0x4038d1 VMOVAPD %XMM0,%XMM9 |
(17) 0x4038d5 VMOVAPD %XMM0,%XMM10 |
(17) 0x4038d9 VMOVAPD %XMM0,%XMM11 |
(17) 0x4038dd VMOVAPD %XMM0,%XMM12 |
(17) 0x4038e1 VMOVAPD %XMM0,%XMM13 |
(17) 0x4038e5 VMOVAPD %XMM0,%XMM14 |
(17) 0x4038e9 VMOVAPD %XMM0,%XMM15 |
(17) 0x4038ed VMOVAPD %XMM0,%XMM16 |
(17) 0x4038f3 JMP 403910 |
0x4038f5 NOPW %CS:(%RAX,%RAX,1) |
(18) 0x403900 INC %R11 |
(18) 0x403903 ADD $0x10,%R10 |
(18) 0x403907 CMP %R11,%RDX |
(18) 0x40390a JE 403880 |
(18) 0x403910 VMOVUPD (%R10),%XMM17 |
(18) 0x403916 VSUBPD %XMM17,%XMM1,%XMM18 |
(18) 0x40391c VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403922 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403929 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x40392f VUCOMISD %XMM16,%XMM18 |
(18) 0x403935 JAE 403941 |
(18) 0x403937 MOV %R11D,(%R14,%R9,4) |
(18) 0x40393b VMOVAPD %XMM18,%XMM16 |
(18) 0x403941 VSUBPD %XMM17,%XMM2,%XMM18 |
(18) 0x403947 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x40394d VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403954 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x40395a VUCOMISD %XMM15,%XMM18 |
(18) 0x403960 JAE 40396d |
(18) 0x403962 MOV %R11D,0x4(%R14,%R9,4) |
(18) 0x403967 VMOVAPD %XMM18,%XMM15 |
(18) 0x40396d VSUBPD %XMM17,%XMM3,%XMM18 |
(18) 0x403973 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403979 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403980 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403986 VUCOMISD %XMM14,%XMM18 |
(18) 0x40398c JAE 403999 |
(18) 0x40398e MOV %R11D,0x8(%R14,%R9,4) |
(18) 0x403993 VMOVAPD %XMM18,%XMM14 |
(18) 0x403999 VSUBPD %XMM17,%XMM4,%XMM18 |
(18) 0x40399f VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x4039a5 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x4039ac VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x4039b2 VUCOMISD %XMM13,%XMM18 |
(18) 0x4039b8 JAE 4039c5 |
(18) 0x4039ba MOV %R11D,0xc(%R14,%R9,4) |
(18) 0x4039bf VMOVAPD %XMM18,%XMM13 |
(18) 0x4039c5 VSUBPD %XMM17,%XMM5,%XMM18 |
(18) 0x4039cb VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x4039d1 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x4039d8 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x4039de VUCOMISD %XMM12,%XMM18 |
(18) 0x4039e4 JAE 4039f1 |
(18) 0x4039e6 MOV %R11D,0x10(%R14,%R9,4) |
(18) 0x4039eb VMOVAPD %XMM18,%XMM12 |
(18) 0x4039f1 VSUBPD %XMM17,%XMM6,%XMM18 |
(18) 0x4039f7 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x4039fd VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403a04 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403a0a VUCOMISD %XMM11,%XMM18 |
(18) 0x403a10 JAE 403a1d |
(18) 0x403a12 MOV %R11D,0x14(%R14,%R9,4) |
(18) 0x403a17 VMOVAPD %XMM18,%XMM11 |
(18) 0x403a1d VSUBPD %XMM17,%XMM7,%XMM18 |
(18) 0x403a23 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403a29 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403a30 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403a36 VUCOMISD %XMM10,%XMM18 |
(18) 0x403a3c JAE 403a49 |
(18) 0x403a3e MOV %R11D,0x18(%R14,%R9,4) |
(18) 0x403a43 VMOVAPD %XMM18,%XMM10 |
(18) 0x403a49 VSUBPD %XMM17,%XMM8,%XMM17 |
(18) 0x403a4f VMULPD %XMM17,%XMM17,%XMM17 |
(18) 0x403a55 VSHUFPD $0x1,%XMM17,%XMM17,%XMM18 |
(18) 0x403a5c VADDSD %XMM17,%XMM18,%XMM17 |
(18) 0x403a62 VUCOMISD %XMM9,%XMM17 |
(18) 0x403a68 JAE 403900 |
(18) 0x403a6e MOV %R11D,0x1c(%R14,%R9,4) |
(18) 0x403a73 VMOVAPD %XMM17,%XMM9 |
(18) 0x403a79 JMP 403900 |
0x403a7e XCHG %AX,%AX |
0x403a80 MOV %EBX,%ESI |
0x403a82 AND $0x7ffffffc,%ESI |
0x403a88 MOV %EDX,%R8D |
0x403a8b AND $-0x4,%R8D |
0x403a8f SHR $0x2,%EBX |
0x403a92 AND $0x1fffffff,%EBX |
0x403a98 SAL $0x6,%RBX |
0x403a9c ADD %R13,%RBX |
0x403a9f LEA 0x30(%R13),%R9 |
0x403aa3 VMOVSD 0x656d(%RIP),%XMM0 |
0x403aab JMP 403acd |
0x403aad NOPW %CS:(%RAX,%RAX,1) |
0x403abc NOPL (%RAX) |
(14) 0x403ac0 CMP %RCX,%RDI |
(14) 0x403ac3 LEA 0x1(%RDI),%RDI |
(14) 0x403ac7 JE 403824 |
(14) 0x403acd LEA (%RDI,%RAX,1),%R10 |
(14) 0x403ad1 MOV %R10,%R11 |
(14) 0x403ad4 SAL $0x4,%R11 |
(14) 0x403ad8 VMOVUPD (%R15,%R11,1),%XMM1 |
(14) 0x403ade VMOVAPD %XMM0,%XMM2 |
(14) 0x403ae2 CMP $0x4,%EDX |
(14) 0x403ae5 JAE 403b40 |
(14) 0x403ae7 CMP %RDX,%RSI |
(14) 0x403aea JAE 403ac0 |
(14) 0x403aec MOV %RBX,%R11 |
(14) 0x403aef MOV %RSI,%R13 |
(14) 0x403af2 JMP 403b0c |
0x403af4 NOPW %CS:(%RAX,%RAX,1) |
(15) 0x403b00 INC %R13 |
(15) 0x403b03 ADD $0x10,%R11 |
(15) 0x403b07 CMP %R13,%RDX |
(15) 0x403b0a JE 403ac0 |
(15) 0x403b0c VSUBPD (%R11),%XMM1,%XMM3 |
(15) 0x403b11 VMULPD %XMM3,%XMM3,%XMM3 |
(15) 0x403b15 VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(15) 0x403b1a VADDSD %XMM3,%XMM4,%XMM3 |
(15) 0x403b1e VUCOMISD %XMM2,%XMM3 |
(15) 0x403b22 JAE 403b00 |
(15) 0x403b24 MOV %R13D,(%R14,%R10,4) |
(15) 0x403b28 VMOVAPD %XMM3,%XMM2 |
(15) 0x403b2c JMP 403b00 |
0x403b2e NOPW %CS:(%RAX,%RAX,1) |
0x403b3d NOPL (%RAX) |
(14) 0x403b40 MOV %R9,%R11 |
(14) 0x403b43 XOR %R13D,%R13D |
(14) 0x403b46 VMOVAPD %XMM0,%XMM2 |
(14) 0x403b4a JMP 403b71 |
0x403b4c NOPW %CS:(%RAX,%RAX,1) |
0x403b5b NOPL (%RAX,%RAX,1) |
(16) 0x403b60 ADD $0x4,%R13 |
(16) 0x403b64 ADD $0x40,%R11 |
(16) 0x403b68 CMP %R13,%R8 |
(16) 0x403b6b JE 403ae7 |
(16) 0x403b71 VSUBPD -0x30(%R11),%XMM1,%XMM3 |
(16) 0x403b77 VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403b7b VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403b80 VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403b84 VUCOMISD %XMM2,%XMM3 |
(16) 0x403b88 JAE 403b92 |
(16) 0x403b8a MOV %R13D,(%R14,%R10,4) |
(16) 0x403b8e VMOVAPD %XMM3,%XMM2 |
(16) 0x403b92 VSUBPD -0x20(%R11),%XMM1,%XMM3 |
(16) 0x403b98 VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403b9c VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403ba1 VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403ba5 VUCOMISD %XMM2,%XMM3 |
(16) 0x403ba9 JAE 403bb7 |
(16) 0x403bab LEA 0x1(%R13),%R12D |
(16) 0x403baf MOV %R12D,(%R14,%R10,4) |
(16) 0x403bb3 VMOVAPD %XMM3,%XMM2 |
(16) 0x403bb7 VSUBPD -0x10(%R11),%XMM1,%XMM3 |
(16) 0x403bbd VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403bc1 VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403bc6 VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403bca VUCOMISD %XMM2,%XMM3 |
(16) 0x403bce JAE 403bdc |
(16) 0x403bd0 LEA 0x2(%R13),%R12D |
(16) 0x403bd4 MOV %R12D,(%R14,%R10,4) |
(16) 0x403bd8 VMOVAPD %XMM3,%XMM2 |
(16) 0x403bdc VSUBPD (%R11),%XMM1,%XMM3 |
(16) 0x403be1 VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403be5 VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403bea VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403bee VUCOMISD %XMM2,%XMM3 |
(16) 0x403bf2 JAE 403b60 |
(16) 0x403bf8 LEA 0x3(%R13),%R12D |
(16) 0x403bfc MOV %R12D,(%R14,%R10,4) |
(16) 0x403c00 VMOVAPD %XMM3,%XMM2 |
(16) 0x403c04 JMP 403b60 |
0x403c09 NOPL (%RAX) |
0x403c0c NOPL (%RAX) |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►98.87+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_serial_fork_call(ident*,[...] | libiomp5.so | |
| ○ | __kmp_fork_call | libiomp5.so | |
| ○ | __kmpc_fork_call | libiomp5.so | |
| ○ | k_means(int, point_t*, point_t[...] | main.cpp:58 | kmeans-icpx-O3-aggressive |
| ○ | main | main.cpp:125 | kmeans-icpx-O3-aggressive |
| ○ | __libc_init_first | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | kmeans-icpx-O3-aggressive |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►90.93+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.90+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.61+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►99.92+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run run_1_thread
| Source file and lines | main.cpp:58-70 |
| Module | kmeans-icpx-O3-aggressive |
| nb instructions | 99 |
| nb uops | 100 |
| loop length | 390 |
| used x86 registers | 14 |
| used mmx registers | 0 |
| used xmm registers | 1 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 25.00 cycles |
| front end | 25.00 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| cycles | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 24.88 |
| Stall cycles | 0.00 |
| Front-end | 25.00 |
| Dispatch | 15.00 |
| Overall L1 | 25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 8% |
| load | 6% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 7% |
| all | 12% |
| load | 12% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 8% |
| load | 9% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 7% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JS 403840 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R8,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV (%RDI),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA -0x38(%RBP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x3c(%RBP),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x30(%RBP),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x2c(%RBP),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV $0x40e160,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RAX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| CALL 402200 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %ECX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JA 403824 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JLE 403824 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x1(%RCX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| CMP $0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 403860 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xe0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| AND $-0x8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JNE 403a80 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x300> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV $0x40e180,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| JMP 402050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| DEC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| VMOVSD 0x67a3(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 403889 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x109> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7ffffffc,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| MOV %EDX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $-0x4,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x2,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| AND $0x1fffffff,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| SAL $0x6,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| ADD %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x30(%R13),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VMOVSD 0x656d(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 403acd <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x34d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run run_1_thread
| Source file and lines | main.cpp:58-70 |
| Module | kmeans-icpx-O3-aggressive |
| nb instructions | 99 |
| nb uops | 100 |
| loop length | 390 |
| used x86 registers | 14 |
| used mmx registers | 0 |
| used xmm registers | 1 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 25.00 cycles |
| front end | 25.00 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| cycles | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 24.88 |
| Stall cycles | 0.00 |
| Front-end | 25.00 |
| Dispatch | 15.00 |
| Overall L1 | 25.00 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 8% |
| load | 6% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 7% |
| all | 12% |
| load | 12% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 8% |
| load | 9% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 7% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JS 403840 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R8,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV (%RDI),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA -0x38(%RBP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x3c(%RBP),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x30(%RBP),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x2c(%RBP),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV $0x40e160,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RAX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| CALL 402200 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %ECX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JA 403824 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JLE 403824 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x1(%RCX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| CMP $0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 403860 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xe0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| AND $-0x8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JNE 403a80 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x300> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV $0x40e180,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| JMP 402050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| DEC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| VMOVSD 0x67a3(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 403889 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x109> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7ffffffc,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| MOV %EDX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $-0x4,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x2,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| AND $0x1fffffff,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| SAL $0x6,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| ADD %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x30(%R13),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VMOVSD 0x656d(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 403acd <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x34d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| Run run_1_thread | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: closeOMP_NUM_THREADS: 1 |
|---|---|
| Run run_2_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: closeOMP_NUM_THREADS: 2 |
| Run run_4_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: closeOMP_NUM_THREADS: 4 |
| Run run_8_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: closeOMP_NUM_THREADS: 8 |
| Run run_16_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: closeOMP_NUM_THREADS: 16 |
| Run run_26_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: closeOMP_NUM_THREADS: 26 |
| (run_1_thread) Efficiency | (run_1_thread) Potential Speed-Up (%) | (run_2_threads) Efficiency | (run_2_threads) Potential Speed-Up (%) | (run_4_threads) Efficiency | (run_4_threads) Potential Speed-Up (%) | (run_8_threads) Efficiency | (run_8_threads) Potential Speed-Up (%) | (run_16_threads) Efficiency | (run_16_threads) Potential Speed-Up (%) | (run_26_threads) Efficiency | (run_26_threads) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.96 | 3.54 | 0.9 | 8.83 | 0.8 | 14.86 | 0.7 | 18.85 | 0.63 | 19.08 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| run_1_thread | 1 | 1 | 1 | 1 | 137.08497619629 | 92.440742492676 |
| run_2_threads | 2 | 0.96 | 1.92 | 2 | 68.505020141602 | 89.565902709961 |
| run_4_threads | 4 | 0.9 | 3.58 | 4 | 34.244998931885 | 84.27367401123 |
| run_8_threads | 8 | 0.8 | 6.42 | 8 | 17.125001907349 | 75.408073425293 |
| run_16_threads | 16 | 0.7 | 11.15 | 16 | 8.5600004196167 | 62.242477416992 |
| run_26_threads | 26 | 0.63 | 16.3 | 26 | 5.3099999427795 | 51.154148101807 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼k_means(int, point_t*, point_t*, int*, point_t*, int, int) [clone .extracted]– | 92.44 | 137.08 |
| ▼Loop 17 - main.cpp:59-70 - kmeans-icpx-O3-aggressive– | 0.99 | 1.46 |
| ○Loop 18 - main.cpp:61-67 - kmeans-icpx-O3-aggressive | 91.45 | 135.62 |
| ▼Loop 14 - main.cpp:59-70 - kmeans-icpx-O3-aggressive– | 0.00 | 0.00 |
| ○Loop 15 - main.cpp:61-67 - kmeans-icpx-O3-aggressive | 0.00 | 0.00 |
| ○Loop 16 - main.cpp:61-67 - kmeans-icpx-O3-aggressive | 0.00 | 0.00 |
