| Function: k_means(int, point_t*, point_t*, int*, point_t*, int, int) [clone .extracted] | Module: kmeans-icpx-O3-aggressive | Source: main.cpp:70-82 | Coverage (incl. loops): 92.32% | (excl. loops): 0.00% |
|---|
| Function: k_means(int, point_t*, point_t*, int*, point_t*, int, int) [clone .extracted] | Module: kmeans-icpx-O3-aggressive | Source: main.cpp:70-82 | Coverage (incl. loops): 92.32% | (excl. loops): 0.00% |
|---|
/home/fmusial/KMEANS_Benchmarks/kmeans/main.cpp: 70 - 82 |
-------------------------------------------------------------------------------- |
70: #pragma omp parallel for |
71: for (int i = 0; i < n; ++i) { |
72: double optimal_dist = DBL_MAX; |
73: for (int j = 0; j < k; ++j) { |
74: double dist = |
75: (points[i].x - centroids[j].x) * (points[i].x - centroids[j].x) + |
76: (points[i].y - centroids[j].y) * (points[i].y - centroids[j].y); |
77: if (dist < optimal_dist) { |
78: optimal_dist = dist; |
79: assignment[i] = j; |
80: } |
81: } |
82: } |
0x403850 PUSH %RBP |
0x403851 MOV %RSP,%RBP |
0x403854 PUSH %R15 |
0x403856 PUSH %R14 |
0x403858 PUSH %R13 |
0x40385a PUSH %R12 |
0x40385c PUSH %RBX |
0x40385d SUB $0x18,%RSP |
0x403861 MOV 0x18(%RBP),%EAX |
0x403864 MOVL $0,-0x3c(%RBP) |
0x40386b TEST %EAX,%EAX |
0x40386d JS 403920 |
0x403873 MOV %R9,%RBX |
0x403876 MOV %R8,%R14 |
0x403879 MOV %RCX,%R13 |
0x40387c MOV %RDX,%R15 |
0x40387f MOV (%RDI),%ESI |
0x403881 MOVL $0,-0x30(%RBP) |
0x403888 MOV %EAX,-0x2c(%RBP) |
0x40388b MOVL $0x1,-0x38(%RBP) |
0x403892 SUB $0x8,%RSP |
0x403896 LEA -0x38(%RBP),%RAX |
0x40389a LEA -0x3c(%RBP),%RCX |
0x40389e LEA -0x30(%RBP),%R8 |
0x4038a2 LEA -0x2c(%RBP),%R9 |
0x4038a6 MOV $0x40e170,%EDI |
0x4038ab MOV %ESI,-0x34(%RBP) |
0x4038ae MOV $0x22,%EDX |
0x4038b3 PUSH $0x1 |
0x4038b5 PUSH $0x1 |
0x4038b7 PUSH %RAX |
0x4038b8 CALL 402220 <__kmpc_for_static_init_4@plt> |
0x4038bd ADD $0x20,%RSP |
0x4038c1 MOV -0x30(%RBP),%EAX |
0x4038c4 MOV -0x2c(%RBP),%ECX |
0x4038c7 CMP %ECX,%EAX |
0x4038c9 JA 4038f4 |
0x4038cb TEST %EBX,%EBX |
0x4038cd JLE 4038f4 |
0x4038cf MOV %EBX,%EDX |
0x4038d1 AND $0x7fffffff,%EDX |
0x4038d7 SUB %RAX,%RCX |
0x4038da LEA 0x1(%RCX),%RSI |
0x4038de CMP $0x8,%RSI |
0x4038e2 JAE 403940 |
0x4038e4 MOV %RSI,%RDI |
0x4038e7 AND $-0x8,%RDI |
0x4038eb CMP %RSI,%RDI |
0x4038ee JNE 403b60 |
0x4038f4 MOV $0x40e190,%EDI |
0x4038f9 MOV -0x34(%RBP),%ESI |
0x4038fc ADD $0x18,%RSP |
0x403900 POP %RBX |
0x403901 POP %R12 |
0x403903 POP %R13 |
0x403905 POP %R14 |
0x403907 POP %R15 |
0x403909 POP %RBP |
0x40390a JMP 402050 |
0x40390f NOPW %CS:(%RAX,%RAX,1) |
0x40391e XCHG %AX,%AX |
0x403920 ADD $0x18,%RSP |
0x403924 POP %RBX |
0x403925 POP %R12 |
0x403927 POP %R13 |
0x403929 POP %R14 |
0x40392b POP %R15 |
0x40392d POP %RBP |
0x40392e RET |
0x40392f NOPW %CS:(%RAX,%RAX,1) |
0x40393e XCHG %AX,%AX |
0x403940 MOV %RSI,%RDI |
0x403943 SHR $0x3,%RDI |
0x403947 DEC %RDI |
0x40394a XOR %R8D,%R8D |
0x40394d VMOVSD 0x66c3(%RIP),%XMM0 |
0x403955 JMP 40396d |
0x403957 NOPW (%RAX,%RAX,1) |
(17) 0x403960 CMP %RDI,%R8 |
(17) 0x403963 LEA 0x1(%R8),%R8 |
(17) 0x403967 JE 4038e4 |
(17) 0x40396d LEA (%RAX,%R8,8),%R9 |
(17) 0x403971 MOV %R9,%R10 |
(17) 0x403974 SAL $0x4,%R10 |
(17) 0x403978 VMOVUPD (%R15,%R10,1),%XMM1 |
(17) 0x40397e VMOVUPD 0x10(%R15,%R10,1),%XMM2 |
(17) 0x403985 VMOVUPD 0x20(%R15,%R10,1),%XMM3 |
(17) 0x40398c VMOVUPD 0x30(%R15,%R10,1),%XMM4 |
(17) 0x403993 VMOVUPD 0x40(%R15,%R10,1),%XMM5 |
(17) 0x40399a VMOVUPD 0x50(%R15,%R10,1),%XMM6 |
(17) 0x4039a1 VMOVUPD 0x60(%R15,%R10,1),%XMM7 |
(17) 0x4039a8 VMOVUPD 0x70(%R15,%R10,1),%XMM8 |
(17) 0x4039af MOV %R13,%R10 |
(17) 0x4039b2 XOR %R11D,%R11D |
(17) 0x4039b5 VMOVAPD %XMM0,%XMM9 |
(17) 0x4039b9 VMOVAPD %XMM0,%XMM10 |
(17) 0x4039bd VMOVAPD %XMM0,%XMM11 |
(17) 0x4039c1 VMOVAPD %XMM0,%XMM12 |
(17) 0x4039c5 VMOVAPD %XMM0,%XMM13 |
(17) 0x4039c9 VMOVAPD %XMM0,%XMM14 |
(17) 0x4039cd VMOVAPD %XMM0,%XMM15 |
(17) 0x4039d1 VMOVAPD %XMM0,%XMM16 |
(17) 0x4039d7 JMP 4039f0 |
0x4039d9 NOPL (%RAX) |
(18) 0x4039e0 INC %R11 |
(18) 0x4039e3 ADD $0x10,%R10 |
(18) 0x4039e7 CMP %R11,%RDX |
(18) 0x4039ea JE 403960 |
(18) 0x4039f0 VMOVUPD (%R10),%XMM17 |
(18) 0x4039f6 VSUBPD %XMM17,%XMM1,%XMM18 |
(18) 0x4039fc VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403a02 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403a09 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403a0f VUCOMISD %XMM16,%XMM18 |
(18) 0x403a15 JAE 403a21 |
(18) 0x403a17 MOV %R11D,(%R14,%R9,4) |
(18) 0x403a1b VMOVAPD %XMM18,%XMM16 |
(18) 0x403a21 VSUBPD %XMM17,%XMM2,%XMM18 |
(18) 0x403a27 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403a2d VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403a34 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403a3a VUCOMISD %XMM15,%XMM18 |
(18) 0x403a40 JAE 403a4d |
(18) 0x403a42 MOV %R11D,0x4(%R14,%R9,4) |
(18) 0x403a47 VMOVAPD %XMM18,%XMM15 |
(18) 0x403a4d VSUBPD %XMM17,%XMM3,%XMM18 |
(18) 0x403a53 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403a59 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403a60 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403a66 VUCOMISD %XMM14,%XMM18 |
(18) 0x403a6c JAE 403a79 |
(18) 0x403a6e MOV %R11D,0x8(%R14,%R9,4) |
(18) 0x403a73 VMOVAPD %XMM18,%XMM14 |
(18) 0x403a79 VSUBPD %XMM17,%XMM4,%XMM18 |
(18) 0x403a7f VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403a85 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403a8c VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403a92 VUCOMISD %XMM13,%XMM18 |
(18) 0x403a98 JAE 403aa5 |
(18) 0x403a9a MOV %R11D,0xc(%R14,%R9,4) |
(18) 0x403a9f VMOVAPD %XMM18,%XMM13 |
(18) 0x403aa5 VSUBPD %XMM17,%XMM5,%XMM18 |
(18) 0x403aab VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403ab1 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403ab8 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403abe VUCOMISD %XMM12,%XMM18 |
(18) 0x403ac4 JAE 403ad1 |
(18) 0x403ac6 MOV %R11D,0x10(%R14,%R9,4) |
(18) 0x403acb VMOVAPD %XMM18,%XMM12 |
(18) 0x403ad1 VSUBPD %XMM17,%XMM6,%XMM18 |
(18) 0x403ad7 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403add VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403ae4 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403aea VUCOMISD %XMM11,%XMM18 |
(18) 0x403af0 JAE 403afd |
(18) 0x403af2 MOV %R11D,0x14(%R14,%R9,4) |
(18) 0x403af7 VMOVAPD %XMM18,%XMM11 |
(18) 0x403afd VSUBPD %XMM17,%XMM7,%XMM18 |
(18) 0x403b03 VMULPD %XMM18,%XMM18,%XMM18 |
(18) 0x403b09 VSHUFPD $0x1,%XMM18,%XMM18,%XMM19 |
(18) 0x403b10 VADDSD %XMM18,%XMM19,%XMM18 |
(18) 0x403b16 VUCOMISD %XMM10,%XMM18 |
(18) 0x403b1c JAE 403b29 |
(18) 0x403b1e MOV %R11D,0x18(%R14,%R9,4) |
(18) 0x403b23 VMOVAPD %XMM18,%XMM10 |
(18) 0x403b29 VSUBPD %XMM17,%XMM8,%XMM17 |
(18) 0x403b2f VMULPD %XMM17,%XMM17,%XMM17 |
(18) 0x403b35 VSHUFPD $0x1,%XMM17,%XMM17,%XMM18 |
(18) 0x403b3c VADDSD %XMM17,%XMM18,%XMM17 |
(18) 0x403b42 VUCOMISD %XMM9,%XMM17 |
(18) 0x403b48 JAE 4039e0 |
(18) 0x403b4e MOV %R11D,0x1c(%R14,%R9,4) |
(18) 0x403b53 VMOVAPD %XMM17,%XMM9 |
(18) 0x403b59 JMP 4039e0 |
0x403b5e XCHG %AX,%AX |
0x403b60 MOV %EBX,%ESI |
0x403b62 AND $0x7ffffffc,%ESI |
0x403b68 MOV %EDX,%R8D |
0x403b6b AND $-0x4,%R8D |
0x403b6f SHR $0x2,%EBX |
0x403b72 AND $0x1fffffff,%EBX |
0x403b78 SAL $0x6,%RBX |
0x403b7c ADD %R13,%RBX |
0x403b7f LEA 0x30(%R13),%R9 |
0x403b83 VMOVSD 0x648d(%RIP),%XMM0 |
0x403b8b JMP 403bad |
0x403b8d NOPW %CS:(%RAX,%RAX,1) |
0x403b9c NOPL (%RAX) |
(14) 0x403ba0 CMP %RCX,%RDI |
(14) 0x403ba3 LEA 0x1(%RDI),%RDI |
(14) 0x403ba7 JE 4038f4 |
(14) 0x403bad LEA (%RDI,%RAX,1),%R10 |
(14) 0x403bb1 MOV %R10,%R11 |
(14) 0x403bb4 SAL $0x4,%R11 |
(14) 0x403bb8 VMOVUPD (%R15,%R11,1),%XMM1 |
(14) 0x403bbe VMOVAPD %XMM0,%XMM2 |
(14) 0x403bc2 CMP $0x4,%EDX |
(14) 0x403bc5 JAE 403c20 |
(14) 0x403bc7 CMP %RDX,%RSI |
(14) 0x403bca JAE 403ba0 |
(14) 0x403bcc MOV %RBX,%R11 |
(14) 0x403bcf MOV %RSI,%R13 |
(14) 0x403bd2 JMP 403bec |
0x403bd4 NOPW %CS:(%RAX,%RAX,1) |
(15) 0x403be0 INC %R13 |
(15) 0x403be3 ADD $0x10,%R11 |
(15) 0x403be7 CMP %R13,%RDX |
(15) 0x403bea JE 403ba0 |
(15) 0x403bec VSUBPD (%R11),%XMM1,%XMM3 |
(15) 0x403bf1 VMULPD %XMM3,%XMM3,%XMM3 |
(15) 0x403bf5 VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(15) 0x403bfa VADDSD %XMM3,%XMM4,%XMM3 |
(15) 0x403bfe VUCOMISD %XMM2,%XMM3 |
(15) 0x403c02 JAE 403be0 |
(15) 0x403c04 MOV %R13D,(%R14,%R10,4) |
(15) 0x403c08 VMOVAPD %XMM3,%XMM2 |
(15) 0x403c0c JMP 403be0 |
0x403c0e NOPW %CS:(%RAX,%RAX,1) |
0x403c1d NOPL (%RAX) |
(14) 0x403c20 MOV %R9,%R11 |
(14) 0x403c23 XOR %R13D,%R13D |
(14) 0x403c26 VMOVAPD %XMM0,%XMM2 |
(14) 0x403c2a JMP 403c51 |
0x403c2c NOPW %CS:(%RAX,%RAX,1) |
0x403c3b NOPL (%RAX,%RAX,1) |
(16) 0x403c40 ADD $0x4,%R13 |
(16) 0x403c44 ADD $0x40,%R11 |
(16) 0x403c48 CMP %R13,%R8 |
(16) 0x403c4b JE 403bc7 |
(16) 0x403c51 VSUBPD -0x30(%R11),%XMM1,%XMM3 |
(16) 0x403c57 VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403c5b VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403c60 VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403c64 VUCOMISD %XMM2,%XMM3 |
(16) 0x403c68 JAE 403c72 |
(16) 0x403c6a MOV %R13D,(%R14,%R10,4) |
(16) 0x403c6e VMOVAPD %XMM3,%XMM2 |
(16) 0x403c72 VSUBPD -0x20(%R11),%XMM1,%XMM3 |
(16) 0x403c78 VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403c7c VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403c81 VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403c85 VUCOMISD %XMM2,%XMM3 |
(16) 0x403c89 JAE 403c97 |
(16) 0x403c8b LEA 0x1(%R13),%R12D |
(16) 0x403c8f MOV %R12D,(%R14,%R10,4) |
(16) 0x403c93 VMOVAPD %XMM3,%XMM2 |
(16) 0x403c97 VSUBPD -0x10(%R11),%XMM1,%XMM3 |
(16) 0x403c9d VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403ca1 VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403ca6 VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403caa VUCOMISD %XMM2,%XMM3 |
(16) 0x403cae JAE 403cbc |
(16) 0x403cb0 LEA 0x2(%R13),%R12D |
(16) 0x403cb4 MOV %R12D,(%R14,%R10,4) |
(16) 0x403cb8 VMOVAPD %XMM3,%XMM2 |
(16) 0x403cbc VSUBPD (%R11),%XMM1,%XMM3 |
(16) 0x403cc1 VMULPD %XMM3,%XMM3,%XMM3 |
(16) 0x403cc5 VSHUFPD $0x1,%XMM3,%XMM3,%XMM4 |
(16) 0x403cca VADDSD %XMM3,%XMM4,%XMM3 |
(16) 0x403cce VUCOMISD %XMM2,%XMM3 |
(16) 0x403cd2 JAE 403c40 |
(16) 0x403cd8 LEA 0x3(%R13),%R12D |
(16) 0x403cdc MOV %R12D,(%R14,%R10,4) |
(16) 0x403ce0 VMOVAPD %XMM3,%XMM2 |
(16) 0x403ce4 JMP 403c40 |
0x403ce9 NOPL (%RAX) |
0x403cec NOPL (%RAX) |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_serial_fork_call(ident*,[...] | libiomp5.so | |
| ○ | __kmp_fork_call | libiomp5.so | |
| ○ | __kmpc_fork_call | libiomp5.so | |
| ○ | k_means(int, point_t*, point_t[...] | main.cpp:70 | kmeans-icpx-O3-aggressive |
| ○ | main | main.cpp:27 | kmeans-icpx-O3-aggressive |
| ○ | __libc_init_first | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | kmeans-icpx-O3-aggressive |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
| ○ | __kmp_invoke_task_func | libiomp5.so |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run run_1_thread
| Source file and lines | main.cpp:70-82 |
| Module | kmeans-icpx-O3-aggressive |
| nb instructions | 100 |
| nb uops | 101 |
| loop length | 402 |
| used x86 registers | 14 |
| used mmx registers | 0 |
| used xmm registers | 1 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 25.25 cycles |
| front end | 25.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| cycles | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 25.13 |
| Stall cycles | 0.00 |
| Front-end | 25.25 |
| Dispatch | 15.00 |
| Overall L1 | 25.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 8% |
| load | 6% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 7% |
| all | 12% |
| load | 12% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 8% |
| load | 9% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 7% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JS 403920 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R8,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV (%RDI),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA -0x38(%RBP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x3c(%RBP),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x30(%RBP),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x2c(%RBP),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV $0x40e170,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RAX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| CALL 402220 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %ECX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JA 4038f4 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JLE 4038f4 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x1(%RCX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| CMP $0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 403940 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xf0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| AND $-0x8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JNE 403b60 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x310> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV $0x40e190,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| JMP 402050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| DEC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| VMOVSD 0x66c3(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 40396d <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x11d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7ffffffc,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| MOV %EDX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $-0x4,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x2,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| AND $0x1fffffff,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| SAL $0x6,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| ADD %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x30(%R13),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VMOVSD 0x648d(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 403bad <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x35d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run run_1_thread
| Source file and lines | main.cpp:70-82 |
| Module | kmeans-icpx-O3-aggressive |
| nb instructions | 100 |
| nb uops | 101 |
| loop length | 402 |
| used x86 registers | 14 |
| used mmx registers | 0 |
| used xmm registers | 1 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 6 |
| micro-operation queue | 25.25 cycles |
| front end | 25.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| cycles | 10.00 | 10.00 | 11.83 | 11.50 | 15.00 | 10.00 | 10.00 | 11.67 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 25.13 |
| Stall cycles | 0.00 |
| Front-end | 25.25 |
| Dispatch | 15.00 |
| Overall L1 | 25.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 8% |
| load | 6% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 7% |
| all | 12% |
| load | 12% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 8% |
| load | 9% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 7% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JS 403920 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R8,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV (%RDI),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOVL $0x1,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA -0x38(%RBP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x3c(%RBP),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x30(%RBP),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| LEA -0x2c(%RBP),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV $0x40e170,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH $0x1 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| PUSH %RAX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | N/A |
| CALL 402220 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %ECX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JA 4038f4 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| JLE 4038f4 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x1(%RCX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| CMP $0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 403940 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0xf0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| AND $-0x8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JNE 403b60 <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x310> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV $0x40e190,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| JMP 402050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| DEC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| VMOVSD 0x66c3(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 40396d <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x11d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $0x7ffffffc,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| MOV %EDX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| AND $-0x4,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x2,%EBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| AND $0x1fffffff,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| SAL $0x6,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | N/A |
| ADD %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x30(%R13),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VMOVSD 0x648d(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 403bad <_Z7k_meansiP7point_tS0_PiS0_ii.extracted+0x35d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| Run run_1_thread | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: trueOMP_NUM_THREADS: 1 |
|---|---|
| Run run_2_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: trueOMP_NUM_THREADS: 2 |
| Run run_4_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: trueOMP_NUM_THREADS: 4 |
| Run run_8_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: trueOMP_NUM_THREADS: 8 |
| Run run_10_threads | Number processes: 1Number nodes: 1Run Command: <executable> input/100000000.in 1000 100000000 50 25MPI Command: Dataset: Run Directory: /home/fmusial/KMEANS_BenchmarksOMP_PROC_BIND: trueOMP_NUM_THREADS: 10 |
| (run_1_thread) Efficiency | (run_1_thread) Potential Speed-Up (%) | (run_2_threads) Efficiency | (run_2_threads) Potential Speed-Up (%) | (run_4_threads) Efficiency | (run_4_threads) Potential Speed-Up (%) | (run_8_threads) Efficiency | (run_8_threads) Potential Speed-Up (%) | (run_10_threads) Efficiency | (run_10_threads) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 0.95 | 4.19 | 0.86 | 11.12 | 0.78 | 15.48 | 0.75 | 16.73 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| run_1_thread | 1 | 1 | 1 | 1 | 90.010009765625 | 92.317962646484 |
| run_2_threads | 2 | 0.95 | 1.91 | 2 | 46.069999694824 | 88.239791870117 |
| run_4_threads | 4 | 0.86 | 3.45 | 4 | 24.279998779297 | 81.488929748535 |
| run_8_threads | 8 | 0.78 | 6.24 | 8 | 12.525001525879 | 70.354248046875 |
| run_10_threads | 10 | 0.75 | 7.45 | 10 | 10.255000114441 | 65.725799560547 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼k_means(int, point_t*, point_t*, int*, point_t*, int, int) [clone .extracted]– | 92.32 | 90.01 |
| ▼Loop 17 - main.cpp:71-82 - kmeans-icpx-O3-aggressive– | 1.02 | 0.99 |
| ○Loop 18 - main.cpp:73-79 - kmeans-icpx-O3-aggressive | 91.30 | 89.02 |
| ▼Loop 14 - main.cpp:71-82 - kmeans-icpx-O3-aggressive– | 0.00 | 0.00 |
| ○Loop 15 - main.cpp:73-79 - kmeans-icpx-O3-aggressive | 0.00 | 0.00 |
| ○Loop 16 - main.cpp:73-79 - kmeans-icpx-O3-aggressive | 0.00 | 0.00 |
