Function: main | Module: exec | Source: main.c:50-191 [...] | Coverage: 0.01% |
---|
Function: main | Module: exec | Source: main.c:50-191 [...] | Coverage: 0.01% |
---|
/home/kcamus/qaas_runs/169-401-3406/intel/HACCmk/build/HACCmk/src/main.c: 50 - 191 |
-------------------------------------------------------------------------------- |
50: { |
[...] |
73: printf( "count is set %d\n", count ); |
74: printf( "Total MPI ranks %d\n", nprocs ); |
75: } |
76: |
77: if (argc == 2 && strncmp(argv[1], "-s", 2) == 0) |
78: NN = 15000; |
79: |
80: printf( "N is set %ld\n", NN ); |
81: |
82: #pragma omp parallel |
[...] |
97: for ( n = 400; n < NN; n = n + 20 ) |
[...] |
103: dx1 = 1.0f/(float)n; |
104: dy1 = 2.0f/(float)n; |
105: dz1 = 3.0f/(float)n; |
106: xx[0] = 0.f; |
107: yy[0] = 0.f; |
108: zz[0] = 0.f; |
109: mass[0] = 2.f; |
110: |
111: for ( i = 1; i < n; i++ ) |
112: { |
113: xx[i] = xx[i-1] + dx1; |
114: yy[i] = yy[i-1] + dy1; |
115: zz[i] = zz[i-1] + dz1; |
116: mass[i] = (float)i * 0.01f + xx[i]; |
117: } |
118: |
119: for ( i = 0; i < n; i++ ) |
120: { |
121: vx1[i] = 0.f; |
122: vy1[i] = 0.f; |
123: vz1[i] = 0.f; |
[...] |
136: t1 = mysecond(); |
137: #endif |
138: |
139: #pragma omp parallel for private( dx1, dy1, dz1 ) |
[...] |
152: t2 = mysecond(); |
[...] |
166: t3 = (t2 - t1) * 1e6; |
167: #endif |
168: |
169: elapsed = elapsed + t3; |
[...] |
185: printf( "\nKernel elapsed time, s: %18.8lf\n", elapsed*1e-6 ); |
[...] |
191: return 0; |
0x401580 PUSH %RBP |
0x401581 MOV %RSP,%RBP |
0x401584 PUSH %R15 |
0x401586 PUSH %R14 |
0x401588 PUSH %R13 |
0x40158a PUSH %R12 |
0x40158c PUSH %RBX |
0x40158d SUB $0x18,%RSP |
0x401591 MOV %RSI,%R14 |
0x401594 MOV %EDI,%R15D |
0x401597 MOV $0x9d9fea,%ESI |
0x40159c MOV $0x3,%EDI |
0x4015a1 CALL 402430 <__intel_new_feature_proc_init> |
0x4015a6 MOV $0x409090,%EDI |
0x4015ab MOV $0xbb8,%ESI |
0x4015b0 XOR %EAX,%EAX |
0x4015b2 CALL 401030 <printf@plt> |
0x4015b7 MOV $0x4090a1,%EDI |
0x4015bc MOV $0x1,%ESI |
0x4015c1 XOR %EAX,%EAX |
0x4015c3 CALL 401030 <printf@plt> |
0x4015c8 MOV $0x186a0,%EBX |
0x4015cd CMP $0x2,%R15D |
0x4015d1 JNE 4015f6 |
0x4015d3 MOV 0x8(%R14),%RDI |
0x4015d7 MOV $0x4090b5,%ESI |
0x4015dc MOV $0x2,%EDX |
0x4015e1 CALL 4010a0 <strncmp@plt> |
0x4015e6 TEST %EAX,%EAX |
0x4015e8 MOV $0x3a98,%EAX |
0x4015ed MOV $0x186a0,%EBX |
0x4015f2 CMOVE %RAX,%RBX |
0x4015f6 MOV $0x4090b8,%EDI |
0x4015fb MOV %RBX,%RSI |
0x4015fe XOR %EAX,%EAX |
0x401600 CALL 401030 <printf@plt> |
0x401605 MOV $0x40d0c0,%EDI |
0x40160a MOV $0x401990,%EDX |
0x40160f MOV $0x1,%ESI |
0x401614 XOR %ECX,%ECX |
0x401616 XOR %EAX,%EAX |
0x401618 CALL 4010f0 <__kmpc_fork_call@plt> |
0x40161d ADD $-0x191,%EBX |
0x401623 MOV $-0x33333333,%R14D |
0x401629 IMUL %RBX,%R14 |
0x40162d SHR $0x24,%R14 |
0x401631 INC %R14D |
0x401634 VXORPS %XMM2,%XMM2,%XMM2 |
0x401638 MOV $0x190,%R15D |
0x40163e MOV $0x18f,%R12D |
0x401644 VMOVSS 0x79bc(%RIP),%XMM5 |
0x40164c XOR %R13D,%R13D |
0x40164f JMP 40170a |
0x401654 NOPW %CS:(%RAX,%RAX,1) |
(2) 0x401660 MOV $0x593b90,%EDI |
(2) 0x401665 XOR %ESI,%ESI |
(2) 0x401667 MOV %RBX,%RDX |
(2) 0x40166a VZEROUPPER |
(2) 0x40166d CALL 402060 <_intel_fast_memset> |
(2) 0x401672 MOV $0x5f5610,%EDI |
(2) 0x401677 XOR %ESI,%ESI |
(2) 0x401679 MOV %RBX,%RDX |
(2) 0x40167c CALL 402060 <_intel_fast_memset> |
(2) 0x401681 MOV $0x657090,%EDI |
(2) 0x401686 XOR %ESI,%ESI |
(2) 0x401688 MOV %RBX,%RDX |
(2) 0x40168b CALL 402060 <_intel_fast_memset> |
(2) 0x401690 XOR %EAX,%EAX |
(2) 0x401692 CALL 401b30 <mysecond> |
(2) 0x401697 VMOVSD %XMM0,-0x30(%RBP) |
(2) 0x40169c SUB $0x8,%RSP |
(2) 0x4016a0 MOV $0x40d120,%EDI |
(2) 0x4016a5 MOV $0x4019c0,%EDX |
(2) 0x4016aa MOV $0x3e6b851f,%ECX |
(2) 0x4016af MOV $0x3f000000,%R8D |
(2) 0x4016b5 MOV $0x3cf5c28f,%R9D |
(2) 0x4016bb MOV $0x6,%ESI |
(2) 0x4016c0 XOR %EAX,%EAX |
(2) 0x4016c2 PUSH $0xbb7 |
(2) 0x4016c7 PUSH $0 |
(2) 0x4016c9 PUSH %R15 |
(2) 0x4016cb CALL 4010f0 <__kmpc_fork_call@plt> |
(2) 0x4016d0 ADD $0x20,%RSP |
(2) 0x4016d4 XOR %EAX,%EAX |
(2) 0x4016d6 CALL 401b30 <mysecond> |
(2) 0x4016db VMOVSS 0x7925(%RIP),%XMM5 |
(2) 0x4016e3 VSUBSD -0x30(%RBP),%XMM0,%XMM0 |
(2) 0x4016e8 VMOVSD -0x38(%RBP),%XMM2 |
(2) 0x4016ed VFMADD231SD 0x798a(%RIP),%XMM0,%XMM2 |
(2) 0x4016f6 ADD $0x14,%R15 |
(2) 0x4016fa INC %R13 |
(2) 0x4016fd ADD $0x14,%R12 |
(2) 0x401701 CMP %R14,%R13 |
(2) 0x401704 JE 40195d |
(2) 0x40170a LEA (,%R13,4),%RBX |
(2) 0x401712 ADD %R13,%RBX |
(2) 0x401715 SAL $0x4,%RBX |
(2) 0x401719 ADD $0x640,%RBX |
(2) 0x401720 VXORPS %XMM15,%XMM15,%XMM15 |
(2) 0x401725 VCVTSI2SS %R15D,%XMM15,%XMM0 |
(2) 0x40172a VMOVSS 0x78d2(%RIP),%XMM1 |
(2) 0x401732 VDIVSS %XMM0,%XMM1,%XMM0 |
(2) 0x401736 VBROADCASTSS %XMM0,%XMM1 |
(2) 0x40173b VMULPS 0x78ed(%RIP),%XMM1,%XMM1 |
(2) 0x401743 MOVL $0,0xba43(%RIP) |
(2) 0x40174d MOVL $0,0x6d4b9(%RIP) |
(2) 0x401757 MOVL $0,0xcef2f(%RIP) |
(2) 0x401761 MOVL $0x40000000,0x1309a5(%RIP) |
(2) 0x40176b LEA -0x1(%R15),%RAX |
(2) 0x40176f CMP $0xb,%RAX |
(2) 0x401773 VMOVSD %XMM2,-0x38(%RBP) |
(2) 0x401778 JAE 401790 |
(2) 0x40177a VXORPS %XMM4,%XMM4,%XMM4 |
(2) 0x40177e VXORPS %XMM2,%XMM2,%XMM2 |
(2) 0x401782 XOR %ECX,%ECX |
(2) 0x401784 JMP 4018f8 |
0x401789 NOPL (%RAX) |
(2) 0x401790 VXORPS %XMM3,%XMM3,%XMM3 |
(2) 0x401794 VXORPS %XMM2,%XMM2,%XMM2 |
(2) 0x401798 MOV $-0x3,%RCX |
(2) 0x40179f NOP |
(0) 0x4017a0 VADDSS %XMM0,%XMM2,%XMM2 |
(0) 0x4017a4 VADDPS %XMM1,%XMM3,%XMM3 |
(0) 0x4017a8 VEXTRACTPS $0x1,%XMM3,0x46ec20(,%RCX,4) |
(0) 0x4017b3 VMOVSS %XMM2,0x40d1a0(,%RCX,4) |
(0) 0x4017bc VMOVSS %XMM3,0x4d06a0(,%RCX,4) |
(0) 0x4017c5 LEA 0x4(%RCX),%EDX |
(0) 0x4017c8 VXORPS %XMM15,%XMM15,%XMM15 |
(0) 0x4017cd VCVTSI2SS %EDX,%XMM15,%XMM4 |
(0) 0x4017d1 VFMADD132SS %XMM5,%XMM2,%XMM4 |
(0) 0x4017d6 VMOVSS %XMM4,0x532120(,%RCX,4) |
(0) 0x4017df INC %RCX |
(0) 0x4017e2 JNE 4017a0 |
(2) 0x4017e4 LEA -0x4(%R15),%RDX |
(2) 0x4017e8 AND $-0x8,%RDX |
(2) 0x4017ec VBROADCASTSS %XMM0,%YMM15 |
(2) 0x4017f1 VBROADCASTSS %XMM2,%YMM9 |
(2) 0x4017f6 VMOVUPS 0x7842(%RIP),%YMM4 |
(2) 0x4017fe VFMADD231PS %YMM4,%YMM15,%YMM9 |
(2) 0x401803 VMOVSS 0x7801(%RIP),%XMM7 |
(2) 0x40180b VMULSS %XMM7,%XMM0,%XMM13 |
(2) 0x40180f VMOVSHDUP %XMM1,%XMM6 |
(2) 0x401813 VBROADCASTSD %XMM6,%YMM5 |
(2) 0x401818 VBROADCASTSS 0x77ef(%RIP),%YMM8 |
(2) 0x401821 VPERMPS %YMM3,%YMM8,%YMM11 |
(2) 0x401826 VFMADD231PS %YMM4,%YMM5,%YMM11 |
(2) 0x40182b VMULSS %XMM7,%XMM6,%XMM8 |
(2) 0x40182f VBROADCASTSS %XMM1,%YMM6 |
(2) 0x401834 VBROADCASTSS %XMM3,%YMM12 |
(2) 0x401839 VFMADD231PS %YMM4,%YMM6,%YMM12 |
(2) 0x40183e VMULSS %XMM7,%XMM1,%XMM10 |
(2) 0x401842 LEA 0x3(%RDX),%RCX |
(2) 0x401846 OR $0x2,%RDX |
(2) 0x40184a VBROADCASTSS %XMM13,%YMM7 |
(2) 0x40184f VBROADCASTSS %XMM8,%YMM8 |
(2) 0x401854 VBROADCASTSS %XMM10,%YMM10 |
(2) 0x401859 MOV $0x3,%ESI |
(2) 0x40185e XCHG %AX,%AX |
(1) 0x401860 VADDPS %YMM15,%YMM9,%YMM13 |
(1) 0x401865 VADDPS %YMM7,%YMM9,%YMM9 |
(1) 0x401869 VMOVUPS %YMM13,0x40d194(,%RSI,4) |
(1) 0x401872 VADDPS %YMM5,%YMM11,%YMM14 |
(1) 0x401876 VADDPS %YMM8,%YMM11,%YMM11 |
(1) 0x40187b VMOVUPS %YMM14,0x46ec14(,%RSI,4) |
(1) 0x401884 VADDPS %YMM6,%YMM12,%YMM14 |
(1) 0x401888 VADDPS %YMM10,%YMM12,%YMM12 |
(1) 0x40188d VMOVUPS %YMM14,0x4d0694(,%RSI,4) |
(1) 0x401896 VMOVD %ESI,%XMM4 |
(1) 0x40189a VPBROADCASTD %XMM4,%YMM4 |
(1) 0x40189f VPADDD 0x77b9(%RIP),%YMM4,%YMM4 |
(1) 0x4018a7 VCVTDQ2PS %YMM4,%YMM4 |
(1) 0x4018ab VBROADCASTSS 0x7754(%RIP),%YMM14 |
(1) 0x4018b4 VFMADD213PS %YMM13,%YMM14,%YMM4 |
(1) 0x4018b9 VMOVUPS %YMM4,0x532114(,%RSI,4) |
(1) 0x4018c2 ADD $0x8,%RSI |
(1) 0x4018c6 CMP %RDX,%RSI |
(1) 0x4018c9 JLE 401860 |
(2) 0x4018cb VCVTSI2SS %RCX,%XMM0,%XMM4 |
(2) 0x4018d0 VADDSS 0x773c(%RIP),%XMM4,%XMM4 |
(2) 0x4018d8 CMP %RCX,%RAX |
(2) 0x4018db VMOVSS 0x7725(%RIP),%XMM5 |
(2) 0x4018e3 JE 401660 |
(2) 0x4018e9 VFMADD231SS %XMM4,%XMM0,%XMM2 |
(2) 0x4018ee VBROADCASTSS %XMM4,%XMM4 |
(2) 0x4018f3 VFMADD213PS %XMM3,%XMM1,%XMM4 |
(2) 0x4018f8 MOV %R12,%RAX |
(2) 0x4018fb SUB %RCX,%RAX |
(2) 0x4018fe LEA (,%RCX,4),%RDX |
(2) 0x401906 XOR %ESI,%ESI |
(2) 0x401908 NOPL (%RAX,%RAX,1) |
(3) 0x401910 VADDSS %XMM0,%XMM2,%XMM2 |
(3) 0x401914 VADDPS %XMM1,%XMM4,%XMM4 |
(3) 0x401918 VEXTRACTPS $0x1,%XMM4,0x46ec14(%RDX,%RSI,4) |
(3) 0x401923 VMOVSS %XMM2,0x40d194(%RDX,%RSI,4) |
(3) 0x40192c VMOVSS %XMM4,0x4d0694(%RDX,%RSI,4) |
(3) 0x401935 LEA 0x1(%RCX,%RSI,1),%EDI |
(3) 0x401939 VXORPS %XMM15,%XMM15,%XMM15 |
(3) 0x40193e VCVTSI2SS %EDI,%XMM15,%XMM3 |
(3) 0x401942 VFMADD132SS %XMM5,%XMM2,%XMM3 |
(3) 0x401947 VMOVSS %XMM3,0x532114(%RDX,%RSI,4) |
(3) 0x401950 INC %RSI |
(3) 0x401953 CMP %RSI,%RAX |
(3) 0x401956 JNE 401910 |
(2) 0x401958 JMP 401660 |
0x40195d VMULSD 0x7723(%RIP),%XMM2,%XMM0 |
0x401965 MOV $0x4090e1,%EDI |
0x40196a MOV $0x1,%AL |
0x40196c CALL 401030 <printf@plt> |
0x401971 XOR %EAX,%EAX |
0x401973 ADD $0x18,%RSP |
0x401977 POP %RBX |
0x401978 POP %R12 |
0x40197a POP %R13 |
0x40197c POP %R14 |
0x40197e POP %R15 |
0x401980 POP %RBP |
0x401981 RET |
0x401982 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | main.c:50-191 |
Module | exec |
nb instructions | 69 |
nb uops | 76 |
loop length | 282 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 19.00 cycles |
front end | 19.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 10.00 | 10.00 | 7.83 | 7.50 | 13.00 | 10.00 | 10.00 | 7.67 |
cycles | 10.00 | 10.00 | 7.83 | 7.50 | 13.00 | 10.00 | 10.00 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 17.38 |
Stall cycles | 0.00 |
Front-end | 19.00 |
Dispatch | 13.00 |
Overall L1 | 19.00 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 33% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 3% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 14% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 7% |
load | 10% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EDI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x9d9fea,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 402430 <__intel_new_feature_proc_init> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x409090,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x4090a1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4015f6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%R14),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x4090b5,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4010a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3a98,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RAX,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV $0x4090b8,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x40d0c0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x401990,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4010f0 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $-0x191,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $-0x33333333,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
IMUL %RBX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x24,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x190,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x18f,%R12D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x79bc(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40170a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x7723(%RIP),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x4090e1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%AL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | main.c:50-191 |
Module | exec |
nb instructions | 69 |
nb uops | 76 |
loop length | 282 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 19.00 cycles |
front end | 19.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 10.00 | 10.00 | 7.83 | 7.50 | 13.00 | 10.00 | 10.00 | 7.67 |
cycles | 10.00 | 10.00 | 7.83 | 7.50 | 13.00 | 10.00 | 10.00 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 17.38 |
Stall cycles | 0.00 |
Front-end | 19.00 |
Dispatch | 13.00 |
Overall L1 | 19.00 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 33% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 3% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 14% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 7% |
load | 10% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EDI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x9d9fea,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 402430 <__intel_new_feature_proc_init> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x409090,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x4090a1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4015f6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%R14),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x4090b5,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4010a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3a98,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RAX,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV $0x4090b8,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x40d0c0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x401990,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4010f0 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $-0x191,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $-0x33333333,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
IMUL %RBX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x24,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x190,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x18f,%R12D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x79bc(%RIP),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40170a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x7723(%RIP),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x4090e1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%AL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼main– | 0.01 | 0.01 |
▼Loop 2 - main.c:77-169 - exec– | 0 | 0 |
○Loop 1 - main.c:111-116 - exec | 0.01 | 0 |
○Loop 3 - main.c:111-116 - exec | 0 | 0 |
○Loop 0 - main.c:111-116 - exec | 0 | 0 |