Function: main | Module: exec | Source: main.c:50-191 [...] | Coverage: 0.04% |
---|
Function: main | Module: exec | Source: main.c:50-191 [...] | Coverage: 0.04% |
---|
/home/kcamus/qaas_runs/169-401-3406/intel/HACCmk/build/HACCmk/src/main.c: 50 - 191 |
-------------------------------------------------------------------------------- |
50: { |
[...] |
73: printf( "count is set %d\n", count ); |
74: printf( "Total MPI ranks %d\n", nprocs ); |
75: } |
76: |
77: if (argc == 2 && strncmp(argv[1], "-s", 2) == 0) |
78: NN = 15000; |
79: |
80: printf( "N is set %ld\n", NN ); |
81: |
82: #pragma omp parallel |
[...] |
97: for ( n = 400; n < NN; n = n + 20 ) |
[...] |
103: dx1 = 1.0f/(float)n; |
104: dy1 = 2.0f/(float)n; |
105: dz1 = 3.0f/(float)n; |
106: xx[0] = 0.f; |
107: yy[0] = 0.f; |
108: zz[0] = 0.f; |
109: mass[0] = 2.f; |
110: |
111: for ( i = 1; i < n; i++ ) |
112: { |
113: xx[i] = xx[i-1] + dx1; |
114: yy[i] = yy[i-1] + dy1; |
115: zz[i] = zz[i-1] + dz1; |
116: mass[i] = (float)i * 0.01f + xx[i]; |
117: } |
118: |
119: for ( i = 0; i < n; i++ ) |
120: { |
121: vx1[i] = 0.f; |
122: vy1[i] = 0.f; |
123: vz1[i] = 0.f; |
[...] |
136: t1 = mysecond(); |
137: #endif |
138: |
139: #pragma omp parallel for private( dx1, dy1, dz1 ) |
[...] |
152: t2 = mysecond(); |
[...] |
166: t3 = (t2 - t1) * 1e6; |
167: #endif |
168: |
169: elapsed = elapsed + t3; |
[...] |
185: printf( "\nKernel elapsed time, s: %18.8lf\n", elapsed*1e-6 ); |
[...] |
191: return 0; |
0x401540 PUSH %RBP |
0x401541 MOV %RSP,%RBP |
0x401544 PUSH %R15 |
0x401546 PUSH %R14 |
0x401548 PUSH %R13 |
0x40154a PUSH %R12 |
0x40154c PUSH %RBX |
0x40154d SUB $0x18,%RSP |
0x401551 MOV %RSI,%R14 |
0x401554 MOV %EDI,%R15D |
0x401557 VSTMXCSR -0x2c(%RBP) |
0x40155c ORL $0x8040,-0x2c(%RBP) |
0x401563 VLDMXCSR -0x2c(%RBP) |
0x401568 MOV $0x408090,%EDI |
0x40156d MOV $0xbb8,%ESI |
0x401572 XOR %EAX,%EAX |
0x401574 CALL 401030 <printf@plt> |
0x401579 MOV $0x4080a1,%EDI |
0x40157e MOV $0x1,%ESI |
0x401583 XOR %EAX,%EAX |
0x401585 CALL 401030 <printf@plt> |
0x40158a MOV $0x186a0,%EBX |
0x40158f CMP $0x2,%R15D |
0x401593 JNE 4015b8 |
0x401595 MOV 0x8(%R14),%RDI |
0x401599 MOV $0x4080b5,%ESI |
0x40159e MOV $0x2,%EDX |
0x4015a3 CALL 4010a0 <strncmp@plt> |
0x4015a8 TEST %EAX,%EAX |
0x4015aa MOV $0x3a98,%EAX |
0x4015af MOV $0x186a0,%EBX |
0x4015b4 CMOVE %RAX,%RBX |
0x4015b8 MOV $0x4080b8,%EDI |
0x4015bd MOV %RBX,%RSI |
0x4015c0 XOR %EAX,%EAX |
0x4015c2 CALL 401030 <printf@plt> |
0x4015c7 MOV $0x40b0b0,%EDI |
0x4015cc MOV $0x401950,%EDX |
0x4015d1 MOV $0x1,%ESI |
0x4015d6 XOR %ECX,%ECX |
0x4015d8 XOR %EAX,%EAX |
0x4015da CALL 4010e0 <__kmpc_fork_call@plt> |
0x4015df ADD $-0x191,%EBX |
0x4015e5 MOV $-0x33333333,%R14D |
0x4015eb IMUL %RBX,%R14 |
0x4015ef SHR $0x24,%R14 |
0x4015f3 INC %R14D |
0x4015f6 VXORPS %XMM2,%XMM2,%XMM2 |
0x4015fa MOV $0x190,%R15D |
0x401600 VMOVSS 0x6a00(%RIP),%XMM15 |
0x401608 VMOVDQU64 0x6a4e(%RIP),%YMM16 |
0x401612 MOV $0x18f,%R12D |
0x401618 VBROADCASTSS 0x69e6(%RIP),%YMM17 |
0x401622 XOR %R13D,%R13D |
0x401625 JMP 4016ee |
0x40162a NOPW (%RAX,%RAX,1) |
(2) 0x401630 MOV $0x591b80,%EDI |
(2) 0x401635 XOR %ESI,%ESI |
(2) 0x401637 MOV %RBX,%RDX |
(2) 0x40163a VZEROUPPER |
(2) 0x40163d CALL 401f50 <_intel_fast_memset> |
(2) 0x401642 MOV $0x5f3600,%EDI |
(2) 0x401647 XOR %ESI,%ESI |
(2) 0x401649 MOV %RBX,%RDX |
(2) 0x40164c CALL 401f50 <_intel_fast_memset> |
(2) 0x401651 MOV $0x655080,%EDI |
(2) 0x401656 XOR %ESI,%ESI |
(2) 0x401658 MOV %RBX,%RDX |
(2) 0x40165b CALL 401f50 <_intel_fast_memset> |
(2) 0x401660 XOR %EAX,%EAX |
(2) 0x401662 CALL 401af0 <mysecond> |
(2) 0x401667 VMOVSD %XMM0,-0x38(%RBP) |
(2) 0x40166c SUB $0x8,%RSP |
(2) 0x401670 MOV $0x40b110,%EDI |
(2) 0x401675 MOV $0x401980,%EDX |
(2) 0x40167a MOV $0x3e6b851f,%ECX |
(2) 0x40167f MOV $0x3f000000,%R8D |
(2) 0x401685 MOV $0x3cf5c28f,%R9D |
(2) 0x40168b MOV $0x6,%ESI |
(2) 0x401690 XOR %EAX,%EAX |
(2) 0x401692 PUSH $0xbb7 |
(2) 0x401697 PUSH $0 |
(2) 0x401699 PUSH %R15 |
(2) 0x40169b CALL 4010e0 <__kmpc_fork_call@plt> |
(2) 0x4016a0 ADD $0x20,%RSP |
(2) 0x4016a4 XOR %EAX,%EAX |
(2) 0x4016a6 CALL 401af0 <mysecond> |
(2) 0x4016ab VBROADCASTSS 0x6953(%RIP),%YMM17 |
(2) 0x4016b5 VMOVDQU64 0x69a1(%RIP),%YMM16 |
(2) 0x4016bf VMOVSS 0x6941(%RIP),%XMM15 |
(2) 0x4016c7 VSUBSD -0x38(%RBP),%XMM0,%XMM0 |
(2) 0x4016cc VMOVSD -0x40(%RBP),%XMM2 |
(2) 0x4016d1 VFMADD231SD 0x69a6(%RIP),%XMM0,%XMM2 |
(2) 0x4016da ADD $0x14,%R15 |
(2) 0x4016de INC %R13 |
(2) 0x4016e1 ADD $0x14,%R12 |
(2) 0x4016e5 CMP %R14,%R13 |
(2) 0x4016e8 JE 40192a |
(2) 0x4016ee LEA (,%R13,4),%RBX |
(2) 0x4016f6 ADD %R13,%RBX |
(2) 0x4016f9 SAL $0x4,%RBX |
(2) 0x4016fd ADD $0x640,%RBX |
(2) 0x401704 VCVTSI2SS %R15D,%XMM18,%XMM0 |
(2) 0x40170a VMOVSS 0x68f2(%RIP),%XMM1 |
(2) 0x401712 VDIVSS %XMM0,%XMM1,%XMM0 |
(2) 0x401716 VBROADCASTSS %XMM0,%XMM1 |
(2) 0x40171b VMULPS 0x690d(%RIP),%XMM1,%XMM1 |
(2) 0x401723 MOVL $0,0x9a53(%RIP) |
(2) 0x40172d MOVL $0,0x6b4c9(%RIP) |
(2) 0x401737 MOVL $0,0xccf3f(%RIP) |
(2) 0x401741 MOVL $0x40000000,0x12e9b5(%RIP) |
(2) 0x40174b LEA -0x1(%R15),%RCX |
(2) 0x40174f CMP $0xb,%RCX |
(2) 0x401753 VMOVSD %XMM2,-0x40(%RBP) |
(2) 0x401758 JAE 401770 |
(2) 0x40175a VXORPS %XMM4,%XMM4,%XMM4 |
(2) 0x40175e VXORPS %XMM2,%XMM2,%XMM2 |
(2) 0x401762 XOR %EAX,%EAX |
(2) 0x401764 JMP 4018c4 |
0x401769 NOPL (%RAX) |
(2) 0x401770 VXORPS %XMM3,%XMM3,%XMM3 |
(2) 0x401774 VXORPS %XMM2,%XMM2,%XMM2 |
(2) 0x401778 MOV $-0x3,%RAX |
(2) 0x40177f NOP |
(0) 0x401780 VADDSS %XMM0,%XMM2,%XMM2 |
(0) 0x401784 VMOVSS %XMM2,0x40b190(,%RAX,4) |
(0) 0x40178d VADDPS %XMM1,%XMM3,%XMM3 |
(0) 0x401791 VEXTRACTPS $0x1,%XMM3,0x46cc10(,%RAX,4) |
(0) 0x40179c VMOVSS %XMM3,0x4ce690(,%RAX,4) |
(0) 0x4017a5 LEA 0x4(%RAX),%EDX |
(0) 0x4017a8 VCVTSI2SS %EDX,%XMM18,%XMM4 |
(0) 0x4017ae VFMADD132SS %XMM15,%XMM2,%XMM4 |
(0) 0x4017b3 VMOVSS %XMM4,0x530110(,%RAX,4) |
(0) 0x4017bc INC %RAX |
(0) 0x4017bf JNE 401780 |
(2) 0x4017c1 LEA -0x4(%R15),%RDX |
(2) 0x4017c5 AND $-0x8,%RDX |
(2) 0x4017c9 LEA 0x3(%RDX),%RAX |
(2) 0x4017cd VBROADCASTSS %XMM0,%YMM4 |
(2) 0x4017d2 VBROADCASTSS %XMM2,%YMM10 |
(2) 0x4017d7 VMOVUPS 0x6861(%RIP),%YMM9 |
(2) 0x4017df VFMADD231PS %YMM9,%YMM4,%YMM10 |
(2) 0x4017e4 VMOVSS 0x6820(%RIP),%XMM7 |
(2) 0x4017ec VMULSS %XMM7,%XMM0,%XMM13 |
(2) 0x4017f0 VMOVSHDUP %XMM1,%XMM6 |
(2) 0x4017f4 VBROADCASTSD %XMM6,%YMM5 |
(2) 0x4017f9 VBROADCASTSS 0x680e(%RIP),%YMM8 |
(2) 0x401802 VPERMPS %YMM3,%YMM8,%YMM11 |
(2) 0x401807 VFMADD231PS %YMM9,%YMM5,%YMM11 |
(2) 0x40180c VMULSS %XMM7,%XMM6,%XMM8 |
(2) 0x401810 VBROADCASTSS %XMM1,%YMM6 |
(2) 0x401815 VBROADCASTSS %XMM3,%YMM12 |
(2) 0x40181a VFMADD231PS %YMM9,%YMM6,%YMM12 |
(2) 0x40181f VMULSS %XMM7,%XMM1,%XMM9 |
(2) 0x401823 OR $0x2,%RDX |
(2) 0x401827 VBROADCASTSS %XMM13,%YMM7 |
(2) 0x40182c VBROADCASTSS %XMM8,%YMM8 |
(2) 0x401831 VBROADCASTSS %XMM9,%YMM9 |
(2) 0x401836 MOV $0x3,%ESI |
(2) 0x40183b NOPL (%RAX,%RAX,1) |
(1) 0x401840 VADDPS %YMM4,%YMM10,%YMM13 |
(1) 0x401844 VADDPS %YMM7,%YMM10,%YMM10 |
(1) 0x401848 VMOVUPS %YMM13,0x40b184(,%RSI,4) |
(1) 0x401851 VADDPS %YMM5,%YMM11,%YMM14 |
(1) 0x401855 VADDPS %YMM8,%YMM11,%YMM11 |
(1) 0x40185a VMOVUPS %YMM14,0x46cc04(,%RSI,4) |
(1) 0x401863 VADDPS %YMM6,%YMM12,%YMM14 |
(1) 0x401867 VADDPS %YMM9,%YMM12,%YMM12 |
(1) 0x40186c VMOVUPS %YMM14,0x4ce684(,%RSI,4) |
(1) 0x401875 VPBROADCASTD %ESI,%YMM14 |
(1) 0x40187b VPADDD %YMM16,%YMM14,%YMM14 |
(1) 0x401881 VCVTDQ2PS %YMM14,%YMM14 |
(1) 0x401886 VFMADD132PS %YMM17,%YMM13,%YMM14 |
(1) 0x40188c VMOVUPS %YMM14,0x530104(,%RSI,4) |
(1) 0x401895 ADD $0x8,%RSI |
(1) 0x401899 CMP %RDX,%RSI |
(1) 0x40189c JLE 401840 |
(2) 0x40189e VCVTSI2SS %RAX,%XMM18,%XMM4 |
(2) 0x4018a4 VADDSS 0x6768(%RIP),%XMM4,%XMM4 |
(2) 0x4018ac CMP %RAX,%RCX |
(2) 0x4018af JE 401630 |
(2) 0x4018b5 VFMADD231SS %XMM4,%XMM0,%XMM2 |
(2) 0x4018ba VBROADCASTSS %XMM4,%XMM4 |
(2) 0x4018bf VFMADD213PS %XMM3,%XMM1,%XMM4 |
(2) 0x4018c4 MOV %R12,%RCX |
(2) 0x4018c7 SUB %RAX,%RCX |
(2) 0x4018ca LEA (,%RAX,4),%RDX |
(2) 0x4018d2 XOR %ESI,%ESI |
(2) 0x4018d4 NOPW %CS:(%RAX,%RAX,1) |
(3) 0x4018e0 VADDSS %XMM0,%XMM2,%XMM2 |
(3) 0x4018e4 VMOVSS %XMM2,0x40b184(%RDX,%RSI,4) |
(3) 0x4018ed VADDPS %XMM1,%XMM4,%XMM4 |
(3) 0x4018f1 VEXTRACTPS $0x1,%XMM4,0x46cc04(%RDX,%RSI,4) |
(3) 0x4018fc VMOVSS %XMM4,0x4ce684(%RDX,%RSI,4) |
(3) 0x401905 LEA 0x1(%RAX,%RSI,1),%EDI |
(3) 0x401909 VCVTSI2SS %EDI,%XMM18,%XMM3 |
(3) 0x40190f VFMADD132SS %XMM15,%XMM2,%XMM3 |
(3) 0x401914 VMOVSS %XMM3,0x530104(%RDX,%RSI,4) |
(3) 0x40191d INC %RSI |
(3) 0x401920 CMP %RSI,%RCX |
(3) 0x401923 JNE 4018e0 |
(2) 0x401925 JMP 401630 |
0x40192a VMULSD 0x6756(%RIP),%XMM2,%XMM0 |
0x401932 MOV $0x4080e1,%EDI |
0x401937 MOV $0x1,%AL |
0x401939 CALL 401030 <printf@plt> |
0x40193e XOR %EAX,%EAX |
0x401940 ADD $0x18,%RSP |
0x401944 POP %RBX |
0x401945 POP %R12 |
0x401947 POP %R13 |
0x401949 POP %R14 |
0x40194b POP %R15 |
0x40194d POP %RBP |
0x40194e RET |
0x40194f NOP |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | main.c:50-191 |
Module | exec |
nb instructions | 71 |
nb uops | 83 |
loop length | 285 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 20.75 cycles |
front end | 20.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 10.75 | 10.75 | 9.50 | 9.17 | 14.00 | 10.75 | 10.75 | 9.33 |
cycles | 10.75 | 10.75 | 9.50 | 9.17 | 14.00 | 10.75 | 10.75 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 19.26 |
Stall cycles | 0.00 |
Front-end | 20.75 |
Dispatch | 14.00 |
Overall L1 | 20.75 |
all | 3% |
load | 33% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 25% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 6% |
load | 16% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 8% |
load | 22% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 12% |
load | 8% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 9% |
load | 15% |
store | 6% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EDI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VSTMXCSR -0x2c(%RBP) | 3 | 1 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 10 | 1 |
ORL $0x8040,-0x2c(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
VLDMXCSR -0x2c(%RBP) | 4 | 1 | 0 | 0.50 | 0.50 | 0 | 1 | 1 | 0 | 5 | 3 |
MOV $0x408090,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x4080a1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4015b8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%R14),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x4080b5,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4010a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3a98,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RAX,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV $0x4080b8,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x40b0b0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x401950,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4010e0 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $-0x191,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $-0x33333333,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
IMUL %RBX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x24,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x190,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x6a00(%RIP),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVDQU64 0x6a4e(%RIP),%YMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV $0x18f,%R12D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VBROADCASTSS 0x69e6(%RIP),%YMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4016ee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x6756(%RIP),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x4080e1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%AL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | main.c:50-191 |
Module | exec |
nb instructions | 71 |
nb uops | 83 |
loop length | 285 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 20.75 cycles |
front end | 20.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 10.75 | 10.75 | 9.50 | 9.17 | 14.00 | 10.75 | 10.75 | 9.33 |
cycles | 10.75 | 10.75 | 9.50 | 9.17 | 14.00 | 10.75 | 10.75 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 19.26 |
Stall cycles | 0.00 |
Front-end | 20.75 |
Dispatch | 14.00 |
Overall L1 | 20.75 |
all | 3% |
load | 33% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 25% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 6% |
load | 16% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 8% |
load | 22% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 12% |
load | 8% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 9% |
load | 15% |
store | 6% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EDI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VSTMXCSR -0x2c(%RBP) | 3 | 1 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 10 | 1 |
ORL $0x8040,-0x2c(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
VLDMXCSR -0x2c(%RBP) | 4 | 1 | 0 | 0.50 | 0.50 | 0 | 1 | 1 | 0 | 5 | 3 |
MOV $0x408090,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x4080a1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4015b8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%R14),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x4080b5,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4010a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3a98,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RAX,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV $0x4080b8,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x40b0b0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x401950,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4010e0 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $-0x191,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $-0x33333333,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
IMUL %RBX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x24,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x190,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSS 0x6a00(%RIP),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVDQU64 0x6a4e(%RIP),%YMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV $0x18f,%R12D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VBROADCASTSS 0x69e6(%RIP),%YMM17 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4016ee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x6756(%RIP),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x4080e1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%AL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼main– | 0.04 | 0.01 |
▼Loop 2 - main.c:77-169 - exec– | 0 | 0 |
○Loop 1 - main.c:111-116 - exec | 0.04 | 0.01 |
○Loop 3 - main.c:111-116 - exec | 0 | 0 |
○Loop 0 - main.c:111-116 - exec | 0 | 0 |