Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.06% |
---|
Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.06% |
---|
/home/kcamus/qaas_runs/169-401-3406/intel/HACCmk/build/HACCmk/src/main.c: 50 - 192 |
-------------------------------------------------------------------------------- |
50: { |
[...] |
57: double t3, elapsed = 0.0, validation, final, t1, t2; |
[...] |
63: long NN = N; |
[...] |
73: printf( "count is set %d\n", count ); |
74: printf( "Total MPI ranks %d\n", nprocs ); |
75: } |
76: |
77: if (argc == 2 && strncmp(argv[1], "-s", 2) == 0) |
78: NN = 15000; |
79: |
80: printf( "N is set %ld\n", NN ); |
81: |
82: #pragma omp parallel |
[...] |
97: for ( n = 400; n < NN; n = n + 20 ) |
[...] |
103: dx1 = 1.0f/(float)n; |
104: dy1 = 2.0f/(float)n; |
105: dz1 = 3.0f/(float)n; |
106: xx[0] = 0.f; |
107: yy[0] = 0.f; |
108: zz[0] = 0.f; |
109: mass[0] = 2.f; |
110: |
111: for ( i = 1; i < n; i++ ) |
112: { |
113: xx[i] = xx[i-1] + dx1; |
114: yy[i] = yy[i-1] + dy1; |
115: zz[i] = zz[i-1] + dz1; |
116: mass[i] = (float)i * 0.01f + xx[i]; |
117: } |
118: |
119: for ( i = 0; i < n; i++ ) |
120: { |
121: vx1[i] = 0.f; |
122: vy1[i] = 0.f; |
123: vz1[i] = 0.f; |
[...] |
136: t1 = mysecond(); |
137: #endif |
138: |
139: #pragma omp parallel for private( dx1, dy1, dz1 ) |
[...] |
152: t2 = mysecond(); |
[...] |
166: t3 = (t2 - t1) * 1e6; |
167: #endif |
168: |
169: elapsed = elapsed + t3; |
[...] |
185: printf( "\nKernel elapsed time, s: %18.8lf\n", elapsed*1e-6 ); |
[...] |
192: } |
0x4010b0 PUSH %RBP |
0x4010b1 MOV %RSP,%RBP |
0x4010b4 PUSH %R15 |
0x4010b6 PUSH %R14 |
0x4010b8 PUSH %R13 |
0x4010ba PUSH %R12 |
0x4010bc MOV %RSI,%R12 |
0x4010bf MOV $0xbb8,%ESI |
0x4010c4 PUSH %RBX |
0x4010c5 MOV %EDI,%EBX |
0x4010c7 LEA 0x1f51(%RIP),%RDI |
0x4010ce SUB $0x48,%RSP |
0x4010d2 MOV %FS:0x28,%RAX |
0x4010db MOV %RAX,-0x38(%RBP) |
0x4010df XOR %EAX,%EAX |
0x4010e1 CALL 401030 <printf@plt> |
0x4010e6 MOV $0x1,%ESI |
0x4010eb LEA 0x1f3e(%RIP),%RDI |
0x4010f2 XOR %EAX,%EAX |
0x4010f4 CALL 401030 <printf@plt> |
0x4010f9 MOVQ $0x186a0,-0x68(%RBP) |
0x401101 CMP $0x2,%EBX |
0x401104 JNE 401132 |
0x401106 MOV 0x8(%R12),%RDI |
0x40110b MOV $0x2,%EDX |
0x401110 LEA 0x1f2d(%RIP),%RSI |
0x401117 CALL 401070 <strncmp@plt> |
0x40111c CMP $0x1,%EAX |
0x40111f SBB %RAX,%RAX |
0x401122 AND $-0x14c08,%RAX |
0x401128 ADD $0x186a0,%RAX |
0x40112e MOV %RAX,-0x68(%RBP) |
0x401132 MOV -0x68(%RBP),%RSI |
0x401136 LEA 0x1f0a(%RIP),%RDI |
0x40113d XOR %EAX,%EAX |
0x40113f LEA 0x18a93a(%RIP),%R13 |
0x401146 LEA 0x1ec3b3(%RIP),%R14 |
0x40114d LEA 0x24de2c(%RIP),%R15 |
0x401154 CALL 401030 <printf@plt> |
0x401159 LEA -0x50(%RBP),%RSI |
0x40115d XOR %ECX,%ECX |
0x40115f XOR %EDX,%EDX |
0x401161 LEA 0x778(%RIP),%RDI |
0x401168 MOVL $0,-0x50(%RBP) |
0x40116f MOV %RSI,-0x70(%RBP) |
0x401173 CALL 401090 <GOMP_parallel@plt> |
0x401178 VMOVSS 0x1f04(%RIP),%XMM8 |
0x401180 MOV $0x190,%R11D |
0x401186 VXORPD %XMM9,%XMM9,%XMM9 |
0x40118b LEA 0x128e6e(%RIP),%RAX |
0x401192 VXORPS %XMM7,%XMM7,%XMM7 |
0x401196 NOPW %CS:(%RAX,%RAX,1) |
(1) 0x4011a0 VCVTSI2SS %R11D,%XMM7,%XMM3 |
(1) 0x4011a5 VMOVSS 0x1ecf(%RIP),%XMM5 |
(1) 0x4011ad VXORPS %XMM0,%XMM0,%XMM0 |
(1) 0x4011b1 LEA -0x1(%R11),%RDX |
(1) 0x4011b5 MOV %R11D,-0x54(%RBP) |
(1) 0x4011b9 MOV $0x1,%R12D |
(1) 0x4011bf VMOVAPS %XMM0,%XMM2 |
(1) 0x4011c3 VMOVAPS %XMM0,%XMM1 |
(1) 0x4011c7 MOVL $0,0x18a8af(%RIP) |
(1) 0x4011d1 MOVL $0,0x1ec325(%RIP) |
(1) 0x4011db VDIVSS %XMM3,%XMM5,%XMM10 |
(1) 0x4011df MOVL $0,0x24dd97(%RIP) |
(1) 0x4011e9 MOVL $0x40000000,(%RAX) |
(1) 0x4011ef VMULSS 0x1e89(%RIP),%XMM10,%XMM11 |
(1) 0x4011f7 VADDSS %XMM10,%XMM10,%XMM6 |
(1) 0x4011fc AND $0x3,%EDX |
(1) 0x4011ff JE 4012be |
(1) 0x401205 CMP $0x1,%RDX |
(1) 0x401209 JE 401281 |
(1) 0x40120b CMP $0x2,%RDX |
(1) 0x40120f JE 40124d |
(1) 0x401211 VCVTSI2SS %R12D,%XMM7,%XMM4 |
(1) 0x401216 VMOVAPS %XMM10,%XMM0 |
(1) 0x40121a VMOVSS %XMM10,0x18a862(%RIP) |
(1) 0x401222 VMOVAPS %XMM6,%XMM2 |
(1) 0x401226 VMOVSS %XMM6,0x1ec2d6(%RIP) |
(1) 0x40122e VMOVAPS %XMM11,%XMM1 |
(1) 0x401232 MOV $0x2,%R12D |
(1) 0x401238 VMOVSS %XMM11,0x24dd44(%RIP) |
(1) 0x401240 VFMADD132SS %XMM8,%XMM10,%XMM4 |
(1) 0x401245 VMOVSS %XMM4,0x128db7(%RIP) |
(1) 0x40124d VCVTSI2SS %R12D,%XMM7,%XMM12 |
(1) 0x401252 VADDSS %XMM10,%XMM0,%XMM0 |
(1) 0x401257 VADDSS %XMM6,%XMM2,%XMM2 |
(1) 0x40125b VADDSS %XMM11,%XMM1,%XMM1 |
(1) 0x401260 VMOVSS %XMM0,(%R13,%R12,4) |
(1) 0x401267 VFMADD132SS %XMM8,%XMM0,%XMM12 |
(1) 0x40126c VMOVSS %XMM2,(%R14,%R12,4) |
(1) 0x401272 VMOVSS %XMM1,(%R15,%R12,4) |
(1) 0x401278 VMOVSS %XMM12,(%RAX,%R12,4) |
(1) 0x40127e INC %R12 |
(1) 0x401281 VCVTSI2SS %R12D,%XMM7,%XMM13 |
(1) 0x401286 VADDSS %XMM10,%XMM0,%XMM0 |
(1) 0x40128b VADDSS %XMM6,%XMM2,%XMM2 |
(1) 0x40128f VADDSS %XMM11,%XMM1,%XMM1 |
(1) 0x401294 VMOVSS %XMM0,(%R13,%R12,4) |
(1) 0x40129b VFMADD132SS %XMM8,%XMM0,%XMM13 |
(1) 0x4012a0 VMOVSS %XMM2,(%R14,%R12,4) |
(1) 0x4012a6 VMOVSS %XMM1,(%R15,%R12,4) |
(1) 0x4012ac VMOVSS %XMM13,(%RAX,%R12,4) |
(1) 0x4012b2 INC %R12 |
(1) 0x4012b5 CMP %R11,%R12 |
(1) 0x4012b8 JE 40139a |
(0) 0x4012be VADDSS %XMM10,%XMM0,%XMM14 |
(0) 0x4012c3 VADDSS %XMM6,%XMM2,%XMM15 |
(0) 0x4012c7 LEA 0x2(%R12),%RDI |
(0) 0x4012cc VADDSS %XMM11,%XMM1,%XMM3 |
(0) 0x4012d1 LEA 0x1(%R12),%RCX |
(0) 0x4012d6 LEA 0x3(%R12),%R9 |
(0) 0x4012db VADDSS %XMM10,%XMM14,%XMM12 |
(0) 0x4012e0 VMOVSS %XMM15,(%R14,%R12,4) |
(0) 0x4012e6 VADDSS %XMM6,%XMM15,%XMM2 |
(0) 0x4012ea VCVTSI2SS %EDI,%XMM7,%XMM15 |
(0) 0x4012ee VMOVSS %XMM3,(%R15,%R12,4) |
(0) 0x4012f4 VADDSS %XMM11,%XMM3,%XMM1 |
(0) 0x4012f9 VCVTSI2SS %R12D,%XMM7,%XMM5 |
(0) 0x4012fe VMOVSS %XMM14,(%R13,%R12,4) |
(0) 0x401305 VADDSS %XMM10,%XMM12,%XMM0 |
(0) 0x40130a VMOVSS %XMM12,(%R13,%RCX,4) |
(0) 0x401311 VADDSS %XMM6,%XMM2,%XMM13 |
(0) 0x401315 VCVTSI2SS %ECX,%XMM7,%XMM4 |
(0) 0x401319 VMOVSS %XMM2,(%R14,%RCX,4) |
(0) 0x40131f VCVTSI2SS %R9D,%XMM7,%XMM3 |
(0) 0x401324 VMOVSS %XMM1,(%R15,%RCX,4) |
(0) 0x40132a VFMADD132SS %XMM8,%XMM0,%XMM15 |
(0) 0x40132f VMOVSS %XMM0,(%R13,%RDI,4) |
(0) 0x401336 VADDSS %XMM10,%XMM0,%XMM0 |
(0) 0x40133b VADDSS %XMM6,%XMM13,%XMM2 |
(0) 0x40133f VFMADD132SS %XMM8,%XMM14,%XMM5 |
(0) 0x401344 VADDSS %XMM11,%XMM1,%XMM14 |
(0) 0x401349 VMOVSS %XMM13,(%R14,%RDI,4) |
(0) 0x40134f VFMADD132SS %XMM8,%XMM12,%XMM4 |
(0) 0x401354 VFMADD132SS %XMM8,%XMM0,%XMM3 |
(0) 0x401359 VMOVSS %XMM0,(%R13,%R9,4) |
(0) 0x401360 VADDSS %XMM11,%XMM14,%XMM1 |
(0) 0x401365 VMOVSS %XMM14,(%R15,%RDI,4) |
(0) 0x40136b VMOVSS %XMM2,(%R14,%R9,4) |
(0) 0x401371 VMOVSS %XMM5,(%RAX,%R12,4) |
(0) 0x401377 ADD $0x4,%R12 |
(0) 0x40137b VMOVSS %XMM4,(%RAX,%RCX,4) |
(0) 0x401380 VMOVSS %XMM1,(%R15,%R9,4) |
(0) 0x401386 VMOVSS %XMM15,(%RAX,%RDI,4) |
(0) 0x40138b VMOVSS %XMM3,(%RAX,%R9,4) |
(0) 0x401391 CMP %R11,%R12 |
(0) 0x401394 JNE 4012be |
(1) 0x40139a LEA (,%R12,4),%RBX |
(1) 0x4013a2 XOR %ESI,%ESI |
(1) 0x4013a4 LEA 0xc71d5(%RIP),%RDI |
(1) 0x4013ab VMOVSD %XMM9,-0x60(%RBP) |
(1) 0x4013b0 MOV %RBX,%RDX |
(1) 0x4013b3 CALL 401040 <memset@plt> |
(1) 0x4013b8 MOV %RBX,%RDX |
(1) 0x4013bb XOR %ESI,%ESI |
(1) 0x4013bd LEA 0x6573c(%RIP),%RDI |
(1) 0x4013c4 CALL 401040 <memset@plt> |
(1) 0x4013c9 MOV %RBX,%RDX |
(1) 0x4013cc XOR %ESI,%ESI |
(1) 0x4013ce LEA 0x3cab(%RIP),%RDI |
(1) 0x4013d5 CALL 401040 <memset@plt> |
(1) 0x4013da XOR %EAX,%EAX |
(1) 0x4013dc CALL 401920 <mysecond> |
(1) 0x4013e1 MOV -0x54(%RBP),%R8D |
(1) 0x4013e5 MOV 0x1c9c(%RIP),%R10 |
(1) 0x4013ec XOR %ECX,%ECX |
(1) 0x4013ee MOV -0x70(%RBP),%RSI |
(1) 0x4013f2 XOR %EDX,%EDX |
(1) 0x4013f4 LEA 0x1b5(%RIP),%RDI |
(1) 0x4013fb VMOVQ %XMM0,%RBX |
(1) 0x401400 MOV %R8D,-0x44(%RBP) |
(1) 0x401404 MOV %R10,-0x50(%RBP) |
(1) 0x401408 MOVL $0xbb8,-0x40(%RBP) |
(1) 0x40140f MOVL $0x3e6b851f,-0x48(%RBP) |
(1) 0x401416 CALL 401090 <GOMP_parallel@plt> |
(1) 0x40141b XOR %EAX,%EAX |
(1) 0x40141d CALL 401920 <mysecond> |
(1) 0x401422 VMOVQ %RBX,%XMM7 |
(1) 0x401427 VMOVSD -0x60(%RBP),%XMM9 |
(1) 0x40142c LEA 0x14(%R12),%R11 |
(1) 0x401431 VSUBSD %XMM7,%XMM0,%XMM8 |
(1) 0x401435 LEA 0x128bc4(%RIP),%RAX |
(1) 0x40143c VXORPS %XMM7,%XMM7,%XMM7 |
(1) 0x401440 CMP %R11,-0x68(%RBP) |
(1) 0x401444 VFMADD231SD 0x1c43(%RIP),%XMM8,%XMM9 |
(1) 0x40144d VMOVSS 0x1c2f(%RIP),%XMM8 |
(1) 0x401455 JG 4011a0 |
0x40145b VMULSD 0x1c35(%RIP),%XMM9,%XMM0 |
0x401463 LEA 0x1bee(%RIP),%RDI |
0x40146a MOV $0x1,%EAX |
0x40146f CALL 401030 <printf@plt> |
0x401474 MOV -0x38(%RBP),%RAX |
0x401478 SUB %FS:0x28,%RAX |
0x401481 JNE 401494 |
0x401483 ADD $0x48,%RSP |
0x401487 XOR %EAX,%EAX |
0x401489 POP %RBX |
0x40148a POP %R12 |
0x40148c POP %R13 |
0x40148e POP %R14 |
0x401490 POP %R15 |
0x401492 POP %RBP |
0x401493 RET |
0x401494 CALL 4010a0 <__stack_chk_fail@plt> |
0x401499 NOPL (%RAX) |
0x4014a0 ENDBR64 |
0x4014a4 STMXCSR -0x4(%RSP) |
0x4014a9 ORL $0x8040,-0x4(%RSP) |
0x4014b1 LDMXCSR -0x4(%RSP) |
0x4014b6 RET |
0x4014b7 NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 76 |
nb uops | 88 |
loop length | 341 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 22.00 cycles |
front end | 22.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 10.50 | 10.50 | 12.50 | 12.17 | 20.00 | 10.50 | 10.50 | 12.33 |
cycles | 10.50 | 10.50 | 12.50 | 12.17 | 20.00 | 10.50 | 10.50 | 12.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.25 |
Stall cycles | 0.00 |
Front-end | 22.00 |
Dispatch | 20.00 |
Overall L1 | 22.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 9% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 8% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 9% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x1f51(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1f3e(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOVQ $0x186a0,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
CMP $0x2,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 401132 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1f2d(%RIP),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401070 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SBB %RAX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
AND $-0x14c08,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x186a0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x68(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x1f0a(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x18a93a(%RIP),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1ec3b3(%RIP),%R14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x24de2c(%RIP),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x778(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV %RSI,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 401090 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSS 0x1f04(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x190,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x128e6e(%RIP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x1c35(%RIP),%XMM9,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0x1bee(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 401494 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
CALL 4010a0 <__stack_chk_fail@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ENDBR64 | |||||||||||
STMXCSR -0x4(%RSP) | 3 | 1.50 | 0 | 0.33 | 0.33 | 1 | 0 | 0.50 | 0.33 | 10 | 1 |
ORL $0x8040,-0x4(%RSP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
LDMXCSR -0x4(%RSP) | 4 | 1 | 0 | 0.50 | 0.50 | 0 | 1 | 1 | 0 | 5 | 3 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 76 |
nb uops | 88 |
loop length | 341 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 22.00 cycles |
front end | 22.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 10.50 | 10.50 | 12.50 | 12.17 | 20.00 | 10.50 | 10.50 | 12.33 |
cycles | 10.50 | 10.50 | 12.50 | 12.17 | 20.00 | 10.50 | 10.50 | 12.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.25 |
Stall cycles | 0.00 |
Front-end | 22.00 |
Dispatch | 20.00 |
Overall L1 | 22.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 9% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 8% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 9% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x1f51(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %FS:0x28,%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1f3e(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOVQ $0x186a0,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
CMP $0x2,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 401132 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1f2d(%RIP),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401070 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SBB %RAX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
AND $-0x14c08,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $0x186a0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x68(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x1f0a(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x18a93a(%RIP),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1ec3b3(%RIP),%R14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x24de2c(%RIP),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x778(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOV %RSI,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 401090 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSS 0x1f04(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x190,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x128e6e(%RIP),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULSD 0x1c35(%RIP),%XMM9,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA 0x1bee(%RIP),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 401030 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SUB %FS:0x28,%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 401494 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
CALL 4010a0 <__stack_chk_fail@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ENDBR64 | |||||||||||
STMXCSR -0x4(%RSP) | 3 | 1.50 | 0 | 0.33 | 0.33 | 1 | 0 | 0.50 | 0.33 | 10 | 1 |
ORL $0x8040,-0x4(%RSP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
LDMXCSR -0x4(%RSP) | 4 | 1 | 0 | 0.50 | 0.50 | 0 | 1 | 1 | 0 | 5 | 3 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼main– | 0.06 | 0.02 |
▼Loop 1 - main.c:97-169 - exec– | 0 | 0 |
○Loop 0 - main.c:111-116 - exec | 0.06 | 0.02 |