Function: Step10_orig.A | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 63.16% |
---|
Function: Step10_orig.A | Module: exec | Source: Step10_orig.c:10-41 [...] | Coverage: 63.16% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/171-094-7986/intel/HACCmk/build/HACCmk/src/Step10_orig.c: 10 - 41 |
-------------------------------------------------------------------------------- |
10: { |
[...] |
19: for ( j = 0; j < count1; j++ ) |
20: { |
21: dxc = xx1[j] - xxi; |
22: dyc = yy1[j] - yyi; |
23: dzc = zz1[j] - zzi; |
24: |
25: r2 = dxc * dxc + dyc * dyc + dzc * dzc; |
26: |
27: m = ( r2 < fsrrmax2 ) ? mass1[j] : 0.0f; |
28: |
29: f = pow( r2 + mp_rsm2, -1.5 ) - ( ma0 + r2*(ma1 + r2*(ma2 + r2*(ma3 + r2*(ma4 + r2*ma5))))); |
30: |
31: f = ( r2 > 0.0f ) ? m * f : 0.0f; |
32: |
33: xi = xi + f * dxc; |
34: yi = yi + f * dyc; |
35: zi = zi + f * dzc; |
36: } |
37: |
38: *dxi = xi; |
39: *dyi = yi; |
40: *dzi = zi; |
41: } |
0x403080 PUSH %RBP |
0x403081 MOV %RSP,%RBP |
0x403084 PUSH %R15 |
0x403086 PUSH %R14 |
0x403088 PUSH %RBX |
0x403089 MOVAPS %XMM4,-0x70(%RBP) |
0x40308d MOVAPS %XMM3,-0x60(%RBP) |
0x403091 MOVAPS %XMM2,-0x50(%RBP) |
0x403095 MOVAPD %XMM1,-0x40(%RBP) |
0x40309a MOVAPS %XMM0,-0x30(%RBP) |
0x40309e MOV 0x18(%RBP),%RAX |
0x4030a2 MOV 0x10(%RBP),%R10 |
0x4030a6 TEST %EDI,%EDI |
0x4030a8 JLE 4032e8 |
0x4030ae MOV %EDI,%EDI |
0x4030b0 MOV $-0x4,%R11D |
0x4030b6 XORPS %XMM5,%XMM5 |
0x4030b9 AND %RDI,%R11 |
0x4030bc JE 403306 |
0x4030c2 MOVAPS -0x30(%RBP),%XMM0 |
0x4030c6 SHUFPS $0,%XMM0,%XMM0 |
0x4030ca MOVAPS %XMM0,-0x90(%RBP) |
0x4030d1 MOVAPS -0x40(%RBP),%XMM0 |
0x4030d5 SHUFPS $0,%XMM0,%XMM0 |
0x4030d9 MOVAPS %XMM0,-0x80(%RBP) |
0x4030dd MOVAPS -0x50(%RBP),%XMM8 |
0x4030e2 SHUFPS $0,%XMM8,%XMM8 |
0x4030e7 MOVAPS -0x60(%RBP),%XMM9 |
0x4030ec SHUFPS $0,%XMM9,%XMM9 |
0x4030f1 MOVAPS -0x70(%RBP),%XMM10 |
0x4030f6 SHUFPS $0,%XMM10,%XMM10 |
0x4030fb XORPS %XMM5,%XMM5 |
0x4030fe XOR %EBX,%EBX |
0x403100 XORPS %XMM11,%XMM11 |
0x403104 XORPS %XMM14,%XMM14 |
0x403108 JMP 4031da |
0x40310d NOPL (%RAX) |
(9) 0x403110 MOVAPS %XMM13,%XMM1 |
(9) 0x403114 ADDPS %XMM10,%XMM1 |
(9) 0x403118 CVTPS2PD %XMM1,%XMM2 |
(9) 0x40311b MOVHLPS %XMM1,%XMM1 |
(9) 0x40311e CVTPS2PD %XMM1,%XMM0 |
(9) 0x403121 SQRTPD %XMM0,%XMM6 |
(9) 0x403125 MULPD %XMM0,%XMM0 |
(9) 0x403129 MOVAPD 0x726f(%RIP),%XMM7 |
(9) 0x403131 MOVAPD %XMM7,%XMM1 |
(9) 0x403135 DIVPD %XMM0,%XMM1 |
(9) 0x403139 SQRTPD %XMM2,%XMM0 |
(9) 0x40313d MULPD %XMM6,%XMM1 |
(9) 0x403141 MULPD %XMM2,%XMM2 |
(9) 0x403145 DIVPD %XMM2,%XMM7 |
(9) 0x403149 MULPD %XMM0,%XMM7 |
(9) 0x40314d MOVAPS %XMM13,%XMM0 |
(9) 0x403151 MULPS 0x7258(%RIP),%XMM0 |
(9) 0x403158 ADDPS 0x7261(%RIP),%XMM0 |
(9) 0x40315f MULPS %XMM13,%XMM0 |
(9) 0x403163 ADDPS 0x7266(%RIP),%XMM0 |
(9) 0x40316a MULPS %XMM13,%XMM0 |
(9) 0x40316e ADDPS 0x726b(%RIP),%XMM0 |
(9) 0x403175 MULPS %XMM13,%XMM0 |
(9) 0x403179 ADDPS 0x7270(%RIP),%XMM0 |
(9) 0x403180 MULPS %XMM13,%XMM0 |
(9) 0x403184 ADDPS 0x7275(%RIP),%XMM0 |
(9) 0x40318b CVTPS2PD %XMM0,%XMM2 |
(9) 0x40318e ADDPD %XMM7,%XMM2 |
(9) 0x403192 MOVHLPS %XMM0,%XMM0 |
(9) 0x403195 CVTPS2PD %XMM0,%XMM0 |
(9) 0x403198 CVTPD2PS %XMM2,%XMM2 |
(9) 0x40319c ADDPD %XMM1,%XMM0 |
(9) 0x4031a0 CVTPD2PS %XMM0,%XMM0 |
(9) 0x4031a4 UNPCKLPD %XMM0,%XMM2 |
(9) 0x4031a8 MULPS %XMM3,%XMM2 |
(9) 0x4031ab XORPD %XMM0,%XMM0 |
(9) 0x4031af CMPPS $0x1,%XMM13,%XMM0 |
(9) 0x4031b4 ANDPS %XMM2,%XMM0 |
(9) 0x4031b7 MULPS %XMM0,%XMM12 |
(9) 0x4031bb ADDPS %XMM12,%XMM14 |
(9) 0x4031bf MULPS %XMM0,%XMM15 |
(9) 0x4031c3 ADDPS %XMM15,%XMM11 |
(9) 0x4031c7 MULPS %XMM4,%XMM0 |
(9) 0x4031ca ADDPS %XMM0,%XMM5 |
(9) 0x4031cd ADD $0x4,%RBX |
(9) 0x4031d1 CMP %R11,%RBX |
(9) 0x4031d4 JAE 40329c |
(9) 0x4031da MOVUPS (%RSI,%RBX,4),%XMM12 |
(9) 0x4031df SUBPS -0x90(%RBP),%XMM12 |
(9) 0x4031e7 MOVUPS (%RDX,%RBX,4),%XMM15 |
(9) 0x4031ec SUBPS -0x80(%RBP),%XMM15 |
(9) 0x4031f1 MOVUPS (%RCX,%RBX,4),%XMM4 |
(9) 0x4031f5 SUBPS %XMM8,%XMM4 |
(9) 0x4031f9 MOVAPS %XMM12,%XMM1 |
(9) 0x4031fd MULPS %XMM12,%XMM1 |
(9) 0x403201 MOVAPS %XMM15,%XMM3 |
(9) 0x403205 MULPS %XMM15,%XMM3 |
(9) 0x403209 ADDPS %XMM1,%XMM3 |
(9) 0x40320c MOVAPS %XMM4,%XMM13 |
(9) 0x403210 MULPS %XMM4,%XMM13 |
(9) 0x403214 ADDPS %XMM3,%XMM13 |
(9) 0x403218 LEA (%R8,%RBX,4),%R14 |
(9) 0x40321c MOVAPS %XMM13,%XMM1 |
(9) 0x403220 CMPPS $0x1,%XMM9,%XMM1 |
(9) 0x403225 MOVMSKPS %XMM1,%R15D |
(9) 0x403229 XORPS %XMM3,%XMM3 |
(9) 0x40322c TEST $0x1,%R15B |
(9) 0x403230 JNE 403250 |
(9) 0x403232 TEST $0x2,%R15B |
(9) 0x403236 JNE 40325b |
(9) 0x403238 TEST $0x4,%R15B |
(9) 0x40323c JNE 403271 |
(9) 0x40323e TEST $0x8,%R15B |
(9) 0x403242 JE 403110 |
(9) 0x403248 JMP 403289 |
0x40324a NOPW (%RAX,%RAX,1) |
(9) 0x403250 MOVSS (%R14),%XMM3 |
(9) 0x403255 TEST $0x2,%R15B |
(9) 0x403259 JE 403238 |
(9) 0x40325b MOVSS 0x4(%R14),%XMM1 |
(9) 0x403261 MOVLHPS %XMM3,%XMM1 |
(9) 0x403264 SHUFPS $-0x1e,%XMM3,%XMM1 |
(9) 0x403268 MOVAPS %XMM1,%XMM3 |
(9) 0x40326b TEST $0x4,%R15B |
(9) 0x40326f JE 40323e |
(9) 0x403271 MOVSS 0x8(%R14),%XMM1 |
(9) 0x403277 SHUFPS $0x30,%XMM3,%XMM1 |
(9) 0x40327b SHUFPS $-0x7c,%XMM1,%XMM3 |
(9) 0x40327f TEST $0x8,%R15B |
(9) 0x403283 JE 403110 |
(9) 0x403289 MOVSS 0xc(%R14),%XMM1 |
(9) 0x40328f SHUFPS $-0x1c,%XMM3,%XMM1 |
(9) 0x403293 SHUFPS $0x24,%XMM1,%XMM3 |
(9) 0x403297 JMP 403110 |
0x40329c MOVAPS %XMM14,%XMM0 |
0x4032a0 UNPCKHPD %XMM14,%XMM0 |
0x4032a5 ADDPS %XMM14,%XMM0 |
0x4032a9 MOVAPS %XMM0,%XMM6 |
0x4032ac SHUFPS $0x55,%XMM0,%XMM6 |
0x4032b0 ADDSS %XMM0,%XMM6 |
0x4032b4 MOVAPS %XMM11,%XMM0 |
0x4032b8 UNPCKHPD %XMM11,%XMM0 |
0x4032bd ADDPS %XMM11,%XMM0 |
0x4032c1 MOVAPS %XMM0,%XMM7 |
0x4032c4 SHUFPS $0x55,%XMM0,%XMM7 |
0x4032c8 ADDSS %XMM0,%XMM7 |
0x4032cc MOVAPS %XMM5,%XMM0 |
0x4032cf UNPCKHPD %XMM5,%XMM0 |
0x4032d3 ADDPS %XMM5,%XMM0 |
0x4032d6 MOVAPS %XMM0,%XMM5 |
0x4032d9 SHUFPS $0x55,%XMM0,%XMM5 |
0x4032dd ADDSS %XMM0,%XMM5 |
0x4032e1 CMP %RDI,%R11 |
0x4032e4 JE 4032f1 |
0x4032e6 JMP 40330f |
0x4032e8 XORPS %XMM6,%XMM6 |
0x4032eb XORPS %XMM7,%XMM7 |
0x4032ee XORPS %XMM5,%XMM5 |
0x4032f1 MOVSS %XMM6,(%R9) |
0x4032f6 MOVSS %XMM7,(%R10) |
0x4032fb MOVSS %XMM5,(%RAX) |
0x4032ff POP %RBX |
0x403300 POP %R14 |
0x403302 POP %R15 |
0x403304 POP %RBP |
0x403305 RET |
0x403306 XOR %R11D,%R11D |
0x403309 XORPS %XMM7,%XMM7 |
0x40330c XORPS %XMM6,%XMM6 |
0x40330f MOVSS 0x7104(%RIP),%XMM8 |
0x403318 MOVSS 0x70ff(%RIP),%XMM9 |
0x403321 MOVSS 0x70fa(%RIP),%XMM10 |
0x40332a MOVSS 0x70f5(%RIP),%XMM11 |
0x403333 MOVSS 0x70f0(%RIP),%XMM12 |
0x40333c JMP 4033f5 |
0x403341 NOPW %CS:(%RAX,%RAX,1) |
(8) 0x403350 MOVAPS %XMM15,%XMM0 |
(8) 0x403354 ADDSS -0x70(%RBP),%XMM0 |
(8) 0x403359 CVTSS2SD %XMM0,%XMM0 |
(8) 0x40335d XORPS %XMM1,%XMM1 |
(8) 0x403360 SQRTSD %XMM0,%XMM1 |
(8) 0x403364 MULSD %XMM0,%XMM0 |
(8) 0x403368 MOVSD 0x70a0(%RIP),%XMM3 |
(8) 0x403370 DIVSD %XMM0,%XMM3 |
(8) 0x403374 MULSD %XMM1,%XMM3 |
(8) 0x403378 MOVAPS %XMM15,%XMM0 |
(8) 0x40337c MULSS 0x7094(%RIP),%XMM0 |
(8) 0x403384 ADDSS %XMM8,%XMM0 |
(8) 0x403389 MULSS %XMM15,%XMM0 |
(8) 0x40338e ADDSS %XMM9,%XMM0 |
(8) 0x403393 MULSS %XMM15,%XMM0 |
(8) 0x403398 ADDSS %XMM10,%XMM0 |
(8) 0x40339d MULSS %XMM15,%XMM0 |
(8) 0x4033a2 ADDSS %XMM11,%XMM0 |
(8) 0x4033a7 MULSS %XMM15,%XMM0 |
(8) 0x4033ac ADDSS %XMM12,%XMM0 |
(8) 0x4033b1 CVTSS2SD %XMM0,%XMM0 |
(8) 0x4033b5 ADDSD %XMM3,%XMM0 |
(8) 0x4033b9 CVTSD2SS %XMM0,%XMM0 |
(8) 0x4033bd MULSS %XMM4,%XMM0 |
(8) 0x4033c1 XORPD %XMM1,%XMM1 |
(8) 0x4033c5 CMPSS $0x1,%XMM15,%XMM1 |
(8) 0x4033cb ANDPS %XMM0,%XMM1 |
(8) 0x4033ce MULSS %XMM1,%XMM14 |
(8) 0x4033d3 ADDSS %XMM14,%XMM6 |
(8) 0x4033d8 MULSS %XMM1,%XMM2 |
(8) 0x4033dc ADDSS %XMM2,%XMM7 |
(8) 0x4033e0 MULSS %XMM13,%XMM1 |
(8) 0x4033e5 ADDSS %XMM1,%XMM5 |
(8) 0x4033e9 INC %R11 |
(8) 0x4033ec CMP %R11,%RDI |
(8) 0x4033ef JE 4032f1 |
(8) 0x4033f5 MOVSS (%RSI,%R11,4),%XMM14 |
(8) 0x4033fb SUBSS -0x30(%RBP),%XMM14 |
(8) 0x403401 MOVSS (%RDX,%R11,4),%XMM2 |
(8) 0x403407 SUBSS -0x40(%RBP),%XMM2 |
(8) 0x40340c MOVSS (%RCX,%R11,4),%XMM13 |
(8) 0x403412 SUBSS -0x50(%RBP),%XMM13 |
(8) 0x403418 MOVAPS %XMM14,%XMM0 |
(8) 0x40341c MULSS %XMM14,%XMM0 |
(8) 0x403421 MOVAPS %XMM2,%XMM4 |
(8) 0x403424 MULSS %XMM2,%XMM4 |
(8) 0x403428 ADDSS %XMM0,%XMM4 |
(8) 0x40342c MOVAPS %XMM13,%XMM15 |
(8) 0x403430 MULSS %XMM13,%XMM15 |
(8) 0x403435 ADDSS %XMM4,%XMM15 |
(8) 0x40343a XORPS %XMM4,%XMM4 |
(8) 0x40343d UCOMISS -0x60(%RBP),%XMM15 |
(8) 0x403442 JAE 403350 |
(8) 0x403448 MOVSS (%R8,%R11,4),%XMM4 |
(8) 0x40344e JMP 403350 |
0x403453 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 81 |
nb uops | 77 |
loop length | 343 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 14 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 12.83 cycles |
front end | 12.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 1.50 | 1.25 | 1.25 | 3.50 | 7.33 | 7.33 | 7.33 | 0.00 | 5.67 | 5.67 | 5.67 | 5.00 | 5.00 |
cycles | 3.50 | 1.50 | 1.25 | 1.25 | 3.50 | 7.33 | 7.33 | 7.33 | 0.00 | 5.67 | 5.67 | 5.67 | 5.00 | 5.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 12.83 |
Dispatch | 7.33 |
Overall L1 | 12.83 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 73% |
load | 50% |
store | 70% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 88% |
all | 70% |
load | 50% |
store | 70% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 82% |
all | 6% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 20% |
load | 15% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 15% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 23% |
all | 19% |
load | 15% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 15% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVAPS %XMM4,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS %XMM3,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS %XMM2,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPD %XMM1,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS %XMM0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 4032e8 <Step10_orig.A+0x268> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $-0x4,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XORPS %XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND %RDI,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 403306 <Step10_orig.A+0x286> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVAPS -0x30(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS %XMM0,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS -0x40(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS %XMM0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS -0x50(%RBP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS -0x60(%RBP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS -0x70(%RBP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
XORPS %XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XORPS %XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4031da <Step10_orig.A+0x15a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOVAPS %XMM14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
UNPCKHPD %XMM14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
ADDPS %XMM14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHUFPS $0x55,%XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
ADDSS %XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
UNPCKHPD %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
ADDPS %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHUFPS $0x55,%XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
ADDSS %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
UNPCKHPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
ADDPS %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHUFPS $0x55,%XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
ADDSS %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RDI,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 4032f1 <Step10_orig.A+0x271> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
JMP 40330f <Step10_orig.A+0x28f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XORPS %XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSS %XMM6,(%R9) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVSS %XMM7,(%R10) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVSS %XMM5,(%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XORPS %XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSS 0x7104(%RIP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70ff(%RIP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70fa(%RIP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70f5(%RIP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70f0(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 4033f5 <Step10_orig.A+0x375> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | Step10_orig.c:10-41 |
Module | exec |
nb instructions | 81 |
nb uops | 77 |
loop length | 343 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 14 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 12.83 cycles |
front end | 12.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 1.50 | 1.25 | 1.25 | 3.50 | 7.33 | 7.33 | 7.33 | 0.00 | 5.67 | 5.67 | 5.67 | 5.00 | 5.00 |
cycles | 3.50 | 1.50 | 1.25 | 1.25 | 3.50 | 7.33 | 7.33 | 7.33 | 0.00 | 5.67 | 5.67 | 5.67 | 5.00 | 5.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 12.83 |
Dispatch | 7.33 |
Overall L1 | 12.83 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 73% |
load | 50% |
store | 70% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 88% |
all | 70% |
load | 50% |
store | 70% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 82% |
all | 6% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 20% |
load | 15% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 15% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 23% |
all | 19% |
load | 15% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 15% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVAPS %XMM4,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS %XMM3,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS %XMM2,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPD %XMM1,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS %XMM0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %EDI,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 4032e8 <Step10_orig.A+0x268> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $-0x4,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XORPS %XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND %RDI,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 403306 <Step10_orig.A+0x286> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVAPS -0x30(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS %XMM0,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS -0x40(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS %XMM0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
MOVAPS -0x50(%RBP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS -0x60(%RBP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVAPS -0x70(%RBP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
SHUFPS $0,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
XORPS %XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XORPS %XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4031da <Step10_orig.A+0x15a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOVAPS %XMM14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
UNPCKHPD %XMM14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
ADDPS %XMM14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHUFPS $0x55,%XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
ADDSS %XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
UNPCKHPD %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
ADDPS %XMM11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHUFPS $0x55,%XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
ADDSS %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
UNPCKHPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
ADDPS %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVAPS %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHUFPS $0x55,%XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
ADDSS %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RDI,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 4032f1 <Step10_orig.A+0x271> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
JMP 40330f <Step10_orig.A+0x28f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XORPS %XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSS %XMM6,(%R9) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVSS %XMM7,(%R10) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVSS %XMM5,(%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XORPS %XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XORPS %XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSS 0x7104(%RIP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70ff(%RIP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70fa(%RIP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70f5(%RIP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSS 0x70f0(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 4033f5 <Step10_orig.A+0x375> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼Step10_orig.A– | 63.16 | 8.42 |
○Loop 9 - Step10_orig.c:19-35 - exec | 63.15 | 8.23 |
○Loop 8 - Step10_orig.c:19-35 - exec | 0 | 0 |