Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 4.83% |
---|
Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 4.83% |
---|
/scratch_na/users/xoserete/qaas_runs/171-322-0339/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 57 - 79 |
-------------------------------------------------------------------------------- |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
64: +density0(j ,k-1)*volume(j ,k-1) & |
65: +density0(j ,k )*volume(j ,k ) & |
66: +density0(j-1,k )*volume(j-1,k )) & |
67: *0.25_8) |
68: |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
76: +yarea(j-1,k )*(viscosity(j-1,k )-viscosity(j-1,k-1))) |
77: ENDDO |
78: ENDDO |
79: !$OMP END DO |
0x424550 PUSH %RBP |
0x424551 MOV %RSP,%RBP |
0x424554 PUSH %R15 |
0x424556 PUSH %R14 |
0x424558 PUSH %R13 |
0x42455a PUSH %R12 |
0x42455c PUSH %RBX |
0x42455d AND $-0x20,%RSP |
0x424561 SUB $0x480,%RSP |
0x424568 MOV 0xd0(%RBP),%RAX |
0x42456f MOV %RAX,0x1f0(%RSP) |
0x424577 MOV 0xc8(%RBP),%RAX |
0x42457e MOV %RAX,0x1e8(%RSP) |
0x424586 MOV 0xc0(%RBP),%RAX |
0x42458d MOV %RAX,0x1e0(%RSP) |
0x424595 MOV 0xb8(%RBP),%RAX |
0x42459c MOV %RAX,0x1d8(%RSP) |
0x4245a4 MOV 0xb0(%RBP),%RAX |
0x4245ab MOV %RAX,0x1d0(%RSP) |
0x4245b3 MOV 0xa8(%RBP),%RAX |
0x4245ba MOV %RAX,0x1c8(%RSP) |
0x4245c2 MOV 0xa0(%RBP),%RAX |
0x4245c9 MOV %RAX,0x1c0(%RSP) |
0x4245d1 MOV 0x98(%RBP),%RAX |
0x4245d8 MOV %RAX,0x1b8(%RSP) |
0x4245e0 MOV 0x70(%RBP),%R14D |
0x4245e4 MOV 0x68(%RBP),%EAX |
0x4245e7 SUB %R14D,%EAX |
0x4245ea INC %EAX |
0x4245ec MOV 0x90(%RBP),%RCX |
0x4245f3 MOV %RCX,0x1b0(%RSP) |
0x4245fb MOV 0x88(%RBP),%RCX |
0x424602 MOV %RCX,0x1a8(%RSP) |
0x42460a MOV 0x80(%RBP),%R13 |
0x424611 MOV 0x78(%RBP),%R15 |
0x424615 MOV 0x60(%RBP),%RCX |
0x424619 MOV %RCX,0xb0(%RSP) |
0x424621 MOV 0x58(%RBP),%RCX |
0x424625 MOV %RCX,0xa8(%RSP) |
0x42462d MOV 0x50(%RBP),%RCX |
0x424631 MOV %RCX,0xa0(%RSP) |
0x424639 MOV 0x48(%RBP),%RCX |
0x42463d MOV %RCX,0x98(%RSP) |
0x424645 MOV 0x40(%RBP),%RCX |
0x424649 MOV %RCX,0x90(%RSP) |
0x424651 MOV 0x38(%RBP),%RCX |
0x424655 MOV %RCX,0x88(%RSP) |
0x42465d MOV 0x30(%RBP),%RCX |
0x424661 MOV %RCX,0x80(%RSP) |
0x424669 MOV 0x28(%RBP),%RCX |
0x42466d MOV %RCX,0x78(%RSP) |
0x424672 MOV 0x20(%RBP),%RCX |
0x424676 MOV %RCX,0x70(%RSP) |
0x42467b MOV 0x18(%RBP),%RCX |
0x42467f MOV %RCX,0x68(%RSP) |
0x424684 MOV 0x10(%RBP),%RBX |
0x424688 MOVL $0,0x64(%RSP) |
0x424690 JS 424711 |
0x424692 MOV %RDX,%R12 |
0x424695 MOV %R8,0x10(%RSP) |
0x42469a MOV %RDI,0x120(%RSP) |
0x4246a2 MOV (%RDI),%ESI |
0x4246a4 MOVL $0,0x3c(%RSP) |
0x4246ac MOV %EAX,0x38(%RSP) |
0x4246b0 MOVL $0x1,0x60(%RSP) |
0x4246b8 SUB $0x8,%RSP |
0x4246bc LEA 0x68(%RSP),%RAX |
0x4246c1 LEA 0x6c(%RSP),%RCX |
0x4246c6 LEA 0x44(%RSP),%R8 |
0x4246cb LEA 0x40(%RSP),%R9 |
0x4246d0 MOV $0x733290,%EDI |
0x4246d5 MOV %ESI,0x5c(%RSP) |
0x4246d9 MOV $0x22,%EDX |
0x4246de PUSH $0x1 |
0x4246e0 PUSH $0x1 |
0x4246e2 PUSH %RAX |
0x4246e3 CALL 404520 <__kmpc_for_static_init_4@plt> |
0x4246e8 ADD $0x20,%RSP |
0x4246ec MOV 0x3c(%RSP),%EAX |
0x4246f0 MOV 0x38(%RSP),%EDI |
0x4246f4 SUB %EAX,%EDI |
0x4246f6 JAE 42472b |
0x4246f8 MOV $0x7332b0,%EDI |
0x4246fd MOV 0x54(%RSP),%ESI |
0x424701 VZEROUPPER |
0x424704 CALL 404110 <__kmpc_for_static_fini@plt> |
0x424709 MOV 0x120(%RSP),%RDI |
0x424711 MOV (%RDI),%ESI |
0x424713 MOV $0x7332d0,%EDI |
0x424718 LEA -0x28(%RBP),%RSP |
0x42471c POP %RBX |
0x42471d POP %R12 |
0x42471f POP %R13 |
0x424721 POP %R14 |
0x424723 POP %R15 |
0x424725 POP %RBP |
0x424726 JMP 4045e0 |
0x42472b MOV %RAX,%RDX |
0x42472e VMOVQ %R12,%XMM0 |
0x424733 ADD %R14D,%EDX |
0x424736 MOVSXD (%R13),%RCX |
0x42473a MOV (%R15),%EAX |
0x42473d SUB %ECX,%EAX |
0x42473f LEA 0x1(%RAX),%R11D |
0x424743 ADD $0x2,%EAX |
0x424746 CMP $0x2,%EAX |
0x424749 MOV $0x1,%ESI |
0x42474e CMOVGE %EAX,%ESI |
0x424751 MOV %ESI,%EAX |
0x424753 AND $0x7ffffffc,%EAX |
0x424758 MOV %RAX,0x48(%RSP) |
0x42475d MOVSXD 0x10(%RSP),%R8 |
0x424762 MOVSXD %EBX,%RAX |
0x424765 LEA -0x2(%RAX),%R9 |
0x424769 MOV %R9,0x190(%RSP) |
0x424771 VPBROADCASTQ %XMM0,%YMM0 |
0x424776 MOV %RSI,0x188(%RSP) |
0x42477e VPBROADCASTQ %RSI,%YMM1 |
0x424784 VMOVDQA %YMM1,0x220(%RSP) |
0x42478d MOV %RCX,0x198(%RSP) |
0x424795 LEA (,%RCX,8),%RCX |
0x42479d SAL $0x3,%RAX |
0x4247a1 SUB %RAX,%RCX |
0x4247a4 MOV 0x88(%RSP),%RAX |
0x4247ac ADD %RCX,%RAX |
0x4247af ADD $0x10,%RAX |
0x4247b3 MOV %RAX,0x180(%RSP) |
0x4247bb MOV $0x1,%EAX |
0x4247c0 SUB %R8,%RAX |
0x4247c3 MOV %RAX,0x178(%RSP) |
0x4247cb MOV $0x2,%EAX |
0x4247d0 SUB %R8,%RAX |
0x4247d3 MOV %RAX,0x170(%RSP) |
0x4247db MOV 0xa8(%RSP),%RAX |
0x4247e3 ADD %RCX,%RAX |
0x4247e6 ADD $0x10,%RAX |
0x4247ea MOV %RAX,0x168(%RSP) |
0x4247f2 MOV 0x90(%RSP),%RAX |
0x4247fa LEA 0x10(%RAX,%RCX,1),%RAX |
0x4247ff MOV %RAX,0x160(%RSP) |
0x424807 MOV 0x98(%RSP),%RAX |
0x42480f LEA 0x10(%RAX,%RCX,1),%RAX |
0x424814 MOV %RAX,0x158(%RSP) |
0x42481c MOV 0xa0(%RSP),%RAX |
0x424824 LEA 0x10(%RAX,%RCX,1),%RAX |
0x424829 MOV %RAX,0x150(%RSP) |
0x424831 MOV 0x68(%RSP),%RAX |
0x424836 LEA 0x10(%RAX,%RCX,1),%RAX |
0x42483b MOV %RAX,0x148(%RSP) |
0x424843 MOV 0x78(%RSP),%RAX |
0x424848 LEA 0x10(%RAX,%RCX,1),%RAX |
0x42484d MOV %RAX,0x140(%RSP) |
0x424855 MOV 0x70(%RSP),%RAX |
0x42485a LEA 0x10(%RAX,%RCX,1),%RAX |
0x42485f MOV %RAX,0x138(%RSP) |
0x424867 MOV 0x80(%RSP),%RAX |
0x42486f LEA 0x10(%RAX,%RCX,1),%RAX |
0x424874 MOV %RAX,0x130(%RSP) |
0x42487c MOV 0xb0(%RSP),%RAX |
0x424884 LEA 0x10(%RAX,%RCX,1),%RAX |
0x424889 MOV %RAX,0x128(%RSP) |
0x424891 LEA -0x2(%R8),%RAX |
0x424895 MOV %RAX,0xb8(%RSP) |
0x42489d NEG %R8 |
0x4248a0 MOV %R8,0xc0(%RSP) |
0x4248a8 VBROADCASTSD 0xce367(%RIP),%YMM3 |
0x4248b1 XOR %R8D,%R8D |
0x4248b4 MOV %RDX,0x1a0(%RSP) |
0x4248bc MOV %EDX,%EBX |
0x4248be MOV %EDI,0x34(%RSP) |
0x4248c2 MOV %R11D,0x58(%RSP) |
0x4248c7 JMP 424d8b |
0x4248cc NOPL (%RAX) |
(119) 0x4248d0 MOV %R11D,%EDI |
(119) 0x4248d3 MOV 0xc0(%RSP),%RAX |
(119) 0x4248db ADD %RDX,%RAX |
(119) 0x4248de INC %RAX |
(119) 0x4248e1 MOV %R14,%RCX |
(119) 0x4248e4 IMUL %RAX,%RCX |
(119) 0x4248e8 MOV %RCX,0xd8(%RSP) |
(119) 0x4248f0 MOV %R10,%R12 |
(119) 0x4248f3 MOV %R10,%RCX |
(119) 0x4248f6 IMUL %RAX,%RCX |
(119) 0x4248fa MOV %RCX,0xe0(%RSP) |
(119) 0x424902 MOV %RSI,%R9 |
(119) 0x424905 IMUL %RAX,%RSI |
(119) 0x424909 MOV %RSI,0x28(%RSP) |
(119) 0x42490e MOV %R8,%RSI |
(119) 0x424911 IMUL %RAX,%R8 |
(119) 0x424915 MOV %R13,%RCX |
(119) 0x424918 IMUL %RAX,%R13 |
(119) 0x42491c SUB 0xb8(%RSP),%RDX |
(119) 0x424924 IMUL %RDX,%R14 |
(119) 0x424928 IMUL %RDX,%R12 |
(119) 0x42492c IMUL %RDX,%R9 |
(119) 0x424930 MOV %R9,0xf0(%RSP) |
(119) 0x424938 IMUL %RDX,%RSI |
(119) 0x42493c MOV %RSI,0xf8(%RSP) |
(119) 0x424944 MOV %R15,%RSI |
(119) 0x424947 IMUL %RDX,%RSI |
(119) 0x42494b IMUL %RDX,%RCX |
(119) 0x42494f MOV %RCX,0x108(%RSP) |
(119) 0x424957 MOV 0x18(%RSP),%R11 |
(119) 0x42495c IMUL %RDX,%R11 |
(119) 0x424960 MOV 0x20(%RSP),%R9 |
(119) 0x424965 IMUL %RDX,%R9 |
(119) 0x424969 MOV 0x10(%RSP),%RAX |
(119) 0x42496e IMUL %RDX,%RAX |
(119) 0x424972 IMUL 0x40(%RSP),%RDX |
(119) 0x424978 MOV %RDX,0x110(%RSP) |
(119) 0x424980 MOV %RAX,%R15 |
(119) 0x424983 MOV %R11,0x100(%RSP) |
(119) 0x42498b MOV %RSI,0xe8(%RSP) |
(119) 0x424993 MOV %R8,0xd0(%RSP) |
(119) 0x42499b MOV 0x28(%RSP),%RAX |
(119) 0x4249a0 MOV %RAX,0xc8(%RSP) |
(119) 0x4249a8 XOR %EDX,%EDX |
(119) 0x4249aa MOV 0x118(%RSP),%R8 |
(119) 0x4249b2 MOV %EDI,%R11D |
(119) 0x4249b5 MOV 0x34(%RSP),%EDI |
(119) 0x4249b9 VPBROADCASTQ %RDX,%YMM1 |
(119) 0x4249bf VMOVDQA 0x220(%RSP),%YMM2 |
(119) 0x4249c8 VPSUBQ %YMM1,%YMM2,%YMM1 |
(119) 0x4249cc VPCMPNLEUQ 0xcd7e9(%RIP),%YMM1,%K1 |
(119) 0x4249d7 MOV 0xa0(%RSP),%RCX |
(119) 0x4249df MOV 0xd8(%RSP),%R10 |
(119) 0x4249e7 ADD %RCX,%R10 |
(119) 0x4249ea ADD 0x198(%RSP),%RDX |
(119) 0x4249f2 MOV 0x190(%RSP),%RSI |
(119) 0x4249fa MOV %RSI,%RAX |
(119) 0x4249fd NOT %RAX |
(119) 0x424a00 ADD %RDX,%RAX |
(119) 0x424a03 VMOVUPD (%R10,%RAX,8),%YMM26{%K1}{z} |
(119) 0x424a0a SUB %RSI,%RDX |
(119) 0x424a0d VMOVUPD (%R10,%RDX,8),%YMM27{%K1}{z} |
(119) 0x424a14 MOV 0x98(%RSP),%RSI |
(119) 0x424a1c MOV 0xe0(%RSP),%R10 |
(119) 0x424a24 ADD %RSI,%R10 |
(119) 0x424a27 VMOVUPD (%R10,%RAX,8),%YMM28{%K1}{z} |
(119) 0x424a2e VMOVUPD (%R10,%RDX,8),%YMM29{%K1}{z} |
(119) 0x424a35 ADD %RCX,%R14 |
(119) 0x424a38 VMOVUPD (%R14,%RDX,8),%YMM30{%K1}{z} |
(119) 0x424a3f VMOVUPD (%R14,%RAX,8),%YMM31{%K1}{z} |
(119) 0x424a46 ADD %RSI,%R12 |
(119) 0x424a49 VMOVUPD (%R12,%RDX,8),%YMM2{%K1}{z} |
(119) 0x424a50 VMOVUPD (%R12,%RAX,8),%YMM4{%K1}{z} |
(119) 0x424a57 MOV 0xb0(%RSP),%RCX |
(119) 0x424a5f MOV 0xf0(%RSP),%RSI |
(119) 0x424a67 ADD %RCX,%RSI |
(119) 0x424a6a VMOVUPD (%RSI,%RDX,8),%YMM18{%K1}{z} |
(119) 0x424a71 MOV 0x90(%RSP),%RSI |
(119) 0x424a79 MOV 0xf8(%RSP),%R10 |
(119) 0x424a81 ADD %RSI,%R10 |
(119) 0x424a84 VMOVUPD (%R10,%RDX,8),%YMM19{%K1}{z} |
(119) 0x424a8b VMOVUPD (%R10,%RAX,8),%YMM20{%K1}{z} |
(119) 0x424a92 MOV 0xc8(%RSP),%R10 |
(119) 0x424a9a ADD %RCX,%R10 |
(119) 0x424a9d VMOVUPD (%R10,%RDX,8),%YMM21{%K1}{z} |
(119) 0x424aa4 MOV 0xd0(%RSP),%RCX |
(119) 0x424aac ADD %RSI,%RCX |
(119) 0x424aaf VMOVUPD (%RCX,%RDX,8),%YMM22{%K1}{z} |
(119) 0x424ab6 VMOVUPD (%RCX,%RAX,8),%YMM23{%K1}{z} |
(119) 0x424abd MOV 0xe8(%RSP),%RCX |
(119) 0x424ac5 ADD 0xa8(%RSP),%RCX |
(119) 0x424acd VMOVUPD (%RCX,%RDX,8),%YMM1{%K1}{z} |
(119) 0x424ad4 VMOVUPD (%RCX,%RAX,8),%YMM5{%K1}{z} |
(119) 0x424adb MOV 0x88(%RSP),%RCX |
(119) 0x424ae3 MOV 0x108(%RSP),%RSI |
(119) 0x424aeb ADD %RCX,%RSI |
(119) 0x424aee VMOVUPD (%RSI,%RDX,8),%YMM24{%K1}{z} |
(119) 0x424af5 VMOVUPD (%RSI,%RAX,8),%YMM25{%K1}{z} |
(119) 0x424afc ADD %RCX,%R13 |
(119) 0x424aff VMOVUPD (%R13,%RAX,8),%YMM6{%K1}{z} |
(119) 0x424b07 VMOVUPD (%R13,%RDX,8),%YMM7{%K1}{z} |
(119) 0x424b0f MOV 0x100(%RSP),%RAX |
(119) 0x424b17 ADD 0x80(%RSP),%RAX |
(119) 0x424b1f VMOVUPD (%RAX,%RDX,8),%YMM8{%K1}{z} |
(119) 0x424b26 VMOVAPD 0x260(%RSP),%YMM12 |
(119) 0x424b2f VMOVAPD %YMM26,%YMM12{%K1} |
(119) 0x424b35 VMOVAPD 0x2a0(%RSP),%YMM11 |
(119) 0x424b3e VMOVAPD %YMM28,%YMM11{%K1} |
(119) 0x424b44 VMOVAPD 0x2e0(%RSP),%YMM10 |
(119) 0x424b4d VMOVAPD %YMM27,%YMM10{%K1} |
(119) 0x424b53 VMOVAPD 0x320(%RSP),%YMM9 |
(119) 0x424b5c VMOVAPD %YMM29,%YMM9{%K1} |
(119) 0x424b62 VMOVAPD 0x340(%RSP),%YMM29 |
(119) 0x424b6a VMOVAPD %YMM30,%YMM29{%K1} |
(119) 0x424b70 VMOVAPD 0x360(%RSP),%YMM28 |
(119) 0x424b78 VMOVAPD %YMM2,%YMM28{%K1} |
(119) 0x424b7e VMOVAPD 0x380(%RSP),%YMM27 |
(119) 0x424b86 VMOVAPD %YMM31,%YMM27{%K1} |
(119) 0x424b8c VMOVAPD 0x3a0(%RSP),%YMM26 |
(119) 0x424b94 VMOVAPD %YMM4,%YMM26{%K1} |
(119) 0x424b9a VMOVAPD %YMM18,%YMM17{%K1} |
(119) 0x424ba0 VMOVAPD %YMM19,%YMM16{%K1} |
(119) 0x424ba6 VMOVAPD %YMM20,%YMM15{%K1} |
(119) 0x424bac VMOVAPD %YMM21,%YMM14{%K1} |
(119) 0x424bb2 VMOVAPD %YMM22,%YMM13{%K1} |
(119) 0x424bb8 VSUBPD %YMM16,%YMM15,%YMM2 |
(119) 0x424bbe VMULPD %YMM17,%YMM2,%YMM2 |
(119) 0x424bc4 VMOVAPD 0x240(%RSP),%YMM21 |
(119) 0x424bcc VMOVAPD %YMM23,%YMM21{%K1} |
(119) 0x424bd2 VSUBPD %YMM13,%YMM21,%YMM4 |
(119) 0x424bd8 VFMADD213PD %YMM2,%YMM14,%YMM4 |
(119) 0x424bdd VMOVAPD 0x300(%RSP),%YMM20 |
(119) 0x424be5 VMOVAPD %YMM24,%YMM20{%K1} |
(119) 0x424beb VMOVAPD 0x3c0(%RSP),%YMM19 |
(119) 0x424bf3 VMOVAPD %YMM25,%YMM19{%K1} |
(119) 0x424bf9 VMOVAPD 0x3e0(%RSP),%YMM18 |
(119) 0x424c01 VMOVAPD %YMM7,%YMM18{%K1} |
(119) 0x424c07 VMOVAPD 0x400(%RSP),%YMM7 |
(119) 0x424c10 VMOVAPD %YMM6,%YMM7{%K1} |
(119) 0x424c16 VSUBPD %YMM20,%YMM19,%YMM2 |
(119) 0x424c1c VFMADD213PD %YMM4,%YMM17,%YMM2 |
(119) 0x424c22 VSUBPD %YMM18,%YMM7,%YMM4 |
(119) 0x424c28 VFMADD231PD %YMM4,%YMM14,%YMM2 |
(119) 0x424c2d VMOVAPD %YMM11,0x2a0(%RSP) |
(119) 0x424c36 VMOVAPD %YMM12,0x260(%RSP) |
(119) 0x424c3f VMULPD %YMM11,%YMM12,%YMM4 |
(119) 0x424c44 VMOVAPD %YMM9,0x320(%RSP) |
(119) 0x424c4d VMOVAPD %YMM10,0x2e0(%RSP) |
(119) 0x424c56 VFMADD231PD %YMM9,%YMM10,%YMM4 |
(119) 0x424c5b VMOVAPD %YMM28,0x360(%RSP) |
(119) 0x424c63 VMOVAPD %YMM29,0x340(%RSP) |
(119) 0x424c6b VFMADD231PD %YMM28,%YMM29,%YMM4 |
(119) 0x424c71 VMOVAPD %YMM26,0x3a0(%RSP) |
(119) 0x424c79 VMOVAPD %YMM27,0x380(%RSP) |
(119) 0x424c81 VFMADD231PD %YMM26,%YMM27,%YMM4 |
(119) 0x424c87 VMULPD %YMM3,%YMM4,%YMM4 |
(119) 0x424c8b VDIVPD %YMM4,%YMM0,%YMM4 |
(119) 0x424c8f VMOVAPD 0x420(%RSP),%YMM6 |
(119) 0x424c98 VMOVAPD %YMM8,%YMM6{%K1} |
(119) 0x424c9e VMOVAPD %YMM6,0x420(%RSP) |
(119) 0x424ca7 VFMADD213PD %YMM6,%YMM4,%YMM2 |
(119) 0x424cac ADD 0x70(%RSP),%R9 |
(119) 0x424cb1 VMOVUPD %YMM2,(%R9,%RDX,8){%K1} |
(119) 0x424cb8 ADD 0x78(%RSP),%R15 |
(119) 0x424cbd VMOVUPD (%R15,%RDX,8),%YMM2{%K1}{z} |
(119) 0x424cc4 VMOVAPD 0x280(%RSP),%YMM9 |
(119) 0x424ccd VMOVAPD %YMM1,%YMM9{%K1} |
(119) 0x424cd3 VSUBPD %YMM16,%YMM13,%YMM1 |
(119) 0x424cd9 VMULPD %YMM1,%YMM9,%YMM1 |
(119) 0x424cdd VMOVAPD 0x2c0(%RSP),%YMM8 |
(119) 0x424ce6 VMOVAPD %YMM5,%YMM8{%K1} |
(119) 0x424cec VMOVAPD %YMM21,0x240(%RSP) |
(119) 0x424cf4 VSUBPD %YMM15,%YMM21,%YMM5 |
(119) 0x424cfa VFMADD213PD %YMM1,%YMM8,%YMM5 |
(119) 0x424cff VMOVAPD %YMM18,0x3e0(%RSP) |
(119) 0x424d07 VMOVAPD %YMM20,0x300(%RSP) |
(119) 0x424d0f VSUBPD %YMM20,%YMM18,%YMM1 |
(119) 0x424d15 VMOVAPD %YMM7,0x400(%RSP) |
(119) 0x424d1e VMOVAPD %YMM19,0x3c0(%RSP) |
(119) 0x424d26 VSUBPD %YMM19,%YMM7,%YMM6 |
(119) 0x424d2c VMOVAPD %YMM9,0x280(%RSP) |
(119) 0x424d35 VFMADD213PD %YMM5,%YMM9,%YMM1 |
(119) 0x424d3a VMOVAPD %YMM8,0x2c0(%RSP) |
(119) 0x424d43 VFMADD231PD %YMM6,%YMM8,%YMM1 |
(119) 0x424d48 VMOVAPD 0x440(%RSP),%YMM5 |
(119) 0x424d51 VMOVAPD %YMM2,%YMM5{%K1} |
(119) 0x424d57 VMOVAPD %YMM5,0x440(%RSP) |
(119) 0x424d60 VFMADD213PD %YMM5,%YMM4,%YMM1 |
(119) 0x424d65 MOV 0x110(%RSP),%RAX |
(119) 0x424d6d ADD 0x68(%RSP),%RAX |
(119) 0x424d72 VMOVUPD %YMM1,(%RAX,%RDX,8){%K1} |
(119) 0x424d79 LEA 0x1(%R8),%EAX |
(119) 0x424d7d INC %EBX |
(119) 0x424d7f CMP %EDI,%R8D |
(119) 0x424d82 MOV %EAX,%R8D |
(119) 0x424d85 JE 4246f8 |
(119) 0x424d8b TEST %R11D,%R11D |
(119) 0x424d8e JS 424d79 |
(119) 0x424d90 MOV 0x1a0(%RSP),%RAX |
(119) 0x424d98 MOV %R8,0x118(%RSP) |
(119) 0x424da0 ADD %R8D,%EAX |
(119) 0x424da3 MOV 0x1a8(%RSP),%RCX |
(119) 0x424dab MOV (%RCX),%R10 |
(119) 0x424dae MOV 0x1b0(%RSP),%RCX |
(119) 0x424db6 MOV (%RCX),%R14 |
(119) 0x424db9 MOV 0x1b8(%RSP),%RCX |
(119) 0x424dc1 MOV (%RCX),%RCX |
(119) 0x424dc4 MOV %RCX,0x18(%RSP) |
(119) 0x424dc9 MOV 0x1c0(%RSP),%RCX |
(119) 0x424dd1 MOV (%RCX),%RSI |
(119) 0x424dd4 MOV 0x1c8(%RSP),%RCX |
(119) 0x424ddc MOV (%RCX),%R8 |
(119) 0x424ddf MOV 0x1d0(%RSP),%RCX |
(119) 0x424de7 MOV (%RCX),%RCX |
(119) 0x424dea MOV %RCX,0x20(%RSP) |
(119) 0x424def MOV 0x1d8(%RSP),%RCX |
(119) 0x424df7 MOV (%RCX),%RCX |
(119) 0x424dfa MOV %RCX,0x10(%RSP) |
(119) 0x424dff MOV 0x1e0(%RSP),%RCX |
(119) 0x424e07 MOV (%RCX),%R15 |
(119) 0x424e0a MOV 0x1e8(%RSP),%RCX |
(119) 0x424e12 MOV (%RCX),%RCX |
(119) 0x424e15 MOV %RCX,0x40(%RSP) |
(119) 0x424e1a MOV 0x1f0(%RSP),%RCX |
(119) 0x424e22 MOV (%RCX),%R13 |
(119) 0x424e25 MOVSXD %EAX,%RDX |
(119) 0x424e28 MOV 0x48(%RSP),%R9 |
(119) 0x424e2d TEST %R9,%R9 |
(119) 0x424e30 JE 4248d0 |
(119) 0x424e36 MOV %EBX,0x5c(%RSP) |
(119) 0x424e3a MOVSXD %EBX,%R11 |
(119) 0x424e3d MOV 0x178(%RSP),%RAX |
(119) 0x424e45 LEA (%RAX,%R11,1),%R12 |
(119) 0x424e49 ADD 0x170(%RSP),%R11 |
(119) 0x424e51 MOV 0xc0(%RSP),%RAX |
(119) 0x424e59 ADD %RDX,%RAX |
(119) 0x424e5c INC %RAX |
(119) 0x424e5f MOV %R14,%RDI |
(119) 0x424e62 IMUL %RAX,%RDI |
(119) 0x424e66 MOV %RDI,0xd8(%RSP) |
(119) 0x424e6e MOV %R10,%RDI |
(119) 0x424e71 IMUL %RAX,%RDI |
(119) 0x424e75 MOV %RDI,0xe0(%RSP) |
(119) 0x424e7d SUB 0xb8(%RSP),%RDX |
(119) 0x424e85 MOV %R14,%RDI |
(119) 0x424e88 IMUL %RDX,%RDI |
(119) 0x424e8c MOV %RDI,0x1f8(%RSP) |
(119) 0x424e94 MOV %R10,%RDI |
(119) 0x424e97 IMUL %RDX,%RDI |
(119) 0x424e9b MOV %RDI,0x200(%RSP) |
(119) 0x424ea3 MOV %RSI,%RDI |
(119) 0x424ea6 IMUL %RDX,%RDI |
(119) 0x424eaa MOV %RDI,0xf0(%RSP) |
(119) 0x424eb2 MOV %R8,%RDI |
(119) 0x424eb5 IMUL %RDX,%RDI |
(119) 0x424eb9 MOV %RDI,0xf8(%RSP) |
(119) 0x424ec1 MOV %RSI,%RDI |
(119) 0x424ec4 IMUL %RAX,%RDI |
(119) 0x424ec8 MOV %RDI,0xc8(%RSP) |
(119) 0x424ed0 MOV %R8,%RDI |
(119) 0x424ed3 IMUL %RAX,%RDI |
(119) 0x424ed7 MOV %RDI,0xd0(%RSP) |
(119) 0x424edf MOV %R15,%RDI |
(119) 0x424ee2 IMUL %RDX,%RDI |
(119) 0x424ee6 MOV %RDI,0xe8(%RSP) |
(119) 0x424eee MOV %R13,%RDI |
(119) 0x424ef1 IMUL %RDX,%RDI |
(119) 0x424ef5 MOV %RDI,0x108(%RSP) |
(119) 0x424efd IMUL %R13,%RAX |
(119) 0x424f01 MOV %RAX,0x208(%RSP) |
(119) 0x424f09 MOV 0x18(%RSP),%RAX |
(119) 0x424f0e IMUL %RDX,%RAX |
(119) 0x424f12 MOV %RAX,0x100(%RSP) |
(119) 0x424f1a MOV 0x20(%RSP),%RAX |
(119) 0x424f1f IMUL %RDX,%RAX |
(119) 0x424f23 MOV %RAX,0x210(%RSP) |
(119) 0x424f2b MOV 0x10(%RSP),%RCX |
(119) 0x424f30 MOV %RCX,%RAX |
(119) 0x424f33 IMUL %RDX,%RAX |
(119) 0x424f37 MOV %RAX,0x218(%RSP) |
(119) 0x424f3f MOV %RSI,0x28(%RSP) |
(119) 0x424f44 MOV 0x40(%RSP),%RSI |
(119) 0x424f49 IMUL %RSI,%RDX |
(119) 0x424f4d MOV %RDX,0x110(%RSP) |
(119) 0x424f55 MOV %R13,%RAX |
(119) 0x424f58 IMUL %R12,%RAX |
(119) 0x424f5c IMUL %R11,%R13 |
(119) 0x424f60 IMUL %R11,%R15 |
(119) 0x424f64 MOV %R8,%RDX |
(119) 0x424f67 IMUL %R12,%RDX |
(119) 0x424f6b IMUL %R11,%R8 |
(119) 0x424f6f MOV %R10,%RBX |
(119) 0x424f72 IMUL %R11,%RBX |
(119) 0x424f76 MOV %R14,%RDI |
(119) 0x424f79 IMUL %R11,%RDI |
(119) 0x424f7d IMUL %R12,%R10 |
(119) 0x424f81 IMUL %R12,%R14 |
(119) 0x424f85 IMUL %R11,%RSI |
(119) 0x424f89 IMUL %R11,%RCX |
(119) 0x424f8d MOV 0x20(%RSP),%R9 |
(119) 0x424f92 IMUL %R11,%R9 |
(119) 0x424f96 MOV %R9,0x20(%RSP) |
(119) 0x424f9b MOV 0x18(%RSP),%R9 |
(119) 0x424fa0 IMUL %R11,%R9 |
(119) 0x424fa4 MOV %R9,0x18(%RSP) |
(119) 0x424fa9 MOV 0x28(%RSP),%R9 |
(119) 0x424fae IMUL %R9,%R12 |
(119) 0x424fb2 IMUL %R9,%R11 |
(119) 0x424fb6 MOV 0x180(%RSP),%R9 |
(119) 0x424fbe ADD %R9,%RAX |
(119) 0x424fc1 ADD %R9,%R13 |
(119) 0x424fc4 MOV %R13,0x40(%RSP) |
(119) 0x424fc9 ADD 0x168(%RSP),%R15 |
(119) 0x424fd1 MOV %R15,0x10(%RSP) |
(119) 0x424fd6 MOV 0x160(%RSP),%R9 |
(119) 0x424fde ADD %R9,%RDX |
(119) 0x424fe1 ADD %R9,%R8 |
(119) 0x424fe4 MOV %R8,0x28(%RSP) |
(119) 0x424fe9 MOV 0x158(%RSP),%R9 |
(119) 0x424ff1 ADD %R9,%RBX |
(119) 0x424ff4 MOV 0x150(%RSP),%R15 |
(119) 0x424ffc ADD %R15,%RDI |
(119) 0x424fff ADD %R9,%R10 |
(119) 0x425002 MOV %R10,%R8 |
(119) 0x425005 ADD %R15,%R14 |
(119) 0x425008 MOV %R14,%R10 |
(119) 0x42500b ADD 0x148(%RSP),%RSI |
(119) 0x425013 ADD 0x140(%RSP),%RCX |
(119) 0x42501b MOV %RCX,%R9 |
(119) 0x42501e MOV 0x20(%RSP),%RCX |
(119) 0x425023 ADD 0x138(%RSP),%RCX |
(119) 0x42502b MOV 0x18(%RSP),%R15 |
(119) 0x425030 ADD 0x130(%RSP),%R15 |
(119) 0x425038 MOV 0x128(%RSP),%R14 |
(119) 0x425040 ADD %R14,%R12 |
(119) 0x425043 MOV %R12,%R13 |
(119) 0x425046 ADD %R14,%R11 |
(119) 0x425049 MOV %R11,%R14 |
(119) 0x42504c XOR %R11D,%R11D |
(119) 0x42504f NOP |
(120) 0x425050 MOV %R10,%R12 |
(120) 0x425053 VMOVUPD -0x8(%R10,%R11,8),%YMM1 |
(120) 0x42505a VMOVUPD (%R10,%R11,8),%YMM2 |
(120) 0x425060 MOV %R8,%R12 |
(120) 0x425063 VMULPD -0x8(%R8,%R11,8),%YMM1,%YMM1 |
(120) 0x42506a VFMADD231PD (%R8,%R11,8),%YMM2,%YMM1 |
(120) 0x425070 VMOVUPD -0x8(%RDI,%R11,8),%YMM2 |
(120) 0x425077 VMOVUPD (%RDI,%R11,8),%YMM4 |
(120) 0x42507d VFMADD132PD (%RBX,%R11,8),%YMM1,%YMM4 |
(120) 0x425083 VFMADD231PD -0x8(%RBX,%R11,8),%YMM2,%YMM4 |
(120) 0x42508a VMULPD %YMM3,%YMM4,%YMM1 |
(120) 0x42508e VDIVPD %YMM1,%YMM0,%YMM1 |
(120) 0x425092 VMOVUPD (%R14,%R11,8),%YMM2 |
(120) 0x425098 MOV 0x28(%RSP),%R12 |
(120) 0x42509d VMOVUPD -0x8(%R12,%R11,8),%YMM4 |
(120) 0x4250a4 VMOVUPD (%R12,%R11,8),%YMM5 |
(120) 0x4250aa VSUBPD %YMM5,%YMM4,%YMM6 |
(120) 0x4250ae VMULPD %YMM2,%YMM6,%YMM6 |
(120) 0x4250b2 VMOVUPD (%R13,%R11,8),%YMM7 |
(120) 0x4250b9 VMOVUPD -0x8(%RDX,%R11,8),%YMM8 |
(120) 0x4250c0 VMOVUPD (%RDX,%R11,8),%YMM18 |
(120) 0x4250c7 VSUBPD %YMM18,%YMM8,%YMM19 |
(120) 0x4250cd VFMADD213PD %YMM6,%YMM7,%YMM19 |
(120) 0x4250d3 MOV 0x10(%RSP),%R12 |
(120) 0x4250d8 VMOVUPD -0x8(%R12,%R11,8),%YMM6 |
(120) 0x4250df VMOVUPD (%R12,%R11,8),%YMM20 |
(120) 0x4250e6 VSUBPD %YMM5,%YMM18,%YMM5 |
(120) 0x4250ec VMULPD %YMM5,%YMM20,%YMM5 |
(120) 0x4250f2 VSUBPD %YMM4,%YMM8,%YMM4 |
(120) 0x4250f6 VFMADD213PD %YMM5,%YMM6,%YMM4 |
(120) 0x4250fb MOV 0x40(%RSP),%R12 |
(120) 0x425100 VMOVUPD -0x8(%R12,%R11,8),%YMM5 |
(120) 0x425107 VMOVUPD (%R12,%R11,8),%YMM8 |
(120) 0x42510d VMOVUPD -0x8(%RAX,%R11,8),%YMM18 |
(120) 0x425118 VMOVUPD (%RAX,%R11,8),%YMM21 |
(120) 0x42511f VSUBPD %YMM8,%YMM5,%YMM22 |
(120) 0x425125 VSUBPD %YMM21,%YMM18,%YMM23 |
(120) 0x42512b VFMADD213PD %YMM19,%YMM2,%YMM22 |
(120) 0x425131 VFMADD231PD %YMM23,%YMM7,%YMM22 |
(120) 0x425137 VFMADD213PD (%R15,%R11,8),%YMM1,%YMM22 |
(120) 0x42513e VMOVUPD %YMM22,(%RCX,%R11,8) |
(120) 0x425145 VSUBPD %YMM8,%YMM21,%YMM2 |
(120) 0x42514b VSUBPD %YMM5,%YMM18,%YMM5 |
(120) 0x425151 VFMADD213PD %YMM4,%YMM20,%YMM2 |
(120) 0x425157 VFMADD231PD %YMM5,%YMM6,%YMM2 |
(120) 0x42515c VFMADD213PD (%R9,%R11,8),%YMM1,%YMM2 |
(120) 0x425162 VMOVUPD %YMM2,(%RSI,%R11,8) |
(120) 0x425168 ADD $0x4,%R11 |
(120) 0x42516c CMP 0x48(%RSP),%R11 |
(120) 0x425171 JB 425050 |
(119) 0x425177 MOV 0x48(%RSP),%RAX |
(119) 0x42517c MOV %RAX,%RDX |
(119) 0x42517f CMP 0x188(%RSP),%RAX |
(119) 0x425187 MOV 0x34(%RSP),%EDI |
(119) 0x42518b MOV 0x58(%RSP),%R11D |
(119) 0x425190 MOV 0x5c(%RSP),%EBX |
(119) 0x425194 MOV 0x118(%RSP),%R8 |
(119) 0x42519c MOV 0x218(%RSP),%R15 |
(119) 0x4251a4 MOV 0x210(%RSP),%R9 |
(119) 0x4251ac MOV 0x208(%RSP),%R13 |
(119) 0x4251b4 MOV 0x200(%RSP),%R12 |
(119) 0x4251bc MOV 0x1f8(%RSP),%R14 |
(119) 0x4251c4 JNE 4249b9 |
(119) 0x4251ca JMP 424d79 |
0x4251cf NOP |
Path / |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 174 |
nb uops | 177 |
loop length | 897 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 79 |
micro-operation queue | 29.50 cycles |
front end | 29.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 17.00 | 17.00 | 30.00 | 8.00 | 8.00 | 30.00 | 30.00 | 30.00 | 8.00 | 17.00 |
cycles | 8.00 | 13.13 | 17.00 | 17.00 | 30.00 | 8.00 | 8.00 | 30.00 | 30.00 | 30.00 | 8.00 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.07-30.08 |
Stall cycles | 0.59-0.61 |
RS full (events) | 2.97-3.04 |
Front-end | 29.50 |
Dispatch | 30.00 |
Overall L1 | 30.00 |
all | 3% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
all | 11% |
load | 7% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 8% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x480,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1e8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1b0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 424711 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1c1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x68(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x6c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x40(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x733290,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404520 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x3c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42472b <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1db> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x7332b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x54(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404110 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x120(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7332d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4045e0 <__kmpc_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD (%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %EAX,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7ffffffc,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x10(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA -0x2(%RAX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x188(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM1,0x220(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RCX,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD $0x10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD $0x10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x158(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x130(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x128(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xce367(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11D,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 424d8b <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x83b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 174 |
nb uops | 177 |
loop length | 897 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 79 |
micro-operation queue | 29.50 cycles |
front end | 29.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 17.00 | 17.00 | 30.00 | 8.00 | 8.00 | 30.00 | 30.00 | 30.00 | 8.00 | 17.00 |
cycles | 8.00 | 13.13 | 17.00 | 17.00 | 30.00 | 8.00 | 8.00 | 30.00 | 30.00 | 30.00 | 8.00 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.07-30.08 |
Stall cycles | 0.59-0.61 |
RS full (events) | 2.97-3.04 |
Front-end | 29.50 |
Dispatch | 30.00 |
Overall L1 | 30.00 |
all | 3% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
all | 11% |
load | 7% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 8% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x480,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1e8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1b0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 424711 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1c1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x68(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x6c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x40(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x733290,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404520 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x3c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42472b <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1db> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x7332b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x54(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404110 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x120(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7332d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4045e0 <__kmpc_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD (%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %EAX,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7ffffffc,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x10(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA -0x2(%RAX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x188(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM1,0x220(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RCX,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD $0x10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD $0x10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x158(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x130(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RAX,%RCX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x128(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xce367(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11D,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 424d8b <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x83b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel_.DIR.OMP.PARALLEL.2– | 4.83 | 1.57 |
▼Loop 119 - accelerate_kernel.f90:60-76 - exec– | 0.01 | 0 |
○Loop 120 - accelerate_kernel.f90:62-76 - exec | 4.83 | 1.56 |