Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.28% |
---|
Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.28% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-152-3172/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 57 - 79 |
-------------------------------------------------------------------------------- |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
64: +density0(j ,k-1)*volume(j ,k-1) & |
65: +density0(j ,k )*volume(j ,k ) & |
66: +density0(j-1,k )*volume(j-1,k )) & |
67: *0.25_8) |
68: |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
76: +yarea(j-1,k )*(viscosity(j-1,k )-viscosity(j-1,k-1))) |
77: ENDDO |
78: ENDDO |
79: !$OMP END DO |
0x42c0c0 PUSH %RBP |
0x42c0c1 MOV %RSP,%RBP |
0x42c0c4 PUSH %R15 |
0x42c0c6 PUSH %R14 |
0x42c0c8 PUSH %R13 |
0x42c0ca PUSH %R12 |
0x42c0cc PUSH %RBX |
0x42c0cd AND $-0x40,%RSP |
0x42c0d1 SUB $0x5c0,%RSP |
0x42c0d8 MOV %R9,0x38(%RSP) |
0x42c0dd MOV 0x60(%RBP),%EBX |
0x42c0e0 MOV 0x58(%RBP),%EAX |
0x42c0e3 SUB %EBX,%EAX |
0x42c0e5 INC %EAX |
0x42c0e7 MOVL $0,0x34(%RSP) |
0x42c0ef JS 42c177 |
0x42c0f5 MOV %RDX,%R12 |
0x42c0f8 MOV %R8,%R15 |
0x42c0fb MOV %RCX,%R14 |
0x42c0fe MOV %RDI,0x90(%RSP) |
0x42c106 MOV (%RDI),%ESI |
0x42c108 MOVL $0,0x1c(%RSP) |
0x42c110 MOV %EAX,0x18(%RSP) |
0x42c114 MOVL $0x1,0x30(%RSP) |
0x42c11c SUB $0x8,%RSP |
0x42c120 LEA 0x38(%RSP),%RAX |
0x42c125 LEA 0x3c(%RSP),%RCX |
0x42c12a LEA 0x24(%RSP),%R8 |
0x42c12f LEA 0x20(%RSP),%R9 |
0x42c134 MOV $0x54bf70,%EDI |
0x42c139 MOV %ESI,0x30(%RSP) |
0x42c13d MOV $0x22,%EDX |
0x42c142 PUSH $0x1 |
0x42c144 PUSH $0x1 |
0x42c146 PUSH %RAX |
0x42c147 CALL 404670 <__kmpc_for_static_init_4@plt> |
0x42c14c ADD $0x20,%RSP |
0x42c150 MOV 0x1c(%RSP),%EAX |
0x42c154 MOV 0x18(%RSP),%R10D |
0x42c159 SUB %EAX,%R10D |
0x42c15c JAE 42c1c0 |
0x42c15e MOV $0x54bf90,%EDI |
0x42c163 MOV 0x28(%RSP),%ESI |
0x42c167 VZEROUPPER |
0x42c16a CALL 404230 <__kmpc_for_static_fini@plt> |
0x42c16f MOV 0x90(%RSP),%RDI |
0x42c177 MOV (%RDI),%ESI |
0x42c179 MOV $0x54bfb0,%EDI |
0x42c17e CALL 404740 <__kmpc_barrier@plt> |
0x42c183 LEA -0x28(%RBP),%RSP |
0x42c187 POP %RBX |
0x42c188 POP %R12 |
0x42c18a POP %R13 |
0x42c18c POP %R14 |
0x42c18e POP %R15 |
0x42c190 POP %RBP |
0x42c191 RET |
0x42c192 NOPW %CS:(%RAX,%RAX,1) |
0x42c1a1 NOPW %CS:(%RAX,%RAX,1) |
0x42c1b0 NOPW %CS:(%RAX,%RAX,1) |
0x42c1bf NOP |
0x42c1c0 MOV %RAX,%R8 |
0x42c1c3 MOV 0x48(%RBP),%R9 |
0x42c1c7 MOV 0x40(%RBP),%R11 |
0x42c1cb VMOVQ %R12,%XMM0 |
0x42c1d0 SAL $0x20,%R15 |
0x42c1d4 MOV $-0x200000000,%RCX |
0x42c1de LEA (%R15,%RCX,1),%RDX |
0x42c1e2 MOV %RDX,%RAX |
0x42c1e5 SAR $0x20,%RAX |
0x42c1e9 MOV %RAX,0xa0(%RSP) |
0x42c1f1 SAL $0x20,%R14 |
0x42c1f5 ADD %R14,%RCX |
0x42c1f8 MOV %RCX,%R12 |
0x42c1fb SAR $0x20,%R12 |
0x42c1ff TEST %RDX,%RDX |
0x42c202 MOV $-0x1,%RSI |
0x42c209 CMOVNS %RDX,%RSI |
0x42c20d TEST %RSI,%RSI |
0x42c210 MOV $0x1,%EDI |
0x42c215 CMOVG %RDI,%RSI |
0x42c219 MOV $0x200000000,%R13 |
0x42c223 MOV %R13,%RAX |
0x42c226 SUB %R15,%RAX |
0x42c229 MOV 0x38(%RBP),%R15 |
0x42c22d CMP %RAX,%RDX |
0x42c230 CMOVG %RDX,%RAX |
0x42c234 MOV $-0x1,%RDX |
0x42c23b SHR $0x20,%RAX |
0x42c23f IMUL %RSI,%RAX |
0x42c243 SAL $0x3,%RAX |
0x42c247 SUB %RAX,%R11 |
0x42c24a MOV %R11,0xf0(%RSP) |
0x42c252 MOV %R8,%RSI |
0x42c255 ADD %EBX,%ESI |
0x42c257 SUB %RAX,%R15 |
0x42c25a MOV %R15,0xe8(%RSP) |
0x42c262 TEST %RCX,%RCX |
0x42c265 CMOVNS %RCX,%RDX |
0x42c269 TEST %RDX,%RDX |
0x42c26c CMOVG %RDI,%RDX |
0x42c270 MOV 0x30(%RBP),%RDI |
0x42c274 SUB %R14,%R13 |
0x42c277 MOV 0x28(%RBP),%R8 |
0x42c27b CMP %R13,%RCX |
0x42c27e CMOVG %RCX,%R13 |
0x42c282 SHR $0x20,%R13 |
0x42c286 IMUL %RDX,%R13 |
0x42c28a MOV %R10D,%EDX |
0x42c28d SUB %RAX,%RDI |
0x42c290 MOV %RDI,0xe0(%RSP) |
0x42c298 SUB %RAX,%R9 |
0x42c29b MOV %R9,0xd8(%RSP) |
0x42c2a3 SUB %RAX,%R8 |
0x42c2a6 MOV %R8,0xd0(%RSP) |
0x42c2ae MOV 0x38(%RSP),%RCX |
0x42c2b3 SUB %RAX,%RCX |
0x42c2b6 MOV %RCX,0xc8(%RSP) |
0x42c2be MOV 0x18(%RBP),%RCX |
0x42c2c2 SUB %RAX,%RCX |
0x42c2c5 MOV %RCX,0xc0(%RSP) |
0x42c2cd MOV 0x10(%RBP),%RCX |
0x42c2d1 SUB %RAX,%RCX |
0x42c2d4 MOV %RCX,0xb8(%RSP) |
0x42c2dc MOV 0x20(%RBP),%RCX |
0x42c2e0 SUB %RAX,%RCX |
0x42c2e3 MOV %RCX,0xb0(%RSP) |
0x42c2eb MOV 0x50(%RBP),%RCX |
0x42c2ef SUB %RAX,%RCX |
0x42c2f2 MOV %RCX,0x48(%RSP) |
0x42c2f7 MOV %R12,0x98(%RSP) |
0x42c2ff NOT %R12 |
0x42c302 MOV %R12,0x40(%RSP) |
0x42c307 NEG %R13 |
0x42c30a MOV %R13,0xf8(%RSP) |
0x42c312 VBROADCASTSD 0xdf384(%RIP),%ZMM1 |
0x42c31c VPBROADCASTQ %XMM0,%ZMM2 |
0x42c322 XOR %EDI,%EDI |
0x42c324 MOV %RSI,0xa8(%RSP) |
0x42c32c MOV %ESI,%EBX |
0x42c32e MOV %R10D,0x14(%RSP) |
0x42c333 JMP 42c35f |
0x42c335 NOPW %CS:(%RAX,%RAX,1) |
(224) 0x42c340 MOV %R9,%RDI |
(224) 0x42c343 NOPW %CS:(%RAX,%RAX,1) |
(224) 0x42c350 LEA 0x1(%RDI),%EAX |
(224) 0x42c353 INC %EBX |
(224) 0x42c355 CMP %EDX,%EDI |
(224) 0x42c357 MOV %EAX,%EDI |
(224) 0x42c359 JE 42c15e |
(224) 0x42c35f MOV 0x70(%RBP),%RAX |
(224) 0x42c363 MOVSXD (%RAX),%R8 |
(224) 0x42c366 MOV 0x68(%RBP),%RAX |
(224) 0x42c36a MOV (%RAX),%ECX |
(224) 0x42c36c MOV %ECX,%EAX |
(224) 0x42c36e SUB %R8D,%EAX |
(224) 0x42c371 INC %EAX |
(224) 0x42c373 JS 42c350 |
(224) 0x42c375 MOV 0x78(%RBP),%RAX |
(224) 0x42c379 MOV (%RAX),%R13 |
(224) 0x42c37c MOV 0x80(%RBP),%RAX |
(224) 0x42c383 MOV (%RAX),%R14 |
(224) 0x42c386 MOV 0x88(%RBP),%RAX |
(224) 0x42c38d MOV (%RAX),%RAX |
(224) 0x42c390 MOV %RAX,0x60(%RSP) |
(224) 0x42c395 MOV 0x90(%RBP),%RAX |
(224) 0x42c39c MOV (%RAX),%R11 |
(224) 0x42c39f MOV 0x98(%RBP),%RAX |
(224) 0x42c3a6 MOV (%RAX),%R12 |
(224) 0x42c3a9 MOV 0xa0(%RBP),%RAX |
(224) 0x42c3b0 MOV (%RAX),%R9 |
(224) 0x42c3b3 MOV 0xa8(%RBP),%RAX |
(224) 0x42c3ba MOV (%RAX),%R15 |
(224) 0x42c3bd MOV 0xb0(%RBP),%RAX |
(224) 0x42c3c4 MOV (%RAX),%RSI |
(224) 0x42c3c7 MOV 0xb8(%RBP),%RAX |
(224) 0x42c3ce MOV (%RAX),%RAX |
(224) 0x42c3d1 MOV %RAX,0x78(%RSP) |
(224) 0x42c3d6 MOV 0xc0(%RBP),%RAX |
(224) 0x42c3dd MOV (%RAX),%RAX |
(224) 0x42c3e0 MOV %RAX,0x58(%RSP) |
(224) 0x42c3e5 SUB %R8D,%ECX |
(224) 0x42c3e8 ADD $0x2,%ECX |
(224) 0x42c3eb CMP $0x2,%ECX |
(224) 0x42c3ee MOV $0x1,%EAX |
(224) 0x42c3f3 CMOVL %EAX,%ECX |
(224) 0x42c3f6 MOV %RCX,%RAX |
(224) 0x42c3f9 AND $0x7ffffff8,%RCX |
(224) 0x42c400 MOV %RDI,0x80(%RSP) |
(224) 0x42c408 MOV %R15,0x70(%RSP) |
(224) 0x42c40d MOV %R9,0x68(%RSP) |
(224) 0x42c412 JE 42c780 |
(224) 0x42c418 MOV %RAX,0x120(%RSP) |
(224) 0x42c420 MOV %EBX,0x2c(%RSP) |
(224) 0x42c424 MOVSXD %EBX,%RDI |
(224) 0x42c427 MOV 0x40(%RSP),%RAX |
(224) 0x42c42c LEA (%RAX,%RDI,1),%R10 |
(224) 0x42c430 ADD 0xf8(%RSP),%RDI |
(224) 0x42c438 MOV %R14,%RAX |
(224) 0x42c43b IMUL %R10,%RAX |
(224) 0x42c43f LEA (%RAX,%R8,8),%RBX |
(224) 0x42c443 MOV %RCX,0x88(%RSP) |
(224) 0x42c44b MOV 0xf0(%RSP),%RCX |
(224) 0x42c453 ADD %RCX,%RBX |
(224) 0x42c456 MOV %R13,%RAX |
(224) 0x42c459 IMUL %R10,%RAX |
(224) 0x42c45d MOV %RSI,%R15 |
(224) 0x42c460 MOV %R11,0x50(%RSP) |
(224) 0x42c465 MOV %R14,%R9 |
(224) 0x42c468 LEA (%RAX,%R8,8),%R14 |
(224) 0x42c46c MOV 0xe8(%RSP),%RDX |
(224) 0x42c474 ADD %RDX,%R14 |
(224) 0x42c477 MOV %R9,0x100(%RSP) |
(224) 0x42c47f MOV %R9,%RAX |
(224) 0x42c482 IMUL %RDI,%RAX |
(224) 0x42c486 LEA (%RAX,%R8,8),%R9 |
(224) 0x42c48a ADD %RCX,%R9 |
(224) 0x42c48d MOV %R13,0x108(%RSP) |
(224) 0x42c495 IMUL %RDI,%R13 |
(224) 0x42c499 LEA (%R13,%R8,8),%R11 |
(224) 0x42c49e ADD %RDX,%R11 |
(224) 0x42c4a1 MOV %R12,%RAX |
(224) 0x42c4a4 IMUL %RDI,%RAX |
(224) 0x42c4a8 LEA (%RAX,%R8,8),%RAX |
(224) 0x42c4ac MOV 0xe0(%RSP),%RSI |
(224) 0x42c4b4 ADD %RSI,%RAX |
(224) 0x42c4b7 MOV %R12,0x110(%RSP) |
(224) 0x42c4bf IMUL %R10,%R12 |
(224) 0x42c4c3 MOV 0x58(%RSP),%RDX |
(224) 0x42c4c8 MOV %R10,0x20(%RSP) |
(224) 0x42c4cd LEA (%R12,%R8,8),%R13 |
(224) 0x42c4d1 ADD %RSI,%R13 |
(224) 0x42c4d4 MOV %R15,0x118(%RSP) |
(224) 0x42c4dc IMUL %RDI,%R15 |
(224) 0x42c4e0 LEA (%R15,%R8,8),%RSI |
(224) 0x42c4e4 ADD 0xd8(%RSP),%RSI |
(224) 0x42c4ec MOV %RDX,%RCX |
(224) 0x42c4ef IMUL %RDI,%RCX |
(224) 0x42c4f3 LEA (%RCX,%R8,8),%R12 |
(224) 0x42c4f7 MOV 0xd0(%RSP),%R15 |
(224) 0x42c4ff ADD %R15,%R12 |
(224) 0x42c502 IMUL %R10,%RDX |
(224) 0x42c506 LEA (%RDX,%R8,8),%R10 |
(224) 0x42c50a ADD %R15,%R10 |
(224) 0x42c50d MOV 0x78(%RSP),%RCX |
(224) 0x42c512 IMUL %RDI,%RCX |
(224) 0x42c516 LEA (%RCX,%R8,8),%RCX |
(224) 0x42c51a ADD 0xc8(%RSP),%RCX |
(224) 0x42c522 MOV %RCX,0x138(%RSP) |
(224) 0x42c52a MOV 0x70(%RSP),%RCX |
(224) 0x42c52f IMUL %RDI,%RCX |
(224) 0x42c533 LEA (%RCX,%R8,8),%RCX |
(224) 0x42c537 ADD 0xc0(%RSP),%RCX |
(224) 0x42c53f MOV %RCX,0x130(%RSP) |
(224) 0x42c547 MOV 0x68(%RSP),%RCX |
(224) 0x42c54c IMUL %RDI,%RCX |
(224) 0x42c550 LEA (%RCX,%R8,8),%RCX |
(224) 0x42c554 ADD 0xb8(%RSP),%RCX |
(224) 0x42c55c MOV %RCX,0x128(%RSP) |
(224) 0x42c564 MOV 0x60(%RSP),%RDX |
(224) 0x42c569 MOV %RDI,%RCX |
(224) 0x42c56c IMUL %RDI,%RDX |
(224) 0x42c570 LEA (%RDX,%R8,8),%R15 |
(224) 0x42c574 ADD 0xb0(%RSP),%R15 |
(224) 0x42c57c MOV 0x50(%RSP),%RDX |
(224) 0x42c581 MOV 0x20(%RSP),%RDI |
(224) 0x42c586 IMUL %RDX,%RDI |
(224) 0x42c58a LEA (%RDI,%R8,8),%RDI |
(224) 0x42c58e ADD 0x48(%RSP),%RDI |
(224) 0x42c593 IMUL %RDX,%RCX |
(224) 0x42c597 MOV %R8,0x20(%RSP) |
(224) 0x42c59c LEA (%RCX,%R8,8),%RDX |
(224) 0x42c5a0 ADD 0x48(%RSP),%RDX |
(224) 0x42c5a5 XOR %R8D,%R8D |
(224) 0x42c5a8 NOPL (%RAX,%RAX,1) |
(225) 0x42c5b0 VMOVUPD -0x8(%RBX,%R8,8),%ZMM0 |
(225) 0x42c5bb VMOVUPD (%RBX,%R8,8),%ZMM3 |
(225) 0x42c5c2 VMULPD -0x8(%R14,%R8,8),%ZMM0,%ZMM0 |
(225) 0x42c5cd VFMADD231PD (%R14,%R8,8),%ZMM3,%ZMM0 |
(225) 0x42c5d4 VMOVUPD -0x8(%R9,%R8,8),%ZMM3 |
(225) 0x42c5df VMOVUPD (%R9,%R8,8),%ZMM4 |
(225) 0x42c5e6 VFMADD132PD (%R11,%R8,8),%ZMM0,%ZMM4 |
(225) 0x42c5ed VFMADD231PD -0x8(%R11,%R8,8),%ZMM3,%ZMM4 |
(225) 0x42c5f8 VMULPD %ZMM1,%ZMM4,%ZMM0 |
(225) 0x42c5fe VDIVPD %ZMM0,%ZMM2,%ZMM0 |
(225) 0x42c604 VMOVUPD (%RDX,%R8,8),%ZMM3 |
(225) 0x42c60b VMOVUPD -0x8(%RAX,%R8,8),%ZMM4 |
(225) 0x42c616 VMOVUPD (%RAX,%R8,8),%ZMM17 |
(225) 0x42c61d VSUBPD %ZMM17,%ZMM4,%ZMM18 |
(225) 0x42c623 VMULPD %ZMM3,%ZMM18,%ZMM18 |
(225) 0x42c629 VMOVUPD (%RDI,%R8,8),%ZMM19 |
(225) 0x42c630 VMOVUPD -0x8(%R13,%R8,8),%ZMM20 |
(225) 0x42c63b VMOVUPD (%R13,%R8,8),%ZMM25 |
(225) 0x42c643 VSUBPD %ZMM25,%ZMM20,%ZMM26 |
(225) 0x42c649 VFMADD213PD %ZMM18,%ZMM19,%ZMM26 |
(225) 0x42c64f VMOVUPD -0x8(%RSI,%R8,8),%ZMM18 |
(225) 0x42c65a VMOVUPD (%RSI,%R8,8),%ZMM27 |
(225) 0x42c661 VSUBPD %ZMM17,%ZMM25,%ZMM17 |
(225) 0x42c667 VMULPD %ZMM17,%ZMM27,%ZMM17 |
(225) 0x42c66d VSUBPD %ZMM4,%ZMM20,%ZMM4 |
(225) 0x42c673 VFMADD213PD %ZMM17,%ZMM18,%ZMM4 |
(225) 0x42c679 VMOVUPD -0x8(%R12,%R8,8),%ZMM17 |
(225) 0x42c684 VMOVUPD (%R12,%R8,8),%ZMM20 |
(225) 0x42c68b VSUBPD %ZMM20,%ZMM17,%ZMM25 |
(225) 0x42c691 VMOVUPD -0x8(%R10,%R8,8),%ZMM28 |
(225) 0x42c69c VMOVUPD (%R10,%R8,8),%ZMM29 |
(225) 0x42c6a3 VSUBPD %ZMM29,%ZMM28,%ZMM30 |
(225) 0x42c6a9 VFMADD213PD %ZMM26,%ZMM3,%ZMM25 |
(225) 0x42c6af VFMADD231PD %ZMM30,%ZMM19,%ZMM25 |
(225) 0x42c6b5 VFMADD213PD (%R15,%R8,8),%ZMM0,%ZMM25 |
(225) 0x42c6bc MOV 0x128(%RSP),%RCX |
(225) 0x42c6c4 VMOVUPD %ZMM25,(%RCX,%R8,8) |
(225) 0x42c6cb VSUBPD %ZMM20,%ZMM29,%ZMM3 |
(225) 0x42c6d1 VSUBPD %ZMM17,%ZMM28,%ZMM17 |
(225) 0x42c6d7 VFMADD213PD %ZMM4,%ZMM27,%ZMM3 |
(225) 0x42c6dd VFMADD231PD %ZMM17,%ZMM18,%ZMM3 |
(225) 0x42c6e3 MOV 0x130(%RSP),%RCX |
(225) 0x42c6eb VFMADD213PD (%RCX,%R8,8),%ZMM0,%ZMM3 |
(225) 0x42c6f2 MOV 0x138(%RSP),%RCX |
(225) 0x42c6fa VMOVUPD %ZMM3,(%RCX,%R8,8) |
(225) 0x42c701 ADD $0x8,%R8 |
(225) 0x42c705 CMP 0x88(%RSP),%R8 |
(225) 0x42c70d JB 42c5b0 |
(224) 0x42c713 MOV 0x120(%RSP),%RAX |
(224) 0x42c71b MOV 0x88(%RSP),%R10 |
(224) 0x42c723 CMP %RAX,%R10 |
(224) 0x42c726 MOV 0x14(%RSP),%EDX |
(224) 0x42c72a MOV 0x2c(%RSP),%EBX |
(224) 0x42c72e MOV 0x80(%RSP),%RDI |
(224) 0x42c736 JE 42c350 |
(224) 0x42c73c MOV %RDI,%R9 |
(224) 0x42c73f VPBROADCASTQ %RAX,%ZMM0 |
(224) 0x42c745 MOV 0x118(%RSP),%RSI |
(224) 0x42c74d MOV 0x110(%RSP),%RDI |
(224) 0x42c755 MOV 0x50(%RSP),%R11 |
(224) 0x42c75a MOV 0x108(%RSP),%R13 |
(224) 0x42c762 MOV 0x100(%RSP),%R14 |
(224) 0x42c76a MOV 0x20(%RSP),%R8 |
(224) 0x42c76f JMP 42c78f |
0x42c771 NOPW %CS:(%RAX,%RAX,1) |
(224) 0x42c780 MOV %RDI,%R9 |
(224) 0x42c783 MOV %R12,%RDI |
(224) 0x42c786 VPBROADCASTQ %RAX,%ZMM0 |
(224) 0x42c78c XOR %R10D,%R10D |
(224) 0x42c78f VPBROADCASTQ %R10,%ZMM3 |
(224) 0x42c795 VPSUBQ %ZMM3,%ZMM0,%ZMM0 |
(224) 0x42c79b VPCMPNLEUQ 0xddada(%RIP),%ZMM0,%K1 |
(224) 0x42c7a6 KORTESTB %K1,%K1 |
(224) 0x42c7aa JE 42c340 |
(224) 0x42c7b0 MOV 0xa8(%RSP),%RAX |
(224) 0x42c7b8 ADD %R9D,%EAX |
(224) 0x42c7bb MOVSXD %EAX,%R15 |
(224) 0x42c7be ADD %R8,%R10 |
(224) 0x42c7c1 MOV 0x40(%RSP),%RAX |
(224) 0x42c7c6 MOV %RDI,%R8 |
(224) 0x42c7c9 LEA (%RAX,%R15,1),%R9 |
(224) 0x42c7cd MOV %R14,%RAX |
(224) 0x42c7d0 IMUL %R9,%RAX |
(224) 0x42c7d4 MOV 0x40(%RBP),%RDX |
(224) 0x42c7d8 ADD %RDX,%RAX |
(224) 0x42c7db MOV %RSI,%RDI |
(224) 0x42c7de MOV 0xa0(%RSP),%RSI |
(224) 0x42c7e6 MOV %RSI,%RCX |
(224) 0x42c7e9 NOT %RCX |
(224) 0x42c7ec ADD %R10,%RCX |
(224) 0x42c7ef VMOVUPD (%RAX,%RCX,8),%ZMM25{%K1}{z} |
(224) 0x42c7f6 SUB %RSI,%R10 |
(224) 0x42c7f9 VMOVUPD (%RAX,%R10,8),%ZMM26{%K1}{z} |
(224) 0x42c800 MOV %R13,%RAX |
(224) 0x42c803 IMUL %R9,%RAX |
(224) 0x42c807 MOV 0x38(%RBP),%RSI |
(224) 0x42c80b ADD %RSI,%RAX |
(224) 0x42c80e VMOVUPD (%RAX,%RCX,8),%ZMM27{%K1}{z} |
(224) 0x42c815 VMOVUPD (%RAX,%R10,8),%ZMM28{%K1}{z} |
(224) 0x42c81c SUB 0x98(%RSP),%R15 |
(224) 0x42c824 IMUL %R15,%R14 |
(224) 0x42c828 ADD %RDX,%R14 |
(224) 0x42c82b VMOVUPD (%R14,%R10,8),%ZMM29{%K1}{z} |
(224) 0x42c832 VMOVUPD (%R14,%RCX,8),%ZMM30{%K1}{z} |
(224) 0x42c839 IMUL %R15,%R13 |
(224) 0x42c83d ADD %RSI,%R13 |
(224) 0x42c840 VMOVUPD (%R13,%R10,8),%ZMM31{%K1}{z} |
(224) 0x42c848 VMOVUPD (%R13,%RCX,8),%ZMM0{%K1}{z} |
(224) 0x42c850 MOV %R11,%RAX |
(224) 0x42c853 IMUL %R15,%RAX |
(224) 0x42c857 MOV 0x50(%RBP),%RSI |
(224) 0x42c85b ADD %RSI,%RAX |
(224) 0x42c85e VMOVUPD (%RAX,%R10,8),%ZMM3{%K1}{z} |
(224) 0x42c865 MOV %R8,%RAX |
(224) 0x42c868 IMUL %R15,%RAX |
(224) 0x42c86c MOV 0x30(%RBP),%RDX |
(224) 0x42c870 ADD %RDX,%RAX |
(224) 0x42c873 VMOVUPD (%RAX,%R10,8),%ZMM4{%K1}{z} |
(224) 0x42c87a VMOVUPD (%RAX,%RCX,8),%ZMM17{%K1}{z} |
(224) 0x42c881 IMUL %R9,%R11 |
(224) 0x42c885 ADD %RSI,%R11 |
(224) 0x42c888 VMOVUPD (%R11,%R10,8),%ZMM19{%K1}{z} |
(224) 0x42c88f IMUL %R9,%R8 |
(224) 0x42c893 ADD %RDX,%R8 |
(224) 0x42c896 VMOVUPD (%R8,%R10,8),%ZMM21{%K1}{z} |
(224) 0x42c89d VMOVUPD (%R8,%RCX,8),%ZMM22{%K1}{z} |
(224) 0x42c8a4 IMUL %R15,%RDI |
(224) 0x42c8a8 ADD 0x48(%RBP),%RDI |
(224) 0x42c8ac VMOVUPD (%RDI,%R10,8),%ZMM18{%K1}{z} |
(224) 0x42c8b3 VMOVUPD (%RDI,%RCX,8),%ZMM20{%K1}{z} |
(224) 0x42c8ba MOV 0x80(%RSP),%RDI |
(224) 0x42c8c2 MOV 0x58(%RSP),%RSI |
(224) 0x42c8c7 MOV %RSI,%RAX |
(224) 0x42c8ca IMUL %R15,%RAX |
(224) 0x42c8ce MOV 0x28(%RBP),%RDX |
(224) 0x42c8d2 ADD %RDX,%RAX |
(224) 0x42c8d5 VMOVUPD (%RAX,%R10,8),%ZMM23{%K1}{z} |
(224) 0x42c8dc VMOVUPD (%RAX,%RCX,8),%ZMM24{%K1}{z} |
(224) 0x42c8e3 IMUL %R9,%RSI |
(224) 0x42c8e7 ADD %RDX,%RSI |
(224) 0x42c8ea MOV 0x14(%RSP),%EDX |
(224) 0x42c8ee VMOVUPD (%RSI,%RCX,8),%ZMM5{%K1}{z} |
(224) 0x42c8f5 VMOVUPD (%RSI,%R10,8),%ZMM6{%K1}{z} |
(224) 0x42c8fc MOV 0x60(%RSP),%RAX |
(224) 0x42c901 IMUL %R15,%RAX |
(224) 0x42c905 ADD 0x20(%RBP),%RAX |
(224) 0x42c909 VMOVUPD (%RAX,%R10,8),%ZMM7{%K1}{z} |
(224) 0x42c910 VMOVAPD 0x180(%RSP),%ZMM11 |
(224) 0x42c918 VMOVAPD %ZMM25,%ZMM11{%K1} |
(224) 0x42c91e VMOVAPD 0x200(%RSP),%ZMM10 |
(224) 0x42c926 VMOVAPD %ZMM27,%ZMM10{%K1} |
(224) 0x42c92c VMOVAPD 0x280(%RSP),%ZMM9 |
(224) 0x42c934 VMOVAPD %ZMM26,%ZMM9{%K1} |
(224) 0x42c93a VMOVAPD 0x300(%RSP),%ZMM8 |
(224) 0x42c942 VMOVAPD %ZMM28,%ZMM8{%K1} |
(224) 0x42c948 VMOVAPD 0x340(%RSP),%ZMM28 |
(224) 0x42c950 VMOVAPD %ZMM29,%ZMM28{%K1} |
(224) 0x42c956 VMOVAPD 0x380(%RSP),%ZMM27 |
(224) 0x42c95e VMOVAPD %ZMM31,%ZMM27{%K1} |
(224) 0x42c964 VMOVAPD 0x3c0(%RSP),%ZMM26 |
(224) 0x42c96c VMOVAPD %ZMM30,%ZMM26{%K1} |
(224) 0x42c972 VMOVAPD 0x400(%RSP),%ZMM25 |
(224) 0x42c97a VMOVAPD %ZMM0,%ZMM25{%K1} |
(224) 0x42c980 VMOVAPD %ZMM3,%ZMM16{%K1} |
(224) 0x42c986 VMOVAPD %ZMM4,%ZMM15{%K1} |
(224) 0x42c98c VMOVAPD %ZMM17,%ZMM14{%K1} |
(224) 0x42c992 VMOVAPD %ZMM19,%ZMM13{%K1} |
(224) 0x42c998 VMOVAPD %ZMM21,%ZMM12{%K1} |
(224) 0x42c99e VSUBPD %ZMM15,%ZMM14,%ZMM0 |
(224) 0x42c9a4 VMULPD %ZMM16,%ZMM0,%ZMM0 |
(224) 0x42c9aa VMOVAPD 0x140(%RSP),%ZMM29 |
(224) 0x42c9b2 VMOVAPD %ZMM22,%ZMM29{%K1} |
(224) 0x42c9b8 VSUBPD %ZMM12,%ZMM29,%ZMM3 |
(224) 0x42c9be VFMADD213PD %ZMM0,%ZMM13,%ZMM3 |
(224) 0x42c9c4 VMOVAPD 0x2c0(%RSP),%ZMM21 |
(224) 0x42c9cc VMOVAPD %ZMM23,%ZMM21{%K1} |
(224) 0x42c9d2 VMOVAPD 0x440(%RSP),%ZMM19 |
(224) 0x42c9da VMOVAPD %ZMM24,%ZMM19{%K1} |
(224) 0x42c9e0 VMOVAPD 0x480(%RSP),%ZMM17 |
(224) 0x42c9e8 VMOVAPD %ZMM6,%ZMM17{%K1} |
(224) 0x42c9ee VMOVAPD 0x4c0(%RSP),%ZMM6 |
(224) 0x42c9f6 VMOVAPD %ZMM5,%ZMM6{%K1} |
(224) 0x42c9fc VSUBPD %ZMM21,%ZMM19,%ZMM0 |
(224) 0x42ca02 VFMADD213PD %ZMM3,%ZMM16,%ZMM0 |
(224) 0x42ca08 VSUBPD %ZMM17,%ZMM6,%ZMM3 |
(224) 0x42ca0e VFMADD231PD %ZMM3,%ZMM13,%ZMM0 |
(224) 0x42ca14 VMOVAPD %ZMM10,0x200(%RSP) |
(224) 0x42ca1c VMOVAPD %ZMM11,0x180(%RSP) |
(224) 0x42ca24 VMULPD %ZMM10,%ZMM11,%ZMM3 |
(224) 0x42ca2a VMOVAPD %ZMM8,0x300(%RSP) |
(224) 0x42ca32 VMOVAPD %ZMM9,0x280(%RSP) |
(224) 0x42ca3a VFMADD231PD %ZMM8,%ZMM9,%ZMM3 |
(224) 0x42ca40 VMOVAPD %ZMM27,0x380(%RSP) |
(224) 0x42ca48 VMOVAPD %ZMM28,0x340(%RSP) |
(224) 0x42ca50 VFMADD231PD %ZMM27,%ZMM28,%ZMM3 |
(224) 0x42ca56 VMOVAPD %ZMM25,0x400(%RSP) |
(224) 0x42ca5e VMOVAPD %ZMM26,0x3c0(%RSP) |
(224) 0x42ca66 VFMADD231PD %ZMM25,%ZMM26,%ZMM3 |
(224) 0x42ca6c VMULPD %ZMM1,%ZMM3,%ZMM3 |
(224) 0x42ca72 VDIVPD %ZMM3,%ZMM2,%ZMM3 |
(224) 0x42ca78 VMOVAPD 0x500(%RSP),%ZMM4 |
(224) 0x42ca80 VMOVAPD %ZMM7,%ZMM4{%K1} |
(224) 0x42ca86 VMOVAPD %ZMM4,0x500(%RSP) |
(224) 0x42ca8e VFMADD213PD %ZMM4,%ZMM3,%ZMM0 |
(224) 0x42ca94 MOV 0x68(%RSP),%RAX |
(224) 0x42ca99 IMUL %R15,%RAX |
(224) 0x42ca9d ADD 0x10(%RBP),%RAX |
(224) 0x42caa1 VMOVUPD %ZMM0,(%RAX,%R10,8){%K1} |
(224) 0x42caa8 MOV 0x70(%RSP),%RAX |
(224) 0x42caad IMUL %R15,%RAX |
(224) 0x42cab1 ADD 0x18(%RBP),%RAX |
(224) 0x42cab5 VMOVUPD (%RAX,%R10,8),%ZMM0{%K1}{z} |
(224) 0x42cabc MOV 0x78(%RSP),%RAX |
(224) 0x42cac1 IMUL %R15,%RAX |
(224) 0x42cac5 VMOVAPD 0x1c0(%RSP),%ZMM8 |
(224) 0x42cacd VMOVAPD %ZMM18,%ZMM8{%K1} |
(224) 0x42cad3 VSUBPD %ZMM15,%ZMM12,%ZMM4 |
(224) 0x42cad9 VMULPD %ZMM4,%ZMM8,%ZMM4 |
(224) 0x42cadf VMOVAPD 0x240(%RSP),%ZMM7 |
(224) 0x42cae7 VMOVAPD %ZMM20,%ZMM7{%K1} |
(224) 0x42caed VMOVAPD %ZMM29,0x140(%RSP) |
(224) 0x42caf5 VSUBPD %ZMM14,%ZMM29,%ZMM5 |
(224) 0x42cafb VFMADD213PD %ZMM4,%ZMM7,%ZMM5 |
(224) 0x42cb01 VMOVAPD %ZMM17,0x480(%RSP) |
(224) 0x42cb09 VMOVAPD %ZMM21,0x2c0(%RSP) |
(224) 0x42cb11 VSUBPD %ZMM21,%ZMM17,%ZMM4 |
(224) 0x42cb17 VMOVAPD %ZMM6,0x4c0(%RSP) |
(224) 0x42cb1f VMOVAPD %ZMM19,0x440(%RSP) |
(224) 0x42cb27 VSUBPD %ZMM19,%ZMM6,%ZMM6 |
(224) 0x42cb2d VMOVAPD %ZMM8,0x1c0(%RSP) |
(224) 0x42cb35 VFMADD213PD %ZMM5,%ZMM8,%ZMM4 |
(224) 0x42cb3b VMOVAPD %ZMM7,0x240(%RSP) |
(224) 0x42cb43 VFMADD231PD %ZMM6,%ZMM7,%ZMM4 |
(224) 0x42cb49 VMOVAPD 0x540(%RSP),%ZMM5 |
(224) 0x42cb51 VMOVAPD %ZMM0,%ZMM5{%K1} |
(224) 0x42cb57 VMOVAPD %ZMM5,0x540(%RSP) |
(224) 0x42cb5f VFMADD213PD %ZMM5,%ZMM3,%ZMM4 |
(224) 0x42cb65 ADD 0x38(%RSP),%RAX |
(224) 0x42cb6a VMOVUPD %ZMM4,(%RAX,%R10,8){%K1} |
(224) 0x42cb71 JMP 42c350 |
0x42cb76 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 145 |
nb uops | 149 |
loop length | 665 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 38 |
micro-operation queue | 24.83 cycles |
front end | 24.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
cycles | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 24.30-24.34 |
Stall cycles | 0.00 |
Front-end | 24.83 |
Dispatch | 17.50 |
Overall L1 | 24.83 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 11% |
load | 8% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 9% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x5c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42c177 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x38(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x20(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x54bf70,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c1c0 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x54bf90,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x54bfb0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404740 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SAL $0x20,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R15,%RCX,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x20,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RDX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDI,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RSI,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %EBX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %RDI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R13,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R10D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdf384(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42c35f <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x29f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 145 |
nb uops | 149 |
loop length | 665 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 38 |
micro-operation queue | 24.83 cycles |
front end | 24.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
cycles | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 24.30-24.34 |
Stall cycles | 0.00 |
Front-end | 24.83 |
Dispatch | 17.50 |
Overall L1 | 24.83 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 11% |
load | 8% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 9% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x5c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42c177 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x38(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x20(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x54bf70,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c1c0 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x54bf90,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x54bfb0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404740 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SAL $0x20,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R15,%RCX,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x20,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RDX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDI,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RSI,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %EBX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %RDI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R13,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R10D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdf384(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42c35f <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x29f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel_.DIR.OMP.PARALLEL.2– | 5.28 | 3.96 |
▼Loop 224 - accelerate_kernel.f90:60-76 - exec– | 0.01 | 0.01 |
○Loop 225 - accelerate_kernel.f90:62-76 - exec | 5.28 | 3.95 |