Function: void RAJA::internal::StatementExecutor<RAJA::statement::Collapse<RAJA::omp_parallel_collap ... | Module: libkripke.so | Source: Collapse.hpp:81-81 [...] | Coverage: 0.15% |
---|
Function: void RAJA::internal::StatementExecutor<RAJA::statement::Collapse<RAJA::omp_parallel_collap ... | Module: libkripke.so | Source: Collapse.hpp:81-81 [...] | Coverage: 0.15% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/View.hpp: 79 - 110 |
-------------------------------------------------------------------------------- |
79: : layout(V.layout), data(V.data) |
[...] |
110: return data[idx]; |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/kernel/Collapse.hpp: 81 - 81 |
-------------------------------------------------------------------------------- |
81: #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \ |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Layout.hpp: 55 - 55 |
-------------------------------------------------------------------------------- |
55: return a * b; |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Operators.hpp: 307 - 307 |
-------------------------------------------------------------------------------- |
307: return Ret{lhs} + rhs; |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/pattern/detail/reduce.hpp: 74 - 262 |
-------------------------------------------------------------------------------- |
74: val = operator_type::operator()(val, v); |
[...] |
261: : parent{other.parent ? other.parent : &other}, |
262: identity{other.identity}, |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/tpl/camp/include/camp/tuple.hpp: 253 - 253 |
-------------------------------------------------------------------------------- |
253: CAMP_HOST_DEVICE constexpr tuple(tuple const& o) : base(o.base) {} |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/index/RangeSegment.hpp: 120 - 120 |
-------------------------------------------------------------------------------- |
120: RAJA_HOST_DEVICE RAJA_INLINE ~TypedRangeSegment() {} |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/loop/forall.hpp: 59 - 59 |
-------------------------------------------------------------------------------- |
59: for (decltype(distance_it) i = 0; i < distance_it; ++i) { |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/internal/Iterators.hpp: 55 - 177 |
-------------------------------------------------------------------------------- |
55: : val(rhs.val) |
[...] |
142: return val - rhs.val; |
[...] |
177: return value_type(val + rhs); |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/src/Kripke/Kernel/Population.cpp: 58 - 58 |
-------------------------------------------------------------------------------- |
58: part_red += w(d) * psi(d,g,z) * volume(z); |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/reduce.hpp: 46 - 59 |
-------------------------------------------------------------------------------- |
46: class ReduceOMP |
[...] |
59: #pragma omp critical(ompReduceCritical) |
0x4ee70 PUSH %RBP |
0x4ee71 MOV %RSP,%RBP |
0x4ee74 PUSH %R15 |
0x4ee76 PUSH %R14 |
0x4ee78 PUSH %R13 |
0x4ee7a PUSH %R12 |
0x4ee7c PUSH %RBX |
0x4ee7d AND $-0x20,%RSP |
0x4ee81 SUB $0x60,%RSP |
0x4ee85 MOV 0x10(%RDI),%RAX |
0x4ee89 MOV 0x8(%RDI),%R14 |
0x4ee8d MOV 0x38(%RAX),%RCX |
0x4ee91 LEA 0x38(%RAX),%R8 |
0x4ee95 MOV 0x10(%RAX),%RBX |
0x4ee99 MOV %R14,0x58(%RSP) |
0x4ee9e MOV (%RAX),%RSI |
0x4eea1 MOV 0x28(%RAX),%RDX |
0x4eea5 TEST %RCX,%RCX |
0x4eea8 MOV 0x70(%RAX),%R9 |
0x4eeac MOV 0x90(%RAX),%R10 |
0x4eeb3 MOV %RBX,0x30(%RSP) |
0x4eeb8 CMOVE %R8,%RCX |
0x4eebc MOV 0x98(%RAX),%R11 |
0x4eec3 MOV (%RDI),%R14 |
0x4eec6 MOV %RSI,0x40(%RSP) |
0x4eecb MOV 0x20(%RAX),%RBX |
0x4eecf VMOVSD 0x40(%RAX),%XMM2 |
0x4eed4 MOV %RDX,0x18(%RSP) |
0x4eed9 MOV 0xd8(%RAX),%R12 |
0x4eee0 MOV 0x100(%RAX),%R13 |
0x4eee7 MOV %RCX,0x10(%RSP) |
0x4eeec MOV %R9,0x28(%RSP) |
0x4eef1 MOV %R10,0x20(%RSP) |
0x4eef6 MOV %R11,0x38(%RSP) |
0x4eefb TEST %R14,%R14 |
0x4eefe JLE 4f273 |
0x4ef04 CMPQ $0,0x58(%RSP) |
0x4ef0a JLE 4f273 |
0x4ef10 VMOVSD %XMM2,0x50(%RSP) |
0x4ef16 CALL 9760 <omp_get_num_threads@plt> |
0x4ef1b MOVSXD %EAX,%R15 |
0x4ef1e CALL 9650 <omp_get_thread_num@plt> |
0x4ef23 XOR %EDX,%EDX |
0x4ef25 VMOVSD 0x50(%RSP),%XMM2 |
0x4ef2b MOVSXD %EAX,%RDI |
0x4ef2e MOV 0x58(%RSP),%RAX |
0x4ef33 IMUL %R14,%RAX |
0x4ef37 DIV %R15 |
0x4ef3a CMP %RDX,%RDI |
0x4ef3d MOV %RAX,%RSI |
0x4ef40 JB 4f2ee |
0x4ef46 IMUL %RSI,%RDI |
0x4ef4a LEA (%RDI,%RDX,1),%RAX |
0x4ef4e LEA (%RSI,%RAX,1),%RCX |
0x4ef52 CMP %RCX,%RAX |
0x4ef55 JAE 4f273 |
0x4ef5b XOR %EDX,%EDX |
0x4ef5d MOV 0x18(%RSP),%R10 |
0x4ef62 LEA -0x1(%RSI),%R8 |
0x4ef66 XOR %R11D,%R11D |
0x4ef69 DIVQ 0x58(%RSP) |
0x4ef6e MOV %R8,0x50(%RSP) |
0x4ef73 SUB %RBX,%R10 |
0x4ef76 LEA (%R13,%RBX,8),%RSI |
0x4ef7b LEA -0x1(%R10),%R9 |
0x4ef7f MOV %R10,%R14 |
0x4ef82 MOV %R10,%R15 |
0x4ef85 MOV %R9,0x18(%RSP) |
0x4ef8a SHR $0x2,%R14 |
0x4ef8e AND $-0x4,%R15 |
0x4ef92 SAL $0x5,%R14 |
0x4ef96 MOV %RAX,0x48(%RSP) |
0x4ef9b NOPL (%RAX,%RAX,1) |
(427) 0x4efa0 TEST %R10,%R10 |
(427) 0x4efa3 JLE 4f24c |
(427) 0x4efa9 MOV 0x48(%RSP),%RAX |
(427) 0x4efae MOV 0x40(%RSP),%RDI |
(427) 0x4efb3 MOV 0x28(%RSP),%RCX |
(427) 0x4efb8 MOV 0x38(%RSP),%R8 |
(427) 0x4efbd ADD %RDI,%RAX |
(427) 0x4efc0 MOV 0x20(%RSP),%R9 |
(427) 0x4efc5 VMOVSD (%RCX,%RAX,8),%XMM8 |
(427) 0x4efca MOV 0x30(%RSP),%RCX |
(427) 0x4efcf IMUL %R9,%RAX |
(427) 0x4efd3 ADD %RDX,%RCX |
(427) 0x4efd6 IMUL %R8,%RCX |
(427) 0x4efda ADD %RAX,%RCX |
(427) 0x4efdd CMPQ $0x2,0x18(%RSP) |
(427) 0x4efe3 JBE 4f2c9 |
(427) 0x4efe9 LEA -0x20(%R14),%RDI |
(427) 0x4efed LEA (%RCX,%RBX,1),%R8 |
(427) 0x4eff1 VBROADCASTSD %XMM8,%YMM9 |
(427) 0x4eff6 XOR %EAX,%EAX |
(427) 0x4eff8 SHR $0x5,%RDI |
(427) 0x4effc LEA (%R12,%R8,8),%R9 |
(427) 0x4f000 VXORPD %XMM0,%XMM0,%XMM0 |
(427) 0x4f004 INC %RDI |
(427) 0x4f007 AND $0x7,%EDI |
(427) 0x4f00a JE 4f0eb |
(427) 0x4f010 CMP $0x1,%RDI |
(427) 0x4f014 JE 4f0c9 |
(427) 0x4f01a CMP $0x2,%RDI |
(427) 0x4f01e JE 4f0b1 |
(427) 0x4f024 CMP $0x3,%RDI |
(427) 0x4f028 JE 4f099 |
(427) 0x4f02a CMP $0x4,%RDI |
(427) 0x4f02e JE 4f080 |
(427) 0x4f030 CMP $0x5,%RDI |
(427) 0x4f034 JE 4f067 |
(427) 0x4f036 CMP $0x6,%RDI |
(427) 0x4f03a JE 4f04f |
(427) 0x4f03c VMOVUPD (%RSI),%YMM5 |
(427) 0x4f040 MOV $0x20,%EAX |
(427) 0x4f045 VMULPD (%R9),%YMM5,%YMM0 |
(427) 0x4f04a VMULPD %YMM9,%YMM0,%YMM0 |
(427) 0x4f04f VMOVUPD (%RSI,%RAX,1),%YMM6 |
(427) 0x4f054 VMULPD (%R9,%RAX,1),%YMM6,%YMM1 |
(427) 0x4f05a ADD $0x20,%RAX |
(427) 0x4f05e VMULPD %YMM9,%YMM1,%YMM7 |
(427) 0x4f063 VADDPD %YMM7,%YMM0,%YMM0 |
(427) 0x4f067 VMOVUPD (%RSI,%RAX,1),%YMM10 |
(427) 0x4f06c VMULPD (%R9,%RAX,1),%YMM10,%YMM11 |
(427) 0x4f072 ADD $0x20,%RAX |
(427) 0x4f076 VMULPD %YMM9,%YMM11,%YMM12 |
(427) 0x4f07b VADDPD %YMM12,%YMM0,%YMM0 |
(427) 0x4f080 VMOVUPD (%RSI,%RAX,1),%YMM13 |
(427) 0x4f085 VMULPD (%R9,%RAX,1),%YMM13,%YMM14 |
(427) 0x4f08b ADD $0x20,%RAX |
(427) 0x4f08f VMULPD %YMM9,%YMM14,%YMM15 |
(427) 0x4f094 VADDPD %YMM15,%YMM0,%YMM0 |
(427) 0x4f099 VMOVUPD (%RSI,%RAX,1),%YMM3 |
(427) 0x4f09e VMULPD (%R9,%RAX,1),%YMM3,%YMM4 |
(427) 0x4f0a4 ADD $0x20,%RAX |
(427) 0x4f0a8 VMULPD %YMM9,%YMM4,%YMM5 |
(427) 0x4f0ad VADDPD %YMM5,%YMM0,%YMM0 |
(427) 0x4f0b1 VMOVUPD (%RSI,%RAX,1),%YMM6 |
(427) 0x4f0b6 VMULPD (%R9,%RAX,1),%YMM6,%YMM1 |
(427) 0x4f0bc ADD $0x20,%RAX |
(427) 0x4f0c0 VMULPD %YMM9,%YMM1,%YMM7 |
(427) 0x4f0c5 VADDPD %YMM7,%YMM0,%YMM0 |
(427) 0x4f0c9 VMOVUPD (%RSI,%RAX,1),%YMM10 |
(427) 0x4f0ce VMULPD (%R9,%RAX,1),%YMM10,%YMM11 |
(427) 0x4f0d4 ADD $0x20,%RAX |
(427) 0x4f0d8 VMULPD %YMM9,%YMM11,%YMM12 |
(427) 0x4f0dd VADDPD %YMM12,%YMM0,%YMM0 |
(427) 0x4f0e2 CMP %RAX,%R14 |
(427) 0x4f0e5 JE 4f1c5 |
(428) 0x4f0eb VMOVUPD (%RSI,%RAX,1),%YMM13 |
(428) 0x4f0f0 VMOVUPD 0x20(%RSI,%RAX,1),%YMM4 |
(428) 0x4f0f6 VMOVUPD 0x40(%RSI,%RAX,1),%YMM7 |
(428) 0x4f0fc VMULPD (%R9,%RAX,1),%YMM13,%YMM14 |
(428) 0x4f102 VMULPD 0x20(%R9,%RAX,1),%YMM4,%YMM5 |
(428) 0x4f109 VMULPD 0x40(%R9,%RAX,1),%YMM7,%YMM10 |
(428) 0x4f110 VMULPD %YMM9,%YMM14,%YMM15 |
(428) 0x4f115 VMULPD %YMM9,%YMM5,%YMM6 |
(428) 0x4f11a VMULPD %YMM9,%YMM10,%YMM11 |
(428) 0x4f11f VADDPD %YMM15,%YMM0,%YMM3 |
(428) 0x4f124 VMOVUPD 0x60(%RSI,%RAX,1),%YMM0 |
(428) 0x4f12a VMULPD 0x60(%R9,%RAX,1),%YMM0,%YMM13 |
(428) 0x4f131 VADDPD %YMM6,%YMM3,%YMM1 |
(428) 0x4f135 VMOVUPD 0x80(%RSI,%RAX,1),%YMM3 |
(428) 0x4f13e VMULPD 0x80(%R9,%RAX,1),%YMM3,%YMM4 |
(428) 0x4f148 VADDPD %YMM11,%YMM1,%YMM12 |
(428) 0x4f14d VMOVUPD 0xa0(%RSI,%RAX,1),%YMM1 |
(428) 0x4f156 VMULPD %YMM9,%YMM13,%YMM14 |
(428) 0x4f15b VMULPD 0xa0(%R9,%RAX,1),%YMM1,%YMM7 |
(428) 0x4f165 VMULPD %YMM9,%YMM4,%YMM5 |
(428) 0x4f16a VADDPD %YMM14,%YMM12,%YMM15 |
(428) 0x4f16f VMOVUPD 0xc0(%RSI,%RAX,1),%YMM12 |
(428) 0x4f178 VMULPD %YMM9,%YMM7,%YMM10 |
(428) 0x4f17d VMULPD 0xc0(%R9,%RAX,1),%YMM12,%YMM0 |
(428) 0x4f187 VADDPD %YMM5,%YMM15,%YMM6 |
(428) 0x4f18b VMOVUPD 0xe0(%RSI,%RAX,1),%YMM15 |
(428) 0x4f194 VMULPD 0xe0(%R9,%RAX,1),%YMM15,%YMM3 |
(428) 0x4f19e ADD $0x100,%RAX |
(428) 0x4f1a4 VADDPD %YMM10,%YMM6,%YMM11 |
(428) 0x4f1a9 VMULPD %YMM9,%YMM0,%YMM13 |
(428) 0x4f1ae VMULPD %YMM9,%YMM3,%YMM4 |
(428) 0x4f1b3 VADDPD %YMM13,%YMM11,%YMM14 |
(428) 0x4f1b8 VADDPD %YMM4,%YMM14,%YMM0 |
(428) 0x4f1bc CMP %RAX,%R14 |
(428) 0x4f1bf JNE 4f0eb |
(427) 0x4f1c5 VEXTRACTF64X2 $0x1,%YMM0,%XMM9 |
(427) 0x4f1cc VADDPD %XMM0,%XMM9,%XMM5 |
(427) 0x4f1d0 VUNPCKHPD %XMM5,%XMM5,%XMM6 |
(427) 0x4f1d4 VADDPD %XMM5,%XMM6,%XMM1 |
(427) 0x4f1d8 VADDSD %XMM1,%XMM2,%XMM3 |
(427) 0x4f1dc CMP %R15,%R10 |
(427) 0x4f1df JE 4f2b8 |
(427) 0x4f1e5 MOV %R15,%RDI |
(427) 0x4f1e8 MOV %R10,%R8 |
(427) 0x4f1eb VADDPD %XMM0,%XMM9,%XMM4 |
(427) 0x4f1ef MOV %R15,%RAX |
(427) 0x4f1f2 SUB %RDI,%R8 |
(427) 0x4f1f5 CMP $0x1,%R8 |
(427) 0x4f1f9 JE 4f2e5 |
(427) 0x4f1ff LEA (%RCX,%RBX,1),%R9 |
(427) 0x4f203 VMOVDDUP %XMM8,%XMM7 |
(427) 0x4f208 ADD %RDI,%R9 |
(427) 0x4f20b ADD %RBX,%RDI |
(427) 0x4f20e VMOVUPD (%R13,%RDI,8),%XMM10 |
(427) 0x4f215 VMULPD (%R12,%R9,8),%XMM10,%XMM11 |
(427) 0x4f21b VFMADD132PD %XMM11,%XMM4,%XMM7 |
(427) 0x4f220 VUNPCKHPD %XMM7,%XMM7,%XMM12 |
(427) 0x4f224 VADDPD %XMM7,%XMM12,%XMM0 |
(427) 0x4f228 VADDSD %XMM0,%XMM2,%XMM2 |
(427) 0x4f22c TEST $0x1,%R8B |
(427) 0x4f230 JE 4f24c |
(427) 0x4f232 AND $-0x2,%R8 |
(427) 0x4f236 ADD %R8,%RAX |
(427) 0x4f239 ADD %RBX,%RAX |
(427) 0x4f23c VMULSD (%R13,%RAX,8),%XMM8,%XMM8 |
(427) 0x4f243 ADD %RCX,%RAX |
(427) 0x4f246 VFMADD231SD (%R12,%RAX,8),%XMM8,%XMM2 |
(427) 0x4f24c MOV 0x50(%RSP),%RCX |
(427) 0x4f251 CMP %RCX,%R11 |
(427) 0x4f254 JE 4f270 |
(427) 0x4f256 INC %RDX |
(427) 0x4f259 CMP %RDX,0x58(%RSP) |
(427) 0x4f25e JLE 4f2c0 |
(427) 0x4f260 INC %R11 |
(427) 0x4f263 JMP 4efa0 |
0x4f268 NOPL (%RAX,%RAX,1) |
0x4f270 VZEROUPPER |
0x4f273 MOV 0xb3c16(%RIP),%RBX |
0x4f27a VMOVSD %XMM2,0x58(%RSP) |
0x4f280 MOV %RBX,%RDI |
0x4f283 CALL 9370 <GOMP_critical_name_start@plt> |
0x4f288 MOV 0x10(%RSP),%RDI |
0x4f28d VMOVSD 0x58(%RSP),%XMM13 |
0x4f293 VADDSD 0x10(%RDI),%XMM13,%XMM14 |
0x4f298 VMOVSD %XMM14,0x10(%RDI) |
0x4f29d LEA -0x28(%RBP),%RSP |
0x4f2a1 MOV %RBX,%RDI |
0x4f2a4 POP %RBX |
0x4f2a5 POP %R12 |
0x4f2a7 POP %R13 |
0x4f2a9 POP %R14 |
0x4f2ab POP %R15 |
0x4f2ad POP %RBP |
0x4f2ae JMP 9130 |
0x4f2b3 NOPL (%RAX,%RAX,1) |
(427) 0x4f2b8 VMOVSD %XMM3,%XMM3,%XMM2 |
(427) 0x4f2bc JMP 4f24c |
0x4f2be XCHG %AX,%AX |
(427) 0x4f2c0 INCQ 0x48(%RSP) |
(427) 0x4f2c5 XOR %EDX,%EDX |
(427) 0x4f2c7 JMP 4f260 |
(427) 0x4f2c9 XOR %EDI,%EDI |
(427) 0x4f2cb MOV %R10,%R8 |
(427) 0x4f2ce VMOVSD %XMM2,%XMM2,%XMM3 |
(427) 0x4f2d2 XOR %EAX,%EAX |
(427) 0x4f2d4 SUB %RDI,%R8 |
(427) 0x4f2d7 VXORPD %XMM4,%XMM4,%XMM4 |
(427) 0x4f2db CMP $0x1,%R8 |
(427) 0x4f2df JNE 4f1ff |
(427) 0x4f2e5 VMOVSD %XMM3,%XMM3,%XMM2 |
(427) 0x4f2e9 JMP 4f239 |
0x4f2ee INC %RSI |
0x4f2f1 XOR %EDX,%EDX |
0x4f2f3 JMP 4ef46 |
0x4f2f8 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.10 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.82 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | Collapse.hpp:81-81 |
Module | libkripke.so |
nb instructions | 98 |
nb uops | 110 |
loop length | 404 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 18.33 cycles |
front end | 18.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 8.00 | 9.67 | 9.67 | 11.50 | 4.47 | 4.50 | 11.50 | 11.50 | 11.50 | 4.53 | 9.67 |
cycles | 4.50 | 10.27 | 9.67 | 9.67 | 11.50 | 4.47 | 4.50 | 11.50 | 11.50 | 11.50 | 4.53 | 9.67 |
Cycles executing div or sqrt instructions | 20.00 |
Front-end | 18.33 |
Dispatch | 11.50 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
SUB $0x60,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV 0x38(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
LEA 0x38(%RAX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV 0x28(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | scal (12.5%) |
MOV 0x70(%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV 0x90(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV %RBX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
CMOVE %R8,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x98(%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV (%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x20(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
VMOVSD 0x40(%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0xd8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV 0x100(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | scal (12.5%) |
JLE 4f273 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x403> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
CMPQ $0,0x58(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | scal (12.5%) |
JLE 4f273 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x403> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
VMOVSD %XMM2,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
CALL 9760 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
MOVSXD %EAX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 | scal (12.5%) |
CALL 9650 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VMOVSD 0x50(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 | N/A |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
DIV %R15 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 | scal (12.5%) |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
JB 4f2ee <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x47e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
IMUL %RSI,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
LEA (%RDI,%RDX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA (%RSI,%RAX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
JAE 4f273 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x403> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x18(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
LEA -0x1(%RSI),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
DIVQ 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 | scal (12.5%) |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SUB %RBX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | N/A |
LEA (%R13,%RBX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA -0x1(%R10),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R10,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
MOV %R9,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SHR $0x2,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
AND $-0x4,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
SAL $0x5,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
MOV 0xb3c16(%RIP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
VMOVSD %XMM2,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
CALL 9370 <GOMP_critical_name_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
VMOVSD 0x58(%RSP),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
VADDSD 0x10(%RDI),%XMM13,%XMM14 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM14,0x10(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
JMP 9130 <GOMP_critical_name_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
INC %RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
JMP 4ef46 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0xd6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
Source file and lines | Collapse.hpp:81-81 |
Module | libkripke.so |
nb instructions | 98 |
nb uops | 110 |
loop length | 404 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 18.33 cycles |
front end | 18.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 8.00 | 9.67 | 9.67 | 11.50 | 4.47 | 4.50 | 11.50 | 11.50 | 11.50 | 4.53 | 9.67 |
cycles | 4.50 | 10.27 | 9.67 | 9.67 | 11.50 | 4.47 | 4.50 | 11.50 | 11.50 | 11.50 | 4.53 | 9.67 |
Cycles executing div or sqrt instructions | 20.00 |
Front-end | 18.33 |
Dispatch | 11.50 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 | N/A |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
SUB $0x60,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV 0x38(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
LEA 0x38(%RAX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV 0x28(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | scal (12.5%) |
MOV 0x70(%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV 0x90(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV %RBX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
CMOVE %R8,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x98(%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV (%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x20(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
VMOVSD 0x40(%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0xd8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOV 0x100(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | scal (12.5%) |
JLE 4f273 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x403> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
CMPQ $0,0x58(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | scal (12.5%) |
JLE 4f273 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x403> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
VMOVSD %XMM2,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
CALL 9760 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
MOVSXD %EAX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 | scal (12.5%) |
CALL 9650 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VMOVSD 0x50(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 | N/A |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
DIV %R15 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 | scal (12.5%) |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
JB 4f2ee <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x47e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
IMUL %RSI,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
LEA (%RDI,%RDX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA (%RSI,%RAX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
JAE 4f273 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x403> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x18(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
LEA -0x1(%RSI),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
DIVQ 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 | scal (12.5%) |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SUB %RBX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | N/A |
LEA (%R13,%RBX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA -0x1(%R10),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R10,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
MOV %R9,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SHR $0x2,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
AND $-0x4,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
SAL $0x5,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
MOV 0xb3c16(%RIP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
VMOVSD %XMM2,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
CALL 9370 <GOMP_critical_name_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
VMOVSD 0x58(%RSP),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
VADDSD 0x10(%RDI),%XMM13,%XMM14 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM14,0x10(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 | N/A |
JMP 9130 <GOMP_critical_name_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
INC %RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
JMP 4ef46 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0xd6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void RAJA::internal::StatementExecutor | 0.15 | 0.09 |
▼Loop 427 - RangeSegment.hpp:120-120 - libkripke.so– | 0.00 | 0.00 |
○Loop 428 - forall.hpp:59-59 - libkripke.so | 0.15 | 0.07 |