Function: void RAJA::internal::StatementExecutor<RAJA::statement::Collapse<RAJA::omp_parallel_collap ... | Module: libkripke.so | Source: Collapse.hpp:81-81 [...] | Coverage: 6.32% |
---|
Function: void RAJA::internal::StatementExecutor<RAJA::statement::Collapse<RAJA::omp_parallel_collap ... | Module: libkripke.so | Source: Collapse.hpp:81-81 [...] | Coverage: 6.32% |
---|
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/index/RangeSegment.hpp: 120 - 120 |
-------------------------------------------------------------------------------- |
120: RAJA_HOST_DEVICE RAJA_INLINE ~TypedRangeSegment() {} |
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Operators.hpp: 307 - 307 |
-------------------------------------------------------------------------------- |
307: return Ret{lhs} + rhs; |
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/loop/forall.hpp: 59 - 59 |
-------------------------------------------------------------------------------- |
59: for (decltype(distance_it) i = 0; i < distance_it; ++i) { |
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/internal/Iterators.hpp: 55 - 177 |
-------------------------------------------------------------------------------- |
55: : val(rhs.val) |
[...] |
142: return val - rhs.val; |
[...] |
177: return value_type(val + rhs); |
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/src/Kripke/Kernel/LTimes.cpp: 62 - 62 |
-------------------------------------------------------------------------------- |
62: phi(nm,g,z) += ell(nm, d) * psi(d, g, z); |
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/View.hpp: 79 - 79 |
-------------------------------------------------------------------------------- |
79: : layout(V.layout), data(V.data) |
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/kernel/Collapse.hpp: 81 - 81 |
-------------------------------------------------------------------------------- |
81: #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \ |
/scratch_na/users/xoserete/qaas_runs/171-291-2973/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Layout.hpp: 55 - 55 |
-------------------------------------------------------------------------------- |
55: return a * b; |
0x643a0 PUSH %RBP |
0x643a1 MOV %RSP,%RBP |
0x643a4 PUSH %R15 |
0x643a6 PUSH %R14 |
0x643a8 PUSH %R13 |
0x643aa PUSH %R12 |
0x643ac PUSH %RBX |
0x643ad SUB $0x68,%RSP |
0x643b1 MOV 0x10(%RDI),%RAX |
0x643b5 MOV (%RAX),%RBX |
0x643b8 MOV 0x110(%RAX),%R12 |
0x643bf MOV 0x18(%RAX),%RDX |
0x643c3 MOV 0x20(%RAX),%RCX |
0x643c7 MOV 0x38(%RAX),%RSI |
0x643cb MOV 0x68(%RAX),%R8 |
0x643cf MOV %RBX,-0x48(%RBP) |
0x643d3 MOV 0xa8(%RAX),%R9 |
0x643da MOV 0xc8(%RAX),%R10 |
0x643e1 MOV %R12,-0x88(%RBP) |
0x643e8 MOV 0xf0(%RAX),%R11 |
0x643ef MOV 0x118(%RAX),%RBX |
0x643f6 MOV %RDX,-0x78(%RBP) |
0x643fa MOV 0x10(%RAX),%R14 |
0x643fe MOV 0x30(%RAX),%R13 |
0x64402 MOV %RCX,-0x50(%RBP) |
0x64406 MOV 0x60(%RAX),%R15 |
0x6440a MOV 0x158(%RAX),%RAX |
0x64411 MOV %RSI,-0x90(%RBP) |
0x64418 MOV %R8,-0x40(%RBP) |
0x6441c MOV %R9,-0x58(%RBP) |
0x64420 MOV %R10,-0x80(%RBP) |
0x64424 MOV %R11,-0x60(%RBP) |
0x64428 MOV %RBX,-0x38(%RBP) |
0x6442c MOV %RAX,-0x68(%RBP) |
0x64430 MOV 0x8(%RDI),%R12 |
0x64434 MOV (%RDI),%RDI |
0x64437 TEST %RDI,%RDI |
0x6443a MOV %RDI,-0x70(%RBP) |
0x6443e JLE 64736 |
0x64444 TEST %R12,%R12 |
0x64447 JLE 64736 |
0x6444d CALL 9750 <omp_get_num_threads@plt> |
0x64452 MOVSXD %EAX,%RBX |
0x64455 CALL 9640 <omp_get_thread_num@plt> |
0x6445a XOR %EDX,%EDX |
0x6445c MOVSXD %EAX,%RSI |
0x6445f MOV -0x70(%RBP),%RAX |
0x64463 IMUL %R12,%RAX |
0x64467 DIV %RBX |
0x6446a MOV %RAX,%RCX |
0x6446d CMP %RDX,%RSI |
0x64470 JB 6474e |
0x64476 IMUL %RCX,%RSI |
0x6447a LEA (%RSI,%RDX,1),%RAX |
0x6447e LEA (%RCX,%RAX,1),%R8 |
0x64482 CMP %R8,%RAX |
0x64485 JAE 64736 |
0x6448b XOR %EDX,%EDX |
0x6448d MOV -0x80(%RBP),%RDI |
0x64491 LEA -0x1(%RCX),%R10 |
0x64495 MOV -0x78(%RBP),%R11 |
0x64499 DIV %R12 |
0x6449c MOV -0x90(%RBP),%R8 |
0x644a3 MOV %R10,-0x78(%RBP) |
0x644a7 MOV %R12,-0x90(%RBP) |
0x644ae LEA (,%RDI,8),%RBX |
0x644b6 SUB %R14,%R11 |
0x644b9 SUB %R13,%R8 |
0x644bc IMUL %R14,%RDI |
0x644c0 MOV %RDI,-0x80(%RBP) |
0x644c4 MOV %RDX,%R9 |
0x644c7 MOV -0x88(%RBP),%RDX |
0x644ce MOV %RAX,-0x70(%RBP) |
0x644d2 MOV %RDX,%RSI |
0x644d5 SAL $0x3,%RDX |
0x644d9 IMUL %R14,%RSI |
0x644dd XOR %R14D,%R14D |
0x644e0 ADD %R13,%RSI |
0x644e3 MOV %RSI,-0x88(%RBP) |
0x644ea NOPW (%RAX,%RAX,1) |
(1018) 0x644f0 TEST %R11,%R11 |
(1018) 0x644f3 JLE 64719 |
(1018) 0x644f9 MOV -0x50(%RBP),%R12 |
(1018) 0x644fd MOV -0x48(%RBP),%R10 |
(1018) 0x64501 MOV -0x70(%RBP),%RAX |
(1018) 0x64505 MOV -0x80(%RBP),%RDI |
(1018) 0x64509 LEA (%R9,%R12,1),%RCX |
(1018) 0x6450d MOV -0x60(%RBP),%R12 |
(1018) 0x64511 ADD %R10,%RAX |
(1018) 0x64514 MOV -0x88(%RBP),%R10 |
(1018) 0x6451b LEA (%RDI,%RAX,1),%RSI |
(1018) 0x6451f IMUL %R15,%RAX |
(1018) 0x64523 LEA (%R12,%RSI,8),%RDI |
(1018) 0x64527 MOV -0x38(%RBP),%RSI |
(1018) 0x6452b MOV -0x68(%RBP),%R12 |
(1018) 0x6452f IMUL %RCX,%RSI |
(1018) 0x64533 ADD %R10,%RSI |
(1018) 0x64536 MOV -0x40(%RBP),%R10 |
(1018) 0x6453a LEA (%R12,%RSI,8),%RSI |
(1018) 0x6453e XOR %R12D,%R12D |
(1018) 0x64541 IMUL %R10,%RCX |
(1018) 0x64545 ADD %RCX,%RAX |
(1018) 0x64548 MOV -0x58(%RBP),%RCX |
(1018) 0x6454c ADD %R13,%RAX |
(1018) 0x6454f LEA (%RCX,%RAX,8),%RCX |
(1018) 0x64553 NOPL (%RAX,%RAX,1) |
(1019) 0x64558 XOR %EAX,%EAX |
(1019) 0x6455a TEST %R8,%R8 |
(1019) 0x6455d JLE 64707 |
(1019) 0x64563 MOV %R8,%R10 |
(1019) 0x64566 AND $0x7,%R10D |
(1019) 0x6456a JE 64645 |
(1019) 0x64570 CMP $0x1,%R10 |
(1019) 0x64574 JE 64625 |
(1019) 0x6457a CMP $0x2,%R10 |
(1019) 0x6457e JE 6460e |
(1019) 0x64584 CMP $0x3,%R10 |
(1019) 0x64588 JE 645f7 |
(1019) 0x6458a CMP $0x4,%R10 |
(1019) 0x6458e JE 645e0 |
(1019) 0x64590 CMP $0x5,%R10 |
(1019) 0x64594 JE 645c9 |
(1019) 0x64596 CMP $0x6,%R10 |
(1019) 0x6459a JE 645b2 |
(1019) 0x6459c VMOVSD (%RDI),%XMM0 |
(1019) 0x645a0 VMOVSD (%RCX),%XMM7 |
(1019) 0x645a4 MOV $0x1,%EAX |
(1019) 0x645a9 VFMADD132SD (%RSI),%XMM7,%XMM0 |
(1019) 0x645ae VMOVSD %XMM0,(%RCX) |
(1019) 0x645b2 VMOVSD (%RCX,%RAX,8),%XMM2 |
(1019) 0x645b7 VMOVSD (%RDI),%XMM1 |
(1019) 0x645bb VFMADD132SD (%RSI,%RAX,8),%XMM2,%XMM1 |
(1019) 0x645c1 VMOVSD %XMM1,(%RCX,%RAX,8) |
(1019) 0x645c6 INC %RAX |
(1019) 0x645c9 VMOVSD (%RCX,%RAX,8),%XMM6 |
(1019) 0x645ce VMOVSD (%RDI),%XMM3 |
(1019) 0x645d2 VFMADD132SD (%RSI,%RAX,8),%XMM6,%XMM3 |
(1019) 0x645d8 VMOVSD %XMM3,(%RCX,%RAX,8) |
(1019) 0x645dd INC %RAX |
(1019) 0x645e0 VMOVSD (%RCX,%RAX,8),%XMM5 |
(1019) 0x645e5 VMOVSD (%RDI),%XMM4 |
(1019) 0x645e9 VFMADD132SD (%RSI,%RAX,8),%XMM5,%XMM4 |
(1019) 0x645ef VMOVSD %XMM4,(%RCX,%RAX,8) |
(1019) 0x645f4 INC %RAX |
(1019) 0x645f7 VMOVSD (%RCX,%RAX,8),%XMM9 |
(1019) 0x645fc VMOVSD (%RDI),%XMM8 |
(1019) 0x64600 VFMADD132SD (%RSI,%RAX,8),%XMM9,%XMM8 |
(1019) 0x64606 VMOVSD %XMM8,(%RCX,%RAX,8) |
(1019) 0x6460b INC %RAX |
(1019) 0x6460e VMOVSD (%RCX,%RAX,8),%XMM11 |
(1019) 0x64613 VMOVSD (%RDI),%XMM10 |
(1019) 0x64617 VFMADD132SD (%RSI,%RAX,8),%XMM11,%XMM10 |
(1019) 0x6461d VMOVSD %XMM10,(%RCX,%RAX,8) |
(1019) 0x64622 INC %RAX |
(1019) 0x64625 VMOVSD (%RCX,%RAX,8),%XMM13 |
(1019) 0x6462a VMOVSD (%RDI),%XMM12 |
(1019) 0x6462e VFMADD132SD (%RSI,%RAX,8),%XMM13,%XMM12 |
(1019) 0x64634 VMOVSD %XMM12,(%RCX,%RAX,8) |
(1019) 0x64639 INC %RAX |
(1019) 0x6463c CMP %R8,%RAX |
(1019) 0x6463f JE 64707 |
(1020) 0x64645 VMOVSD (%RDI),%XMM14 |
(1020) 0x64649 VMOVSD (%RCX,%RAX,8),%XMM15 |
(1020) 0x6464e VFMADD132SD (%RSI,%RAX,8),%XMM15,%XMM14 |
(1020) 0x64654 VMOVSD 0x8(%RCX,%RAX,8),%XMM7 |
(1020) 0x6465a VMOVSD 0x10(%RCX,%RAX,8),%XMM2 |
(1020) 0x64660 VMOVSD 0x18(%RCX,%RAX,8),%XMM6 |
(1020) 0x64666 VMOVSD 0x20(%RCX,%RAX,8),%XMM5 |
(1020) 0x6466c VMOVSD 0x28(%RCX,%RAX,8),%XMM9 |
(1020) 0x64672 VMOVSD 0x30(%RCX,%RAX,8),%XMM11 |
(1020) 0x64678 VMOVSD 0x38(%RCX,%RAX,8),%XMM13 |
(1020) 0x6467e VMOVSD %XMM14,(%RCX,%RAX,8) |
(1020) 0x64683 VMOVSD (%RDI),%XMM0 |
(1020) 0x64687 VFMADD132SD 0x8(%RSI,%RAX,8),%XMM7,%XMM0 |
(1020) 0x6468e VMOVSD %XMM0,0x8(%RCX,%RAX,8) |
(1020) 0x64694 VMOVSD (%RDI),%XMM1 |
(1020) 0x64698 VFMADD132SD 0x10(%RSI,%RAX,8),%XMM2,%XMM1 |
(1020) 0x6469f VMOVSD %XMM1,0x10(%RCX,%RAX,8) |
(1020) 0x646a5 VMOVSD (%RDI),%XMM3 |
(1020) 0x646a9 VFMADD132SD 0x18(%RSI,%RAX,8),%XMM6,%XMM3 |
(1020) 0x646b0 VMOVSD %XMM3,0x18(%RCX,%RAX,8) |
(1020) 0x646b6 VMOVSD (%RDI),%XMM4 |
(1020) 0x646ba VFMADD132SD 0x20(%RSI,%RAX,8),%XMM5,%XMM4 |
(1020) 0x646c1 VMOVSD %XMM4,0x20(%RCX,%RAX,8) |
(1020) 0x646c7 VMOVSD (%RDI),%XMM8 |
(1020) 0x646cb VFMADD132SD 0x28(%RSI,%RAX,8),%XMM9,%XMM8 |
(1020) 0x646d2 VMOVSD %XMM8,0x28(%RCX,%RAX,8) |
(1020) 0x646d8 VMOVSD (%RDI),%XMM10 |
(1020) 0x646dc VFMADD132SD 0x30(%RSI,%RAX,8),%XMM11,%XMM10 |
(1020) 0x646e3 VMOVSD %XMM10,0x30(%RCX,%RAX,8) |
(1020) 0x646e9 VMOVSD (%RDI),%XMM12 |
(1020) 0x646ed VFMADD132SD 0x38(%RSI,%RAX,8),%XMM13,%XMM12 |
(1020) 0x646f4 VMOVSD %XMM12,0x38(%RCX,%RAX,8) |
(1020) 0x646fa ADD $0x8,%RAX |
(1020) 0x646fe CMP %R8,%RAX |
(1020) 0x64701 JNE 64645 |
(1019) 0x64707 INC %R12 |
(1019) 0x6470a ADD %RBX,%RDI |
(1019) 0x6470d ADD %RDX,%RSI |
(1019) 0x64710 CMP %R11,%R12 |
(1019) 0x64713 JNE 64558 |
(1018) 0x64719 MOV -0x78(%RBP),%RDI |
(1018) 0x6471d CMP %RDI,%R14 |
(1018) 0x64720 JE 64736 |
(1018) 0x64722 INC %R9 |
(1018) 0x64725 CMP %R9,-0x90(%RBP) |
(1018) 0x6472c JLE 64745 |
(1018) 0x6472e INC %R14 |
(1018) 0x64731 JMP 644f0 |
0x64736 ADD $0x68,%RSP |
0x6473a POP %RBX |
0x6473b POP %R12 |
0x6473d POP %R13 |
0x6473f POP %R14 |
0x64741 POP %R15 |
0x64743 POP %RBP |
0x64744 RET |
(1018) 0x64745 INCQ -0x70(%RBP) |
(1018) 0x64749 XOR %R9D,%R9D |
(1018) 0x6474c JMP 6472e |
0x6474e INC %RCX |
0x64751 XOR %EDX,%EDX |
0x64753 JMP 64476 |
0x64758 NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | Collapse.hpp:81-81 |
Module | libkripke.so |
nb instructions | 92 |
nb uops | 102 |
loop length | 369 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 17.00 cycles |
front end | 17.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.80 | 10.00 | 9.67 | 9.67 | 12.50 | 3.80 | 3.80 | 12.50 | 12.50 | 12.50 | 3.60 | 9.67 |
cycles | 3.80 | 12.07 | 9.67 | 9.67 | 12.50 | 3.80 | 3.80 | 12.50 | 12.50 | 12.50 | 3.60 | 9.67 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.19-20.24 |
Stall cycles | 3.46-3.51 |
SB full (events) | 3.46-3.51 |
Front-end | 17.00 |
Dispatch | 12.50 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xc8(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x118(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x158(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JLE 64736 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 64736 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CALL 9750 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %EAX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
CALL 9640 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %EAX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV -0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %RBX | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 6474e <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x3ae> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RCX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%RDX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%RAX,1),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 64736 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x80(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RCX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x78(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIV %R12 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV -0x90(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDI,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R14,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R13,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R14,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RDI,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x88(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RSI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 64476 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0xd6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | Collapse.hpp:81-81 |
Module | libkripke.so |
nb instructions | 92 |
nb uops | 102 |
loop length | 369 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 17.00 cycles |
front end | 17.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.80 | 10.00 | 9.67 | 9.67 | 12.50 | 3.80 | 3.80 | 12.50 | 12.50 | 12.50 | 3.60 | 9.67 |
cycles | 3.80 | 12.07 | 9.67 | 9.67 | 12.50 | 3.80 | 3.80 | 12.50 | 12.50 | 12.50 | 3.60 | 9.67 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.19-20.24 |
Stall cycles | 3.46-3.51 |
SB full (events) | 3.46-3.51 |
Front-end | 17.00 |
Dispatch | 12.50 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xc8(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x118(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x158(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JLE 64736 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 64736 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CALL 9750 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %EAX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
CALL 9640 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %EAX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV -0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %RBX | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 6474e <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x3ae> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RCX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%RDX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%RAX,1),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 64736 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x80(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RCX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x78(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIV %R12 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV -0x90(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDI,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R14,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R13,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R14,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RDI,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x88(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RSI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 64476 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0xd6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void RAJA::internal::StatementExecutor | 6.32 | 4.18 |
▼Loop 1018 - RangeSegment.hpp:120-120 - libkripke.so– | 0 | 0 |
▼Loop 1019 - forall.hpp:59-59 - libkripke.so– | 0 | 0 |
○Loop 1020 - forall.hpp:59-59 - libkripke.so | 6.32 | 3.66 |