Function: sortAtomsById | Module: exec | Source: haloExchange.c:655-661 | Coverage: 0.07% |
---|
Function: sortAtomsById | Module: exec | Source: haloExchange.c:655-661 | Coverage: 0.07% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-4338/intel/CoMD/build/CoMD/CoMD/src-openmp/haloExchange.c: 655 - 661 |
-------------------------------------------------------------------------------- |
655: int bId = ((AtomMsg*) b)->gid; |
656: assert(aId != bId); |
657: |
658: if (aId < bId) |
659: return -1; |
660: return 1; |
661: } |
0x40a8c0 MOV (%RSI),%EAX |
0x40a8c2 CMP %EAX,(%RDI) |
0x40a8c4 JE 40a900 |
0x40a8c6 SETGE %AL |
0x40a8c9 MOVZX %AL,%EAX |
0x40a8cc LEA -0x1(%RAX,%RAX,1),%EAX |
0x40a8d0 RET |
0x40a8d1 NOPW %CS:(%RAX,%RAX,1) |
0x40a8e0 NOPW %CS:(%RAX,%RAX,1) |
0x40a8ef NOPW %CS:(%RAX,%RAX,1) |
0x40a8fe XCHG %AX,%AX |
0x40a900 PUSH %RBP |
0x40a901 MOV %RSP,%RBP |
0x40a904 MOV $0x426d09,%EDI |
0x40a909 MOV $0x426b3c,%ESI |
0x40a90e MOV $0x426d14,%ECX |
0x40a913 MOV $0x290,%EDX |
0x40a918 CALL 402fa0 <__assert_fail@plt> |
0x40a91d NOPL (%RAX) |
Path / |
Source file and lines | haloExchange.c:655-661 |
Module | exec |
nb instructions | 8.50 |
nb uops | 9 |
loop length | 26 |
used x86 registers | 5 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 1.50 cycles |
front end | 1.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.40 | 1.10 | 0.83 | 0.83 | 0.50 | 1.10 | 1.30 | 0.50 | 0.50 | 0.50 | 1.10 | 0.83 |
cycles | 1.40 | 1.30 | 0.83 | 0.83 | 0.50 | 1.10 | 1.30 | 0.50 | 0.50 | 0.50 | 1.10 | 0.83 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 1.70 |
Stall cycles | 0.00 |
Front-end | 1.50 |
Dispatch | 1.40 |
Overall L1 | 1.67 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 8% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Source file and lines | haloExchange.c:655-661 |
Module | exec |
nb instructions | 7 |
nb uops | 7 |
loop length | 17 |
used x86 registers | 3 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 1.17 cycles |
front end | 1.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.00 | 1.00 | 1.00 | 0.00 | 1.00 | 1.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 |
cycles | 1.50 | 1.40 | 1.00 | 1.00 | 0.00 | 1.00 | 1.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 1.62 |
Stall cycles | 0.00 |
Front-end | 1.17 |
Dispatch | 1.50 |
Overall L1 | 1.50 |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV (%RSI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %EAX,(%RDI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 40a900 <sortAtomsById+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SETGE %AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVZX %AL,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
Source file and lines | haloExchange.c:655-661 |
Module | exec |
nb instructions | 10 |
nb uops | 11 |
loop length | 35 |
used x86 registers | 7 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 1.83 cycles |
front end | 1.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.30 | 1.20 | 0.67 | 0.67 | 1.00 | 1.20 | 1.10 | 1.00 | 1.00 | 1.00 | 1.20 | 0.67 |
cycles | 1.30 | 1.20 | 0.67 | 0.67 | 1.00 | 1.20 | 1.10 | 1.00 | 1.00 | 1.00 | 1.20 | 0.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 1.77 |
Stall cycles | 0.00 |
Front-end | 1.83 |
Dispatch | 1.30 |
Overall L1 | 1.83 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 8% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV (%RSI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %EAX,(%RDI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 40a900 <sortAtomsById+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x426d09,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x426b3c,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x426d14,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x290,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 402fa0 <__assert_fail@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
Name | Coverage (%) | Time (s) |
---|---|---|
○sortAtomsById | 0.07 | 0.02 |