mirror of
https://github.com/facebook/zstd.git
synced 2025-10-15 00:02:02 -04:00
Merge pull request #1756 from mgrice/dev
Improvements in zstd decode performance
This commit is contained in:
commit
bfff5b30a4
@ -209,7 +209,7 @@ typedef enum {
|
|||||||
/*! ZSTD_wildcopy() :
|
/*! ZSTD_wildcopy() :
|
||||||
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
|
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
|
||||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||||
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
void ZSTD_wildcopy(void* dst, const void* src, BYTE* oend_g, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||||
{
|
{
|
||||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||||
const BYTE* ip = (const BYTE*)src;
|
const BYTE* ip = (const BYTE*)src;
|
||||||
@ -217,25 +217,33 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
|||||||
BYTE* const oend = op + length;
|
BYTE* const oend = op + length;
|
||||||
|
|
||||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||||
|
|
||||||
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
|
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
|
||||||
do
|
do
|
||||||
COPY8(op, ip)
|
COPY8(op, ip)
|
||||||
while (op < oend);
|
while (op < oend);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ((length & 8) == 0)
|
if (oend < oend_g-16) {
|
||||||
COPY8(op, ip);
|
/* common case */
|
||||||
do {
|
do {
|
||||||
COPY16(op, ip);
|
COPY16(op, ip);
|
||||||
|
}
|
||||||
|
while (op < oend);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
do {
|
||||||
|
COPY8(op, ip);
|
||||||
|
}
|
||||||
|
while (op < oend);
|
||||||
}
|
}
|
||||||
while (op < oend);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! ZSTD_wildcopy_16min() :
|
/*! ZSTD_wildcopy_16min() :
|
||||||
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
|
* same semantics as ZSTD_wildcopy() except guaranteed to be able to copy 16 bytes at the start */
|
||||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||||
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
void ZSTD_wildcopy_16min(void* dst, const void* src, BYTE* oend_g, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||||
{
|
{
|
||||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||||
const BYTE* ip = (const BYTE*)src;
|
const BYTE* ip = (const BYTE*)src;
|
||||||
@ -246,17 +254,25 @@ void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_over
|
|||||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||||
|
|
||||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
|
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
|
||||||
do
|
do {
|
||||||
COPY8(op, ip)
|
COPY8(op, ip);
|
||||||
|
}
|
||||||
while (op < oend);
|
while (op < oend);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ((length & 8) == 0)
|
if (oend < oend_g-16) {
|
||||||
COPY8(op, ip);
|
/* common case */
|
||||||
do {
|
do {
|
||||||
COPY16(op, ip);
|
COPY16(op, ip);
|
||||||
|
}
|
||||||
|
while (op < oend);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
do {
|
||||||
|
COPY8(op, ip);
|
||||||
|
}
|
||||||
|
while (op < oend);
|
||||||
}
|
}
|
||||||
while (op < oend);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -359,7 +359,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
|
|||||||
/* copy Literals */
|
/* copy Literals */
|
||||||
assert(seqStorePtr->maxNbLit <= 128 KB);
|
assert(seqStorePtr->maxNbLit <= 128 KB);
|
||||||
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
|
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
|
||||||
ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
|
ZSTD_wildcopy(seqStorePtr->lit, literals, seqStorePtr->lit + litLength + 8, (ptrdiff_t)litLength, ZSTD_no_overlap);
|
||||||
seqStorePtr->lit += litLength;
|
seqStorePtr->lit += litLength;
|
||||||
|
|
||||||
/* literal Length */
|
/* literal Length */
|
||||||
|
@ -641,7 +641,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|||||||
|
|
||||||
/* copy Literals */
|
/* copy Literals */
|
||||||
if (sequence.litLength > 8)
|
if (sequence.litLength > 8)
|
||||||
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
ZSTD_wildcopy_16min(op, (*litPtr), oend, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||||
else
|
else
|
||||||
ZSTD_copy8(op, *litPtr);
|
ZSTD_copy8(op, *litPtr);
|
||||||
op = oLitEnd;
|
op = oLitEnd;
|
||||||
@ -690,13 +690,13 @@ size_t ZSTD_execSequence(BYTE* op,
|
|||||||
|
|
||||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||||
if (op < oend_w) {
|
if (op < oend_w) {
|
||||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
ZSTD_wildcopy(op, match, oend, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||||
match += oend_w - op;
|
match += oend_w - op;
|
||||||
op = oend_w;
|
op = oend_w;
|
||||||
}
|
}
|
||||||
while (op < oMatchEnd) *op++ = *match++;
|
while (op < oMatchEnd) *op++ = *match++;
|
||||||
} else {
|
} else {
|
||||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
ZSTD_wildcopy(op, match, oend, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||||
}
|
}
|
||||||
return sequenceLength;
|
return sequenceLength;
|
||||||
}
|
}
|
||||||
@ -722,7 +722,7 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
|||||||
|
|
||||||
/* copy Literals */
|
/* copy Literals */
|
||||||
if (sequence.litLength > 8)
|
if (sequence.litLength > 8)
|
||||||
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
ZSTD_wildcopy_16min(op, *litPtr, oend, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||||
else
|
else
|
||||||
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
||||||
|
|
||||||
@ -772,13 +772,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
|||||||
|
|
||||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||||
if (op < oend_w) {
|
if (op < oend_w) {
|
||||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
ZSTD_wildcopy(op, match, oend, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||||
match += oend_w - op;
|
match += oend_w - op;
|
||||||
op = oend_w;
|
op = oend_w;
|
||||||
}
|
}
|
||||||
while (op < oMatchEnd) *op++ = *match++;
|
while (op < oMatchEnd) *op++ = *match++;
|
||||||
} else {
|
} else {
|
||||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
ZSTD_wildcopy(op, match, oend, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||||
}
|
}
|
||||||
return sequenceLength;
|
return sequenceLength;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user