mirror of
https://github.com/facebook/zstd.git
synced 2025-10-15 00:02:02 -04:00
working
This commit is contained in:
parent
ee4be5e981
commit
9bfb1a3357
580
lib/zstd_opt.c
580
lib/zstd_opt.c
@ -412,7 +412,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
const BYTE* const base = ctx->base + ctx->dictLimit;
|
||||
|
||||
size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
|
||||
size_t rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE;
|
||||
const U32 maxSearches = 1 << ctx->params.searchLog;
|
||||
const U32 mls = ctx->params.searchLength;
|
||||
|
||||
@ -420,33 +420,62 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
||||
U32 maxNbAttempts, U32 matchLengthSearch, LZ5HC_match_t* matches);
|
||||
getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS : ZSTD_HcGetAllMatches_selectMLS;
|
||||
|
||||
LZ5HC_optimal_t opt[LZ5_OPT_NUM+4];
|
||||
LZ5HC_match_t matches[LZ5_OPT_NUM+1];
|
||||
|
||||
const uint8_t *inr;
|
||||
int res, cur, cur2, cur_min, skip_num = 0;
|
||||
int rep = 1, llen, litlen, price, offset, best_off, match_num, last_pos;
|
||||
|
||||
const int sufficient_len = 128; //ctx->params.sufficientLength;
|
||||
const int faster_get_matches = (ctx->params.strategy == ZSTD_opt);
|
||||
|
||||
/* init */
|
||||
ZSTD_resetSeqStore(seqStorePtr);
|
||||
if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;
|
||||
|
||||
/* Match Loop */
|
||||
while (ip < ilimit) {
|
||||
size_t matchLength=0;
|
||||
size_t mlen=0;
|
||||
size_t best_mlen=0;
|
||||
size_t offset=0;
|
||||
const BYTE* start=ip+1;
|
||||
memset(opt, 0, sizeof(LZ5HC_optimal_t));
|
||||
last_pos = 0;
|
||||
llen = ip - anchor;
|
||||
inr = ip;
|
||||
|
||||
/* check repCode */
|
||||
if (MEM_read32(start) == MEM_read32(start - offset_1)) {
|
||||
if (MEM_read32(start) == MEM_read32(start - rep_1)) {
|
||||
/* repcode : we take it */
|
||||
matchLength = ZSTD_count(start+MINMATCH, start+MINMATCH-offset_1, iend) + MINMATCH;
|
||||
if (depth==0) goto _storeSequence;
|
||||
best_mlen = ZSTD_count(start+MINMATCH, start+MINMATCH-rep_1, iend) + MINMATCH;
|
||||
|
||||
LZ5_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)((char*)ip-source), rep, best_mlen);
|
||||
if (depth==0 || best_mlen > sufficient_len || best_mlen >= LZ5_OPT_NUM) {
|
||||
best_off = 0; cur = 0; last_pos = 1;
|
||||
goto _storeSequence;
|
||||
}
|
||||
|
||||
mlen = best_mlen;
|
||||
do
|
||||
{
|
||||
litlen = 0;
|
||||
price = LZ5HC_get_price(llen, 0, mlen - MINMATCH) - llen;
|
||||
if (mlen > last_pos || price < opt[mlen].price)
|
||||
SET_PRICE(mlen, mlen, 0, litlen, price);
|
||||
mlen--;
|
||||
}
|
||||
while (mlen >= MINMATCH);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
/* first search (depth 0) */
|
||||
size_t mnum = getAllMatches(ctx, ip, iend, maxSearches, mls, matches);
|
||||
if (mnum > 0 && matches[mnum-1].len > matchLength)
|
||||
matchLength = matches[mnum-1].len, start = ip, offset=matches[mnum-1].off;
|
||||
if (mnum > 0 && matches[mnum-1].len > best_mlen)
|
||||
best_mlen = matches[mnum-1].len, start = ip, offset=matches[mnum-1].off;
|
||||
}
|
||||
|
||||
if (matchLength < MINMATCH) {
|
||||
if (best_mlen < MINMATCH) {
|
||||
// ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
|
||||
ip++;
|
||||
continue;
|
||||
@ -456,20 +485,20 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
||||
if (depth>=1)
|
||||
while (ip<ilimit) {
|
||||
ip ++;
|
||||
if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1))) {
|
||||
size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
|
||||
if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep_1))) {
|
||||
size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep_1, iend) + MINMATCH;
|
||||
int gain2 = (int)(mlRep * 3);
|
||||
int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
|
||||
int gain1 = (int)(best_mlen*3 - ZSTD_highbit((U32)offset+1) + 1);
|
||||
if ((mlRep >= MINMATCH) && (gain2 > gain1))
|
||||
matchLength = mlRep, offset = 0, start = ip;
|
||||
best_mlen = mlRep, offset = 0, start = ip;
|
||||
}
|
||||
{
|
||||
size_t mnum = getAllMatches(ctx, ip, iend, maxSearches, mls, matches);
|
||||
if (mnum > 0) {
|
||||
int gain2 = (int)(matches[mnum-1].len*4 - ZSTD_highbit((U32)matches[mnum-1].off+1)); /* raw approx */
|
||||
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
|
||||
int gain1 = (int)(best_mlen*4 - ZSTD_highbit((U32)offset+1) + 4);
|
||||
if ((matches[mnum-1].len >= MINMATCH) && (gain2 > gain1)) {
|
||||
matchLength = matches[mnum-1].len, offset = matches[mnum-1].off, start = ip;
|
||||
best_mlen = matches[mnum-1].len, offset = matches[mnum-1].off, start = ip;
|
||||
continue; /* search a better one */
|
||||
}
|
||||
}
|
||||
@ -481,28 +510,28 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
||||
/* catch up */
|
||||
if (offset) {
|
||||
while ((start>anchor) && (start>base+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */
|
||||
{ start--; matchLength++; }
|
||||
offset_2 = offset_1; offset_1 = offset;
|
||||
{ start--; best_mlen++; }
|
||||
rep_2 = rep_1; rep_1 = offset;
|
||||
}
|
||||
|
||||
/* store sequence */
|
||||
_storeSequence:
|
||||
{
|
||||
size_t litLength = start - anchor;
|
||||
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
|
||||
anchor = ip = start + matchLength;
|
||||
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, best_mlen-MINMATCH);
|
||||
anchor = ip = start + best_mlen;
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while ( (ip <= ilimit)
|
||||
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
|
||||
&& (MEM_read32(ip) == MEM_read32(ip - rep_2)) ) {
|
||||
/* store sequence */
|
||||
matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
|
||||
offset = offset_2;
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength);
|
||||
ip += matchLength+MINMATCH;
|
||||
best_mlen = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep_2, iend);
|
||||
offset = rep_2;
|
||||
rep_2 = rep_1;
|
||||
rep_1 = offset;
|
||||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen);
|
||||
ip += best_mlen+MINMATCH;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} }
|
||||
@ -518,505 +547,6 @@ _storeSequence:
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
/* *******************************
|
||||
* Optimal parser
|
||||
*********************************/
|
||||
FORCE_INLINE
|
||||
void ZSTD_compressBlock_opt2_generic(ZSTD_CCtx* ctx,
|
||||
const void* src, size_t srcSize,
|
||||
const U32 searchMethod, const U32 depth)
|
||||
{
|
||||
seqStore_t* seqStorePtr = &(ctx->seqStore);
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* anchor = istart;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
const BYTE* const base = ctx->base + ctx->dictLimit;
|
||||
|
||||
size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
|
||||
const U32 maxSearches = 1 << ctx->params.searchLog;
|
||||
const U32 mls = ctx->params.searchLength;
|
||||
|
||||
typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
|
||||
size_t* offsetPtr,
|
||||
U32 maxNbAttempts, U32 matchLengthSearch);
|
||||
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
|
||||
|
||||
/* init */
|
||||
ZSTD_resetSeqStore(seqStorePtr);
|
||||
if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;
|
||||
|
||||
LZ5HC_optimal_t opt[LZ5_OPT_NUM+4];
|
||||
LZ5HC_match_t matches[LZ5_OPT_NUM+1];
|
||||
const uint8_t *inr;
|
||||
int res, cur, cur2, cur_min, skip_num = 0;
|
||||
int rep = 1, llen, litlen, mlen, best_mlen, price, offset, best_off, match_num, last_pos;
|
||||
|
||||
const int sufficient_len = 64; //ctx->params.sufficientLength;
|
||||
const bool faster_get_matches = (ctx->params.strategy == ZSTD_opt);
|
||||
|
||||
|
||||
/* Match Loop */
|
||||
while (ip < ilimit) {
|
||||
size_t mlen=0;
|
||||
size_t offset=0;
|
||||
const BYTE* start=ip+1;
|
||||
|
||||
memset(opt, 0, sizeof(LZ5HC_optimal_t));
|
||||
last_pos = 0;
|
||||
llen = ip - anchor;
|
||||
inr = ip;
|
||||
|
||||
|
||||
/* check repCode */
|
||||
if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) {
|
||||
/* repcode : we take it */
|
||||
mlen = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH;
|
||||
|
||||
LZ5_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)((char*)ip-source), rep, mlen);
|
||||
if (mlen > sufficient_len || mlen >= LZ5_OPT_NUM)
|
||||
{
|
||||
best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1;
|
||||
goto _storeSequence;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
litlen = 0;
|
||||
price = LZ5HC_get_price(llen, 0, mlen - MINMATCH) - llen;
|
||||
if (mlen > last_pos || price < opt[mlen].price)
|
||||
SET_PRICE(mlen, mlen, 0, litlen, price);
|
||||
mlen--;
|
||||
}
|
||||
while (mlen >= MINMATCH);
|
||||
|
||||
if (depth==0) goto _storeSequence;
|
||||
}
|
||||
|
||||
/*
|
||||
{
|
||||
// first search (depth 0)
|
||||
size_t offsetFound = 99999999;
|
||||
size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
|
||||
if (ml2 > mlen)
|
||||
mlen = ml2, start = ip, offset=offsetFound;
|
||||
}
|
||||
*/
|
||||
/*
|
||||
if (mlen < MINMATCH) {
|
||||
ip += ((ip-anchor) >> g_searchStrength) + 1; // jump faster over incompressible sections
|
||||
continue;
|
||||
}
|
||||
*/
|
||||
/* // let's try to find a better solution
|
||||
if (depth>=1)
|
||||
while (ip<ilimit) {
|
||||
ip ++;
|
||||
if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1))) {
|
||||
size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
|
||||
int gain2 = (int)(mlRep * 3);
|
||||
int gain1 = (int)(mlen*3 - ZSTD_highbit((U32)offset+1) + 1);
|
||||
if ((mlRep >= MINMATCH) && (gain2 > gain1))
|
||||
mlen = mlRep, offset = 0, start = ip;
|
||||
}
|
||||
{
|
||||
size_t offset2=999999;
|
||||
size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
|
||||
int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); // raw approx
|
||||
int gain1 = (int)(mlen*4 - ZSTD_highbit((U32)offset+1) + 4);
|
||||
if ((ml2 >= MINMATCH) && (gain2 > gain1)) {
|
||||
mlen = ml2, offset = offset2, start = ip;
|
||||
continue; // search a better one
|
||||
} }
|
||||
|
||||
break; // nothing found : store previous solution
|
||||
}
|
||||
*/
|
||||
|
||||
best_mlen = (last_pos) ? last_pos : MINMATCH;
|
||||
|
||||
if (faster_get_matches && last_pos)
|
||||
match_num = 0;
|
||||
else
|
||||
{
|
||||
if (ctx->params.strategy == LZ5HC_optimal_price)
|
||||
{
|
||||
LZ5HC_Insert(ctx, ip);
|
||||
match_num = LZ5HC_GetAllMatches(ctx, ip, ip, matchlimit, best_mlen, matches);
|
||||
}
|
||||
else
|
||||
{
|
||||
LZ5HC_BinTree_Insert(ctx, ip);
|
||||
match_num = LZ5HC_BinTree_GetAllMatches(ctx, ip, matchlimit, best_mlen, matches);
|
||||
}
|
||||
}
|
||||
|
||||
LZ5_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)((char*)ip-source), match_num, last_pos);
|
||||
if (!last_pos && !match_num) { ip++; continue; }
|
||||
|
||||
if (match_num && matches[match_num-1].len > sufficient_len)
|
||||
{
|
||||
best_mlen = matches[match_num-1].len;
|
||||
best_off = matches[match_num-1].off;
|
||||
cur = 0;
|
||||
last_pos = 1;
|
||||
goto encode;
|
||||
}
|
||||
|
||||
// set prices using matches at position = 0
|
||||
for (int i = 0; i < match_num; i++)
|
||||
{
|
||||
mlen = (i>0) ? matches[i-1].len+1 : best_mlen;
|
||||
best_mlen = (matches[i].len < LZ5_OPT_NUM) ? matches[i].len : LZ5_OPT_NUM;
|
||||
LZ5_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)((char*)ip-source), matches[i].len, matches[i].off, best_mlen, last_pos);
|
||||
while (mlen <= best_mlen)
|
||||
{
|
||||
litlen = 0;
|
||||
price = LZ5HC_get_price(llen + litlen, matches[i].off, mlen - MINMATCH) - llen;
|
||||
if (mlen > last_pos || price < opt[mlen].price)
|
||||
SET_PRICE(mlen, mlen, matches[i].off, litlen, price);
|
||||
mlen++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!last_pos) { ip++; continue; }
|
||||
|
||||
opt[0].rep = rep;
|
||||
opt[0].mlen = 1;
|
||||
|
||||
// check further positions
|
||||
for (skip_num = 0, cur = 1; cur <= last_pos; cur++)
|
||||
{
|
||||
inr = ip + cur;
|
||||
|
||||
if (opt[cur-1].mlen == 1)
|
||||
{
|
||||
litlen = opt[cur-1].litlen + 1;
|
||||
|
||||
if (cur != litlen)
|
||||
{
|
||||
price = opt[cur - litlen].price + LZ5_LIT_ONLY_COST(litlen);
|
||||
LZ5_LOG_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)((char*)inr-source), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
|
||||
}
|
||||
else
|
||||
{
|
||||
price = LZ5_LIT_ONLY_COST(llen + litlen) - llen;
|
||||
LZ5_LOG_PRICE("%d: TRY2 price=%d cur=%d litlen=%d llen=%d\n", (int)((char*)inr-source), price, cur, litlen, llen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
litlen = 1;
|
||||
price = opt[cur - 1].price + LZ5_LIT_ONLY_COST(litlen);
|
||||
LZ5_LOG_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)((char*)inr-source), price, cur, litlen, LZ5_LIT_ONLY_COST(litlen));
|
||||
}
|
||||
|
||||
mlen = 1;
|
||||
best_mlen = 0;
|
||||
LZ5_LOG_PARSER("%d: TRY price=%d opt[%d].price=%d\n", (int)((char*)inr-source), price, cur, opt[cur].price);
|
||||
|
||||
if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen)))
|
||||
SET_PRICE(cur, mlen, best_mlen, litlen, price);
|
||||
|
||||
if (cur == last_pos) break;
|
||||
|
||||
if (opt[cur].mlen > 1)
|
||||
{
|
||||
mlen = opt[cur].mlen;
|
||||
offset = opt[cur].off;
|
||||
if (offset < 1)
|
||||
{
|
||||
opt[cur].rep = opt[cur-mlen].rep;
|
||||
LZ5_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d\n", (int)((char*)inr-source), cur, mlen, opt[cur-mlen].rep);
|
||||
}
|
||||
else
|
||||
{
|
||||
opt[cur].rep = offset;
|
||||
LZ5_LOG_PARSER("%d: COPYREP2 cur=%d offset=%d rep=%d\n", (int)((char*)inr-source), cur, offset, opt[cur].rep);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
opt[cur].rep = opt[cur-1].rep; // copy rep
|
||||
}
|
||||
|
||||
|
||||
LZ5_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)((char*)inr-source), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep);
|
||||
|
||||
// check rep
|
||||
// best_mlen = 0;
|
||||
mlen = MEM_count(inr, inr - opt[cur].rep, matchlimit);
|
||||
if (mlen >= MINMATCH && mlen > best_mlen)
|
||||
{
|
||||
LZ5_LOG_PARSER("%d: try REP rep=%d mlen=%d\n", (int)((char*)inr-source), opt[cur].rep, mlen);
|
||||
LZ5_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)((char*)inr-source), mlen, 0, opt[cur].rep, cur, opt[cur].off);
|
||||
|
||||
if (mlen > sufficient_len || cur + mlen >= LZ5_OPT_NUM)
|
||||
{
|
||||
best_mlen = mlen;
|
||||
best_off = 0;
|
||||
LZ5_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)((char*)inr-source), sufficient_len, best_mlen, best_off, last_pos);
|
||||
last_pos = cur + 1;
|
||||
goto encode;
|
||||
}
|
||||
|
||||
if (opt[cur].mlen == 1)
|
||||
{
|
||||
litlen = opt[cur].litlen;
|
||||
|
||||
if (cur != litlen)
|
||||
{
|
||||
price = opt[cur - litlen].price + LZ5HC_get_price(litlen, 0, mlen - MINMATCH);
|
||||
LZ5_LOG_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)((char*)inr-source), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
|
||||
}
|
||||
else
|
||||
{
|
||||
price = LZ5HC_get_price(llen + litlen, 0, mlen - MINMATCH) - llen;
|
||||
LZ5_LOG_PRICE("%d: TRY2 price=%d cur=%d litlen=%d llen=%d\n", (int)((char*)inr-source), price, cur, litlen, llen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
litlen = 0;
|
||||
price = opt[cur].price + LZ5HC_get_price(litlen, 0, mlen - MINMATCH);
|
||||
LZ5_LOG_PRICE("%d: TRY3 price=%d cur=%d litlen=%d getprice=%d\n", (int)((char*)inr-source), price, cur, litlen, LZ5HC_get_price(litlen, 0, mlen - MINMATCH));
|
||||
}
|
||||
|
||||
best_mlen = mlen;
|
||||
if (faster_get_matches)
|
||||
skip_num = best_mlen;
|
||||
|
||||
LZ5_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)((char*)inr-source), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price);
|
||||
|
||||
do
|
||||
{
|
||||
if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH
|
||||
SET_PRICE(cur + mlen, mlen, 0, litlen, price);
|
||||
mlen--;
|
||||
}
|
||||
while (mlen >= MINMATCH);
|
||||
}
|
||||
|
||||
if (faster_get_matches && skip_num > 0)
|
||||
{
|
||||
skip_num--;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH;
|
||||
|
||||
if (ctx->params.strategy == LZ5HC_optimal_price)
|
||||
{
|
||||
LZ5HC_Insert(ctx, inr);
|
||||
match_num = LZ5HC_GetAllMatches(ctx, inr, ip, matchlimit, best_mlen, matches);
|
||||
LZ5_LOG_PARSER("%d: LZ5HC_GetAllMatches match_num=%d\n", (int)((char*)inr-source), match_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
LZ5HC_BinTree_Insert(ctx, inr);
|
||||
match_num = LZ5HC_BinTree_GetAllMatches(ctx, inr, matchlimit, best_mlen, matches);
|
||||
LZ5_LOG_PARSER("%d: LZ5HC_BinTree_GetAllMatches match_num=%d\n", (int)((char*)inr-source), match_num);
|
||||
}
|
||||
|
||||
|
||||
if (match_num > 0 && matches[match_num-1].len > sufficient_len)
|
||||
{
|
||||
cur -= matches[match_num-1].back;
|
||||
best_mlen = matches[match_num-1].len;
|
||||
best_off = matches[match_num-1].off;
|
||||
last_pos = cur + 1;
|
||||
goto encode;
|
||||
}
|
||||
|
||||
cur_min = cur;
|
||||
|
||||
// set prices using matches at position = cur
|
||||
for (int i = 0; i < match_num; i++)
|
||||
{
|
||||
mlen = (i>0) ? matches[i-1].len+1 : best_mlen;
|
||||
cur2 = cur - matches[i].back;
|
||||
best_mlen = (cur2 + matches[i].len < LZ5_OPT_NUM) ? matches[i].len : LZ5_OPT_NUM - cur2;
|
||||
LZ5_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)((char*)inr-source), cur, cur2, matches[i].len, matches[i].off, best_mlen, last_pos);
|
||||
|
||||
while (mlen <= best_mlen)
|
||||
{
|
||||
if (opt[cur2].mlen == 1)
|
||||
{
|
||||
litlen = opt[cur2].litlen;
|
||||
|
||||
if (cur2 != litlen)
|
||||
price = opt[cur2 - litlen].price + LZ5HC_get_price(litlen, matches[i].off, mlen - MINMATCH);
|
||||
else
|
||||
price = LZ5HC_get_price(llen + litlen, matches[i].off, mlen - MINMATCH) - llen;
|
||||
}
|
||||
else
|
||||
{
|
||||
litlen = 0;
|
||||
price = opt[cur2].price + LZ5HC_get_price(litlen, matches[i].off, mlen - MINMATCH);
|
||||
}
|
||||
|
||||
LZ5_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)((char*)inr-source), matches[i].back, mlen, best_mlen, matches[i].off, price, litlen, cur - litlen, opt[cur - litlen].price);
|
||||
if (cur2 + mlen > last_pos || price < opt[cur2 + mlen].price)
|
||||
{
|
||||
SET_PRICE(cur2 + mlen, mlen, matches[i].off, litlen, price);
|
||||
|
||||
opt[cur2 + mlen].rep = matches[i].off; // update reps
|
||||
if (cur2 < cur_min) cur_min = cur2;
|
||||
}
|
||||
|
||||
mlen++;
|
||||
}
|
||||
}
|
||||
|
||||
if (cur_min < cur)
|
||||
{
|
||||
for (int i=cur_min-1; i<=last_pos; i++)
|
||||
{
|
||||
LZ5_LOG_PARSER("%d: BEFORE price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)((char*)ip-source+i), i, last_pos, opt[i].price, opt[i].off, opt[i].mlen, opt[i].litlen, opt[i].rep);
|
||||
}
|
||||
|
||||
for (int i=cur_min+1; i<=last_pos; i++)
|
||||
if (opt[i].price < (1<<30) && (opt[i].off) < 1 && i - opt[i].mlen > cur_min) // invalidate reps
|
||||
{
|
||||
if (opt[i-1].mlen == 1)
|
||||
{
|
||||
litlen = opt[i-1].litlen + 1;
|
||||
|
||||
if (i != litlen)
|
||||
{
|
||||
price = opt[i - litlen].price + LZ5_LIT_ONLY_COST(litlen);
|
||||
// LZ5_LOG_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), i - litlen, opt[i - litlen].price, price, i, litlen);
|
||||
}
|
||||
else
|
||||
{
|
||||
price = LZ5_LIT_ONLY_COST(llen + litlen) - llen;
|
||||
// LZ5_LOG_PRICE("%d: TRY2 price=%d cur=%d litlen=%d llen=%d\n", (int)(inr-base), price, i, litlen, llen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
litlen = 1;
|
||||
price = opt[i - 1].price + LZ5_LIT_ONLY_COST(litlen);
|
||||
// LZ5_LOG_PRICE("%d: TRY3 price=%d cur=%d litlen=%d\n", (int)(inr-base), price, i, litlen);
|
||||
}
|
||||
|
||||
mlen = 1;
|
||||
best_mlen = 0;
|
||||
SET_PRICE(i, mlen, best_mlen, litlen, price);
|
||||
|
||||
opt[i].rep = opt[i-1].rep; // copy reps
|
||||
|
||||
LZ5_LOG_PARSER("%d: INVALIDATE pred=%d price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)((char*)inr-source), cur-cur_min, i, last_pos, opt[i].price, opt[i].off, opt[i].mlen, opt[i].litlen, opt[i].rep);
|
||||
}
|
||||
|
||||
for (int i=cur_min-1; i<=last_pos; i++)
|
||||
{
|
||||
LZ5_LOG_PARSER("%d: AFTER price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)((char*)ip-source+i), i, last_pos, opt[i].price, opt[i].off, opt[i].mlen, opt[i].litlen, opt[i].rep);
|
||||
}
|
||||
|
||||
}
|
||||
} // for (skip_num = 0, cur = 1; cur <= last_pos; cur++)
|
||||
|
||||
|
||||
best_mlen = opt[last_pos].mlen;
|
||||
best_off = opt[last_pos].off;
|
||||
cur = last_pos - best_mlen;
|
||||
|
||||
/* catch up */
|
||||
if (offset) {
|
||||
// while ((start>anchor) && (start>base+offset) && (start[-1] == start[-1-offset])) // only search for offset within prefix
|
||||
// { start--; mlen++; }
|
||||
offset_2 = offset_1; offset_1 = offset;
|
||||
}
|
||||
|
||||
/* store sequence */
|
||||
_storeSequence: // cur, last_pos, best_mlen, best_off have to be set
|
||||
for (int i = 1; i <= last_pos; i++)
|
||||
{
|
||||
LZ5_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)((char*)ip-source+i), i, last_pos, opt[i].price, opt[i].off, opt[i].mlen, opt[i].litlen, opt[i].rep);
|
||||
}
|
||||
|
||||
LZ5_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)((char*)ip-source+cur), cur, last_pos, best_mlen, best_off, opt[cur].rep);
|
||||
|
||||
opt[0].mlen = 1;
|
||||
|
||||
while (cur >= 0)
|
||||
{
|
||||
mlen = opt[cur].mlen;
|
||||
offset = opt[cur].off;
|
||||
opt[cur].mlen = best_mlen;
|
||||
opt[cur].off = best_off;
|
||||
best_mlen = mlen;
|
||||
best_off = offset;
|
||||
cur -= mlen;
|
||||
}
|
||||
|
||||
for (int i = 0; i <= last_pos;)
|
||||
{
|
||||
LZ5_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)((char*)ip-source+i), i, last_pos, opt[i].price, opt[i].off, opt[i].mlen, opt[i].litlen, opt[i].rep);
|
||||
i += opt[i].mlen;
|
||||
}
|
||||
|
||||
cur = 0;
|
||||
|
||||
while (cur < last_pos)
|
||||
{
|
||||
LZ5_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d\n", (int)((char*)ip-source+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep);
|
||||
mlen = opt[cur].mlen;
|
||||
if (mlen == 1) { ip++; cur++; continue; }
|
||||
offset = opt[cur].off;
|
||||
cur += mlen;
|
||||
|
||||
LZ5_LOG_ENCODE("%d: ENCODE literals=%d off=%d mlen=%d ", (int)((char*)ip-source), (int)(ip-anchor), (int)(offset), mlen);
|
||||
if (offset == 0)
|
||||
{
|
||||
res = LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, mlen, ip - rep, limit, oend);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, mlen, ip - offset, limit, oend);
|
||||
rep = offset;
|
||||
}
|
||||
LZ5_LOG_ENCODE("out=%d\n", (int)((char*)op - dest));
|
||||
|
||||
if (res) return 0;
|
||||
|
||||
LZ5_LOG_PARSER("%d: offset=%d rep=%d\n", (int)((char*)ip-source), offset, rep);
|
||||
}
|
||||
|
||||
{
|
||||
size_t litLength = start - anchor;
|
||||
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
|
||||
anchor = ip = start + mlen;
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while ( (ip <= ilimit)
|
||||
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
|
||||
/* store sequence */
|
||||
mlen = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
|
||||
offset = offset_2;
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen);
|
||||
ip += mlen+MINMATCH;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} }
|
||||
|
||||
/* Last Literals */
|
||||
{
|
||||
size_t lastLLSize = iend - anchor;
|
||||
memcpy(seqStorePtr->lit, anchor, lastLLSize);
|
||||
seqStorePtr->lit += lastLLSize;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if 0
|
||||
static int LZ5HC_compress_optimal_price (
|
||||
LZ5HC_Data_Structure* ctx,
|
||||
|
Loading…
x
Reference in New Issue
Block a user