Fixed : dictBuilder fails if first sample is too small

This commit is contained in:
Yann Collet 2016-06-22 11:05:34 +02:00
parent 8d30cd562a
commit 23042929da

View File

@ -584,48 +584,51 @@ static void ZDICT_countEStats(EStats_ress_t esr,
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
const void* src, size_t srcSize)
{
const seqStore_t* seqStorePtr;
size_t cSize;
if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */
ZSTD_copyCCtx(esr.zc, esr.ref);
ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
seqStorePtr = ZSTD_getSeqStore(esr.zc);
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
/* literals stats */
{ const BYTE* bytePtr;
for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++)
countLit[*bytePtr]++;
}
if (cSize) { /* if == 0; block is not compressible */
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
/* seqStats */
{ size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
ZSTD_seqToCodes(seqStorePtr, nbSeq);
{ const BYTE* codePtr = seqStorePtr->offCodeStart;
size_t u;
for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
/* literals stats */
{ const BYTE* bytePtr;
for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++)
countLit[*bytePtr]++;
}
{ const BYTE* codePtr = seqStorePtr->mlCodeStart;
size_t u;
for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
/* seqStats */
{ size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
ZSTD_seqToCodes(seqStorePtr, nbSeq);
{ const BYTE* codePtr = seqStorePtr->offCodeStart;
size_t u;
for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
}
{ const BYTE* codePtr = seqStorePtr->mlCodeStart;
size_t u;
for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
}
{ const BYTE* codePtr = seqStorePtr->llCodeStart;
size_t u;
for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
} }
/* rep offsets */
{ const U32* const offsetPtr = seqStorePtr->offsetStart;
U32 offset1 = offsetPtr[0] - 3;
U32 offset2 = offsetPtr[1] - 3;
if (offset1 >= MAXREPOFFSET) offset1 = 0;
if (offset2 >= MAXREPOFFSET) offset2 = 0;
repOffsets[offset1] += 3;
repOffsets[offset2] += 1;
}
{ const BYTE* codePtr = seqStorePtr->llCodeStart;
size_t u;
for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
} }
/* rep offsets */
{ const U32* const offsetPtr = seqStorePtr->offsetStart;
U32 offset1 = offsetPtr[0] - 3;
U32 offset2 = offsetPtr[1] - 3;
if (offset1 >= MAXREPOFFSET) offset1 = 0;
if (offset2 >= MAXREPOFFSET) offset2 = 0;
repOffsets[offset1] += 3;
repOffsets[offset2] += 1;
}
}
/*