mirror of
https://github.com/facebook/zstd.git
synced 2025-10-16 00:04:24 -04:00
Literals header fields use little endian convention
This commit is contained in:
parent
6fa05a2371
commit
198e6aac44
@ -572,17 +572,14 @@ static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void
|
|||||||
switch(flSize)
|
switch(flSize)
|
||||||
{
|
{
|
||||||
case 1: /* 2 - 1 - 5 */
|
case 1: /* 2 - 1 - 5 */
|
||||||
ostart[0] = (BYTE)((lbt_raw<<6) + (0<<5) + srcSize);
|
ostart[0] = (BYTE)((U32)lbt_raw + (srcSize<<3));
|
||||||
break;
|
break;
|
||||||
case 2: /* 2 - 2 - 12 */
|
case 2: /* 2 - 2 - 12 */
|
||||||
ostart[0] = (BYTE)((lbt_raw<<6) + (2<<4) + (srcSize >> 8));
|
MEM_writeLE16(ostart, (U32)lbt_raw + (1<<2) + (srcSize<<4));
|
||||||
ostart[1] = (BYTE)srcSize;
|
|
||||||
break;
|
break;
|
||||||
default: /*note : should not be necessary : flSize is within {1,2,3} */
|
default: /*note : should not be necessary : flSize is within {1,2,3} */
|
||||||
case 3: /* 2 - 2 - 20 */
|
case 3: /* 2 - 2 - 20 */
|
||||||
ostart[0] = (BYTE)((lbt_raw<<6) + (3<<4) + (srcSize >> 16));
|
MEM_writeLE32(ostart, (U32)lbt_raw + (3<<2) + (srcSize<<4));
|
||||||
ostart[1] = (BYTE)(srcSize>>8);
|
|
||||||
ostart[2] = (BYTE)srcSize;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -595,22 +592,19 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
|
|||||||
BYTE* const ostart = (BYTE* const)dst;
|
BYTE* const ostart = (BYTE* const)dst;
|
||||||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
||||||
|
|
||||||
(void)dstCapacity; /* dstCapacity guaranteed to be >=4, hence large enough */
|
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
|
||||||
|
|
||||||
switch(flSize)
|
switch(flSize)
|
||||||
{
|
{
|
||||||
case 1: /* 2 - 1 - 5 */
|
case 1: /* 2 - 1 - 5 */
|
||||||
ostart[0] = (BYTE)((lbt_rle<<6) + (0<<5) + srcSize);
|
ostart[0] = (BYTE)((U32)lbt_rle + (srcSize<<3));
|
||||||
break;
|
break;
|
||||||
case 2: /* 2 - 2 - 12 */
|
case 2: /* 2 - 2 - 12 */
|
||||||
ostart[0] = (BYTE)((lbt_rle<<6) + (2<<4) + (srcSize >> 8));
|
MEM_writeLE16(ostart, (U32)lbt_rle + (1<<2) + (srcSize<<4));
|
||||||
ostart[1] = (BYTE)srcSize;
|
|
||||||
break;
|
break;
|
||||||
default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
|
default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
|
||||||
case 3: /* 2 - 2 - 20 */
|
case 3: /* 2 - 2 - 20 */
|
||||||
ostart[0] = (BYTE)((lbt_rle<<6) + (3<<4) + (srcSize >> 16));
|
MEM_writeLE32(ostart, (U32)lbt_rle + (3<<2) + (srcSize<<4));
|
||||||
ostart[1] = (BYTE)(srcSize>>8);
|
|
||||||
ostart[2] = (BYTE)srcSize;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -658,24 +652,22 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
|
|||||||
switch(lhSize)
|
switch(lhSize)
|
||||||
{
|
{
|
||||||
case 3: /* 2 - 2 - 10 - 10 */
|
case 3: /* 2 - 2 - 10 - 10 */
|
||||||
ostart[0] = (BYTE)((srcSize>>6) + (singleStream << 4) + (hType<<6));
|
{ U32 const lhc = hType + (singleStream << 2) + (srcSize<<4) + (cLitSize<<14);
|
||||||
ostart[1] = (BYTE)((srcSize<<2) + (cLitSize>>8));
|
MEM_writeLE24(ostart, lhc);
|
||||||
ostart[2] = (BYTE)(cLitSize);
|
break;
|
||||||
break;
|
}
|
||||||
case 4: /* 2 - 2 - 14 - 14 */
|
case 4: /* 2 - 2 - 14 - 14 */
|
||||||
ostart[0] = (BYTE)((srcSize>>10) + (2<<4) + (hType<<6));
|
{ U32 const lhc = hType + (2 << 2) + (srcSize<<4) + (cLitSize<<18);
|
||||||
ostart[1] = (BYTE)(srcSize>> 2);
|
MEM_writeLE32(ostart, lhc);
|
||||||
ostart[2] = (BYTE)((srcSize<<6) + (cLitSize>>8));
|
break;
|
||||||
ostart[3] = (BYTE)(cLitSize);
|
}
|
||||||
break;
|
|
||||||
default: /* should not be necessary, lhSize is only {3,4,5} */
|
default: /* should not be necessary, lhSize is only {3,4,5} */
|
||||||
case 5: /* 2 - 2 - 18 - 18 */
|
case 5: /* 2 - 2 - 18 - 18 */
|
||||||
ostart[0] = (BYTE)((srcSize>>14) + (3<<4) + (hType<<6));
|
{ U32 const lhc = hType + (3 << 2) + (srcSize<<4) + (cLitSize<<22);
|
||||||
ostart[1] = (BYTE)(srcSize>>6);
|
MEM_writeLE32(ostart, lhc);
|
||||||
ostart[2] = (BYTE)((srcSize<<2) + (cLitSize>>16));
|
ostart[4] = (BYTE)(cLitSize >> 10);
|
||||||
ostart[3] = (BYTE)(cLitSize>>8);
|
break;
|
||||||
ostart[4] = (BYTE)(cLitSize);
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
return lhSize+cLitSize;
|
return lhSize+cLitSize;
|
||||||
}
|
}
|
||||||
@ -2735,8 +2727,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
|
|||||||
BYTE* op = (BYTE*)dst;
|
BYTE* op = (BYTE*)dst;
|
||||||
size_t fhSize = 0;
|
size_t fhSize = 0;
|
||||||
|
|
||||||
/* not even init ! */
|
if (cctx->stage==0) return ERROR(stage_wrong); /*< not even init ! */
|
||||||
if (cctx->stage==0) return ERROR(stage_wrong);
|
|
||||||
|
|
||||||
/* special case : empty frame */
|
/* special case : empty frame */
|
||||||
if (cctx->stage==1) {
|
if (cctx->stage==1) {
|
||||||
@ -2748,7 +2739,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* frame epilogue */
|
/* frame epilogue */
|
||||||
if (dstCapacity < 3) return ERROR(dstSize_tooSmall);
|
if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall);
|
||||||
{ U32 const checksum = cctx->params.fParams.checksumFlag ?
|
{ U32 const checksum = cctx->params.fParams.checksumFlag ?
|
||||||
(U32)(XXH64_digest(&cctx->xxhState) >> 11) :
|
(U32)(XXH64_digest(&cctx->xxhState) >> 11) :
|
||||||
0;
|
0;
|
||||||
@ -2756,7 +2747,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cctx->stage = 0; /* return to "created but not init" status */
|
cctx->stage = 0; /* return to "created but not init" status */
|
||||||
return 3+fhSize;
|
return ZSTD_blockHeaderSize+fhSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -397,9 +397,9 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
|
|||||||
* compatible with legacy mode
|
* compatible with legacy mode
|
||||||
* @return : decompressed size if known, 0 otherwise
|
* @return : decompressed size if known, 0 otherwise
|
||||||
note : 0 can mean any of the following :
|
note : 0 can mean any of the following :
|
||||||
- decompressed size is not provided within frame header
|
- decompressed size is not present within frame header
|
||||||
- frame header unknown / not supported
|
- frame header unknown / not supported
|
||||||
- frame header not completely provided (`srcSize` too small) */
|
- frame header not complete (`srcSize` too small) */
|
||||||
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
|
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
|
||||||
{
|
{
|
||||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
|
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
|
||||||
@ -464,33 +464,42 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|||||||
|
|
||||||
if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
|
if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
|
||||||
|
|
||||||
switch((litBlockType_t)(istart[0]>> 6))
|
switch((litBlockType_t)(istart[0] & 3))
|
||||||
{
|
{
|
||||||
case lbt_huffman:
|
case lbt_huffman:
|
||||||
{ size_t litSize, litCSize, singleStream=0;
|
{ size_t lhSize, litSize, litCSize, singleStream=0;
|
||||||
U32 lhSize = (istart[0] >> 4) & 3;
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
||||||
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
|
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
|
||||||
switch(lhSize)
|
switch(lhlCode)
|
||||||
{
|
{
|
||||||
case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */
|
case 1:
|
||||||
|
singleStream = 1;
|
||||||
|
/* fall through */
|
||||||
|
case 0: default: /* note : default is impossible, since lhlCode into [0..3] */
|
||||||
/* 2 - 2 - 10 - 10 */
|
/* 2 - 2 - 10 - 10 */
|
||||||
lhSize=3;
|
{ U32 const lhc = MEM_readLE24(istart) >> 4;
|
||||||
singleStream = istart[0] & 16;
|
lhSize = 3;
|
||||||
litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
|
litSize = lhc & 0x3FF;
|
||||||
litCSize = ((istart[1] & 3) << 8) + istart[2];
|
litCSize = lhc >> 10;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
/* 2 - 2 - 14 - 14 */
|
/* 2 - 2 - 14 - 14 */
|
||||||
lhSize=4;
|
{ U32 const lhc = MEM_readLE32(istart) >> 4;
|
||||||
litSize = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
|
lhSize = 4;
|
||||||
litCSize = ((istart[2] & 63) << 8) + istart[3];
|
litSize = lhc & 0x3FFF;
|
||||||
break;
|
litCSize = lhc >> 14;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 3:
|
case 3:
|
||||||
/* 2 - 2 - 18 - 18 */
|
/* 2 - 2 - 18 - 18 */
|
||||||
lhSize=5;
|
{ U64 const lhc = (MEM_readLE32(istart) + (((U64)istart[4]) << 32)) >> 4;
|
||||||
litSize = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
|
lhSize = 5;
|
||||||
litCSize = ((istart[2] & 3) << 16) + (istart[3] << 8) + istart[4];
|
litSize = lhc & 0x3FFFF;
|
||||||
break;
|
litCSize = lhc >> 18;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
|
if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
|
||||||
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
||||||
@ -501,23 +510,23 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|||||||
return ERROR(corruption_detected);
|
return ERROR(corruption_detected);
|
||||||
|
|
||||||
dctx->litPtr = dctx->litBuffer;
|
dctx->litPtr = dctx->litBuffer;
|
||||||
dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+8;
|
dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
|
||||||
dctx->litSize = litSize;
|
dctx->litSize = litSize;
|
||||||
dctx->litEntropy = 1;
|
dctx->litEntropy = 1;
|
||||||
return litCSize + lhSize;
|
return litCSize + lhSize;
|
||||||
}
|
}
|
||||||
case lbt_repeat:
|
case lbt_repeat:
|
||||||
{ size_t litSize, litCSize;
|
{ size_t litSize, litCSize, lhSize;
|
||||||
U32 lhSize = ((istart[0]) >> 4) & 3;
|
U32 const lhc = MEM_readLE24(istart) >> 4;
|
||||||
if (lhSize != 1) /* only case supported for now : small litSize, single stream */
|
if ((((istart[0]) >> 2) & 3) != 1) /* only case supported for now : small litSize, single stream */
|
||||||
return ERROR(corruption_detected);
|
return ERROR(corruption_detected);
|
||||||
if (dctx->litEntropy==0)
|
if (dctx->litEntropy==0)
|
||||||
return ERROR(dictionary_corrupted);
|
return ERROR(dictionary_corrupted);
|
||||||
|
|
||||||
/* 2 - 2 - 10 - 10 */
|
/* 2 - 2 - 10 - 10 */
|
||||||
lhSize=3;
|
lhSize = 3;
|
||||||
litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
|
litSize = lhc & 0x3FF;
|
||||||
litCSize = ((istart[1] & 3) << 8) + istart[2];
|
litCSize = lhc >> 10;
|
||||||
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
||||||
|
|
||||||
{ size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable);
|
{ size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable);
|
||||||
@ -529,19 +538,21 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|||||||
return litCSize + lhSize;
|
return litCSize + lhSize;
|
||||||
}
|
}
|
||||||
case lbt_raw:
|
case lbt_raw:
|
||||||
{ size_t litSize;
|
{ size_t litSize, lhSize;
|
||||||
U32 lhSize = ((istart[0]) >> 4) & 3;
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
||||||
switch(lhSize)
|
switch(lhlCode)
|
||||||
{
|
{
|
||||||
case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
||||||
lhSize=1;
|
lhSize = 1;
|
||||||
litSize = istart[0] & 31;
|
litSize = istart[0] >> 3;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 1:
|
||||||
litSize = ((istart[0] & 15) << 8) + istart[1];
|
lhSize = 2;
|
||||||
|
litSize = MEM_readLE16(istart) >> 4;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
|
lhSize = 3;
|
||||||
|
litSize = MEM_readLE24(istart) >> 4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -560,19 +571,21 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|||||||
return lhSize+litSize;
|
return lhSize+litSize;
|
||||||
}
|
}
|
||||||
case lbt_rle:
|
case lbt_rle:
|
||||||
{ size_t litSize;
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
||||||
U32 lhSize = ((istart[0]) >> 4) & 3;
|
size_t litSize, lhSize;
|
||||||
switch(lhSize)
|
switch(lhlCode)
|
||||||
{
|
{
|
||||||
case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
||||||
lhSize = 1;
|
lhSize = 1;
|
||||||
litSize = istart[0] & 31;
|
litSize = istart[0] >> 3;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 1:
|
||||||
litSize = ((istart[0] & 15) << 8) + istart[1];
|
lhSize = 2;
|
||||||
|
litSize = MEM_readLE16(istart) >> 4;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
|
lhSize = 3;
|
||||||
|
litSize = MEM_readLE24(istart) >> 4;
|
||||||
if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
|
if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -435,15 +435,17 @@ followed by 1 or 4 streams.
|
|||||||
|
|
||||||
Header is in charge of describing how literals are packed.
|
Header is in charge of describing how literals are packed.
|
||||||
It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
|
It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
|
||||||
using big-endian convention.
|
using little-endian convention.
|
||||||
|
|
||||||
| BlockType | sizes format | [compressed size] | regenerated size |
|
| BlockType | sizes format | regenerated size | [compressed size] |
|
||||||
| --------- | ------------ | ----------------- | ---------------- |
|
| --------- | ------------ | ---------------- | ----------------- |
|
||||||
| 2 bits | 1 - 2 bits | 0 - 18 bits | 5 - 20 bits |
|
| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits |
|
||||||
|
|
||||||
|
In this representation, bits on the left are smallest bits.
|
||||||
|
|
||||||
__Block Type__ :
|
__Block Type__ :
|
||||||
|
|
||||||
This is a 2-bits field, describing 4 different block types :
|
This field uses 2 lowest bits of first byte, describing 4 different block types :
|
||||||
|
|
||||||
| Value | 0 | 1 | 2 | 3 |
|
| Value | 0 | 1 | 2 | 3 |
|
||||||
| ---------- | ---------- | ------ | --- | ------- |
|
| ---------- | ---------- | ------ | --- | ------- |
|
||||||
@ -466,19 +468,19 @@ Sizes format are divided into 2 families :
|
|||||||
and the decompressed size. It will also decode the number of streams.
|
and the decompressed size. It will also decode the number of streams.
|
||||||
- For Raw or RLE blocks, it's enough to decode the size to regenerate.
|
- For Raw or RLE blocks, it's enough to decode the size to regenerate.
|
||||||
|
|
||||||
For values spanning several bytes, convention is Big-endian.
|
For values spanning several bytes, convention is Little-endian.
|
||||||
|
|
||||||
__Sizes format for Raw or RLE literals block__ :
|
__Sizes format for Raw and RLE literals block__ :
|
||||||
|
|
||||||
- Value : 0x : Regenerated size uses 5 bits (0-31).
|
- Value : x0 : Regenerated size uses 5 bits (0-31).
|
||||||
Total literal header size is 1 byte.
|
Total literal header size is 1 byte.
|
||||||
`size = h[0] & 31;`
|
`size = h[0]>>3;`
|
||||||
- Value : 10 : Regenerated size uses 12 bits (0-4095).
|
- Value : 01 : Regenerated size uses 12 bits (0-4095).
|
||||||
Total literal header size is 2 bytes.
|
Total literal header size is 2 bytes.
|
||||||
`size = ((h[0] & 15) << 8) + h[1];`
|
`size = (h[0]>>4) + (h[1]<<4);`
|
||||||
- Value : 11 : Regenerated size uses 20 bits (0-1048575).
|
- Value : 11 : Regenerated size uses 20 bits (0-1048575).
|
||||||
Total literal header size is 3 bytes.
|
Total literal header size is 3 bytes.
|
||||||
`size = ((h[0] & 15) << 16) + (h[1]<<8) + h[2];`
|
`size = (h[0]>>4) + (h[1]<<4) + (h[2]<<12);`
|
||||||
|
|
||||||
Note : it's allowed to represent a short value (ex : `13`)
|
Note : it's allowed to represent a short value (ex : `13`)
|
||||||
using a long format, accepting the reduced compacity.
|
using a long format, accepting the reduced compacity.
|
||||||
@ -499,7 +501,7 @@ Note : also applicable to "repeat-stats" blocks.
|
|||||||
Compressed and regenerated sizes use 18 bits (0-262143).
|
Compressed and regenerated sizes use 18 bits (0-262143).
|
||||||
Total literal header size is 5 bytes.
|
Total literal header size is 5 bytes.
|
||||||
|
|
||||||
Compressed and regenerated size fields follow big endian convention.
|
Compressed and regenerated size fields follow little endian convention.
|
||||||
|
|
||||||
#### Huffman Tree description
|
#### Huffman Tree description
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user