mirror of
				https://github.com/facebook/zstd.git
				synced 2025-10-25 00:03:26 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			303 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			303 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * This source code is licensed under both the BSD-style license (found in the
 | |
|  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 | |
|  * in the COPYING file in the root directory of this source tree).
 | |
|  * You may select, at your option, one of the above-listed licenses.
 | |
|  */
 | |
| 
 | |
| 
 | |
| #ifndef ZSTD_COMPRESS_H
 | |
| #define ZSTD_COMPRESS_H
 | |
| 
 | |
| /*-*************************************
 | |
| *  Dependencies
 | |
| ***************************************/
 | |
| #include "zstd_internal.h"
 | |
| #include "zstdmt_compress.h"
 | |
| 
 | |
| #if defined (__cplusplus)
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| /*-*************************************
 | |
| *  Constants
 | |
| ***************************************/
 | |
| static const U32 g_searchStrength = 8;
 | |
| #define HASH_READ_SIZE 8
 | |
| 
 | |
| 
 | |
| /*-*************************************
 | |
| *  Context memory management
 | |
| ***************************************/
 | |
| typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
 | |
| typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
 | |
| 
 | |
| typedef struct ZSTD_prefixDict_s {
 | |
|     const void* dict;
 | |
|     size_t dictSize;
 | |
|     ZSTD_dictMode_e dictMode;
 | |
| } ZSTD_prefixDict;
 | |
| 
 | |
| struct ZSTD_CCtx_s {
 | |
|     const BYTE* nextSrc;    /* next block here to continue on current prefix */
 | |
|     const BYTE* base;       /* All regular indexes relative to this position */
 | |
|     const BYTE* dictBase;   /* extDict indexes relative to this position */
 | |
|     U32   dictLimit;        /* below that point, need extDict */
 | |
|     U32   lowLimit;         /* below that point, no more data */
 | |
|     U32   nextToUpdate;     /* index from which to continue dictionary update */
 | |
|     U32   nextToUpdate3;    /* index from which to continue dictionary update */
 | |
|     U32   hashLog3;         /* dispatch table : larger == faster, more memory */
 | |
|     U32   loadedDictEnd;    /* index of end of dictionary */
 | |
|     ZSTD_compressionStage_e stage;
 | |
|     U32   dictID;
 | |
|     ZSTD_CCtx_params requestedParams;
 | |
|     ZSTD_CCtx_params appliedParams;
 | |
|     void* workSpace;
 | |
|     size_t workSpaceSize;
 | |
|     size_t blockSize;
 | |
|     U64 pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
 | |
|     U64 consumedSrcSize;
 | |
|     XXH64_state_t xxhState;
 | |
|     ZSTD_customMem customMem;
 | |
|     size_t staticSize;
 | |
| 
 | |
|     seqStore_t seqStore;    /* sequences storage ptrs */
 | |
|     optState_t optState;
 | |
|     U32* hashTable;
 | |
|     U32* hashTable3;
 | |
|     U32* chainTable;
 | |
|     ZSTD_entropyCTables_t* entropy;
 | |
| 
 | |
|     /* streaming */
 | |
|     char*  inBuff;
 | |
|     size_t inBuffSize;
 | |
|     size_t inToCompress;
 | |
|     size_t inBuffPos;
 | |
|     size_t inBuffTarget;
 | |
|     char*  outBuff;
 | |
|     size_t outBuffSize;
 | |
|     size_t outBuffContentSize;
 | |
|     size_t outBuffFlushedSize;
 | |
|     ZSTD_cStreamStage streamStage;
 | |
|     U32    frameEnded;
 | |
| 
 | |
|     /* Dictionary */
 | |
|     ZSTD_CDict* cdictLocal;
 | |
|     const ZSTD_CDict* cdict;
 | |
|     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
 | |
| 
 | |
|     /* Multi-threading */
 | |
|     ZSTDMT_CCtx* mtctx;
 | |
| };
 | |
| 
 | |
| 
 | |
| static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
 | |
|                                    8,  9, 10, 11, 12, 13, 14, 15,
 | |
|                                   16, 16, 17, 17, 18, 18, 19, 19,
 | |
|                                   20, 20, 20, 20, 21, 21, 21, 21,
 | |
|                                   22, 22, 22, 22, 22, 22, 22, 22,
 | |
|                                   23, 23, 23, 23, 23, 23, 23, 23,
 | |
|                                   24, 24, 24, 24, 24, 24, 24, 24,
 | |
|                                   24, 24, 24, 24, 24, 24, 24, 24 };
 | |
| 
 | |
| static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 | |
|                                   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 | |
|                                   32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
 | |
|                                   38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
 | |
|                                   40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
 | |
|                                   41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
 | |
|                                   42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
 | |
|                                   42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
 | |
| 
 | |
| /*! ZSTD_storeSeq() :
 | |
|     Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
 | |
|     `offsetCode` : distance to match, or 0 == repCode.
 | |
|     `matchCode` : matchLength - MINMATCH
 | |
| */
 | |
| MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
 | |
| {
 | |
| #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
 | |
|     static const BYTE* g_start = NULL;
 | |
|     U32 const pos = (U32)((const BYTE*)literals - g_start);
 | |
|     if (g_start==NULL) g_start = (const BYTE*)literals;
 | |
|     if ((pos > 0) && (pos < 1000000000))
 | |
|         DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u",
 | |
|                pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
 | |
| #endif
 | |
|     /* copy Literals */
 | |
|     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
 | |
|     ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
 | |
|     seqStorePtr->lit += litLength;
 | |
| 
 | |
|     /* literal Length */
 | |
|     if (litLength>0xFFFF) {
 | |
|         seqStorePtr->longLengthID = 1;
 | |
|         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
 | |
|     }
 | |
|     seqStorePtr->sequences[0].litLength = (U16)litLength;
 | |
| 
 | |
|     /* match offset */
 | |
|     seqStorePtr->sequences[0].offset = offsetCode + 1;
 | |
| 
 | |
|     /* match Length */
 | |
|     if (matchCode>0xFFFF) {
 | |
|         seqStorePtr->longLengthID = 2;
 | |
|         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
 | |
|     }
 | |
|     seqStorePtr->sequences[0].matchLength = (U16)matchCode;
 | |
| 
 | |
|     seqStorePtr->sequences++;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*-*************************************
 | |
| *  Match length counter
 | |
| ***************************************/
 | |
| static unsigned ZSTD_NbCommonBytes (register size_t val)
 | |
| {
 | |
|     if (MEM_isLittleEndian()) {
 | |
|         if (MEM_64bits()) {
 | |
| #       if defined(_MSC_VER) && defined(_WIN64)
 | |
|             unsigned long r = 0;
 | |
|             _BitScanForward64( &r, (U64)val );
 | |
|             return (unsigned)(r>>3);
 | |
| #       elif defined(__GNUC__) && (__GNUC__ >= 3)
 | |
|             return (__builtin_ctzll((U64)val) >> 3);
 | |
| #       else
 | |
|             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
 | |
|                                                      0, 3, 1, 3, 1, 4, 2, 7,
 | |
|                                                      0, 2, 3, 6, 1, 5, 3, 5,
 | |
|                                                      1, 3, 4, 4, 2, 5, 6, 7,
 | |
|                                                      7, 0, 1, 2, 3, 3, 4, 6,
 | |
|                                                      2, 6, 5, 5, 3, 4, 5, 6,
 | |
|                                                      7, 1, 2, 4, 6, 4, 4, 5,
 | |
|                                                      7, 2, 6, 5, 7, 6, 7, 7 };
 | |
|             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
 | |
| #       endif
 | |
|         } else { /* 32 bits */
 | |
| #       if defined(_MSC_VER)
 | |
|             unsigned long r=0;
 | |
|             _BitScanForward( &r, (U32)val );
 | |
|             return (unsigned)(r>>3);
 | |
| #       elif defined(__GNUC__) && (__GNUC__ >= 3)
 | |
|             return (__builtin_ctz((U32)val) >> 3);
 | |
| #       else
 | |
|             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
 | |
|                                                      3, 2, 2, 1, 3, 2, 0, 1,
 | |
|                                                      3, 3, 1, 2, 2, 2, 2, 0,
 | |
|                                                      3, 1, 2, 0, 1, 0, 1, 1 };
 | |
|             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
 | |
| #       endif
 | |
|         }
 | |
|     } else {  /* Big Endian CPU */
 | |
|         if (MEM_64bits()) {
 | |
| #       if defined(_MSC_VER) && defined(_WIN64)
 | |
|             unsigned long r = 0;
 | |
|             _BitScanReverse64( &r, val );
 | |
|             return (unsigned)(r>>3);
 | |
| #       elif defined(__GNUC__) && (__GNUC__ >= 3)
 | |
|             return (__builtin_clzll(val) >> 3);
 | |
| #       else
 | |
|             unsigned r;
 | |
|             const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
 | |
|             if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
 | |
|             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
 | |
|             r += (!val);
 | |
|             return r;
 | |
| #       endif
 | |
|         } else { /* 32 bits */
 | |
| #       if defined(_MSC_VER)
 | |
|             unsigned long r = 0;
 | |
|             _BitScanReverse( &r, (unsigned long)val );
 | |
|             return (unsigned)(r>>3);
 | |
| #       elif defined(__GNUC__) && (__GNUC__ >= 3)
 | |
|             return (__builtin_clz((U32)val) >> 3);
 | |
| #       else
 | |
|             unsigned r;
 | |
|             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
 | |
|             r += (!val);
 | |
|             return r;
 | |
| #       endif
 | |
|     }   }
 | |
| }
 | |
| 
 | |
| 
 | |
| MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
 | |
| {
 | |
|     const BYTE* const pStart = pIn;
 | |
|     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
 | |
| 
 | |
|     while (pIn < pInLoopLimit) {
 | |
|         size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
 | |
|         if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
 | |
|         pIn += ZSTD_NbCommonBytes(diff);
 | |
|         return (size_t)(pIn - pStart);
 | |
|     }
 | |
|     if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
 | |
|     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
 | |
|     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
 | |
|     return (size_t)(pIn - pStart);
 | |
| }
 | |
| 
 | |
| /** ZSTD_count_2segments() :
 | |
| *   can count match length with `ip` & `match` in 2 different segments.
 | |
| *   convention : on reaching mEnd, match count continue starting from iStart
 | |
| */
 | |
| MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
 | |
| {
 | |
|     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
 | |
|     size_t const matchLength = ZSTD_count(ip, match, vEnd);
 | |
|     if (match + matchLength != mEnd) return matchLength;
 | |
|     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*-*************************************
 | |
| *  Hashes
 | |
| ***************************************/
 | |
| static const U32 prime3bytes = 506832829U;
 | |
| static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
 | |
| MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
 | |
| 
 | |
| static const U32 prime4bytes = 2654435761U;
 | |
| static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
 | |
| static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
 | |
| 
 | |
| static const U64 prime5bytes = 889523592379ULL;
 | |
| static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
 | |
| static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
 | |
| 
 | |
| static const U64 prime6bytes = 227718039650203ULL;
 | |
| static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
 | |
| static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
 | |
| 
 | |
| static const U64 prime7bytes = 58295818150454627ULL;
 | |
| static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
 | |
| static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
 | |
| 
 | |
| static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
 | |
| static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
 | |
| static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
 | |
| 
 | |
| MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 | |
| {
 | |
|     switch(mls)
 | |
|     {
 | |
|     default:
 | |
|     case 4: return ZSTD_hash4Ptr(p, hBits);
 | |
|     case 5: return ZSTD_hash5Ptr(p, hBits);
 | |
|     case 6: return ZSTD_hash6Ptr(p, hBits);
 | |
|     case 7: return ZSTD_hash7Ptr(p, hBits);
 | |
|     case 8: return ZSTD_hash8Ptr(p, hBits);
 | |
|     }
 | |
| }
 | |
| 
 | |
| #if defined (__cplusplus)
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif /* ZSTD_COMPRESS_H */
 |