mirror of
				https://github.com/facebook/zstd.git
				synced 2025-10-25 00:03:26 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			296 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			296 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* ******************************************************************
 | |
|    FSE : Finite State Entropy coder
 | |
|    header file
 | |
|    Copyright (C) 2013-2015, Yann Collet.
 | |
| 
 | |
|    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 | |
| 
 | |
|    Redistribution and use in source and binary forms, with or without
 | |
|    modification, are permitted provided that the following conditions are
 | |
|    met:
 | |
| 
 | |
|        * Redistributions of source code must retain the above copyright
 | |
|    notice, this list of conditions and the following disclaimer.
 | |
|        * Redistributions in binary form must reproduce the above
 | |
|    copyright notice, this list of conditions and the following disclaimer
 | |
|    in the documentation and/or other materials provided with the
 | |
|    distribution.
 | |
| 
 | |
|    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | |
|    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
|    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | |
|    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | |
|    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | |
|    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | |
|    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
|    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
|    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
|    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
|    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| 
 | |
|    You can contact the author at :
 | |
|    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 | |
|    - Public forum : https://groups.google.com/forum/#!forum/lz4c
 | |
| ****************************************************************** */
 | |
| #ifndef FSE_H
 | |
| #define FSE_H
 | |
| 
 | |
| #if defined (__cplusplus)
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /* *****************************************
 | |
| *  Includes
 | |
| ******************************************/
 | |
| #include <stddef.h>    /* size_t, ptrdiff_t */
 | |
| 
 | |
| 
 | |
| /*-****************************************
 | |
| *  FSE simple functions
 | |
| ******************************************/
 | |
| size_t FSE_compress(void* dst, size_t maxDstSize,
 | |
|               const void* src, size_t srcSize);
 | |
| size_t FSE_decompress(void* dst,  size_t maxDstSize,
 | |
|                 const void* cSrc, size_t cSrcSize);
 | |
| /*!
 | |
| FSE_compress():
 | |
|     Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
 | |
|     'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize)
 | |
|     return : size of compressed data (<= maxDstSize)
 | |
|     Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
 | |
|                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
 | |
|                      if FSE_isError(return), compression failed (more details using FSE_getErrorName())
 | |
| 
 | |
| FSE_decompress():
 | |
|     Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
 | |
|     into already allocated destination buffer 'dst', of size 'maxDstSize'.
 | |
|     return : size of regenerated data (<= maxDstSize)
 | |
|              or an error code, which can be tested using FSE_isError()
 | |
| 
 | |
|     ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
 | |
|     Why ? : making this distinction requires a header.
 | |
|     Header management is intentionally delegated to the user layer, which can better manage special cases.
 | |
| */
 | |
| 
 | |
| 
 | |
| /* *****************************************
 | |
| *  Tool functions
 | |
| ******************************************/
 | |
| size_t FSE_compressBound(size_t size);       /* maximum compressed size */
 | |
| 
 | |
| /* Error Management */
 | |
| unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
 | |
| const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
 | |
| 
 | |
| 
 | |
| /* *****************************************
 | |
| *  FSE advanced functions
 | |
| ******************************************/
 | |
| /*!
 | |
| FSE_compress2():
 | |
|     Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
 | |
|     Both parameters can be defined as '0' to mean : use default value
 | |
|     return : size of compressed data
 | |
|     Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
 | |
|                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
 | |
|                      if FSE_isError(return), it's an error code.
 | |
| */
 | |
| size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
 | |
| 
 | |
| 
 | |
| /* *****************************************
 | |
| *  FSE detailed API
 | |
| ******************************************/
 | |
| /*!
 | |
| FSE_compress() does the following:
 | |
| 1. count symbol occurrence from source[] into table count[]
 | |
| 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
 | |
| 3. save normalized counters to memory buffer using writeNCount()
 | |
| 4. build encoding table 'CTable' from normalized counters
 | |
| 5. encode the data stream using encoding table 'CTable'
 | |
| 
 | |
| FSE_decompress() does the following:
 | |
| 1. read normalized counters with readNCount()
 | |
| 2. build decoding table 'DTable' from normalized counters
 | |
| 3. decode the data stream using decoding table 'DTable'
 | |
| 
 | |
| The following API allows targeting specific sub-functions for advanced tasks.
 | |
| For example, it's possible to compress several blocks using the same 'CTable',
 | |
| or to save and provide normalized distribution using external method.
 | |
| */
 | |
| 
 | |
| /* *** COMPRESSION *** */
 | |
| 
 | |
| /*!
 | |
| FSE_count():
 | |
|    Provides the precise count of each byte within a table 'count'
 | |
|    'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
 | |
|    *maxSymbolValuePtr will be updated if detected smaller than initial value.
 | |
|    @return : the count of the most frequent symbol (which is not identified)
 | |
|              if return == srcSize, there is only one symbol.
 | |
|              Can also return an error code, which can be tested with FSE_isError() */
 | |
| size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
 | |
| 
 | |
| /*!
 | |
| FSE_optimalTableLog():
 | |
|    dynamically downsize 'tableLog' when conditions are met.
 | |
|    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
 | |
|    return : recommended tableLog (necessarily <= initial 'tableLog') */
 | |
| unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
 | |
| 
 | |
| /*!
 | |
| FSE_normalizeCount():
 | |
|    normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
 | |
|    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
 | |
|    return : tableLog,
 | |
|             or an errorCode, which can be tested using FSE_isError() */
 | |
| size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
 | |
| 
 | |
| /*!
 | |
| FSE_NCountWriteBound():
 | |
|    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
 | |
|    Typically useful for allocation purpose. */
 | |
| size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
 | |
| 
 | |
| /*!
 | |
| FSE_writeNCount():
 | |
|    Compactly save 'normalizedCounter' into 'buffer'.
 | |
|    return : size of the compressed table
 | |
|             or an errorCode, which can be tested using FSE_isError() */
 | |
| size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 | |
| 
 | |
| 
 | |
| /*!
 | |
| Constructor and Destructor of type FSE_CTable
 | |
|     Note that its size depends on 'tableLog' and 'maxSymbolValue' */
 | |
| typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
 | |
| FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
 | |
| void        FSE_freeCTable (FSE_CTable* ct);
 | |
| 
 | |
| /*!
 | |
| FSE_buildCTable():
 | |
|    Builds @ct, which must be already allocated, using FSE_createCTable()
 | |
|    return : 0
 | |
|             or an errorCode, which can be tested using FSE_isError() */
 | |
| size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 | |
| 
 | |
| /*!
 | |
| FSE_compress_usingCTable():
 | |
|    Compress @src using @ct into @dst which must be already allocated
 | |
|    return : size of compressed data (<= @dstCapacity)
 | |
|             or 0 if compressed data could not fit into @dst
 | |
|             or an errorCode, which can be tested using FSE_isError() */
 | |
| size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
 | |
| 
 | |
| /*!
 | |
| Tutorial :
 | |
| ----------
 | |
| The first step is to count all symbols. FSE_count() does this job very fast.
 | |
| Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
 | |
| 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
 | |
| maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
 | |
| FSE_count() will return the number of occurrence of the most frequent symbol.
 | |
| This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
 | |
| If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
 | |
| 
 | |
| The next step is to normalize the frequencies.
 | |
| FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
 | |
| It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
 | |
| You can use 'tableLog'==0 to mean "use default tableLog value".
 | |
| If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
 | |
| which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
 | |
| 
 | |
| The result of FSE_normalizeCount() will be saved into a table,
 | |
| called 'normalizedCounter', which is a table of signed short.
 | |
| 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
 | |
| The return value is tableLog if everything proceeded as expected.
 | |
| It is 0 if there is a single symbol within distribution.
 | |
| If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
 | |
| 
 | |
| 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
 | |
| 'buffer' must be already allocated.
 | |
| For guaranteed success, buffer size must be at least FSE_headerBound().
 | |
| The result of the function is the number of bytes written into 'buffer'.
 | |
| If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
 | |
| 
 | |
| 'normalizedCounter' can then be used to create the compression table 'CTable'.
 | |
| The space required by 'CTable' must be already allocated, using FSE_createCTable().
 | |
| You can then use FSE_buildCTable() to fill 'CTable'.
 | |
| If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
 | |
| 
 | |
| 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
 | |
| Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
 | |
| The function returns the size of compressed data (without header), necessarily <= @dstCapacity.
 | |
| If it returns '0', compressed data could not fit into 'dst'.
 | |
| If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
 | |
| */
 | |
| 
 | |
| 
 | |
| /* *** DECOMPRESSION *** */
 | |
| 
 | |
| /*!
 | |
| FSE_readNCount():
 | |
|    Read compactly saved 'normalizedCounter' from 'rBuffer'.
 | |
|    return : size read from 'rBuffer'
 | |
|             or an errorCode, which can be tested using FSE_isError()
 | |
|             maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
 | |
| size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
 | |
| 
 | |
| /*!
 | |
| Constructor and Destructor of type FSE_DTable
 | |
|     Note that its size depends on 'tableLog' */
 | |
| typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
 | |
| FSE_DTable* FSE_createDTable(unsigned tableLog);
 | |
| void        FSE_freeDTable(FSE_DTable* dt);
 | |
| 
 | |
| /*!
 | |
| FSE_buildDTable():
 | |
|    Builds 'dt', which must be already allocated, using FSE_createDTable()
 | |
|    return : 0,
 | |
|             or an errorCode, which can be tested using FSE_isError() */
 | |
| size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 | |
| 
 | |
| /*!
 | |
| FSE_decompress_usingDTable():
 | |
|    Decompress compressed source @cSrc of size @cSrcSize using @dt
 | |
|    into @dst which must be already allocated.
 | |
|    return : size of regenerated data (necessarily <= @dstCapacity)
 | |
|             or an errorCode, which can be tested using FSE_isError() */
 | |
| size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
 | |
| 
 | |
| /*!
 | |
| Tutorial :
 | |
| ----------
 | |
| (Note : these functions only decompress FSE-compressed blocks.
 | |
|  If block is uncompressed, use memcpy() instead
 | |
|  If block is a single repeated byte, use memset() instead )
 | |
| 
 | |
| The first step is to obtain the normalized frequencies of symbols.
 | |
| This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
 | |
| 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
 | |
| In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
 | |
| or size the table to handle worst case situations (typically 256).
 | |
| FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
 | |
| The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
 | |
| Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
 | |
| If there is an error, the function will return an error code, which can be tested using FSE_isError().
 | |
| 
 | |
| The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
 | |
| This is performed by the function FSE_buildDTable().
 | |
| The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
 | |
| If there is an error, the function will return an error code, which can be tested using FSE_isError().
 | |
| 
 | |
| 'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
 | |
| 'cSrcSize' must be strictly correct, otherwise decompression will fail.
 | |
| FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
 | |
| If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
 | |
| */
 | |
| 
 | |
| 
 | |
| #if defined (__cplusplus)
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif  /* FSE_H */
 |