[fse] Reduce stack usage of FSE_decompress_wksp() by 512 bytes

* Move `counting` into the workspace
* Inrease `HUF_DECOMPRESS_WORKSPACE_SIZE` by 512 bytes
This commit is contained in:
Nick Terrell 2021-03-05 13:14:06 -08:00
parent 0f18059a4e
commit 3b1aba42cc
3 changed files with 17 additions and 10 deletions

View File

@ -352,7 +352,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2)
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */

View File

@ -318,22 +318,29 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
{
const BYTE* const istart = (const BYTE*)cSrc;
const BYTE* ip = istart;
short counting[FSE_MAX_SYMBOL_VALUE+1];
unsigned tableLog;
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
FSE_DTable* const dtable = (FSE_DTable*)workSpace;
short* counting = (short*) workSpace;
size_t const countingSize = sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1);
FSE_DTable* const dtable = (FSE_DTable*)(void*)((BYTE*)workSpace + countingSize);
DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
if (wkspSize < countingSize) return ERROR(GENERIC);
/* normal FSE decoding mode */
size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength;
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize);
ip += NCountLength;
cSrcSize -= NCountLength;
{
size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength;
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize);
ip += NCountLength;
cSrcSize -= NCountLength;
}
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
wkspSize -= FSE_DTABLE_SIZE(tableLog);
wkspSize -= countingSize;
CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );

View File

@ -279,7 +279,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
* a required workspace size greater than that specified in the following
* macro.
*/
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
#ifndef HUF_FORCE_DECOMPRESS_X2