From b2092c6dc41b27dcc0b66bb470ff4371c6010c13 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 18 May 2020 12:35:44 -0700 Subject: [PATCH] [ldm] Reset loadedDictEnd when the context is reset --- lib/compress/zstd_compress.c | 1 + lib/compress/zstd_ldm.c | 8 ++++++++ lib/compress/zstdmt_compress.c | 15 +++++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2cfa93051..3f963b1cf 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1576,6 +1576,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_window_init(&zc->ldmState.window); ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; } DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 22f362807..869986a0a 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -463,6 +463,8 @@ size_t ZSTD_ldm_generateSequences( U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; } /* 2. We enforce the maximum offset allowed. * @@ -471,6 +473,12 @@ size_t ZSTD_ldm_generateSequences( * TODO: * Test the chunk size. * * Try invalidation after the sequence generation and test the * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. */ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index f6b26a757..77a137bdf 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -493,7 +493,6 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); /* Reset the window */ ZSTD_window_init(&serialState->ldmState.window); - serialState->ldmWindow = serialState->ldmState.window; /* Resize tables and output space if necessary. */ if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { ZSTD_free(serialState->ldmState.hashTable, cMem); @@ -508,12 +507,20 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, /* Zero the tables */ memset(serialState->ldmState.hashTable, 0, hashSize); memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + + /* Update window state and fill hash table with dict */ + if (dictSize > 0) { + BYTE const* const dictEnd = (const BYTE*)dict + dictSize; + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize); + ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); + serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); + } + + /* Initialize serialState's copy of ldmWindow. */ + serialState->ldmWindow = serialState->ldmState.window; } - /* Update window state and fill hash table with dict */ if (params.ldmParams.enableLdm && dict) { - ZSTD_window_update(&serialState->ldmState.window, dict, dictSize); - ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, (const BYTE*)dict + dictSize, ¶ms.ldmParams); } serialState->params = params;