Merge pull request #2602 from terrelln/ldm-opt

[LDM] Speed optimization on repetitive data
2025-10-07 00:12:40 -04:00 · 2021-05-04 23:13:09 -07:00 · 2021-05-04 23:13:09 -07:00 · 8389a5122b
commit 8389a5122b
parent d40f55cd95 32823bc150
1 changed files with 47 additions and 12 deletions
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const*
    }
 }
 /** ZSTD_ldm_gear_reset()
 * Feeds [data, data + minMatchLength) into the hash without registering any
 * splits. This effectively resets the hash state. This is used when skipping
 * over data, either at the beginning of a block, or skipping sections.
 */
 static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
                                BYTE const* data, size_t minMatchLength)
 {
    U64 hash = state->rolling;
    size_t n = 0;
 #define GEAR_ITER_ONCE() do {                                  \
        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
        n += 1;                                                \
    } while (0)
    while (n + 3 < minMatchLength) {
        GEAR_ITER_ONCE();
        GEAR_ITER_ONCE();
        GEAR_ITER_ONCE();
        GEAR_ITER_ONCE();
    }
    while (n < minMatchLength) {
        GEAR_ITER_ONCE();
    }
 #undef GEAR_ITER_ONCE
 }
 /** ZSTD_ldm_gear_feed():
 *
 * Registers in the splits array all the split points found in the first
@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable(
    while (ip < iend) {
        size_t hashed;
        unsigned n;
-        
+
        numSplits = 0;
        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
    /* Initialize the rolling hash state with the first minMatchLength bytes */
    ZSTD_ldm_gear_init(&hashState, params);
-    {
+    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
-        size_t n = 0;
+    ip += minMatchLength;
        while (n < minMatchLength) {
            numSplits = 0;
            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
                                    splits, &numSplits);
        }
        ip += minMatchLength;
    }
    while (ip < ilimit) {
        size_t hashed;
@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
        for (n = 0; n < numSplits; n++) {
            size_t forwardMatchLength = 0, backwardMatchLength = 0,
                   bestMatchLength = 0, mLength;
            U32 offset;
            BYTE const* const split = candidates[n].split;
            U32 const checksum = candidates[n].checksum;
            U32 const hash = candidates[n].hash;
@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
            }
            /* Match found */
            offset = (U32)(split - base) - bestEntry->offset;
            mLength = forwardMatchLength + backwardMatchLength;
            {
                U32 const offset = (U32)(split - base) - bestEntry->offset;
                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
                /* Out of sequence storage */
@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal(
            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
            anchor = split + forwardMatchLength;
            /* If we find a match that ends after the data that we've hashed
             * then we have a repeating, overlapping, pattern. E.g. all zeros.
             * If one repetition of the pattern matches our `stopMask` then all
             * repetitions will. We don't need to insert them all into out table,
             * only the first one. So skip over overlapping matches.
             * This is a major speed boost (20x) for compressing a single byte
             * repeated, when that byte ends up in the table.
             */
            if (anchor > ip + hashed) {
                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
                /* Continue the outter loop at anchor (ip + hashed == anchor). */
                ip = anchor - hashed;
                break;
            }
        }
        ip += hashed;