mirror of
				https://github.com/facebook/zstd.git
				synced 2025-11-04 00:02:59 -05:00 
			
		
		
		
	``` for f in $(find . \( -path ./.git -o -path ./tests/fuzz/corpora \) -prune -o -type f); do sed -i 's/Facebook, Inc\./Meta Platforms, Inc. and affiliates./' $f; done ```
		
			
				
	
	
		
			261 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			261 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 | 
						|
 * All rights reserved.
 | 
						|
 *
 | 
						|
 * This source code is licensed under both the BSD-style license (found in the
 | 
						|
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 | 
						|
 * in the COPYING file in the root directory of this source tree).
 | 
						|
 * You may select, at your option, one of the above-listed licenses.
 | 
						|
 */
 | 
						|
 | 
						|
#include "seqgen.h"
 | 
						|
#include "mem.h"
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
#define MIN(a, b)  ((a) < (b) ? (a) : (b))
 | 
						|
 | 
						|
static const size_t kMatchBytes = 128;
 | 
						|
 | 
						|
#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
 | 
						|
static BYTE SEQ_randByte(unsigned* src)
 | 
						|
{
 | 
						|
    static const U32 prime1 = 2654435761U;
 | 
						|
    static const U32 prime2 = 2246822519U;
 | 
						|
    U32 rand32 = *src;
 | 
						|
    rand32 *= prime1;
 | 
						|
    rand32 ^= prime2;
 | 
						|
    rand32  = SEQ_rotl32(rand32, 13);
 | 
						|
    *src = rand32;
 | 
						|
    return (BYTE)(rand32 >> 5);
 | 
						|
}
 | 
						|
 | 
						|
SEQ_stream SEQ_initStream(unsigned seed)
 | 
						|
{
 | 
						|
    SEQ_stream stream;
 | 
						|
    stream.state = 0;
 | 
						|
    XXH64_reset(&stream.xxh, 0);
 | 
						|
    stream.seed = seed;
 | 
						|
    return stream;
 | 
						|
}
 | 
						|
 | 
						|
/* Generates a single guard byte, then match length + 1 of a different byte,
 | 
						|
 * then another guard byte.
 | 
						|
 */
 | 
						|
static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
 | 
						|
                                  SEQ_outBuffer* out)
 | 
						|
{
 | 
						|
    typedef enum {
 | 
						|
        ml_first_byte = 0,
 | 
						|
        ml_match_bytes,
 | 
						|
        ml_last_byte,
 | 
						|
    } ml_state;
 | 
						|
    BYTE* const ostart = (BYTE*)out->dst;
 | 
						|
    BYTE* const oend = ostart + out->size;
 | 
						|
    BYTE* op = ostart + out->pos;
 | 
						|
 | 
						|
    switch ((ml_state)stream->state) {
 | 
						|
    case ml_first_byte:
 | 
						|
        /* Generate a single byte and pick a different byte for the match */
 | 
						|
        if (op >= oend) {
 | 
						|
            stream->bytesLeft = 1;
 | 
						|
            break;
 | 
						|
        }
 | 
						|
        *op = SEQ_randByte(&stream->seed) & 0xFF;
 | 
						|
        do {
 | 
						|
            stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
 | 
						|
        } while (*op == stream->saved);
 | 
						|
        ++op;
 | 
						|
        /* State transition */
 | 
						|
        stream->state = ml_match_bytes;
 | 
						|
        stream->bytesLeft = value + 1;
 | 
						|
    /* fall-through */
 | 
						|
    case ml_match_bytes: {
 | 
						|
        /* Copy matchLength + 1 bytes to the output buffer */
 | 
						|
        size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
 | 
						|
        if (setLength > 0) {
 | 
						|
            memset(op, stream->saved, setLength);
 | 
						|
            op += setLength;
 | 
						|
            stream->bytesLeft -= setLength;
 | 
						|
        }
 | 
						|
        if (stream->bytesLeft > 0)
 | 
						|
            break;
 | 
						|
        /* State transition */
 | 
						|
        stream->state = ml_last_byte;
 | 
						|
    }
 | 
						|
    /* fall-through */
 | 
						|
    case ml_last_byte:
 | 
						|
        /* Generate a single byte and pick a different byte for the match */
 | 
						|
        if (op >= oend) {
 | 
						|
            stream->bytesLeft = 1;
 | 
						|
            break;
 | 
						|
        }
 | 
						|
        do {
 | 
						|
            *op = SEQ_randByte(&stream->seed) & 0xFF;
 | 
						|
        } while (*op == stream->saved);
 | 
						|
        ++op;
 | 
						|
        /* State transition */
 | 
						|
    /* fall-through */
 | 
						|
    default:
 | 
						|
        stream->state = 0;
 | 
						|
        stream->bytesLeft = 0;
 | 
						|
        break;
 | 
						|
    }
 | 
						|
    XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
 | 
						|
    out->pos = op - ostart;
 | 
						|
    return stream->bytesLeft;
 | 
						|
}
 | 
						|
 | 
						|
/* Saves the current seed then generates kMatchBytes random bytes >= 128.
 | 
						|
 * Generates literal length - kMatchBytes random bytes < 128.
 | 
						|
 * Generates another kMatchBytes using the saved seed to generate a match.
 | 
						|
 * This way the match is easy to find for the compressors.
 | 
						|
 */
 | 
						|
static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
 | 
						|
{
 | 
						|
    typedef enum {
 | 
						|
        ll_start = 0,
 | 
						|
        ll_run_bytes,
 | 
						|
        ll_literals,
 | 
						|
        ll_run_match,
 | 
						|
    } ll_state;
 | 
						|
    BYTE* const ostart = (BYTE*)out->dst;
 | 
						|
    BYTE* const oend = ostart + out->size;
 | 
						|
    BYTE* op = ostart + out->pos;
 | 
						|
 | 
						|
    switch ((ll_state)stream->state) {
 | 
						|
    case ll_start:
 | 
						|
        stream->state = ll_run_bytes;
 | 
						|
        stream->saved = stream->seed;
 | 
						|
        stream->bytesLeft = MIN(kMatchBytes, value);
 | 
						|
    /* fall-through */
 | 
						|
    case ll_run_bytes:
 | 
						|
        while (stream->bytesLeft > 0 && op < oend) {
 | 
						|
            *op++ = SEQ_randByte(&stream->seed) | 0x80;
 | 
						|
            --stream->bytesLeft;
 | 
						|
        }
 | 
						|
        if (stream->bytesLeft > 0)
 | 
						|
            break;
 | 
						|
        /* State transition */
 | 
						|
        stream->state = ll_literals;
 | 
						|
        stream->bytesLeft = value - MIN(kMatchBytes, value);
 | 
						|
    /* fall-through */
 | 
						|
    case ll_literals:
 | 
						|
        while (stream->bytesLeft > 0 && op < oend) {
 | 
						|
            *op++ = SEQ_randByte(&stream->seed) & 0x7F;
 | 
						|
            --stream->bytesLeft;
 | 
						|
        }
 | 
						|
        if (stream->bytesLeft > 0)
 | 
						|
            break;
 | 
						|
        /* State transition */
 | 
						|
        stream->state = ll_run_match;
 | 
						|
        stream->bytesLeft = MIN(kMatchBytes, value);
 | 
						|
    /* fall-through */
 | 
						|
    case ll_run_match: {
 | 
						|
        while (stream->bytesLeft > 0 && op < oend) {
 | 
						|
            *op++ = SEQ_randByte(&stream->saved) | 0x80;
 | 
						|
            --stream->bytesLeft;
 | 
						|
        }
 | 
						|
        if (stream->bytesLeft > 0)
 | 
						|
            break;
 | 
						|
    }
 | 
						|
    /* fall-through */
 | 
						|
    default:
 | 
						|
        stream->state = 0;
 | 
						|
        stream->bytesLeft = 0;
 | 
						|
        break;
 | 
						|
    }
 | 
						|
    XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
 | 
						|
    out->pos = op - ostart;
 | 
						|
    return stream->bytesLeft;
 | 
						|
}
 | 
						|
 | 
						|
/* Saves the current seed then generates kMatchBytes random bytes >= 128.
 | 
						|
 * Generates offset - kMatchBytes of zeros to get a large offset without
 | 
						|
 * polluting the hash tables.
 | 
						|
 * Generates another kMatchBytes using the saved seed to generate a with the
 | 
						|
 * required offset.
 | 
						|
 */
 | 
						|
static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
 | 
						|
{
 | 
						|
    typedef enum {
 | 
						|
        of_start = 0,
 | 
						|
        of_run_bytes,
 | 
						|
        of_offset,
 | 
						|
        of_run_match,
 | 
						|
    } of_state;
 | 
						|
    BYTE* const ostart = (BYTE*)out->dst;
 | 
						|
    BYTE* const oend = ostart + out->size;
 | 
						|
    BYTE* op = ostart + out->pos;
 | 
						|
 | 
						|
    switch ((of_state)stream->state) {
 | 
						|
    case of_start:
 | 
						|
        stream->state = of_run_bytes;
 | 
						|
        stream->saved = stream->seed;
 | 
						|
        stream->bytesLeft = MIN(value, kMatchBytes);
 | 
						|
    /* fall-through */
 | 
						|
    case of_run_bytes: {
 | 
						|
        while (stream->bytesLeft > 0 && op < oend) {
 | 
						|
            *op++ = SEQ_randByte(&stream->seed) | 0x80;
 | 
						|
            --stream->bytesLeft;
 | 
						|
        }
 | 
						|
        if (stream->bytesLeft > 0)
 | 
						|
            break;
 | 
						|
        /* State transition */
 | 
						|
        stream->state = of_offset;
 | 
						|
        stream->bytesLeft = value - MIN(value, kMatchBytes);
 | 
						|
    }
 | 
						|
    /* fall-through */
 | 
						|
    case of_offset: {
 | 
						|
        /* Copy matchLength + 1 bytes to the output buffer */
 | 
						|
        size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
 | 
						|
        if (setLength > 0) {
 | 
						|
            memset(op, 0, setLength);
 | 
						|
            op += setLength;
 | 
						|
            stream->bytesLeft -= setLength;
 | 
						|
        }
 | 
						|
        if (stream->bytesLeft > 0)
 | 
						|
            break;
 | 
						|
        /* State transition */
 | 
						|
        stream->state = of_run_match;
 | 
						|
        stream->bytesLeft = MIN(value, kMatchBytes);
 | 
						|
    }
 | 
						|
    /* fall-through */
 | 
						|
    case of_run_match: {
 | 
						|
        while (stream->bytesLeft > 0 && op < oend) {
 | 
						|
            *op++ = SEQ_randByte(&stream->saved) | 0x80;
 | 
						|
            --stream->bytesLeft;
 | 
						|
        }
 | 
						|
        if (stream->bytesLeft > 0)
 | 
						|
            break;
 | 
						|
    }
 | 
						|
    /* fall-through */
 | 
						|
    default:
 | 
						|
        stream->state = 0;
 | 
						|
        stream->bytesLeft = 0;
 | 
						|
        break;
 | 
						|
    }
 | 
						|
    XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
 | 
						|
    out->pos = op - ostart;
 | 
						|
    return stream->bytesLeft;
 | 
						|
}
 | 
						|
 | 
						|
/* Returns the number of bytes left to generate.
 | 
						|
 * Must pass the same type/value until it returns 0.
 | 
						|
 */
 | 
						|
size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
 | 
						|
{
 | 
						|
    switch (type) {
 | 
						|
        case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
 | 
						|
        case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
 | 
						|
        case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
 | 
						|
        case SEQ_gen_max: /* fall-through */
 | 
						|
        default: return 0;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* Returns the xxhash of the data produced so far */
 | 
						|
XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
 | 
						|
{
 | 
						|
    return XXH64_digest(&stream->xxh);
 | 
						|
}
 |