diff --git a/contrib/match_finders/zstd_edist.c b/contrib/match_finders/zstd_edist.c index b5523f4e4..d63a7cf8d 100644 --- a/contrib/match_finders/zstd_edist.c +++ b/contrib/match_finders/zstd_edist.c @@ -26,7 +26,7 @@ * Constants ***************************************/ -/* Just a sential for the entires of the diagonal matrix */ +/* Just a sential for the entries of the diagonal matrix */ #define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30) /* How large should a snake be to be considered a 'big' snake. @@ -57,10 +57,10 @@ typedef struct { const BYTE* src; size_t dictSize; size_t srcSize; - S32* forwardDiag; /* Entires of the forward diagonal stored here */ - S32* backwardDiag; /* Entires of the backward diagonal stored here. + S32* forwardDiag; /* Entries of the forward diagonal stored here */ + S32* backwardDiag; /* Entries of the backward diagonal stored here. * Note: this buffer and the 'forwardDiag' buffer - * are contiguous. See the ZSTD_eDist_genSequences */ + * are contiguous. See the ZSTD_eDist_genSequences */ ZSTD_eDist_match* matches; /* Accumulate matches of length 1 in this buffer. * In a subsequence post-processing step, we combine * contiguous matches. */ @@ -460,7 +460,7 @@ static size_t ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence* sequences, } /*-************************************* -* Interal utils +* Internal utils ***************************************/ static size_t ZSTD_eDist_hamingDist(const BYTE* const a, diff --git a/contrib/seqBench/Makefile b/contrib/seqBench/Makefile new file mode 100644 index 000000000..0782961eb --- /dev/null +++ b/contrib/seqBench/Makefile @@ -0,0 +1,58 @@ +# ################################################################ +# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +PROGDIR = ../../programs +LIBDIR = ../../lib + +LIBZSTD = $(LIBDIR)/libzstd.a + +CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR) + +CFLAGS ?= -O3 -g +CFLAGS += -std=gnu99 +DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum \ + -Wstrict-prototypes -Wundef -Wpointer-arith \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) + + +default: seqBench + +all : seqBench + +seqBench: util.o timefn.o benchfn.o datagen.o xxhash.o seqBench.c $(LIBZSTD) + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ + +.PHONY: $(LIBZSTD) +$(LIBZSTD): + $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" + +benchfn.o: $(PROGDIR)/benchfn.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +timefn.o: $(PROGDIR)/timefn.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +datagen.o: $(PROGDIR)/datagen.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +util.o: $(PROGDIR)/util.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + + +xxhash.o : $(LIBDIR)/common/xxhash.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + + +clean: + $(RM) *.o + $(MAKE) -C $(LIBDIR) clean > /dev/null + $(RM) seqBench diff --git a/contrib/seqBench/seqBench.c b/contrib/seqBench/seqBench.c new file mode 100644 index 000000000..7efebec7b --- /dev/null +++ b/contrib/seqBench/seqBench.c @@ -0,0 +1,53 @@ +#define ZSTD_STATIC_LINKING_ONLY +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + ZSTD_CCtx* zc = ZSTD_createCCtx(); + + if (argc != 2) { + printf("Usage: seqBench \n"); // TODO provide the block delim option here + return 1; + } + + FILE *f = fopen(argv[1], "rb"); + fseek(f, 0, SEEK_END); + long inBufSize = ftell(f); + fseek(f, 0, SEEK_SET); + + char *inBuf = malloc(inBufSize + 1); + fread(inBuf, inBufSize, 1, f); + fclose(f); + + size_t seqsSize = ZSTD_sequenceBound(inBufSize); + ZSTD_Sequence *seqs = (ZSTD_Sequence*)malloc(seqsSize * sizeof(ZSTD_Sequence)); + char *outBuf = malloc(ZSTD_compressBound(inBufSize)); + + ZSTD_generateSequences(zc, seqs, seqsSize, inBuf, inBufSize); + ZSTD_CCtx_setParameter(zc, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters); + size_t outBufSize = ZSTD_compressSequences(zc, outBuf, inBufSize, seqs, seqsSize, inBuf, inBufSize); + if (ZSTD_isError(outBufSize)) { + printf("ERROR: %lu\n", outBufSize); + return 1; + } + + char *validationBuf = malloc(inBufSize); + ZSTD_decompress(validationBuf, inBufSize, outBuf, outBufSize); + + if (memcmp(inBuf, validationBuf, inBufSize) == 0) { + printf("Compression and decompression were successful!\n"); + } else { + printf("ERROR: input and validation buffers don't match!\n"); + for (int i = 0; i < inBufSize; i++) { + if (inBuf[i] != validationBuf[i]) { + printf("First bad index: %d\n", i); + break; + } + } + } + + return 0; +} diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 59d441b2a..8a0c2f191 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3003,6 +3003,10 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) zc->seqCollector.seqIndex += seqStoreSeqSize; } +size_t ZSTD_sequenceBound(size_t srcSize) { + return (srcSize / ZSTD_MINMATCH_MIN) + 1; +} + size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize) { diff --git a/lib/zstd.h b/lib/zstd.h index 65d5adddc..cf3dd7e27 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -836,7 +836,7 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); /*! ZSTD_decompressStream() : * Streaming decompression function. * Call repetitively to consume full input updating it as necessary. - * Funtion will update both input and output `pos` fields exposing current state via these fields: + * Function will update both input and output `pos` fields exposing current state via these fields: * - `input.pos < input.size`, some input remaining and caller should provide remaining input * on the next call. * - `output.pos < output.size`, decoder finished and flushed all remaining buffers. @@ -844,7 +844,7 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); * call ZSTD_decompressStream() again to flush remaining data to output. * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. * - * @return : 0 when a frame is completly decoded and fully flushed, + * @return : 0 when a frame is completely decoded and fully flushed, * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. */ @@ -1396,6 +1396,15 @@ typedef enum { ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ } ZSTD_sequenceFormat_e; +/*! ZSTD_sequenceBound() : + * `srcSize` : size of the input buffer + * @return : upper-bound for the number of sequences that can be generated + * from a buffer of srcSize bytes + * + * note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). + */ +ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); + /*! ZSTD_generateSequences() : * Generate sequences using ZSTD_compress2(), given a source buffer. *