Merge pull request #1432 from terrelln/regression

[regression] Add initial regression test framework
2025-11-29 00:04:37 -05:00 · 2018-11-29 16:06:40 -08:00 · 2018-11-29 16:06:40 -08:00 · 090bc808a8
commit 090bc808a8
parent f6591e4a04 4aaa36f74b
13 changed files with 1608 additions and 1 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -15,7 +15,8 @@ references:
        sudo apt-get -y install \
            gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \
            libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \
-            libc6-dev-ppc64-powerpc-cross zstd gzip coreutils
+            libc6-dev-ppc64-powerpc-cross zstd gzip coreutils \
+            libcurl4-openssl-dev

 jobs:
  # the first half of the jobs are in this test
@ -82,6 +83,49 @@ jobs:
            cp $ZSTD_VERSION.tar* $CIRCLE_ARTIFACTS
      - store_artifacts:
          path: /tmp/circleci-artifacts
+  # This step should only be run in a cron job
+  regression-test:
+    docker:
+      - image: circleci/buildpack-deps:bionic
+    environment:
+      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
+    steps:
+      - checkout
+      - *install-dependencies
+      # Restore the cached resources.
+      - restore_cache:
+          # We try our best to bust the cache when the data changes by hashing
+          # data.c. If that doesn't work, simply update the version number here
+          # and below. If we fail to bust the cache, the regression testing will
+          # still work, since it has its own stamp, but will need to redownload
+          # everything.
+          keys:
+            - regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+      - run:
+          name: Regression Test
+          command: |
+            make -C programs zstd
+            make -C tests/regression test
+            mkdir -p $CIRCLE_ARTIFACTS
+            ./tests/regression/test                     \
+                --cache  tests/regression/cache         \
+                --output $CIRCLE_ARTIFACTS/results.csv  \
+                --zstd   programs/zstd
+            echo "NOTE: The new results.csv is uploaded as an artifact to this job"
+            echo "      If this fails, go to the Artifacts pane in CircleCI, "
+            echo "      download /tmp/circleci-artifacts/results.csv, and if they "
+            echo "      are still good, copy it into the repo and commit it."
+            echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
+            diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
+      # Only save the cache on success (default), since if the failure happened
+      # before we stamp the data cache, we will have a bad cache for this key.
+      - save_cache:
+          key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+          paths:
+            - tests/regression/cache
+      - store_artifacts:
+          path: /tmp/circleci-artifacts
+

 workflows:
  version: 2
@ -96,6 +140,13 @@ workflows:
          filters:
            tags:
              only: /.*/
+      # Create a branch called regression and set it to dev to force a
+      # regression test run
+      - regression-test:
+          filters:
+            branches:
+              only:
+                - regression
      # Only run on release tags.
      - publish-github-release:
          requires:
@ -106,6 +157,20 @@ workflows:
              ignore: /.*/
            tags:
              only: /^v\d+\.\d+\.\d+$/
+  nightly:
+    triggers:
+      - schedule:
+          cron: "0 0 * * *"
+          filters:
+            branches:
+              only:
+                - master
+                - dev
+    jobs:
+      # Run daily long regression tests
+      - regression-test
+
+

  # Longer tests
    #- make -C tests test-zstd-nolegacy && make clean
--- a/tests/regression/Makefile
+++ b/tests/regression/Makefile
@ -0,0 +1,58 @@
+# ################################################################
+# Copyright (c) 2015-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+CFLAGS ?= -O3
+
+CURL_CFLAGS := $(shell curl-config --cflags)
+CURL_LDFLAGS := $(shell curl-config --libs)
+
+PROGDIR := ../../programs
+LIBDIR := ../../lib
+ZSTD_CPPFLAGS := -I$(PROGDIR) -I$(LIBDIR) -I$(LIBDIR)/common
+
+REGRESSION_CFLAGS = $(CFLAGS) $(CURL_CFLAGS)
+REGRESSION_CPPFLAGS = $(CPPFLAGS) $(ZSTD_CPPFLAGS)
+REGRESSION_LDFLAGS = $(LDFLAGS) $(CURL_LDFLAGS)
+
+all: test
+
+xxhash.o: $(LIBDIR)/common/xxhash.c $(LIBDIR)/common/xxhash.h
+	$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
+
+util.o: $(PROGDIR)/util.c $(PROGDIR)/util.h
+	$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
+
+data.o: data.c data.h $(PROGDIR)/util.h $(LIBDIR)/common/xxhash.h
+	$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
+
+config.o: config.c config.h levels.h
+	$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
+
+method.h: data.h config.h result.h
+
+method.o: method.c method.h
+	$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
+
+result.o: result.c result.h
+	$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
+
+test.o: test.c data.h config.h method.h
+	$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
+
+libzstd.a:
+	$(MAKE) -C $(LIBDIR) libzstd.a
+	cp $(LIBDIR)/libzstd.a .
+
+test: test.o data.o config.o util.o method.o result.o xxhash.o libzstd.a
+	$(CC) $^ $(REGRESSION_LDFLAGS) -o $@
+
+.PHONY: clean
+clean:
+	$(MAKE) -C $(LIBDIR) clean
+	$(RM) *.o *.a test
--- a/tests/regression/config.c
+++ b/tests/regression/config.c
@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "config.h"
+
+/* Define a config for each fast level we want to test with. */
+#define FAST_LEVEL(x)                                               \
+    param_value_t const level_fast##x##_param_values[] = {          \
+        {.param = ZSTD_p_compressionLevel, .value = (unsigned)-x},  \
+    };                                                              \
+    config_t const level_fast##x = {                                \
+        .name = "level -" #x,                                       \
+        .cli_args = "--fast=" #x,                                   \
+        .param_values = PARAM_VALUES(level_fast##x##_param_values), \
+    };
+
+/* Define a config for each level we want to test with. */
+#define LEVEL(x)                                                  \
+    param_value_t const level_##x##_param_values[] = {            \
+        {.param = ZSTD_p_compressionLevel, .value = (unsigned)x}, \
+    };                                                            \
+    config_t const level_##x = {                                  \
+        .name = "level " #x,                                      \
+        .cli_args = "-" #x,                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values),   \
+    };
+
+
+#define PARAM_VALUES(pv) \
+    { .data = pv, .size = sizeof(pv) / sizeof((pv)[0]) }
+
+#include "levels.h"
+
+#undef LEVEL
+#undef FAST_LEVEL
+
+static config_t const* g_configs[] = {
+#define FAST_LEVEL(x) &level_fast##x,
+#define LEVEL(x) &level_##x,
+#include "levels.h"
+#undef LEVEL
+#undef FAST_LEVEL
+    NULL,
+};
+
+config_t const* const* configs = g_configs;
+
+int config_get_level(config_t const* config) {
+    param_values_t const params = config->param_values;
+    size_t i;
+    for (size_t i = 0; i < params.size; ++i) {
+        if (params.data[i].param == ZSTD_p_compressionLevel)
+            return params.data[i].value;
+    }
+    return CONFIG_NO_LEVEL;
+}
--- a/tests/regression/config.h
+++ b/tests/regression/config.h
@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+#include <stddef.h>
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include <zstd.h>
+
+typedef struct {
+    ZSTD_cParameter param;
+    unsigned value;
+} param_value_t;
+
+typedef struct {
+    size_t size;
+    param_value_t const* data;
+} param_values_t;
+
+/**
+ * The config tells the compression method what options to use.
+ */
+typedef struct {
+    const char* name;  /**< Identifies the config in the results table */
+    /**
+     * Optional arguments to pass to the CLI. If not set, CLI-based methods
+     * will skip this config.
+     */
+    char const* cli_args;
+    /**
+     * Parameters to pass to the advanced API. If the advanced API isn't used,
+     * the parameters will be derived from these.
+     */
+    param_values_t param_values;
+} config_t;
+
+#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1)
+/**
+ * Returns the compression level specified by the config, or CONFIG_NO_LEVEL if
+ * no level is specified. Note that 0 is a valid compression level, meaning
+ * default.
+ */
+int config_get_level(config_t const* config);
+
+/**
+ * The NULL-terminated list of configs.
+ */
+extern config_t const* const* configs;
+
+#endif
--- a/tests/regression/data.c
+++ b/tests/regression/data.c
@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "data.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/stat.h>
+
+#include <curl/curl.h>
+
+#include "mem.h"
+#include "util.h"
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+/**
+ * Data objects
+ */
+
+#define REGRESSION_RELEASE(x) \
+    "https://github.com/facebook/zstd/releases/download/regression-data/" x
+
+data_t silesia = {
+    .url = REGRESSION_RELEASE("silesia.tar.zst"),
+    .name = "silesia",
+    .type = data_type_dir,
+    .xxhash64 = 0x67558ee5506918b4LL,
+};
+
+data_t silesia_tar = {
+    .url = REGRESSION_RELEASE("silesia.tar.zst"),
+    .name = "silesia.tar",
+    .type = data_type_file,
+    .xxhash64 = 0x67558ee5506918b4LL,
+};
+
+static data_t* g_data[] = {
+    &silesia,
+    &silesia_tar,
+    NULL,
+};
+
+data_t const* const* data = (data_t const* const*)g_data;
+
+/**
+ * data buffer helper functions (documented in header).
+ */
+
+data_buffer_t data_buffer_create(size_t const capacity) {
+    data_buffer_t buffer = {};
+
+    buffer.data = (uint8_t*)malloc(capacity);
+    if (buffer.data == NULL)
+        return buffer;
+    buffer.capacity = capacity;
+    return buffer;
+}
+
+data_buffer_t data_buffer_read(char const* filename) {
+    data_buffer_t buffer = {};
+
+    uint64_t const size = UTIL_getFileSize(filename);
+    if (size == UTIL_FILESIZE_UNKNOWN) {
+        fprintf(stderr, "unknown size for %s\n", filename);
+        return buffer;
+    }
+
+    buffer.data = (uint8_t*)malloc(size);
+    if (buffer.data == NULL) {
+        fprintf(stderr, "malloc failed\n");
+        return buffer;
+    }
+    buffer.capacity = size;
+
+    FILE* file = fopen(filename, "rb");
+    if (file == NULL) {
+        fprintf(stderr, "file null\n");
+        goto err;
+    }
+    buffer.size = fread(buffer.data, 1, buffer.capacity, file);
+    fclose(file);
+    if (buffer.size != buffer.capacity) {
+        fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity);
+        goto err;
+    }
+
+    return buffer;
+err:
+    free(buffer.data);
+    memset(&buffer, 0, sizeof(buffer));
+    return buffer;
+
+}
+
+data_buffer_t data_buffer_get(data_t const* data) {
+    data_buffer_t const kEmptyBuffer = {};
+
+    if (data->type != data_type_file)
+        return kEmptyBuffer;
+
+    return data_buffer_read(data->path);
+}
+
+int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
+    size_t const size =
+        buffer1.size < buffer2.size ? buffer1.size : buffer2.size;
+    int const cmp = memcmp(buffer1.data, buffer2.data, size);
+    if (cmp != 0)
+        return cmp;
+    if (buffer1.size < buffer2.size)
+        return -1;
+    if (buffer1.size == buffer2.size)
+        return 0;
+    assert(buffer1.size > buffer2.size);
+    return 1;
+
+}
+
+void data_buffer_free(data_buffer_t buffer) {
+    free(buffer.data);
+}
+
+/**
+ * Initialization and download functions.
+ */
+
+static char* g_data_dir = NULL;
+
+/* mkdir -p */
+static int ensure_directory_exists(char const* indir) {
+    char* const dir = strdup(indir);
+    char* end = dir;
+    int ret = 0;
+    if (dir == NULL) {
+        ret = EINVAL;
+        goto out;
+    }
+    do {
+        /* Find the next directory level. */
+        for (++end; *end != '\0' && *end != '/'; ++end)
+            ;
+        /* End the string there, make the directory, and restore the string. */
+        char const save = *end;
+        *end = '\0';
+        int const isdir = UTIL_isDirectory(dir);
+        ret = mkdir(dir, S_IRWXU);
+        *end = save;
+        /* Its okay if the directory already exists. */
+        if (ret == 0 || (errno == EEXIST && isdir))
+            continue;
+        ret = errno;
+        fprintf(stderr, "mkdir() failed\n");
+        goto out;
+    } while (*end != '\0');
+
+    ret = 0;
+out:
+    free(dir);
+    return ret;
+}
+
+/** Concatenate 3 strings into a new buffer. */
+static char* cat3(char const* str1, char const* str2, char const* str3) {
+    size_t const size1 = strlen(str1);
+    size_t const size2 = strlen(str2);
+    size_t const size3 = strlen(str3);
+    size_t const size = size1 + size2 + size3 + 1;
+    char* const dst = (char*)malloc(size);
+    if (dst == NULL)
+        return NULL;
+    strcpy(dst, str1);
+    strcpy(dst + size1, str2);
+    strcpy(dst + size1 + size2, str3);
+    assert(strlen(dst) == size1 + size2 + size3);
+    return dst;
+}
+
+/**
+ * State needed by the curl callback.
+ * It takes data from curl, hashes it, and writes it to the file.
+ */
+typedef struct {
+    FILE* file;
+    XXH64_state_t xxhash64;
+    int error;
+} curl_data_t;
+
+/** Create the curl state. */
+static curl_data_t curl_data_create(data_t const* data) {
+    curl_data_t cdata = {};
+
+    XXH64_reset(&cdata.xxhash64, 0);
+
+    assert(UTIL_isDirectory(g_data_dir));
+
+    if (data->type == data_type_file) {
+        /* Decompress the resource and store to the path. */
+        char* cmd = cat3("zstd -dqfo '", data->path, "'");
+        if (cmd == NULL) {
+            cdata.error = ENOMEM;
+            return cdata;
+        }
+        cdata.file = popen(cmd, "w");
+        free(cmd);
+    } else {
+        /* Decompress and extract the resource to the cache directory. */
+        char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'");
+        if (cmd == NULL) {
+            cdata.error = ENOMEM;
+            return cdata;
+        }
+        cdata.file = popen(cmd, "w");
+        free(cmd);
+    }
+    if (cdata.file == NULL) {
+        cdata.error = errno;
+    }
+
+    return cdata;
+}
+
+/** Free the curl state. */
+static int curl_data_free(curl_data_t cdata) {
+    return pclose(cdata.file);
+}
+
+/** curl callback. Updates the hash, and writes to the file. */
+static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
+    curl_data_t* cdata = (curl_data_t*)ptr;
+    size_t const written = fwrite(data, size, count, cdata->file);
+    XXH64_update(&cdata->xxhash64, data, written * size);
+    return written;
+}
+
+/** Download a single data object. */
+static int curl_download_datum(CURL* curl, data_t const* data) {
+    curl_data_t cdata = curl_data_create(data);
+    int err = EFAULT;
+
+    if (cdata.error != 0) {
+        err = cdata.error;
+        goto out;
+    }
+
+    /* Download the data. */
+    if (curl_easy_setopt(curl, CURLOPT_URL, data->url) != 0)
+        goto out;
+    if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
+        goto out;
+    if (curl_easy_perform(curl) != 0) {
+        fprintf(stderr, "downloading '%s' failed\n", data->url);
+        goto out;
+    }
+    /* check that the file exists. */
+    if (data->type == data_type_file && !UTIL_isRegularFile(data->path)) {
+        fprintf(stderr, "output file '%s' does not exist\n", data->path);
+        goto out;
+    }
+    if (data->type == data_type_dir && !UTIL_isDirectory(data->path)) {
+        fprintf(stderr, "output directory '%s' does not exist\n", data->path);
+        goto out;
+    }
+    /* Check that the hash matches. */
+    if (XXH64_digest(&cdata.xxhash64) != data->xxhash64) {
+        fprintf(
+            stderr,
+            "checksum does not match: %llx != %llx\n",
+            (unsigned long long)XXH64_digest(&cdata.xxhash64),
+            (unsigned long long)data->xxhash64);
+        goto out;
+    }
+
+    err = 0;
+out:
+    if (err != 0)
+        fprintf(stderr, "downloading '%s' failed\n", data->name);
+    int const close_err = curl_data_free(cdata);
+    if (close_err != 0 && err == 0) {
+        fprintf(stderr, "failed to write data for '%s'\n", data->name);
+        err = close_err;
+    }
+    return err;
+}
+
+/** Download all the data. */
+static int curl_download_data(data_t const* const* data) {
+    if (curl_global_init(CURL_GLOBAL_ALL) != 0)
+        return EFAULT;
+
+    curl_data_t cdata = {};
+    CURL* curl = curl_easy_init();
+    int err = EFAULT;
+
+    if (curl == NULL)
+        return EFAULT;
+
+    if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0)
+        goto out;
+    if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0)
+        goto out;
+    if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0)
+        goto out;
+
+    assert(data != NULL);
+    for (; *data != NULL; ++data) {
+        if (curl_download_datum(curl, *data) != 0)
+            goto out;
+    }
+
+    err = 0;
+out:
+    curl_easy_cleanup(curl);
+    curl_global_cleanup();
+    return err;
+}
+
+/** Fill the path member variable of the data objects. */
+static int data_create_paths(data_t* const* data, char const* dir) {
+    size_t const dirlen = strlen(dir);
+    assert(data != NULL);
+    for (; *data != NULL; ++data) {
+        data_t* const datum = *data;
+        datum->path = cat3(dir, "/", datum->name);
+        if (datum->path == NULL)
+            return ENOMEM;
+    }
+    return 0;
+}
+
+/** Free the path member variable of the data objects. */
+static void data_free_paths(data_t* const* data) {
+    assert(data != NULL);
+    for (; *data != NULL; ++data) {
+        data_t* datum = *data;
+        free((void*)datum->path);
+        datum->path = NULL;
+    }
+}
+
+static char const kStampName[] = "STAMP";
+
+static void xxh_update_le(XXH64_state_t* state, uint64_t data) {
+    if (!MEM_isLittleEndian())
+        data = MEM_swap64(data);
+    XXH64_update(state, &data, sizeof(data));
+}
+
+/** Hash the data to create the stamp. */
+static uint64_t stamp_hash(data_t const* const* data) {
+    XXH64_state_t state;
+
+    XXH64_reset(&state, 0);
+    assert(data != NULL);
+    for (; *data != NULL; ++data) {
+        data_t const* datum = *data;
+        /* We don't care about the URL that we fetch from. */
+        /* The path is derived from the name. */
+        XXH64_update(&state, datum->name, strlen(datum->name));
+        xxh_update_le(&state, datum->xxhash64);
+        xxh_update_le(&state, datum->type);
+    }
+    return XXH64_digest(&state);
+}
+
+/** Check if the stamp matches the stamp in the cache directory. */
+static int stamp_check(char const* dir, data_t const* const* data) {
+    char* stamp = cat3(dir, "/", kStampName);
+    uint64_t const expected = stamp_hash(data);
+    XXH64_canonical_t actual;
+    FILE* stampfile = NULL;
+    int matches = 0;
+
+    if (stamp == NULL)
+        goto out;
+    if (!UTIL_isRegularFile(stamp)) {
+        fprintf(stderr, "stamp does not exist: recreating the data cache\n");
+        goto out;
+    }
+
+    stampfile = fopen(stamp, "rb");
+    if (stampfile == NULL) {
+        fprintf(stderr, "could not open stamp: recreating the data cache\n");
+        goto out;
+    }
+
+    size_t b;
+    if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) {
+        fprintf(stderr, "invalid stamp: recreating the data cache\n");
+        goto out;
+    }
+
+    matches = (expected == XXH64_hashFromCanonical(&actual));
+    if (matches)
+        fprintf(stderr, "stamp matches: reusing the cached data\n");
+    else
+        fprintf(stderr, "stamp does not match: recreating the data cache\n");
+
+out:
+    free(stamp);
+    if (stampfile != NULL)
+        fclose(stampfile);
+    return matches;
+}
+
+/** On success write a new stamp, on failure delete the old stamp. */
+static int
+stamp_write(char const* dir, data_t const* const* data, int const data_err) {
+    char* stamp = cat3(dir, "/", kStampName);
+    FILE* stampfile = NULL;
+    int err = EIO;
+
+    if (stamp == NULL)
+        return ENOMEM;
+
+    if (data_err != 0) {
+        err = data_err;
+        goto out;
+    }
+    XXH64_canonical_t hash;
+
+    XXH64_canonicalFromHash(&hash, stamp_hash(data));
+
+    stampfile = fopen(stamp, "wb");
+    if (stampfile == NULL)
+        goto out;
+    if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1)
+        goto out;
+    err = 0;
+    fprintf(stderr, "stamped new data cache\n");
+out:
+    if (err != 0)
+        /* Ignore errors. */
+        unlink(stamp);
+    free(stamp);
+    if (stampfile != NULL)
+        fclose(stampfile);
+    return err;
+}
+
+int data_init(char const* dir) {
+    int err;
+
+    if (dir == NULL)
+        return EINVAL;
+
+    /* This must be first to simplify logic. */
+    err = ensure_directory_exists(dir);
+    if (err != 0)
+        return err;
+
+    /* Save the cache directory. */
+    g_data_dir = strdup(dir);
+    if (g_data_dir == NULL)
+        return ENOMEM;
+
+    err = data_create_paths(g_data, dir);
+    if (err != 0)
+        return err;
+
+    /* If the stamp matches then we are good to go.
+     * This must be called before any modifications to the data cache.
+     * After this point, we MUST call stamp_write() to update the STAMP,
+     * since we've updated the data cache.
+     */
+    if (stamp_check(dir, data))
+        return 0;
+
+    err = curl_download_data(data);
+    if (err != 0)
+        goto out;
+
+out:
+    /* This must be last, since it must know if data_init() succeeded. */
+    stamp_write(dir, data, err);
+    return err;
+}
+
+void data_finish(void) {
+    data_free_paths(g_data);
+    free(g_data_dir);
+    g_data_dir = NULL;
+}
--- a/tests/regression/data.h
+++ b/tests/regression/data.h
@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef DATA_H
+#define DATA_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef enum {
+    data_type_file = 1,  /**< This data is a file. *.zst */
+    data_type_dir = 2,   /**< This data is a directory. *.tar.zst */
+} data_type_t;
+
+typedef struct {
+    char const* url;   /**< Where to get this resource. */
+    uint64_t xxhash64; /**< Hash of the url contents. */
+    char const* name;  /**< The logical name of the resource (no extension). */
+    data_type_t type;  /**< The type of this resource. */
+    char const* path;  /**< The path of the unpacked resource (derived). */
+    size_t size;
+} data_t;
+
+/**
+ * The NULL-terminated list of data objects.
+ */
+extern data_t const* const* data;
+
+/**
+ * Initializes the data module and downloads the data necessary.
+ * Caches the downloads in dir. We add a stamp file in the directory after
+ * a successful download. If a stamp file already exists, and matches our
+ * current data stamp, we will use the cached data without downloading.
+ *
+ * @param dir The directory to cache the downloaded data into.
+ *
+ * @returns 0 on success.
+ */
+int data_init(char const* dir);
+
+/**
+ * Must be called at exit to free resources allocated by data_init().
+ */
+void data_finish(void);
+
+typedef struct {
+    uint8_t* data;
+    size_t size;
+    size_t capacity;
+} data_buffer_t;
+
+/**
+ * Read the file that data points to into a buffer.
+ * NOTE: data must be a file, not a directory.
+ *
+ * @returns The buffer, which is NULL on failure.
+ */
+data_buffer_t data_buffer_get(data_t const* data);
+
+/**
+ * Read the contents of filename into a buffer.
+ *
+ * @returns The buffer, which is NULL on failure.
+ */
+data_buffer_t data_buffer_read(char const* filename);
+
+/**
+ * Create a buffer with the specified capacity.
+ *
+ * @returns The buffer, which is NULL on failure.
+ */
+data_buffer_t data_buffer_create(size_t capacity);
+
+/**
+ * Calls memcmp() on the contents [0, size) of both buffers.
+ */
+int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2);
+
+/**
+ * Frees an allocated buffer.
+ */
+void data_buffer_free(data_buffer_t buffer);
+
+
+#endif
--- a/tests/regression/levels.h
+++ b/tests/regression/levels.h
@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef LEVEL
+# error LEVEL(x) must be defined
+#endif
+#ifndef FAST_LEVEL
+# error FAST_LEVEL(x) must be defined
+#endif
+
+/**
+ * The levels are chosen to trigger every strategy in every source size,
+ * as well as some fast levels and the default level.
+ * If you change the compression levels, you should probably update these.
+ */
+
+FAST_LEVEL(5)
+
+FAST_LEVEL(3)
+
+FAST_LEVEL(1)
+LEVEL(0)
+LEVEL(1)
+
+LEVEL(3)
+LEVEL(4)
+LEVEL(5)
+LEVEL(6)
+LEVEL(7)
+
+LEVEL(9)
+
+LEVEL(13)
+
+LEVEL(16)
+
+LEVEL(19)
--- a/tests/regression/method.c
+++ b/tests/regression/method.c
@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "method.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <zstd.h>
+
+static char const* g_zstdcli = NULL;
+
+void method_set_zstdcli(char const* zstdcli) {
+    g_zstdcli = zstdcli;
+}
+
+/**
+ * Macro to get a pointer of type, given ptr, which is a member variable with
+ * the given name, member.
+ *
+ *     method_state_t* base = ...;
+ *     simple_state_t* state = container_of(base, simple_state_t, base);
+ */
+#define container_of(ptr, type, member) \
+    ((type*)(char*)(ptr)-offsetof(type, member))
+
+/** State to reuse the same buffers between compression calls. */
+typedef struct {
+    method_state_t base;
+    data_buffer_t buffer;        /**< The constant input data buffer. */
+    data_buffer_t compressed;    /**< The compressed data buffer. */
+    data_buffer_t decompressed;  /**< The decompressed data buffer. */
+} simple_state_t;
+
+static method_state_t* simple_create(data_t const* data) {
+    simple_state_t* state = (simple_state_t*)calloc(1, sizeof(simple_state_t));
+    if (state == NULL)
+        return NULL;
+    state->base.data = data;
+    state->buffer = data_buffer_get(data);
+    state->compressed =
+        data_buffer_create(ZSTD_compressBound(state->buffer.size));
+    state->decompressed = data_buffer_create(state->buffer.size);
+    return &state->base;
+}
+
+static void simple_destroy(method_state_t* base) {
+    if (base == NULL)
+        return;
+    simple_state_t* state = container_of(base, simple_state_t, base);
+    free(state);
+}
+
+static result_t simple_compress(method_state_t* base, config_t const* config) {
+    if (base == NULL)
+        return result_error(result_error_system_error);
+    simple_state_t* state = container_of(base, simple_state_t, base);
+
+    if (base->data->type != data_type_file)
+        return result_error(result_error_skip);
+
+    if (state->buffer.data == NULL || state->compressed.data == NULL ||
+        state->decompressed.data == NULL) {
+        return result_error(result_error_system_error);
+    }
+
+    /* If the config doesn't specify a level, skip. */
+    int const level = config_get_level(config);
+    if (level == CONFIG_NO_LEVEL)
+        return result_error(result_error_skip);
+
+    /* Compress, decompress, and check the result. */
+    state->compressed.size = ZSTD_compress(
+        state->compressed.data,
+        state->compressed.capacity,
+        state->buffer.data,
+        state->buffer.size,
+        level);
+    if (ZSTD_isError(state->compressed.size))
+        return result_error(result_error_compression_error);
+
+    state->decompressed.size = ZSTD_decompress(
+        state->decompressed.data,
+        state->decompressed.capacity,
+        state->compressed.data,
+        state->compressed.size);
+    if (ZSTD_isError(state->decompressed.size))
+        return result_error(result_error_decompression_error);
+    if (data_buffer_compare(state->buffer, state->decompressed))
+        return result_error(result_error_round_trip_error);
+
+    result_data_t data;
+    data.total_size = state->compressed.size;
+    return result_data(data);
+}
+
+/** Generic state creation function. */
+static method_state_t* method_state_create(data_t const* data) {
+    method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t));
+    if (state == NULL)
+        return NULL;
+    state->data = data;
+    return state;
+}
+
+static void method_state_destroy(method_state_t* state) {
+    free(state);
+}
+
+#define MAX_OUT 32
+
+static result_t cli_file_compress(
+    method_state_t* state,
+    config_t const* config) {
+    if (config->cli_args == NULL)
+        return result_error(result_error_skip);
+
+    if (g_zstdcli == NULL)
+        return result_error(result_error_system_error);
+
+    /* '<zstd>' -r <args> '<file/dir>' | wc -c */
+    char cmd[1024];
+    size_t const cmd_size = snprintf(
+        cmd,
+        sizeof(cmd),
+        "'%s' -cqr %s '%s' | wc -c",
+        g_zstdcli,
+        config->cli_args,
+        state->data->path);
+    if (cmd_size >= sizeof(cmd)) {
+        fprintf(stderr, "command too large: %s\n", cmd);
+        return result_error(result_error_system_error);
+    }
+    FILE* zstd = popen(cmd, "r");
+    if (zstd == NULL) {
+        fprintf(stderr, "failed to popen command: %s\n", cmd);
+        return result_error(result_error_system_error);
+    }
+
+    /* Read the total compressed size. */
+    char out[MAX_OUT + 1];
+    size_t const out_size = fread(out, 1, MAX_OUT, zstd);
+    out[out_size] = '\0';
+    int const zstd_ret = pclose(zstd);
+    if (zstd_ret != 0) {
+        fprintf(stderr, "zstd failed with command: %s\n", cmd);
+        return result_error(result_error_compression_error);
+    }
+    if (out_size == MAX_OUT) {
+        fprintf(stderr, "wc -c produced more bytes than expected: %s\n", out);
+        return result_error(result_error_system_error);
+    }
+
+    result_data_t data;
+    data.total_size = atoll(out);
+    return result_data(data);
+}
+
+method_t const simple = {
+    .name = "simple",
+    .create = simple_create,
+    .compress = simple_compress,
+    .destroy = simple_destroy,
+};
+
+method_t const cli_file = {
+    .name = "cli file",
+    .create = method_state_create,
+    .compress = cli_file_compress,
+    .destroy = method_state_destroy,
+};
+
+static method_t const* g_methods[] = {
+    &simple,
+    &cli_file,
+    NULL,
+};
+
+method_t const* const* methods = g_methods;
--- a/tests/regression/method.h
+++ b/tests/regression/method.h
@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef METHOD_H
+#define METHOD_H
+
+#include <stddef.h>
+
+#include "data.h"
+#include "config.h"
+#include "result.h"
+
+/**
+ * The base class for state that methods keep.
+ * All derived method state classes must have a member of this type.
+ */
+typedef struct {
+    data_t const* data;
+} method_state_t;
+
+/**
+ * A method that compresses the data using config.
+ */
+typedef struct {
+    char const* name;  /**< The identifier for this method in the results. */
+    /**
+     * Creates a state that must contain a member variable of method_state_t,
+     * and returns a pointer to that member variable.
+     *
+     * This method can be used to do expensive work that only depends on the
+     * data, like loading the data file into a buffer.
+     */
+    method_state_t* (*create)(data_t const* data);
+    /**
+     * Compresses the data in the state using the given config.
+     *
+     * @param state A pointer to the state returned by create().
+     *
+     * @returns The total compressed size on success, or an error code.
+     */
+    result_t (*compress)(method_state_t* state, config_t const* config);
+    /**
+     * Frees the state.
+     */
+    void (*destroy)(method_state_t* state);
+} method_t;
+
+/**
+ * Set the zstd cli path. Must be called before any methods are used.
+ */
+void method_set_zstdcli(char const* zstdcli);
+
+/**
+ * A NULL-terminated list of methods.
+ */
+extern method_t const* const* methods;
+
+#endif
--- a/tests/regression/result.c
+++ b/tests/regression/result.c
@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "result.h"
+
+char const* result_get_error_string(result_t result) {
+    switch (result_get_error(result)) {
+        case result_error_ok:
+            return "okay";
+        case result_error_skip:
+            return "skip";
+        case result_error_system_error:
+            return "system error";
+        case result_error_compression_error:
+            return "compression error";
+        case result_error_decompression_error:
+            return "decompression error";
+        case result_error_round_trip_error:
+            return "round trip error";
+    }
+}
--- a/tests/regression/result.h
+++ b/tests/regression/result.h
@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef RESULT_H
+#define RESULT_H
+
+#include <stddef.h>
+
+/**
+ * The error type enum.
+ */
+typedef enum {
+    result_error_ok,                   /**< No error. */
+    result_error_skip,                 /**< This method was skipped. */
+    result_error_system_error,         /**< Some internal error happened. */
+    result_error_compression_error,    /**< Compression failed. */
+    result_error_decompression_error,  /**< Decompression failed. */
+    result_error_round_trip_error,     /**< Data failed to round trip. */
+} result_error_t;
+
+/**
+ * The success type.
+ */
+typedef struct {
+    size_t total_size;  /**< The total compressed size. */
+} result_data_t;
+
+/**
+ * The result type.
+ * Do not access the member variables directory, use the helper functions.
+ */
+typedef struct {
+    result_error_t internal_error;
+    result_data_t internal_data;
+} result_t;
+
+/**
+ * Create a result of the error type.
+ */
+static result_t result_error(result_error_t error);
+/**
+ * Create a result of the success type.
+ */
+static result_t result_data(result_data_t data);
+
+/**
+ * Check if the result is an error or skip.
+ */
+static int result_is_error(result_t result);
+/**
+ * Check if the result error is skip.
+ */
+static int result_is_skip(result_t result);
+/**
+ * Get the result error or okay.
+ */
+static result_error_t result_get_error(result_t result);
+/**
+ * Get the result data. The result MUST be checked with result_is_error() first.
+ */
+static result_data_t result_get_data(result_t result);
+
+static result_t result_error(result_error_t error) {
+    result_t result = {
+        .internal_error = error,
+    };
+    return result;
+}
+
+static result_t result_data(result_data_t data) {
+    result_t result = {
+        .internal_error = result_error_ok,
+        .internal_data = data,
+    };
+    return result;
+}
+
+static int result_is_error(result_t result) {
+    return result_get_error(result) != result_error_ok;
+}
+
+static int result_is_skip(result_t result) {
+    return result_get_error(result) == result_error_skip;
+}
+
+static result_error_t result_get_error(result_t result) {
+    return result.internal_error;
+}
+
+char const* result_get_error_string(result_t result);
+
+static result_data_t result_get_data(result_t result) {
+    return result.internal_data;
+}
+
+#endif
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@ -0,0 +1,43 @@
+Data,	Config,	Method,	Total compressed size
+silesia.tar,	level -5,	simple,	106176430
+silesia.tar,	level -3,	simple,	98476550
+silesia.tar,	level -1,	simple,	87206767
+silesia.tar,	level 0,	simple,	66996953
+silesia.tar,	level 1,	simple,	73658303
+silesia.tar,	level 3,	simple,	66996953
+silesia.tar,	level 4,	simple,	65996020
+silesia.tar,	level 5,	simple,	64421326
+silesia.tar,	level 6,	simple,	62388673
+silesia.tar,	level 7,	simple,	61159525
+silesia.tar,	level 9,	simple,	60214921
+silesia.tar,	level 13,	simple,	58428642
+silesia.tar,	level 16,	simple,	56363759
+silesia.tar,	level 19,	simple,	53274173
+silesia,	level -5,	cli file,	106202112
+silesia,	level -3,	cli file,	98518660
+silesia,	level -1,	cli file,	87226203
+silesia,	level 0,	cli file,	67049190
+silesia,	level 1,	cli file,	73676282
+silesia,	level 3,	cli file,	67049190
+silesia,	level 4,	cli file,	66090040
+silesia,	level 5,	cli file,	64503721
+silesia,	level 6,	cli file,	62446177
+silesia,	level 7,	cli file,	61217029
+silesia,	level 9,	cli file,	60282841
+silesia,	level 13,	cli file,	58480658
+silesia,	level 16,	cli file,	56414170
+silesia,	level 19,	cli file,	53365292
+silesia.tar,	level -5,	cli file,	106250113
+silesia.tar,	level -3,	cli file,	98550747
+silesia.tar,	level -1,	cli file,	87227322
+silesia.tar,	level 0,	cli file,	67111168
+silesia.tar,	level 1,	cli file,	73694374
+silesia.tar,	level 3,	cli file,	67111168
+silesia.tar,	level 4,	cli file,	66154079
+silesia.tar,	level 5,	cli file,	64546998
+silesia.tar,	level 6,	cli file,	62458454
+silesia.tar,	level 7,	cli file,	61231085
+silesia.tar,	level 9,	cli file,	60310313
+silesia.tar,	level 13,	cli file,	58517476
+silesia.tar,	level 16,	cli file,	56448694
+silesia.tar,	level 19,	cli file,	53444920
--- a/tests/regression/test.c
+++ b/tests/regression/test.c
@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <assert.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "config.h"
+#include "data.h"
+#include "method.h"
+
+/** Check if a name contains a comma. */
+static int is_name_bad(char const* name) {
+    if (name == NULL)
+        return 1;
+    for (; *name != '\0'; ++name)
+        if (*name == ',')
+            return 1;
+    return 0;
+}
+
+/** Check if any of the names contain a comma. */
+static int are_names_bad() {
+    for (size_t method = 0; methods[method] != NULL; ++method)
+        if (is_name_bad(methods[method]->name)) {
+            fprintf(stderr, "method name %s is bad\n", methods[method]->name);
+            return 1;
+        }
+    for (size_t datum = 0; data[datum] != NULL; ++datum)
+        if (is_name_bad(data[datum]->name)) {
+            fprintf(stderr, "data name %s is bad\n", data[datum]->name);
+            return 1;
+        }
+    for (size_t config = 0; configs[config] != NULL; ++config)
+        if (is_name_bad(configs[config]->name)) {
+            fprintf(stderr, "config name %s is bad\n", configs[config]->name);
+            return 1;
+        }
+    return 0;
+}
+
+/** Helper macro to print to stderr and a file. */
+#define tprintf(file, ...)            \
+    do {                              \
+        fprintf(file, __VA_ARGS__);   \
+        fprintf(stderr, __VA_ARGS__); \
+    } while (0)
+/** Helper macro to flush stderr and a file. */
+#define tflush(file)    \
+    do {                \
+        fflush(file);   \
+        fflush(stderr); \
+    } while (0)
+
+/**
+ * Run all the regression tests and record the results table to results and
+ * stderr progressively.
+ */
+static int run_all(FILE* results) {
+    tprintf(results, "Data,\tConfig,\tMethod,\tTotal compressed size\n");
+    for (size_t method = 0; methods[method] != NULL; ++method) {
+        for (size_t datum = 0; data[datum] != NULL; ++datum) {
+            /* Create the state common to all configs */
+            method_state_t* state = methods[method]->create(data[datum]);
+            for (size_t config = 0; configs[config] != NULL; ++config) {
+                /* Print the result for the (method, data, config) tuple. */
+                result_t const result =
+                    methods[method]->compress(state, configs[config]);
+                if (result_is_skip(result))
+                    continue;
+                tprintf(
+                    results,
+                    "%s,\t%s,\t%s,\t",
+                    data[datum]->name,
+                    configs[config]->name,
+                    methods[method]->name);
+                if (result_is_error(result)) {
+                    tprintf(results, "%s\n", result_get_error_string(result));
+                } else {
+                    tprintf(
+                        results,
+                        "%llu\n",
+                        (unsigned long long)result_get_data(result).total_size);
+                }
+                tflush(results);
+            }
+            methods[method]->destroy(state);
+        }
+    }
+    return 0;
+}
+
+/**
+ * Option parsing using getopt.
+ * When you add a new option update: long_options, long_extras, and
+ * short_options.
+ */
+
+/** Option variables filled by parse_args. */
+static char const* g_output = NULL;
+static char const* g_diff = NULL;
+static char const* g_cache = NULL;
+static char const* g_zstdcli = NULL;
+
+typedef enum {
+    required_option,
+    optional_option,
+    help_option,
+} option_type;
+
+/**
+ * Extra state that we need to keep per-option that we can't store in getopt.
+ */
+struct option_extra {
+    int id;               /**< The short option name, used as an id. */
+    char const* help;     /**< The help message. */
+    option_type opt_type; /**< The option type: required, optional, or help. */
+    char const** value;   /**< The value to set or NULL if no_argument. */
+};
+
+/** The options. */
+static struct option long_options[] = {
+    {"cache", required_argument, NULL, 'c'},
+    {"diff", required_argument, NULL, 'd'},
+    {"help", no_argument, NULL, 'h'},
+    {"output", required_argument, NULL, 'o'},
+    {"zstd", required_argument, NULL, 'z'},
+};
+
+static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
+
+/** The extra info for the options. Must be in the same order as the options. */
+static struct option_extra long_extras[] = {
+    {'c', "the cache directory", required_option, &g_cache},
+    {'d', "compare the results to this file", optional_option, &g_diff},
+    {'h', "display this message", help_option, NULL},
+    {'o', "write the results here", required_option, &g_output},
+    {'z', "zstd cli tool", required_option, &g_zstdcli},
+};
+
+/** The short options. Must correspond to the options. */
+static char const short_options[] = "c:d:ho:z:";
+
+/** Return the help string for the option type. */
+static char const* required_message(option_type opt_type) {
+    switch (opt_type) {
+        case required_option:
+            return "[required]";
+        case optional_option:
+            return "[optional]";
+        case help_option:
+            return "";
+        default:
+            assert(0);
+            return NULL;
+    }
+}
+
+/** Print the help for the program. */
+static void print_help(void) {
+    fprintf(stderr, "regression test runner\n");
+    size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
+    for (size_t i = 0; i < nargs; ++i) {
+        /* Short / long  - help [option type] */
+        fprintf(
+            stderr,
+            "-%c / --%s \t- %s %s\n",
+            long_options[i].val,
+            long_options[i].name,
+            long_extras[i].help,
+            required_message(long_extras[i].opt_type));
+    }
+}
+
+/** Parse the arguments. Teturn 0 on success. Print help on failure. */
+static int parse_args(int argc, char** argv) {
+    int option_index = 0;
+    int c;
+
+    while (1) {
+        c = getopt_long(argc, argv, short_options, long_options, &option_index);
+        if (c == -1)
+            break;
+
+        int found = 0;
+        for (size_t i = 0; i < nargs; ++i) {
+            if (c == long_extras[i].id && long_extras[i].value != NULL) {
+                *long_extras[i].value = optarg;
+                found = 1;
+                break;
+            }
+        }
+        if (found)
+            continue;
+
+        switch (c) {
+            case 'h':
+            case '?':
+            default:
+                print_help();
+                return 1;
+        }
+    }
+
+    int bad = 0;
+    for (size_t i = 0; i < nargs; ++i) {
+        if (long_extras[i].opt_type != required_option)
+            continue;
+        if (long_extras[i].value == NULL)
+            continue;
+        if (*long_extras[i].value != NULL)
+            continue;
+        fprintf(
+            stderr,
+            "-%c / --%s is a required argument but is not set\n",
+            long_options[i].val,
+            long_options[i].name);
+        bad = 1;
+    }
+    if (bad) {
+        fprintf(stderr, "\n");
+        print_help();
+        return 1;
+    }
+
+    return 0;
+}
+
+/** memcmp() the old results file and the new results file. */
+static int diff_results(char const* actual_file, char const* expected_file) {
+    data_buffer_t const actual = data_buffer_read(actual_file);
+    data_buffer_t const expected = data_buffer_read(expected_file);
+    int ret = 1;
+
+    if (actual.data == NULL) {
+        fprintf(stderr, "failed to open results '%s' for diff\n", actual_file);
+        goto out;
+    }
+    if (expected.data == NULL) {
+        fprintf(
+            stderr,
+            "failed to open previous results '%s' for diff\n",
+            expected_file);
+        goto out;
+    }
+
+    ret = data_buffer_compare(actual, expected);
+    if (ret != 0) {
+        fprintf(
+            stderr,
+            "actual results '%s' does not match expected results '%s'\n",
+            actual_file,
+            expected_file);
+    } else {
+        fprintf(stderr, "actual results match expected results\n");
+    }
+out:
+    data_buffer_free(actual);
+    data_buffer_free(expected);
+    return ret;
+}
+
+int main(int argc, char** argv) {
+    /* Parse args and validate modules. */
+    int ret = parse_args(argc, argv);
+    if (ret != 0)
+        return ret;
+
+    if (are_names_bad())
+        return 1;
+
+    /* Initialize modules. */
+    method_set_zstdcli(g_zstdcli);
+    ret = data_init(g_cache);
+    if (ret != 0) {
+        fprintf(stderr, "data_init() failed with error=%s\n", strerror(ret));
+        return 1;
+    }
+
+    /* Run the regression tests. */
+    ret = 1;
+    FILE* results = fopen(g_output, "w");
+    if (results == NULL) {
+        fprintf(stderr, "Failed to open the output file\n");
+        goto out;
+    }
+    ret = run_all(results);
+    fclose(results);
+
+    if (ret != 0)
+        goto out;
+
+    if (g_diff)
+        /* Diff the new results with the previous results. */
+        ret = diff_results(g_output, g_diff);
+
+out:
+    data_finish();
+    return ret;
+}