Merge pull request #4086 from Adenilson/fix01

[zstd][dict] Ensure that dictionary training functions are fully reen…
This commit is contained in:
Yann Collet 2024-07-04 00:23:55 -07:00 committed by GitHub
commit ab02fd342f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 72 additions and 19 deletions

View File

@ -24,6 +24,18 @@
#ifndef ZSTD_DEPS_COMMON #ifndef ZSTD_DEPS_COMMON
#define ZSTD_DEPS_COMMON #define ZSTD_DEPS_COMMON
/* Even though we use qsort_r only for the dictionary builder, the macro
* _GNU_SOURCE has to be declared *before* the inclusion of any standard
* header and the script 'combine.sh' combines the whole zstd source code
* in a single file.
*/
#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) || \
defined(__CYGWIN__) || defined(__MSYS__)
#if !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif
#endif
#include <limits.h> #include <limits.h>
#include <stddef.h> #include <stddef.h>
#include <string.h> #include <string.h>

View File

@ -21,8 +21,17 @@
/*-************************************* /*-*************************************
* Dependencies * Dependencies
***************************************/ ***************************************/
/* qsort_r is an extension. */
#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) || \
defined(__CYGWIN__) || defined(__MSYS__)
#if !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif
#endif
#include <stdio.h> /* fprintf */ #include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */ #include <stdlib.h> /* malloc, free, qsort_r */
#include <string.h> /* memset */ #include <string.h> /* memset */
#include <time.h> /* clock */ #include <time.h> /* clock */
@ -232,8 +241,10 @@ typedef struct {
unsigned d; unsigned d;
} COVER_ctx_t; } COVER_ctx_t;
/* We need a global context for qsort... */ #if !defined(_GNU_SOURCE) && !defined(__APPLE__) && !defined(_MSC_VER)
/* C90 only offers qsort() that needs a global context. */
static COVER_ctx_t *g_coverCtx = NULL; static COVER_ctx_t *g_coverCtx = NULL;
#endif
/*-************************************* /*-*************************************
* Helper functions * Helper functions
@ -276,11 +287,15 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
/** /**
* Same as COVER_cmp() except ties are broken by pointer value * Same as COVER_cmp() except ties are broken by pointer value
* NOTE: g_coverCtx must be set to call this function. A global is required because
* qsort doesn't take an opaque pointer.
*/ */
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { #if (defined(_WIN32) && defined(_MSC_VER)) || defined(__APPLE__)
int result = COVER_cmp(g_coverCtx, lp, rp); static int WIN_CDECL COVER_strict_cmp(void* g_coverCtx, const void* lp, const void* rp) {
#elif defined(_GNU_SOURCE)
static int COVER_strict_cmp(const void *lp, const void *rp, void *g_coverCtx) {
#else /* C90 fallback.*/
static int COVER_strict_cmp(const void *lp, const void *rp) {
#endif
int result = COVER_cmp((COVER_ctx_t*)g_coverCtx, lp, rp);
if (result == 0) { if (result == 0) {
result = lp < rp ? -1 : 1; result = lp < rp ? -1 : 1;
} }
@ -289,14 +304,50 @@ static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
/** /**
* Faster version for d <= 8. * Faster version for d <= 8.
*/ */
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) { #if (defined(_WIN32) && defined(_MSC_VER)) || defined(__APPLE__)
int result = COVER_cmp8(g_coverCtx, lp, rp); static int WIN_CDECL COVER_strict_cmp8(void* g_coverCtx, const void* lp, const void* rp) {
#elif defined(_GNU_SOURCE)
static int COVER_strict_cmp8(const void *lp, const void *rp, void *g_coverCtx) {
#else /* C90 fallback.*/
static int COVER_strict_cmp8(const void *lp, const void *rp) {
#endif
int result = COVER_cmp8((COVER_ctx_t*)g_coverCtx, lp, rp);
if (result == 0) { if (result == 0) {
result = lp < rp ? -1 : 1; result = lp < rp ? -1 : 1;
} }
return result; return result;
} }
/**
* Abstract away divergence of qsort_r() parameters.
* Hopefully when C11 become the norm, we will be able
* to clean it up.
*/
static void stableSort(COVER_ctx_t *ctx) {
#if defined(__APPLE__)
qsort_r(ctx->suffix, ctx->suffixSize, sizeof(U32),
ctx,
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
#elif defined(_GNU_SOURCE)
qsort_r(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp),
ctx);
#elif defined(_WIN32) && defined(_MSC_VER)
qsort_s(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp),
ctx);
#elif defined(__OpenBSD__)
g_coverCtx = ctx;
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
#else /* C90 fallback.*/
g_coverCtx = ctx;
/* TODO(cavalcanti): implement a reentrant qsort() when is not available. */
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
#endif
}
/** /**
* Returns the first pointer in [first, last) whose element does not compare * Returns the first pointer in [first, last) whose element does not compare
* less than value. If no such element exists it returns last. * less than value. If no such element exists it returns last.
@ -620,17 +671,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
for (i = 0; i < ctx->suffixSize; ++i) { for (i = 0; i < ctx->suffixSize; ++i) {
ctx->suffix[i] = i; ctx->suffix[i] = i;
} }
/* qsort doesn't take an opaque pointer, so pass as a global. stableSort(ctx);
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
*/
g_coverCtx = ctx;
#if defined(__OpenBSD__)
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
#else
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
#endif
} }
DISPLAYLEVEL(2, "Computing frequencies\n"); DISPLAYLEVEL(2, "Computing frequencies\n");
/* For each dmer group (group of positions with the same first d bytes): /* For each dmer group (group of positions with the same first d bytes):