Merge pull request #4349 from Cyan4973/devfd

Support process substitution for `--filelist=`
This commit is contained in:
Yann Collet 2025-03-28 10:35:45 -07:00 committed by GitHub
commit c5926fbab8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 277 additions and 93 deletions

View File

@ -28,7 +28,10 @@ jobs:
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
- name: make test
run: make test
run: |
make test
make -j zstd
./tests/test_process_substitution.bash ./zstd
# lasts ~26mn
make-test-macos:

View File

@ -538,14 +538,16 @@ static int FIO_removeFile(const char* path)
}
/** FIO_openSrcFile() :
* condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
* condition : `srcFileName` must be non-NULL.
* optional: `prefs` may be NULL.
* @result : FILE* to `srcFileName`, or NULL if it fails */
static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf)
{
int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
assert(srcFileName != NULL);
assert(statbuf != NULL);
if (!strcmp (srcFileName, stdinmark)) {
if (!strcmp(srcFileName, stdinmark)) {
DISPLAYLEVEL(4,"Using stdin for input \n");
SET_BINARY_MODE(stdin);
return stdin;
@ -557,8 +559,10 @@ static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFile
return NULL;
}
/* Accept regular files, FIFOs, and process substitution file descriptors */
if (!UTIL_isRegularFileStat(statbuf)
&& !UTIL_isFIFOStat(statbuf)
&& !UTIL_isFileDescriptorPipe(srcFileName) /* Process substitution support */
&& !(allowBlockDevices && UTIL_isBlockDevStat(statbuf))
) {
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
@ -655,7 +659,11 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
}
#endif
if (f == NULL) {
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
if (UTIL_isFileDescriptorPipe(dstFileName)) {
DISPLAYLEVEL(1, "zstd: error: no output specified (use -o or -c). \n");
} else {
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
}
} else {
/* An increased buffer size can provide a significant performance
* boost on some platforms. Note that providing a NULL buf with a

View File

@ -151,7 +151,8 @@ int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg,
/*-*************************************
* Constants
***************************************/
#define LIST_SIZE_INCREASE (8*1024)
#define KB * (1 << 10)
#define LIST_SIZE_INCREASE (8 KB)
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
@ -448,6 +449,26 @@ int UTIL_isFIFOStat(const stat_t* statbuf)
return 0;
}
/* process substitution */
int UTIL_isFileDescriptorPipe(const char* filename)
{
UTIL_TRACE_CALL("UTIL_isFileDescriptorPipe(%s)", filename);
/* Check if the filename is a /dev/fd/ path which indicates a file descriptor */
if (filename[0] == '/' && strncmp(filename, "/dev/fd/", 8) == 0) {
UTIL_TRACE_RET(1);
return 1;
}
/* Check for alternative process substitution formats on different systems */
if (filename[0] == '/' && strncmp(filename, "/proc/self/fd/", 14) == 0) {
UTIL_TRACE_RET(1);
return 1;
}
UTIL_TRACE_RET(0);
return 0; /* Not recognized as a file descriptor pipe */
}
/* UTIL_isBlockDevStat : distinguish named pipes */
int UTIL_isBlockDevStat(const stat_t* statbuf)
{
@ -614,101 +635,157 @@ U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles)
}
/* condition : @file must be valid, and not have reached its end.
* @return : length of line written into @buf, ended with `\0` instead of '\n',
* or 0, if there is no new line */
static size_t readLineFromFile(char* buf, size_t len, FILE* file)
/* Read the entire content of a file into a buffer with progressive resizing */
static char* UTIL_readFileContent(FILE* inFile, size_t* totalReadPtr)
{
assert(!feof(file));
if ( fgets(buf, (int) len, file) == NULL ) return 0;
{ size_t linelen = strlen(buf);
if (strlen(buf)==0) return 0;
if (buf[linelen-1] == '\n') linelen--;
buf[linelen] = '\0';
return linelen+1;
}
}
size_t bufSize = 64 KB; /* Start with a reasonable buffer size */
size_t totalRead = 0;
size_t bytesRead = 0;
char* buf = (char*)malloc(bufSize);
if (buf == NULL) return NULL;
/* Conditions :
* size of @inputFileName file must be < @dstCapacity
* @dst must be initialized
* @return : nb of lines
* or -1 if there's an error
*/
static int
readLinesFromFile(void* dst, size_t dstCapacity,
const char* inputFileName)
{
int nbFiles = 0;
size_t pos = 0;
char* const buf = (char*)dst;
FILE* const inputFile = fopen(inputFileName, "r");
assert(dst != NULL);
/* Read the file incrementally */
while ((bytesRead = fread(buf + totalRead, 1, bufSize - totalRead - 1, inFile)) > 0) {
totalRead += bytesRead;
if(!inputFile) {
if (g_utilDisplayLevel >= 1) perror("zstd:util:readLinesFromFile");
return -1;
}
while ( !feof(inputFile) ) {
size_t const lineLength = readLineFromFile(buf+pos, dstCapacity-pos, inputFile);
if (lineLength == 0) break;
assert(pos + lineLength <= dstCapacity); /* '=' for inputFile not terminated with '\n' */
pos += lineLength;
++nbFiles;
}
CONTROL( fclose(inputFile) == 0 );
return nbFiles;
}
/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/
FileNamesTable*
UTIL_createFileNamesTable_fromFileName(const char* inputFileName)
{
size_t nbFiles = 0;
char* buf;
size_t bufSize;
stat_t statbuf;
if (!UTIL_stat(inputFileName, &statbuf) || !UTIL_isRegularFileStat(&statbuf))
return NULL;
{ U64 const inputFileSize = UTIL_getFileSizeStat(&statbuf);
if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE)
return NULL;
bufSize = (size_t)(inputFileSize + 1); /* (+1) to add '\0' at the end of last filename */
}
buf = (char*) malloc(bufSize);
CONTROL( buf != NULL );
{ int const ret_nbFiles = readLinesFromFile(buf, bufSize, inputFileName);
if (ret_nbFiles <= 0) {
free(buf);
return NULL;
}
nbFiles = (size_t)ret_nbFiles;
}
{ const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable));
CONTROL(filenamesTable != NULL);
{ size_t fnb, pos = 0;
for (fnb = 0; fnb < nbFiles; fnb++) {
filenamesTable[fnb] = buf+pos;
pos += strlen(buf+pos)+1; /* +1 for the finishing `\0` */
/* If buffer is nearly full, expand it */
if (bufSize - totalRead < 1 KB) {
if (bufSize >= MAX_FILE_OF_FILE_NAMES_SIZE) {
/* Too large, abort */
free(buf);
return NULL;
}
assert(pos <= bufSize);
{ size_t newBufSize = bufSize * 2;
if (newBufSize > MAX_FILE_OF_FILE_NAMES_SIZE)
newBufSize = MAX_FILE_OF_FILE_NAMES_SIZE;
{ char* newBuf = (char*)realloc(buf, newBufSize);
if (newBuf == NULL) {
free(buf);
return NULL;
}
buf = newBuf;
bufSize = newBufSize;
} } }
}
/* Add null terminator to the end */
buf[totalRead] = '\0';
*totalReadPtr = totalRead;
return buf;
}
/* Process a buffer containing multiple lines and count the number of lines */
static size_t UTIL_processLines(char* buffer, size_t bufferSize)
{
size_t lineCount = 0;
size_t i = 0;
/* Convert newlines to null terminators and count lines */
while (i < bufferSize) {
if (buffer[i] == '\n') {
buffer[i] = '\0'; /* Replace newlines with null terminators */
lineCount++;
}
i++;
}
/* Count the last line if it doesn't end with a newline */
if (bufferSize > 0 && (i == 0 || buffer[i-1] != '\0')) {
lineCount++;
}
return lineCount;
}
/* Create an array of pointers to the lines in a buffer */
static const char** UTIL_createLinePointers(char* buffer, size_t numLines, size_t bufferSize)
{
size_t lineIndex = 0;
size_t pos = 0;
void* const bufferPtrs = malloc(numLines * sizeof(const char**));
const char** const linePointers = (const char**)bufferPtrs;
if (bufferPtrs == NULL) return NULL;
while (lineIndex < numLines && pos < bufferSize) {
size_t len = 0;
linePointers[lineIndex++] = buffer+pos;
/* Find the next null terminator, being careful not to go past the buffer */
while ((pos + len < bufferSize) && buffer[pos + len] != '\0') {
len++;
}
return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf);
/* Move past this string and its null terminator */
pos += len;
if (pos < bufferSize) pos++; /* Skip the null terminator if we're not at buffer end */
}
/* Verify we processed the expected number of lines */
if (lineIndex != numLines) {
/* Something went wrong - we didn't find as many lines as expected */
free(bufferPtrs);
return NULL;
}
return linePointers;
}
FileNamesTable*
UTIL_createFileNamesTable_fromFileList(const char* fileList)
{
stat_t statbuf;
char* buffer = NULL;
size_t numLines = 0;
size_t bufferSize = 0;
/* Check if the input is a valid file */
if (!UTIL_stat(fileList, &statbuf)) {
return NULL;
}
/* Check if the input is a supported type */
if (!UTIL_isRegularFileStat(&statbuf) &&
!UTIL_isFIFOStat(&statbuf) &&
!UTIL_isFileDescriptorPipe(fileList)) {
return NULL;
}
/* Open the input file */
{ FILE* const inFile = fopen(fileList, "rb");
if (inFile == NULL) return NULL;
/* Read the file content */
buffer = UTIL_readFileContent(inFile, &bufferSize);
fclose(inFile);
}
if (buffer == NULL) return NULL;
/* Process lines */
numLines = UTIL_processLines(buffer, bufferSize);
if (numLines == 0) {
free(buffer);
return NULL;
}
/* Create line pointers */
{ const char** linePointers = UTIL_createLinePointers(buffer, numLines, bufferSize);
if (linePointers == NULL) {
free(buffer);
return NULL;
}
/* Create the final table */
return UTIL_assembleFileNamesTable(linePointers, numLines, buffer);
}
}
static FileNamesTable*
UTIL_assembleFileNamesTable2(const char** filenames, size_t tableSize, size_t tableCapacity, char* buf)
{

View File

@ -191,6 +191,7 @@ int UTIL_isSameFileStat(const char* file1, const char* file2, const stat_t* file
int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
int UTIL_isLink(const char* infilename);
int UTIL_isFIFO(const char* infilename);
int UTIL_isFileDescriptorPipe(const char* filename);
/**
* Returns with the given file descriptor is a console.
@ -250,13 +251,13 @@ typedef struct
size_t tableCapacity;
} FileNamesTable;
/*! UTIL_createFileNamesTable_fromFileName() :
/*! UTIL_createFileNamesTable_fromFileList() :
* read filenames from @inputFileName, and store them into returned object.
* @return : a FileNamesTable*, or NULL in case of error (ex: @inputFileName doesn't exist).
* Note: inputFileSize must be less than 50MB
*/
FileNamesTable*
UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
UTIL_createFileNamesTable_fromFileList(const char* inputFileName);
/*! UTIL_assembleFileNamesTable() :
* This function takes ownership of its arguments, @filenames and @buf,

View File

@ -1379,7 +1379,7 @@ int main(int argCount, const char* argv[])
size_t const nbFileLists = file_of_names->tableSize;
size_t flNb;
for (flNb=0; flNb < nbFileLists; flNb++) {
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileList(file_of_names->fileNames[flNb]);
if (fnt==NULL) {
DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);
CLEAN_RETURN(1);

View File

@ -22,5 +22,7 @@ Trace:FileStat: > UTIL_isRegularFile(out/file.zst)
Trace:FileStat: > UTIL_stat(-1, out/file.zst)
Trace:FileStat: < 0
Trace:FileStat: < 0
Trace:FileStat: > UTIL_isFileDescriptorPipe(out/file.zst)
Trace:FileStat: < 0
zstd: out/file.zst: Permission denied
zstd: can't stat out/file.zst : Permission denied -- ignored

View File

@ -848,6 +848,7 @@ ls tmp* > tmpList
zstd -f tmp1 --filelist=tmpList --filelist=tmpList tmp2 tmp3 # can trigger an overflow of internal file list
rm -rf tmp*
println "\n===> --[no-]content-size tests"
datagen > tmp_contentsize

View File

@ -0,0 +1,92 @@
#!/usr/bin/env bash
# test_process_substitution.bash
# Test zstd's support for process substitution with --filelist
# Process arguments
ZSTD_PATH="zstd" # Default to using zstd from PATH
if [ $# -ge 1 ]; then
ZSTD_PATH="$1"
fi
echo "Using zstd executable: $ZSTD_PATH"
set -e # Exit on error
# Set up test directory and files
echo "Setting up test environment..."
TEST_DIR="tmp_process_substit"
rm -rf "$TEST_DIR"
mkdir -p "$TEST_DIR"
echo "Content of file 1" > "$TEST_DIR/file1.txt"
echo "Content of file 2" > "$TEST_DIR/file2.txt"
echo "Content of file 3" > "$TEST_DIR/file3.txt"
# Clean up any previous test artifacts
rm -f "$TEST_DIR/output.zst" "$TEST_DIR/output_echo.zst" "$TEST_DIR/output_cat.zst"
rm -rf "$TEST_DIR/extracted"
mkdir -p "$TEST_DIR/extracted"
echo "=== Testing process substitution with --filelist ==="
# Test 1: Basic process substitution with find
echo "Test 1: Basic process substitution (find command)"
"$ZSTD_PATH" --filelist=<(find "$TEST_DIR" -name "*.txt" | sort) -c > "$TEST_DIR/output.zst"
if [ -f "$TEST_DIR/output.zst" ]; then
echo "✓ Test 1 PASSED: Output file was created"
else
echo "✗ Test 1 FAILED: Output file was not created"
exit 1
fi
# Test 2: Process substitution with echo
echo "Test 2: Process substitution (echo command)"
"$ZSTD_PATH" --filelist=<(echo -e "$TEST_DIR/file1.txt\n$TEST_DIR/file2.txt") -c > "$TEST_DIR/output_echo.zst"
if [ -f "$TEST_DIR/output_echo.zst" ]; then
echo "✓ Test 2 PASSED: Output file was created"
else
echo "✗ Test 2 FAILED: Output file was not created"
exit 1
fi
# Test 3: Process substitution with cat
echo "Test 3: Process substitution (cat command)"
echo -e "$TEST_DIR/file1.txt\n$TEST_DIR/file3.txt" > "$TEST_DIR/filelist.txt"
"$ZSTD_PATH" --filelist=<(cat "$TEST_DIR/filelist.txt") -c > "$TEST_DIR/output_cat.zst"
if [ -f "$TEST_DIR/output_cat.zst" ]; then
echo "✓ Test 3 PASSED: Output file was created"
else
echo "✗ Test 3 FAILED: Output file was not created"
exit 1
fi
# Test 4: Verify contents of archives
echo "Test 4: Verifying archive contents"
"$ZSTD_PATH" -d "$TEST_DIR/output.zst" -o "$TEST_DIR/extracted/combined.out"
if grep -q "Content of file 1" "$TEST_DIR/extracted/combined.out" &&
grep -q "Content of file 2" "$TEST_DIR/extracted/combined.out" &&
grep -q "Content of file 3" "$TEST_DIR/extracted/combined.out"; then
echo "✓ Test 4 PASSED: All files were correctly archived and extracted"
else
echo "✗ Test 4 FAILED: Not all expected content was found in the extracted file"
exit 1
fi
# Test 5: Edge case with empty list
echo "Test 5: Process substitution with empty input"
"$ZSTD_PATH" --filelist=<(echo "") -c > "$TEST_DIR/output_empty.zst" 2>/dev/null || true
if [ -f "$TEST_DIR/output_empty.zst" ]; then
echo "✓ Test 5 PASSED: Handled empty input gracefully"
else
echo "✓ Test 5 PASSED: Properly rejected empty input"
fi
# cleanup
rm -rf "$TEST_DIR"
echo "All tests completed successfully!"