mirror of
https://github.com/facebook/zstd.git
synced 2025-10-04 00:02:33 -04:00
Merge pull request #4349 from Cyan4973/devfd
Support process substitution for `--filelist=`
This commit is contained in:
commit
c5926fbab8
5
.github/workflows/dev-long-tests.yml
vendored
5
.github/workflows/dev-long-tests.yml
vendored
@ -28,7 +28,10 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
- name: make test
|
||||
run: make test
|
||||
run: |
|
||||
make test
|
||||
make -j zstd
|
||||
./tests/test_process_substitution.bash ./zstd
|
||||
|
||||
# lasts ~26mn
|
||||
make-test-macos:
|
||||
|
@ -538,14 +538,16 @@ static int FIO_removeFile(const char* path)
|
||||
}
|
||||
|
||||
/** FIO_openSrcFile() :
|
||||
* condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
|
||||
* condition : `srcFileName` must be non-NULL.
|
||||
* optional: `prefs` may be NULL.
|
||||
* @result : FILE* to `srcFileName`, or NULL if it fails */
|
||||
static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf)
|
||||
{
|
||||
int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
|
||||
assert(srcFileName != NULL);
|
||||
assert(statbuf != NULL);
|
||||
if (!strcmp (srcFileName, stdinmark)) {
|
||||
|
||||
if (!strcmp(srcFileName, stdinmark)) {
|
||||
DISPLAYLEVEL(4,"Using stdin for input \n");
|
||||
SET_BINARY_MODE(stdin);
|
||||
return stdin;
|
||||
@ -557,8 +559,10 @@ static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFile
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Accept regular files, FIFOs, and process substitution file descriptors */
|
||||
if (!UTIL_isRegularFileStat(statbuf)
|
||||
&& !UTIL_isFIFOStat(statbuf)
|
||||
&& !UTIL_isFileDescriptorPipe(srcFileName) /* Process substitution support */
|
||||
&& !(allowBlockDevices && UTIL_isBlockDevStat(statbuf))
|
||||
) {
|
||||
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
|
||||
@ -655,7 +659,11 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
|
||||
}
|
||||
#endif
|
||||
if (f == NULL) {
|
||||
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
|
||||
if (UTIL_isFileDescriptorPipe(dstFileName)) {
|
||||
DISPLAYLEVEL(1, "zstd: error: no output specified (use -o or -c). \n");
|
||||
} else {
|
||||
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
|
||||
}
|
||||
} else {
|
||||
/* An increased buffer size can provide a significant performance
|
||||
* boost on some platforms. Note that providing a NULL buf with a
|
||||
|
249
programs/util.c
249
programs/util.c
@ -151,7 +151,8 @@ int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg,
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define LIST_SIZE_INCREASE (8*1024)
|
||||
#define KB * (1 << 10)
|
||||
#define LIST_SIZE_INCREASE (8 KB)
|
||||
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
|
||||
|
||||
|
||||
@ -448,6 +449,26 @@ int UTIL_isFIFOStat(const stat_t* statbuf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* process substitution */
|
||||
int UTIL_isFileDescriptorPipe(const char* filename)
|
||||
{
|
||||
UTIL_TRACE_CALL("UTIL_isFileDescriptorPipe(%s)", filename);
|
||||
/* Check if the filename is a /dev/fd/ path which indicates a file descriptor */
|
||||
if (filename[0] == '/' && strncmp(filename, "/dev/fd/", 8) == 0) {
|
||||
UTIL_TRACE_RET(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Check for alternative process substitution formats on different systems */
|
||||
if (filename[0] == '/' && strncmp(filename, "/proc/self/fd/", 14) == 0) {
|
||||
UTIL_TRACE_RET(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
UTIL_TRACE_RET(0);
|
||||
return 0; /* Not recognized as a file descriptor pipe */
|
||||
}
|
||||
|
||||
/* UTIL_isBlockDevStat : distinguish named pipes */
|
||||
int UTIL_isBlockDevStat(const stat_t* statbuf)
|
||||
{
|
||||
@ -614,101 +635,157 @@ U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles)
|
||||
}
|
||||
|
||||
|
||||
/* condition : @file must be valid, and not have reached its end.
|
||||
* @return : length of line written into @buf, ended with `\0` instead of '\n',
|
||||
* or 0, if there is no new line */
|
||||
static size_t readLineFromFile(char* buf, size_t len, FILE* file)
|
||||
/* Read the entire content of a file into a buffer with progressive resizing */
|
||||
static char* UTIL_readFileContent(FILE* inFile, size_t* totalReadPtr)
|
||||
{
|
||||
assert(!feof(file));
|
||||
if ( fgets(buf, (int) len, file) == NULL ) return 0;
|
||||
{ size_t linelen = strlen(buf);
|
||||
if (strlen(buf)==0) return 0;
|
||||
if (buf[linelen-1] == '\n') linelen--;
|
||||
buf[linelen] = '\0';
|
||||
return linelen+1;
|
||||
}
|
||||
}
|
||||
size_t bufSize = 64 KB; /* Start with a reasonable buffer size */
|
||||
size_t totalRead = 0;
|
||||
size_t bytesRead = 0;
|
||||
char* buf = (char*)malloc(bufSize);
|
||||
if (buf == NULL) return NULL;
|
||||
|
||||
/* Conditions :
|
||||
* size of @inputFileName file must be < @dstCapacity
|
||||
* @dst must be initialized
|
||||
* @return : nb of lines
|
||||
* or -1 if there's an error
|
||||
*/
|
||||
static int
|
||||
readLinesFromFile(void* dst, size_t dstCapacity,
|
||||
const char* inputFileName)
|
||||
{
|
||||
int nbFiles = 0;
|
||||
size_t pos = 0;
|
||||
char* const buf = (char*)dst;
|
||||
FILE* const inputFile = fopen(inputFileName, "r");
|
||||
|
||||
assert(dst != NULL);
|
||||
/* Read the file incrementally */
|
||||
while ((bytesRead = fread(buf + totalRead, 1, bufSize - totalRead - 1, inFile)) > 0) {
|
||||
totalRead += bytesRead;
|
||||
|
||||
if(!inputFile) {
|
||||
if (g_utilDisplayLevel >= 1) perror("zstd:util:readLinesFromFile");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while ( !feof(inputFile) ) {
|
||||
size_t const lineLength = readLineFromFile(buf+pos, dstCapacity-pos, inputFile);
|
||||
if (lineLength == 0) break;
|
||||
assert(pos + lineLength <= dstCapacity); /* '=' for inputFile not terminated with '\n' */
|
||||
pos += lineLength;
|
||||
++nbFiles;
|
||||
}
|
||||
|
||||
CONTROL( fclose(inputFile) == 0 );
|
||||
|
||||
return nbFiles;
|
||||
}
|
||||
|
||||
/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable_fromFileName(const char* inputFileName)
|
||||
{
|
||||
size_t nbFiles = 0;
|
||||
char* buf;
|
||||
size_t bufSize;
|
||||
stat_t statbuf;
|
||||
|
||||
if (!UTIL_stat(inputFileName, &statbuf) || !UTIL_isRegularFileStat(&statbuf))
|
||||
return NULL;
|
||||
|
||||
{ U64 const inputFileSize = UTIL_getFileSizeStat(&statbuf);
|
||||
if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE)
|
||||
return NULL;
|
||||
bufSize = (size_t)(inputFileSize + 1); /* (+1) to add '\0' at the end of last filename */
|
||||
}
|
||||
|
||||
buf = (char*) malloc(bufSize);
|
||||
CONTROL( buf != NULL );
|
||||
|
||||
{ int const ret_nbFiles = readLinesFromFile(buf, bufSize, inputFileName);
|
||||
|
||||
if (ret_nbFiles <= 0) {
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
nbFiles = (size_t)ret_nbFiles;
|
||||
}
|
||||
|
||||
{ const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable));
|
||||
CONTROL(filenamesTable != NULL);
|
||||
|
||||
{ size_t fnb, pos = 0;
|
||||
for (fnb = 0; fnb < nbFiles; fnb++) {
|
||||
filenamesTable[fnb] = buf+pos;
|
||||
pos += strlen(buf+pos)+1; /* +1 for the finishing `\0` */
|
||||
/* If buffer is nearly full, expand it */
|
||||
if (bufSize - totalRead < 1 KB) {
|
||||
if (bufSize >= MAX_FILE_OF_FILE_NAMES_SIZE) {
|
||||
/* Too large, abort */
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
assert(pos <= bufSize);
|
||||
|
||||
{ size_t newBufSize = bufSize * 2;
|
||||
if (newBufSize > MAX_FILE_OF_FILE_NAMES_SIZE)
|
||||
newBufSize = MAX_FILE_OF_FILE_NAMES_SIZE;
|
||||
|
||||
{ char* newBuf = (char*)realloc(buf, newBufSize);
|
||||
if (newBuf == NULL) {
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buf = newBuf;
|
||||
bufSize = newBufSize;
|
||||
} } }
|
||||
}
|
||||
|
||||
/* Add null terminator to the end */
|
||||
buf[totalRead] = '\0';
|
||||
*totalReadPtr = totalRead;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* Process a buffer containing multiple lines and count the number of lines */
|
||||
static size_t UTIL_processLines(char* buffer, size_t bufferSize)
|
||||
{
|
||||
size_t lineCount = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Convert newlines to null terminators and count lines */
|
||||
while (i < bufferSize) {
|
||||
if (buffer[i] == '\n') {
|
||||
buffer[i] = '\0'; /* Replace newlines with null terminators */
|
||||
lineCount++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
/* Count the last line if it doesn't end with a newline */
|
||||
if (bufferSize > 0 && (i == 0 || buffer[i-1] != '\0')) {
|
||||
lineCount++;
|
||||
}
|
||||
|
||||
return lineCount;
|
||||
}
|
||||
|
||||
/* Create an array of pointers to the lines in a buffer */
|
||||
static const char** UTIL_createLinePointers(char* buffer, size_t numLines, size_t bufferSize)
|
||||
{
|
||||
size_t lineIndex = 0;
|
||||
size_t pos = 0;
|
||||
void* const bufferPtrs = malloc(numLines * sizeof(const char**));
|
||||
const char** const linePointers = (const char**)bufferPtrs;
|
||||
if (bufferPtrs == NULL) return NULL;
|
||||
|
||||
while (lineIndex < numLines && pos < bufferSize) {
|
||||
size_t len = 0;
|
||||
linePointers[lineIndex++] = buffer+pos;
|
||||
|
||||
/* Find the next null terminator, being careful not to go past the buffer */
|
||||
while ((pos + len < bufferSize) && buffer[pos + len] != '\0') {
|
||||
len++;
|
||||
}
|
||||
|
||||
return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf);
|
||||
/* Move past this string and its null terminator */
|
||||
pos += len;
|
||||
if (pos < bufferSize) pos++; /* Skip the null terminator if we're not at buffer end */
|
||||
}
|
||||
|
||||
/* Verify we processed the expected number of lines */
|
||||
if (lineIndex != numLines) {
|
||||
/* Something went wrong - we didn't find as many lines as expected */
|
||||
free(bufferPtrs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return linePointers;
|
||||
}
|
||||
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable_fromFileList(const char* fileList)
|
||||
{
|
||||
stat_t statbuf;
|
||||
char* buffer = NULL;
|
||||
size_t numLines = 0;
|
||||
size_t bufferSize = 0;
|
||||
|
||||
/* Check if the input is a valid file */
|
||||
if (!UTIL_stat(fileList, &statbuf)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Check if the input is a supported type */
|
||||
if (!UTIL_isRegularFileStat(&statbuf) &&
|
||||
!UTIL_isFIFOStat(&statbuf) &&
|
||||
!UTIL_isFileDescriptorPipe(fileList)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Open the input file */
|
||||
{ FILE* const inFile = fopen(fileList, "rb");
|
||||
if (inFile == NULL) return NULL;
|
||||
|
||||
/* Read the file content */
|
||||
buffer = UTIL_readFileContent(inFile, &bufferSize);
|
||||
fclose(inFile);
|
||||
}
|
||||
|
||||
if (buffer == NULL) return NULL;
|
||||
|
||||
/* Process lines */
|
||||
numLines = UTIL_processLines(buffer, bufferSize);
|
||||
if (numLines == 0) {
|
||||
free(buffer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create line pointers */
|
||||
{ const char** linePointers = UTIL_createLinePointers(buffer, numLines, bufferSize);
|
||||
if (linePointers == NULL) {
|
||||
free(buffer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create the final table */
|
||||
return UTIL_assembleFileNamesTable(linePointers, numLines, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static FileNamesTable*
|
||||
UTIL_assembleFileNamesTable2(const char** filenames, size_t tableSize, size_t tableCapacity, char* buf)
|
||||
{
|
||||
|
@ -191,6 +191,7 @@ int UTIL_isSameFileStat(const char* file1, const char* file2, const stat_t* file
|
||||
int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
|
||||
int UTIL_isLink(const char* infilename);
|
||||
int UTIL_isFIFO(const char* infilename);
|
||||
int UTIL_isFileDescriptorPipe(const char* filename);
|
||||
|
||||
/**
|
||||
* Returns with the given file descriptor is a console.
|
||||
@ -250,13 +251,13 @@ typedef struct
|
||||
size_t tableCapacity;
|
||||
} FileNamesTable;
|
||||
|
||||
/*! UTIL_createFileNamesTable_fromFileName() :
|
||||
/*! UTIL_createFileNamesTable_fromFileList() :
|
||||
* read filenames from @inputFileName, and store them into returned object.
|
||||
* @return : a FileNamesTable*, or NULL in case of error (ex: @inputFileName doesn't exist).
|
||||
* Note: inputFileSize must be less than 50MB
|
||||
*/
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
|
||||
UTIL_createFileNamesTable_fromFileList(const char* inputFileName);
|
||||
|
||||
/*! UTIL_assembleFileNamesTable() :
|
||||
* This function takes ownership of its arguments, @filenames and @buf,
|
||||
|
@ -1379,7 +1379,7 @@ int main(int argCount, const char* argv[])
|
||||
size_t const nbFileLists = file_of_names->tableSize;
|
||||
size_t flNb;
|
||||
for (flNb=0; flNb < nbFileLists; flNb++) {
|
||||
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);
|
||||
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileList(file_of_names->fileNames[flNb]);
|
||||
if (fnt==NULL) {
|
||||
DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);
|
||||
CLEAN_RETURN(1);
|
||||
|
@ -22,5 +22,7 @@ Trace:FileStat: > UTIL_isRegularFile(out/file.zst)
|
||||
Trace:FileStat: > UTIL_stat(-1, out/file.zst)
|
||||
Trace:FileStat: < 0
|
||||
Trace:FileStat: < 0
|
||||
Trace:FileStat: > UTIL_isFileDescriptorPipe(out/file.zst)
|
||||
Trace:FileStat: < 0
|
||||
zstd: out/file.zst: Permission denied
|
||||
zstd: can't stat out/file.zst : Permission denied -- ignored
|
||||
|
@ -848,6 +848,7 @@ ls tmp* > tmpList
|
||||
zstd -f tmp1 --filelist=tmpList --filelist=tmpList tmp2 tmp3 # can trigger an overflow of internal file list
|
||||
rm -rf tmp*
|
||||
|
||||
|
||||
println "\n===> --[no-]content-size tests"
|
||||
|
||||
datagen > tmp_contentsize
|
||||
|
92
tests/test_process_substitution.bash
Executable file
92
tests/test_process_substitution.bash
Executable file
@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env bash
|
||||
# test_process_substitution.bash
|
||||
# Test zstd's support for process substitution with --filelist
|
||||
|
||||
# Process arguments
|
||||
ZSTD_PATH="zstd" # Default to using zstd from PATH
|
||||
if [ $# -ge 1 ]; then
|
||||
ZSTD_PATH="$1"
|
||||
fi
|
||||
|
||||
echo "Using zstd executable: $ZSTD_PATH"
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
# Set up test directory and files
|
||||
echo "Setting up test environment..."
|
||||
TEST_DIR="tmp_process_substit"
|
||||
rm -rf "$TEST_DIR"
|
||||
mkdir -p "$TEST_DIR"
|
||||
echo "Content of file 1" > "$TEST_DIR/file1.txt"
|
||||
echo "Content of file 2" > "$TEST_DIR/file2.txt"
|
||||
echo "Content of file 3" > "$TEST_DIR/file3.txt"
|
||||
|
||||
# Clean up any previous test artifacts
|
||||
rm -f "$TEST_DIR/output.zst" "$TEST_DIR/output_echo.zst" "$TEST_DIR/output_cat.zst"
|
||||
rm -rf "$TEST_DIR/extracted"
|
||||
mkdir -p "$TEST_DIR/extracted"
|
||||
|
||||
echo "=== Testing process substitution with --filelist ==="
|
||||
|
||||
# Test 1: Basic process substitution with find
|
||||
echo "Test 1: Basic process substitution (find command)"
|
||||
"$ZSTD_PATH" --filelist=<(find "$TEST_DIR" -name "*.txt" | sort) -c > "$TEST_DIR/output.zst"
|
||||
|
||||
if [ -f "$TEST_DIR/output.zst" ]; then
|
||||
echo "✓ Test 1 PASSED: Output file was created"
|
||||
else
|
||||
echo "✗ Test 1 FAILED: Output file was not created"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test 2: Process substitution with echo
|
||||
echo "Test 2: Process substitution (echo command)"
|
||||
"$ZSTD_PATH" --filelist=<(echo -e "$TEST_DIR/file1.txt\n$TEST_DIR/file2.txt") -c > "$TEST_DIR/output_echo.zst"
|
||||
|
||||
if [ -f "$TEST_DIR/output_echo.zst" ]; then
|
||||
echo "✓ Test 2 PASSED: Output file was created"
|
||||
else
|
||||
echo "✗ Test 2 FAILED: Output file was not created"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test 3: Process substitution with cat
|
||||
echo "Test 3: Process substitution (cat command)"
|
||||
echo -e "$TEST_DIR/file1.txt\n$TEST_DIR/file3.txt" > "$TEST_DIR/filelist.txt"
|
||||
"$ZSTD_PATH" --filelist=<(cat "$TEST_DIR/filelist.txt") -c > "$TEST_DIR/output_cat.zst"
|
||||
|
||||
if [ -f "$TEST_DIR/output_cat.zst" ]; then
|
||||
echo "✓ Test 3 PASSED: Output file was created"
|
||||
else
|
||||
echo "✗ Test 3 FAILED: Output file was not created"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test 4: Verify contents of archives
|
||||
echo "Test 4: Verifying archive contents"
|
||||
"$ZSTD_PATH" -d "$TEST_DIR/output.zst" -o "$TEST_DIR/extracted/combined.out"
|
||||
|
||||
if grep -q "Content of file 1" "$TEST_DIR/extracted/combined.out" &&
|
||||
grep -q "Content of file 2" "$TEST_DIR/extracted/combined.out" &&
|
||||
grep -q "Content of file 3" "$TEST_DIR/extracted/combined.out"; then
|
||||
echo "✓ Test 4 PASSED: All files were correctly archived and extracted"
|
||||
else
|
||||
echo "✗ Test 4 FAILED: Not all expected content was found in the extracted file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test 5: Edge case with empty list
|
||||
echo "Test 5: Process substitution with empty input"
|
||||
"$ZSTD_PATH" --filelist=<(echo "") -c > "$TEST_DIR/output_empty.zst" 2>/dev/null || true
|
||||
|
||||
if [ -f "$TEST_DIR/output_empty.zst" ]; then
|
||||
echo "✓ Test 5 PASSED: Handled empty input gracefully"
|
||||
else
|
||||
echo "✓ Test 5 PASSED: Properly rejected empty input"
|
||||
fi
|
||||
|
||||
# cleanup
|
||||
rm -rf "$TEST_DIR"
|
||||
|
||||
echo "All tests completed successfully!"
|
||||
|
Loading…
x
Reference in New Issue
Block a user