mirror of
https://github.com/facebook/zstd.git
synced 2025-10-04 00:02:33 -04:00
Add debug logging and simple repro
This commit is contained in:
parent
22b4483163
commit
96fdb9bd16
@ -279,10 +279,16 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t
|
||||
|
||||
for (n=0; n<nbFiles; n++) {
|
||||
S64 const fileSize = DiB_getFileSize(fileNamesTable[n]);
|
||||
DISPLAYLEVEL(1, "[DEBUG] File '%s': size=%lld\n", fileNamesTable[n], (long long)fileSize);
|
||||
|
||||
/* TODO: is there a minimum sample size? What if the file is 1-byte? */
|
||||
if (fileSize == 0) {
|
||||
DISPLAYLEVEL(3, "Sample file '%s' has zero size, skipping...\n", fileNamesTable[n]);
|
||||
continue;
|
||||
} else if (fileSize < 0) {
|
||||
/* BUG: This path is NOT skipped but should be! */
|
||||
DISPLAYLEVEL(1, "[BUG] File '%s' has NEGATIVE size %lld but is NOT skipped!\n",
|
||||
fileNamesTable[n], (long long)fileSize);
|
||||
}
|
||||
|
||||
/* the case where we are breaking up files in sample chunks */
|
||||
@ -290,6 +296,8 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t
|
||||
/* TODO: is there a minimum sample size? Can we have a 1-byte sample? */
|
||||
fs.nbSamples += (int)((fileSize + chunkSize-1) / chunkSize);
|
||||
fs.totalSizeToLoad += fileSize;
|
||||
DISPLAYLEVEL(1, "[DEBUG] After chunked file: nbSamples=%d, totalSizeToLoad=%lld\n",
|
||||
fs.nbSamples, (long long)fs.totalSizeToLoad);
|
||||
}
|
||||
else {
|
||||
/* the case where one file is one sample */
|
||||
@ -303,9 +311,14 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t
|
||||
}
|
||||
fs.nbSamples += 1;
|
||||
fs.totalSizeToLoad += MIN(fileSize, SAMPLESIZE_MAX);
|
||||
DISPLAYLEVEL(1, "[DEBUG] After single file: nbSamples=%d, totalSizeToLoad=%lld\n",
|
||||
fs.nbSamples, (long long)fs.totalSizeToLoad);
|
||||
}
|
||||
}
|
||||
DISPLAYLEVEL(4, "Found training data %d files, %d KB, %d samples\n", nbFiles, (int)(fs.totalSizeToLoad / (1 KB)), fs.nbSamples);
|
||||
DISPLAYLEVEL(1, "[DEBUG FINAL] fileStats: nbSamples=%d, totalSizeToLoad=%lld (%s)\n",
|
||||
fs.nbSamples, (long long)fs.totalSizeToLoad,
|
||||
fs.totalSizeToLoad < 0 ? "NEGATIVE!" : "ok");
|
||||
return fs;
|
||||
}
|
||||
|
||||
@ -344,11 +357,18 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
|
||||
/* Limit the size of the training data to 2GB */
|
||||
/* TODO: there is opportunity to stop DiB_fileStats() early when the data limit is reached */
|
||||
loadedSize = (size_t)MIN( MIN((S64)maxMem, fs.totalSizeToLoad), MAX_SAMPLES_SIZE );
|
||||
DISPLAYLEVEL(1, "[DEBUG] Memory calc: totalSizeToLoad=%lld, maxMem=%zu, loadedSize=%zu (0x%zx)\n",
|
||||
(long long)fs.totalSizeToLoad, maxMem, loadedSize, loadedSize);
|
||||
if (fs.totalSizeToLoad < 0) {
|
||||
DISPLAYLEVEL(1, "[BUG] totalSizeToLoad is NEGATIVE! This will cause allocation issues!\n");
|
||||
}
|
||||
if (memLimit != 0) {
|
||||
DISPLAYLEVEL(2, "! Warning : setting manual memory limit for dictionary training data at %u MB \n",
|
||||
(unsigned)(memLimit / (1 MB)));
|
||||
loadedSize = (size_t)MIN(loadedSize, memLimit);
|
||||
}
|
||||
DISPLAYLEVEL(1, "[DEBUG] About to malloc: srcBuffer size=%zu, sampleSizes array size=%zu\n",
|
||||
loadedSize+NOISELENGTH, (size_t)(fs.nbSamples * sizeof(size_t)));
|
||||
srcBuffer = malloc(loadedSize+NOISELENGTH);
|
||||
sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
|
||||
}
|
||||
|
44
test_allocation_bug.sh
Executable file
44
test_allocation_bug.sh
Executable file
@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
echo "=== Test to show allocation bug with negative totalSizeToLoad ==="
|
||||
echo ""
|
||||
echo "We need at least 5 samples to pass the minimum check"
|
||||
echo ""
|
||||
|
||||
# Clean up
|
||||
rm -rf alloc_test
|
||||
mkdir alloc_test
|
||||
|
||||
# Create exactly 5 valid files (minimum to not exit early)
|
||||
echo "Creating 5 valid files (minimum required)..."
|
||||
for i in {1..5}; do
|
||||
echo "data$i" > alloc_test/good_$i.txt
|
||||
done
|
||||
|
||||
echo "Valid files created (about 6 bytes each = 30 bytes total)"
|
||||
echo ""
|
||||
|
||||
# We need enough bad files to make totalSizeToLoad negative
|
||||
# 30 bytes positive, so we need at least 31 bad files
|
||||
echo "Adding 1000 non-existent files to make totalSizeToLoad very negative..."
|
||||
echo "Expected: totalSizeToLoad = 30 + (1000 * -1) = -970 bytes"
|
||||
echo ""
|
||||
|
||||
# Build command
|
||||
CMD="./zstd --train alloc_test/good_*.txt"
|
||||
for i in {1..1000}; do
|
||||
CMD="$CMD alloc_test/BAD_$i"
|
||||
done
|
||||
CMD="$CMD -o alloc_test/dict.zst --maxdict=65536 2>&1"
|
||||
|
||||
echo "Running command..."
|
||||
echo "================="
|
||||
|
||||
# Run and capture ALL debug output related to our issue
|
||||
eval $CMD | grep -E "\[DEBUG FINAL\]|\[DEBUG\] Memory calc|\[BUG\]|About to malloc|Error|not enough memory"
|
||||
|
||||
echo ""
|
||||
echo "Output should show something like the following:"
|
||||
echo "1. [DEBUG FINAL] fileStats: totalSizeToLoad=-970 (NEGATIVE!)"
|
||||
echo "2. [BUG] totalSizeToLoad is NEGATIVE!"
|
||||
echo "3. [DEBUG] Memory calc: showing huge loadedSize value"
|
||||
echo "4. Error about memory allocation"
|
Loading…
x
Reference in New Issue
Block a user