This commit is contained in:
jinyaoguo 2025-06-21 12:57:12 -04:00
commit a74f7fcabd
30 changed files with 1223 additions and 310 deletions

153
.github/workflows/cmake-tests.yml vendored Normal file
View File

@ -0,0 +1,153 @@
name: cmake-tests
# CMake-specific build and test workflows
# This workflow validates zstd builds across different CMake configurations,
# platforms, and edge cases to ensure broad compatibility.
concurrency:
group: cmake-${{ github.ref }}
cancel-in-progress: true
on:
pull_request:
branches: [ dev, release, actionsTest ]
permissions: read-all
env:
# Centralized test timeouts for consistency
QUICK_TEST_TIME: "30s"
STANDARD_TEST_TIME: "1mn"
# Common CMake flags
COMMON_CMAKE_FLAGS: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
jobs:
# Ubuntu-based cmake build using make wrapper
# This test uses the make-driven cmake build to ensure compatibility
# with the existing build system integration
cmake-ubuntu-basic:
name: "CMake Ubuntu Basic Build"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
- name: Install dependencies
run: |
sudo apt install liblzma-dev # Required for compression algorithms
- name: CMake build and test via make
run: |
# Use make wrapper for cmake build with quick test timeouts
FUZZERTEST=-T${{ env.STANDARD_TEST_TIME }} ZSTREAM_TESTTIME=-T${{ env.STANDARD_TEST_TIME }} make cmakebuild V=1
# Cross-platform cmake build with edge case: source paths containing spaces
# This test ensures cmake handles filesystem paths with spaces correctly
# across different operating systems and build generators
cmake-cross-platform-spaces:
name: "CMake Cross-Platform (Spaces in Path)"
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
generator: "Unix Makefiles"
name: "Linux"
- os: windows-latest
generator: "NMake Makefiles"
name: "Windows NMake"
- os: macos-latest
generator: "Unix Makefiles"
name: "macOS"
env:
SRC_DIR: "source directory with spaces"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
with:
path: "${{ env.SRC_DIR }}"
- uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
if: ${{ matrix.generator == 'NMake Makefiles' }}
- name: "CMake build and install (${{ matrix.name }})"
run: |
# Test Release build with installation to verify packaging
cmake -S "${{ env.SRC_DIR }}/build/cmake" -B build -DBUILD_TESTING=ON -G "${{ matrix.generator }}" -DCMAKE_BUILD_TYPE=Release --install-prefix "${{ runner.temp }}/install"
cmake --build build --config Release
cmake --install build --config Release
# Windows-specific cmake testing with Visual Studio 2022
# Tests multiple generators and toolchains to ensure broad Windows compatibility
# including MSVC (x64, Win32, ARM64), MinGW, and Clang-CL with various architectures and optimizations
cmake-windows-comprehensive:
name: "CMake Windows VS2022 (${{ matrix.name }})"
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- generator: "Visual Studio 17 2022"
flags: "-A x64"
name: "MSVC x64"
runner: "windows-2022"
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
- generator: "Visual Studio 17 2022"
flags: "-A Win32"
name: "MSVC Win32"
runner: "windows-2022"
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
- generator: "Visual Studio 17 2022"
flags: "-A x64"
name: "MSVC x64 (No ZSTD_BUILD_TESTS)"
runner: "windows-2022"
# Intentionally omit ZSTD_BUILD_TESTS to reproduce the CXX language configuration bug
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON"
# - generator: "Visual Studio 17 2022"
# flags: "-A ARM64"
# name: "MSVC ARM64"
# runner: "windows-2022-arm64" # Disabled due to very long queue times
- generator: "MinGW Makefiles"
flags: ""
name: "MinGW"
runner: "windows-2022"
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
- generator: "Visual Studio 17 2022"
flags: "-T ClangCL"
name: "Clang-CL"
runner: "windows-2022"
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
- generator: "Visual Studio 17 2022"
flags: "-T ClangCL -A x64 -DCMAKE_C_FLAGS=/arch:AVX2"
name: "Clang-CL AVX2"
runner: "windows-2022"
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
- name: Add MSBuild to PATH
uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
- name: "Configure CMake (${{ matrix.name }})"
run: |
cd build\cmake
mkdir build
cd build
cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} -DCMAKE_BUILD_TYPE=Debug ${{ matrix.cmake_extra_flags }} -DZSTD_ZSTREAM_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FUZZER_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FULLBENCH_FLAGS=-i0 ..
- name: "Build (${{ matrix.name }})"
run: |
cd build\cmake\build
cmake.exe --build .
- name: "Test (${{ matrix.name }})"
run: |
cd build\cmake\build
ctest.exe -V -C Debug
# macOS ARM64 (Apple Silicon) specific cmake testing
# Validates zstd builds and runs correctly on Apple Silicon architecture
# Uses native ARM64 hardware for optimal performance and compatibility testing
cmake-macos-arm64:
name: "CMake macOS ARM64 (Apple Silicon)"
runs-on: macos-14 # ARM64 runner
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
- name: "CMake build and test (ARM64)"
run: |
# Configure and build with ARM64-specific optimizations
cd build/cmake
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ${{ env.COMMON_CMAKE_FLAGS }} -DZSTD_ZSTREAM_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FUZZER_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FULLBENCH_FLAGS=-i1 ..
make -j$(sysctl -n hw.ncpu)
ctest -V

View File

@ -72,40 +72,6 @@ jobs:
# candidate test (for discussion) : underlink test
# LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
cmake-build-and-test-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
- name: cmake build and test
run: |
sudo apt install liblzma-dev
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild V=1
cmake-source-directory-with-spaces:
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
generator: "Unix Makefiles"
- os: windows-latest
generator: "NMake Makefiles"
- os: macos-latest
generator: "Unix Makefiles"
env:
SRC_DIR: "source directory with spaces"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
with:
path: "${{ env.SRC_DIR }}"
- uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
if: ${{ matrix.generator == 'NMake Makefiles' }}
- name: cmake build on a source directory with spaces
run: |
cmake -S "${{ env.SRC_DIR }}/build/cmake" -B build -DBUILD_TESTING=ON -G "${{ matrix.generator }}" -DCMAKE_BUILD_TYPE=Release --install-prefix "${{ runner.temp }}/install"
cmake --build build --config Release
cmake --install build --config Release
cpp-gnu90-c99-compatibility:
runs-on: ubuntu-latest
steps:
@ -339,34 +305,6 @@ jobs:
run: |
meson install -C builddir --destdir staging/
cmake-visual-2022:
strategy:
matrix:
include:
- generator: "Visual Studio 17 2022"
flags: "-A x64"
- generator: "Visual Studio 17 2022"
flags: "-A Win32"
- generator: "MinGW Makefiles"
- generator: "Visual Studio 17 2022"
flags: "-T ClangCL"
- generator: "Visual Studio 17 2022"
flags: "-T ClangCL -A x64 -DCMAKE_C_FLAGS=/arch:AVX2"
runs-on: windows-2022
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
- name: Add MSBuild to PATH
uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
- name: Build & Test
working-directory: ${{env.GITHUB_WORKSPACE}}
run: |
cd build\cmake
mkdir build
cd build
cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} -DCMAKE_BUILD_TYPE=Debug -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZSTREAM_FLAGS=-T30s -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_FULLBENCH_FLAGS=-i0 ..
cmake.exe --build .
ctest.exe -V -C Debug
msbuild-visual-studio:
strategy:
fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed.
@ -497,6 +435,8 @@ jobs:
make clean
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j check
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j -C tests test-cli-tests
CFLAGS="-march=armv8.2-a+sve2" LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j check
CFLAGS="-march=armv8.2-a+sve2" LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j -C tests test-cli-tests
# This test is only compatible with standard libraries that support BTI (Branch Target Identification).
# Unfortunately, the standard library provided on Ubuntu 24.04 does not have this feature enabled.
# make clean

View File

@ -9,226 +9,73 @@
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
# As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies.
# Set and use the newest cmake policies that are validated to work
set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13") #Policies never changed at PATCH level
if("${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
"${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
else()
set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
endif()
cmake_policy(VERSION ${ZSTD_CMAKE_POLICY_VERSION})
set(CMAKE_BUILD_WITH_INSTALL_RPATH on)
#-----------------------------------------------------------------------------
# Setup CMake environment
#-----------------------------------------------------------------------------
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
# Define project paths
set(ZSTD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..")
set(LIBRARY_DIR ${ZSTD_SOURCE_DIR}/lib)
# Parse version
include(GetZstdLibraryVersion)
GetZstdLibraryVersion(${LIBRARY_DIR}/zstd.h zstd_VERSION_MAJOR zstd_VERSION_MINOR zstd_VERSION_PATCH)
set(ZSTD_SHORT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}")
set(ZSTD_FULL_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
project(zstd
VERSION "${ZSTD_FULL_VERSION}"
LANGUAGES C # Main library is in C
ASM # And ASM
)
message(STATUS "ZSTD VERSION: ${zstd_VERSION}")
set(zstd_HOMEPAGE_URL "https://facebook.github.io/zstd")
set(zstd_DESCRIPTION "Zstandard is a real-time compression algorithm, providing high compression ratios.")
# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to 'Release' as none was specified.")
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
include(GNUInstallDirs)
# Always hide XXHash symbols
add_definitions(-DXXH_NAMESPACE=ZSTD_)
#-----------------------------------------------------------------------------
# Installation variables
# Configure CMake policies and version
#-----------------------------------------------------------------------------
include(ZstdVersion)
#-----------------------------------------------------------------------------
# Project declaration
#-----------------------------------------------------------------------------
project(zstd
VERSION "${ZSTD_FULL_VERSION}"
LANGUAGES C ASM # Main library is in C and ASM
HOMEPAGE_URL "${zstd_HOMEPAGE_URL}"
DESCRIPTION "${zstd_DESCRIPTION}"
)
#-----------------------------------------------------------------------------
# Build type configuration
#-----------------------------------------------------------------------------
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to 'Release' as none was specified.")
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
#-----------------------------------------------------------------------------
# Include standard modules
#-----------------------------------------------------------------------------
include(GNUInstallDirs)
#-----------------------------------------------------------------------------
# Display installation information
#-----------------------------------------------------------------------------
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
message(STATUS "CMAKE_INSTALL_LIBDIR: ${CMAKE_INSTALL_LIBDIR}")
#-----------------------------------------------------------------------------
# Options
# Configure build options
#-----------------------------------------------------------------------------
# Legacy support
option(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" ON)
if (ZSTD_LEGACY_SUPPORT)
message(STATUS "ZSTD_LEGACY_SUPPORT defined!")
set(ZSTD_LEGACY_LEVEL 5 CACHE STRING "")
add_definitions(-DZSTD_LEGACY_SUPPORT=${ZSTD_LEGACY_LEVEL})
else ()
message(STATUS "ZSTD_LEGACY_SUPPORT not defined!")
add_definitions(-DZSTD_LEGACY_SUPPORT=0)
endif ()
if (APPLE)
option(ZSTD_FRAMEWORK "Build as Apple Frameworks" OFF)
endif ()
if (ANDROID)
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT OFF)
# Old versions of bionic libc don't have fseeko/ftello
if ((NOT ${ANDROID_PLATFORM_LEVEL}) OR ${ANDROID_PLATFORM_LEVEL} VERSION_LESS 24)
message(STATUS "Setting compile definitions for old Android API")
add_compile_definitions(LIBC_NO_FSEEKO)
endif ()
else()
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT ON)
endif()
# Multi-threading support
option(ZSTD_MULTITHREAD_SUPPORT "MULTITHREADING SUPPORT" ${ZSTD_MULTITHREAD_SUPPORT_DEFAULT})
if (ZSTD_MULTITHREAD_SUPPORT)
message(STATUS "ZSTD_MULTITHREAD_SUPPORT is enabled")
else ()
message(STATUS "ZSTD_MULTITHREAD_SUPPORT is disabled")
endif ()
option(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" ON)
option(ZSTD_BUILD_CONTRIB "BUILD CONTRIB" OFF)
# Respect the conventional CMake option for enabling tests if it was specified on the first configure
if (BUILD_TESTING)
set(ZSTD_BUILD_TESTS_default ON)
else()
set(ZSTD_BUILD_TESTS_default OFF)
endif()
option(ZSTD_BUILD_TESTS "BUILD TESTS" ${ZSTD_BUILD_TESTS_default})
if (MSVC)
option(ZSTD_USE_STATIC_RUNTIME "LINK TO STATIC RUN-TIME LIBRARIES" OFF)
endif ()
# Enable C++ support for testing.
set(ZSTD_ENABLE_CXX ${ZSTD_BUILD_TESTS})
if(ZSTD_ENABLE_CXX)
enable_language(CXX)
endif()
include(ZstdOptions)
#-----------------------------------------------------------------------------
# Add extra compilation flags
# Configure compilation flags
#-----------------------------------------------------------------------------
include(AddZstdCompilationFlags)
ADD_ZSTD_COMPILATION_FLAGS(ON ZSTD_ENABLE_CXX ON) # C CXX LD
ADD_ZSTD_COMPILATION_FLAGS(ON ZSTD_ENABLE_CXX ON)
#-----------------------------------------------------------------------------
# External dependencies
# Configure dependencies
#-----------------------------------------------------------------------------
# Define a function to handle special thread settings for HP-UX
# See https://github.com/facebook/zstd/pull/3862 for details.
function(setup_hpux_threads)
find_package(Threads)
if (NOT Threads_FOUND)
set(CMAKE_USE_PTHREADS_INIT 1 PARENT_SCOPE)
set(CMAKE_THREAD_LIBS_INIT -lpthread PARENT_SCOPE)
set(CMAKE_HAVE_THREADS_LIBRARY 1 PARENT_SCOPE)
set(Threads_FOUND TRUE PARENT_SCOPE)
endif()
endfunction()
if (ZSTD_MULTITHREAD_SUPPORT AND UNIX)
if (CMAKE_SYSTEM_NAME MATCHES "HP-UX")
setup_hpux_threads()
else()
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
endif()
if (CMAKE_USE_PTHREADS_INIT)
set(THREADS_LIBS "${CMAKE_THREAD_LIBS_INIT}")
else()
message(SEND_ERROR "ZSTD currently does not support thread libraries other than pthreads")
endif()
endif ()
include(ZstdDependencies)
#-----------------------------------------------------------------------------
# Add source directories
# Configure build targets
#-----------------------------------------------------------------------------
add_subdirectory(lib)
option(ZSTD_PROGRAMS_LINK_SHARED "PROGRAMS LINK SHARED" OFF)
if (ZSTD_BUILD_PROGRAMS)
if (NOT ZSTD_BUILD_STATIC AND NOT ZSTD_PROGRAMS_LINK_SHARED)
message(SEND_ERROR "You need to build static library to build zstd CLI")
elseif(NOT ZSTD_BUILD_SHARED AND ZSTD_PROGRAMS_LINK_SHARED)
message(SEND_ERROR "You need to build shared library to build zstd CLI")
endif ()
add_subdirectory(programs)
endif ()
if (ZSTD_BUILD_TESTS)
enable_testing()
if (NOT ZSTD_BUILD_STATIC)
message(SEND_ERROR "You need to build static library to build tests")
endif ()
add_subdirectory(tests)
endif ()
if (ZSTD_BUILD_CONTRIB)
add_subdirectory(contrib)
endif ()
include(ZstdBuild)
#-----------------------------------------------------------------------------
# Add clean-all target
# Configure package generation
#-----------------------------------------------------------------------------
add_custom_target(clean-all
COMMAND ${CMAKE_BUILD_TOOL} clean
COMMAND rm -rf ${CMAKE_BINARY_DIR}/
)
#-----------------------------------------------------------------------------
# Generate Package Config files
#
# This section is based on the boiler plate code from:
# https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html#creating-packages
#-----------------------------------------------------------------------------
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
VERSION ${zstd_VERSION}
COMPATIBILITY SameMajorVersion
)
# A Package Config file that works from the build directory
export(EXPORT zstdExports
FILE "${CMAKE_CURRENT_BINARY_DIR}/zstdTargets.cmake"
NAMESPACE zstd::
)
# A Package Config file that works from the installation directory
set(ConfigPackageLocation ${CMAKE_INSTALL_LIBDIR}/cmake/zstd)
install(EXPORT zstdExports
FILE zstdTargets.cmake
NAMESPACE zstd::
DESTINATION ${ConfigPackageLocation}
)
configure_package_config_file(
zstdConfig.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
INSTALL_DESTINATION ${ConfigPackageLocation}
)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
DESTINATION ${ConfigPackageLocation}
)
include(ZstdPackage)

View File

@ -1,5 +1,7 @@
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
if(CMAKE_CXX_COMPILER)
include(CheckCXXCompilerFlag)
endif()
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
set(ZSTD_HAVE_CHECK_LINKER_FLAG true)
@ -21,7 +23,7 @@ function(EnableCompilerFlag _flag _C _CXX _LD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${_flag}" PARENT_SCOPE)
endif ()
endif ()
if (_CXX)
if (_CXX AND CMAKE_CXX_COMPILER)
CHECK_CXX_COMPILER_FLAG(${_flag} CXX_FLAG_${varname})
if (CXX_FLAG_${varname})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_flag}" PARENT_SCOPE)

View File

@ -0,0 +1,42 @@
# ################################################################
# ZSTD Build Targets Configuration
# ################################################################
# Always build the library first (this defines ZSTD_BUILD_STATIC/SHARED options)
add_subdirectory(lib)
# Validate build configuration after lib options are defined
if(ZSTD_BUILD_PROGRAMS)
if(NOT ZSTD_BUILD_STATIC AND NOT ZSTD_PROGRAMS_LINK_SHARED)
message(SEND_ERROR "Static library required to build zstd CLI programs")
elseif(NOT ZSTD_BUILD_SHARED AND ZSTD_PROGRAMS_LINK_SHARED)
message(SEND_ERROR "Shared library required to build zstd CLI programs")
endif()
endif()
if(ZSTD_BUILD_TESTS AND NOT ZSTD_BUILD_STATIC)
message(SEND_ERROR "Static library required to build test suite")
endif()
# Add programs if requested
if(ZSTD_BUILD_PROGRAMS)
add_subdirectory(programs)
endif()
# Add tests if requested
if(ZSTD_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
endif()
# Add contrib utilities if requested
if(ZSTD_BUILD_CONTRIB)
add_subdirectory(contrib)
endif()
# Clean-all target for thorough cleanup
add_custom_target(clean-all
COMMAND ${CMAKE_BUILD_TOOL} clean
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/
COMMENT "Performing complete clean including build directory"
)

View File

@ -0,0 +1,30 @@
# ################################################################
# ZSTD Dependencies Configuration
# ################################################################
# Function to handle HP-UX thread configuration
function(setup_hpux_threads)
find_package(Threads)
if(NOT Threads_FOUND)
set(CMAKE_USE_PTHREADS_INIT 1 PARENT_SCOPE)
set(CMAKE_THREAD_LIBS_INIT -lpthread PARENT_SCOPE)
set(CMAKE_HAVE_THREADS_LIBRARY 1 PARENT_SCOPE)
set(Threads_FOUND TRUE PARENT_SCOPE)
endif()
endfunction()
# Configure threading support
if(ZSTD_MULTITHREAD_SUPPORT AND UNIX)
if(CMAKE_SYSTEM_NAME MATCHES "HP-UX")
setup_hpux_threads()
else()
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
endif()
if(CMAKE_USE_PTHREADS_INIT)
set(THREADS_LIBS "${CMAKE_THREAD_LIBS_INIT}")
else()
message(SEND_ERROR "ZSTD currently does not support thread libraries other than pthreads")
endif()
endif()

View File

@ -0,0 +1,68 @@
# ################################################################
# ZSTD Build Options Configuration
# ################################################################
# Legacy support configuration
option(ZSTD_LEGACY_SUPPORT "Enable legacy format support" ON)
if(ZSTD_LEGACY_SUPPORT)
message(STATUS "ZSTD_LEGACY_SUPPORT enabled")
set(ZSTD_LEGACY_LEVEL 5 CACHE STRING "Legacy support level")
add_definitions(-DZSTD_LEGACY_SUPPORT=${ZSTD_LEGACY_LEVEL})
else()
message(STATUS "ZSTD_LEGACY_SUPPORT disabled")
add_definitions(-DZSTD_LEGACY_SUPPORT=0)
endif()
# Platform-specific options
if(APPLE)
option(ZSTD_FRAMEWORK "Build as Apple Framework" OFF)
endif()
# Android-specific configuration
if(ANDROID)
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT OFF)
# Handle old Android API levels
if((NOT ANDROID_PLATFORM_LEVEL) OR (ANDROID_PLATFORM_LEVEL VERSION_LESS 24))
message(STATUS "Configuring for old Android API - disabling fseeko/ftello")
add_compile_definitions(LIBC_NO_FSEEKO)
endif()
else()
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT ON)
endif()
# Multi-threading support
option(ZSTD_MULTITHREAD_SUPPORT "Enable multi-threading support" ${ZSTD_MULTITHREAD_SUPPORT_DEFAULT})
if(ZSTD_MULTITHREAD_SUPPORT)
message(STATUS "Multi-threading support enabled")
else()
message(STATUS "Multi-threading support disabled")
endif()
# Build component options
option(ZSTD_BUILD_PROGRAMS "Build command-line programs" ON)
option(ZSTD_BUILD_CONTRIB "Build contrib utilities" OFF)
option(ZSTD_PROGRAMS_LINK_SHARED "Link programs against shared library" OFF)
# Test configuration
if(BUILD_TESTING)
set(ZSTD_BUILD_TESTS_default ON)
else()
set(ZSTD_BUILD_TESTS_default OFF)
endif()
option(ZSTD_BUILD_TESTS "Build test suite" ${ZSTD_BUILD_TESTS_default})
# MSVC-specific options
if(MSVC)
option(ZSTD_USE_STATIC_RUNTIME "Link to static runtime libraries" OFF)
endif()
# C++ support (needed for tests)
set(ZSTD_ENABLE_CXX ${ZSTD_BUILD_TESTS})
if(ZSTD_ENABLE_CXX)
enable_language(CXX)
endif()
# Set global definitions
add_definitions(-DXXH_NAMESPACE=ZSTD_)

View File

@ -0,0 +1,42 @@
# ################################################################
# ZSTD Package Configuration
# ################################################################
include(CMakePackageConfigHelpers)
# Generate version file
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
VERSION ${zstd_VERSION}
COMPATIBILITY SameMajorVersion
)
# Export targets for build directory
export(EXPORT zstdExports
FILE "${CMAKE_CURRENT_BINARY_DIR}/zstdTargets.cmake"
NAMESPACE zstd::
)
# Configure package for installation
set(ConfigPackageLocation ${CMAKE_INSTALL_LIBDIR}/cmake/zstd)
# Install exported targets
install(EXPORT zstdExports
FILE zstdTargets.cmake
NAMESPACE zstd::
DESTINATION ${ConfigPackageLocation}
)
# Configure and install package config file
configure_package_config_file(
zstdConfig.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
INSTALL_DESTINATION ${ConfigPackageLocation}
)
# Install config files
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
DESTINATION ${ConfigPackageLocation}
)

View File

@ -0,0 +1,31 @@
# ################################################################
# ZSTD Version Configuration
# ################################################################
# Setup CMake policy version
set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13")
# Determine appropriate policy version
if("${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
"${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
else()
set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
endif()
cmake_policy(VERSION ${ZSTD_CMAKE_POLICY_VERSION})
# Parse version from header file
include(GetZstdLibraryVersion)
GetZstdLibraryVersion(${LIBRARY_DIR}/zstd.h zstd_VERSION_MAJOR zstd_VERSION_MINOR zstd_VERSION_PATCH)
# Set version variables
set(ZSTD_SHORT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}")
set(ZSTD_FULL_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
# Project metadata
set(zstd_HOMEPAGE_URL "https://facebook.github.io/zstd")
set(zstd_DESCRIPTION "Zstandard is a real-time compression algorithm, providing high compression ratios.")
message(STATUS "ZSTD VERSION: ${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")

View File

@ -97,9 +97,11 @@ if (ZSTD_LEGACY_SUPPORT)
${LIBRARY_LEGACY_DIR}/zstd_v07.h)
endif ()
if (MSVC)
if (MSVC AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
set(MSVC_RESOURCE_DIR ${ZSTD_SOURCE_DIR}/build/VS2010/libzstd-dll)
set(PlatformDependResources ${MSVC_RESOURCE_DIR}/libzstd-dll.rc)
else()
set(PlatformDependResources)
endif ()
# Explicitly set the language to C for all files, including ASM files.

View File

@ -10,7 +10,7 @@
project('zstd',
['c', 'cpp'],
license: ['BSD', 'GPLv2'],
license: 'BSD-3-Clause OR GPL-2.0-only',
default_options : [
# There shouldn't be any need to force a C standard convention for zstd
# but in case one would want that anyway, this can be done here.

View File

@ -739,6 +739,8 @@ static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks,
/* BMK_benchTimedFn may not run exactly nbRounds iterations */
double speedAggregated =
aggregateData(speedPerRound, roundNb + 1, metricAggregatePref);
free(speedPerRound);
if (metricAggregatePref == fastest)
DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated);
else

View File

@ -316,6 +316,10 @@ int main(int argc, const char** argv)
for (pos = 0; pos < inSize; pos += 2) {
size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, 1, pos);
if (decStatus != 1 || outBuffer[0] != inBuffer[pos]) {
free(seekBuffer);
free(outBuffer);
ZSTD_seekable_freeCStream(zscs);
ZSTD_seekable_free(stream);
goto _test_error;
}
}
@ -323,6 +327,10 @@ int main(int argc, const char** argv)
/* We read more than the compressed size, meaning there were some rereads.
This is unneeded because we only seeked forward. */
printf("Too much data read: %zu read, with compressed size %zu\n", buffWrapper.totalRead, seekSize);
free(seekBuffer);
free(outBuffer);
ZSTD_seekable_freeCStream(zscs);
ZSTD_seekable_free(stream);
goto _test_error;
}
@ -342,6 +350,10 @@ int main(int argc, const char** argv)
for (idx = 0; idx < sizeof(tests) / sizeof(tests[0]); idx++) {
size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, tests[idx].size, tests[idx].offset);
if (decStatus != tests[idx].size || memcmp(outBuffer, inBuffer + tests[idx].offset, tests[idx].size) != 0) {
free(seekBuffer);
free(outBuffer);
ZSTD_seekable_freeCStream(zscs);
ZSTD_seekable_free(stream);
goto _test_error;
}
}

View File

@ -218,6 +218,15 @@
# if defined(__ARM_NEON) || defined(_M_ARM64)
# define ZSTD_ARCH_ARM_NEON
# endif
# if defined(__ARM_FEATURE_SVE)
# define ZSTD_ARCH_ARM_SVE
# endif
# if defined(__ARM_FEATURE_SVE2)
# define ZSTD_ARCH_ARM_SVE2
# endif
# if defined(__riscv) && defined(__riscv_vector)
# define ZSTD_ARCH_RISCV_RVV
# endif
#
# if defined(ZSTD_ARCH_X86_AVX2)
# include <immintrin.h>
@ -227,6 +236,12 @@
# elif defined(ZSTD_ARCH_ARM_NEON)
# include <arm_neon.h>
# endif
# if defined(ZSTD_ARCH_ARM_SVE) || defined(ZSTD_ARCH_ARM_SVE2)
# include <arm_sve.h>
# endif
# if defined(ZSTD_ARCH_RISCV_RVV)
# include <riscv_vector.h>
# endif
#endif
/* C-language Attributes are added in C23. */

View File

@ -19,6 +19,12 @@
#include "../common/error_private.h" /* ERROR */
#include "hist.h"
#if defined(ZSTD_ARCH_ARM_SVE2)
#define HIST_FAST_THRESHOLD 500
#else
#define HIST_FAST_THRESHOLD 1500
#endif
/* --- Error management --- */
unsigned HIST_isError(size_t code) { return ERR_isError(code); }
@ -65,6 +71,244 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
#if defined(ZSTD_ARCH_ARM_SVE2)
FORCE_INLINE_TEMPLATE size_t min_size(size_t a, size_t b) { return a < b ? a : b; }
static
svuint16_t HIST_count_6_sve2(const BYTE* const src, size_t size, U32* const dst,
const svuint8_t c0, const svuint8_t c1,
const svuint8_t c2, const svuint8_t c3,
const svuint8_t c4, const svuint8_t c5,
const svuint16_t histmax, size_t maxCount)
{
const svbool_t vl128 = svptrue_pat_b8(SV_VL16);
svuint16_t hh0 = svdup_n_u16(0);
svuint16_t hh1 = svdup_n_u16(0);
svuint16_t hh2 = svdup_n_u16(0);
svuint16_t hh3 = svdup_n_u16(0);
svuint16_t hh4 = svdup_n_u16(0);
svuint16_t hh5 = svdup_n_u16(0);
svuint16_t hh6 = svdup_n_u16(0);
svuint16_t hh7 = svdup_n_u16(0);
svuint16_t hh8 = svdup_n_u16(0);
svuint16_t hh9 = svdup_n_u16(0);
svuint16_t hha = svdup_n_u16(0);
svuint16_t hhb = svdup_n_u16(0);
size_t i = 0;
while (i < size) {
/* We can only accumulate 15 (15 * 16 <= 255) iterations of histogram
* in 8-bit accumulators! */
const size_t size240 = min_size(i + 240, size);
svbool_t pred = svwhilelt_b8_u64(i, size);
svuint8_t c = svld1rq_u8(pred, src + i);
svuint8_t h0 = svhistseg_u8(c0, c);
svuint8_t h1 = svhistseg_u8(c1, c);
svuint8_t h2 = svhistseg_u8(c2, c);
svuint8_t h3 = svhistseg_u8(c3, c);
svuint8_t h4 = svhistseg_u8(c4, c);
svuint8_t h5 = svhistseg_u8(c5, c);
for (i += 16; i < size240; i += 16) {
pred = svwhilelt_b8_u64(i, size);
c = svld1rq_u8(pred, src + i);
h0 = svadd_u8_x(vl128, h0, svhistseg_u8(c0, c));
h1 = svadd_u8_x(vl128, h1, svhistseg_u8(c1, c));
h2 = svadd_u8_x(vl128, h2, svhistseg_u8(c2, c));
h3 = svadd_u8_x(vl128, h3, svhistseg_u8(c3, c));
h4 = svadd_u8_x(vl128, h4, svhistseg_u8(c4, c));
h5 = svadd_u8_x(vl128, h5, svhistseg_u8(c5, c));
}
hh0 = svaddwb_u16(hh0, h0);
hh1 = svaddwt_u16(hh1, h0);
hh2 = svaddwb_u16(hh2, h1);
hh3 = svaddwt_u16(hh3, h1);
hh4 = svaddwb_u16(hh4, h2);
hh5 = svaddwt_u16(hh5, h2);
hh6 = svaddwb_u16(hh6, h3);
hh7 = svaddwt_u16(hh7, h3);
hh8 = svaddwb_u16(hh8, h4);
hh9 = svaddwt_u16(hh9, h4);
hha = svaddwb_u16(hha, h5);
hhb = svaddwt_u16(hhb, h5);
}
svst1_u32(svwhilelt_b32_u64( 0, maxCount), dst + 0, svshllb_n_u32(hh0, 0));
svst1_u32(svwhilelt_b32_u64( 4, maxCount), dst + 4, svshllt_n_u32(hh0, 0));
svst1_u32(svwhilelt_b32_u64( 8, maxCount), dst + 8, svshllb_n_u32(hh1, 0));
svst1_u32(svwhilelt_b32_u64(12, maxCount), dst + 12, svshllt_n_u32(hh1, 0));
svst1_u32(svwhilelt_b32_u64(16, maxCount), dst + 16, svshllb_n_u32(hh2, 0));
svst1_u32(svwhilelt_b32_u64(20, maxCount), dst + 20, svshllt_n_u32(hh2, 0));
svst1_u32(svwhilelt_b32_u64(24, maxCount), dst + 24, svshllb_n_u32(hh3, 0));
svst1_u32(svwhilelt_b32_u64(28, maxCount), dst + 28, svshllt_n_u32(hh3, 0));
svst1_u32(svwhilelt_b32_u64(32, maxCount), dst + 32, svshllb_n_u32(hh4, 0));
svst1_u32(svwhilelt_b32_u64(36, maxCount), dst + 36, svshllt_n_u32(hh4, 0));
svst1_u32(svwhilelt_b32_u64(40, maxCount), dst + 40, svshllb_n_u32(hh5, 0));
svst1_u32(svwhilelt_b32_u64(44, maxCount), dst + 44, svshllt_n_u32(hh5, 0));
svst1_u32(svwhilelt_b32_u64(48, maxCount), dst + 48, svshllb_n_u32(hh6, 0));
svst1_u32(svwhilelt_b32_u64(52, maxCount), dst + 52, svshllt_n_u32(hh6, 0));
svst1_u32(svwhilelt_b32_u64(56, maxCount), dst + 56, svshllb_n_u32(hh7, 0));
svst1_u32(svwhilelt_b32_u64(60, maxCount), dst + 60, svshllt_n_u32(hh7, 0));
svst1_u32(svwhilelt_b32_u64(64, maxCount), dst + 64, svshllb_n_u32(hh8, 0));
svst1_u32(svwhilelt_b32_u64(68, maxCount), dst + 68, svshllt_n_u32(hh8, 0));
svst1_u32(svwhilelt_b32_u64(72, maxCount), dst + 72, svshllb_n_u32(hh9, 0));
svst1_u32(svwhilelt_b32_u64(76, maxCount), dst + 76, svshllt_n_u32(hh9, 0));
svst1_u32(svwhilelt_b32_u64(80, maxCount), dst + 80, svshllb_n_u32(hha, 0));
svst1_u32(svwhilelt_b32_u64(84, maxCount), dst + 84, svshllt_n_u32(hha, 0));
svst1_u32(svwhilelt_b32_u64(88, maxCount), dst + 88, svshllb_n_u32(hhb, 0));
svst1_u32(svwhilelt_b32_u64(92, maxCount), dst + 92, svshllt_n_u32(hhb, 0));
hh0 = svmax_u16_x(vl128, hh0, hh1);
hh2 = svmax_u16_x(vl128, hh2, hh3);
hh4 = svmax_u16_x(vl128, hh4, hh5);
hh6 = svmax_u16_x(vl128, hh6, hh7);
hh8 = svmax_u16_x(vl128, hh8, hh9);
hha = svmax_u16_x(vl128, hha, hhb);
hh0 = svmax_u16_x(vl128, hh0, hh2);
hh4 = svmax_u16_x(vl128, hh4, hh6);
hh8 = svmax_u16_x(vl128, hh8, hha);
hh0 = svmax_u16_x(vl128, hh0, hh4);
hh8 = svmax_u16_x(vl128, hh8, histmax);
return svmax_u16_x(vl128, hh0, hh8);
}
static size_t HIST_count_sve2(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
HIST_checkInput_e check)
{
const BYTE* ip = (const BYTE*)source;
const size_t maxCount = *maxSymbolValuePtr + 1;
assert(*maxSymbolValuePtr <= 255);
if (!sourceSize) {
ZSTD_memset(count, 0, maxCount * sizeof(*count));
*maxSymbolValuePtr = 0;
return 0;
}
{ const svbool_t vl128 = svptrue_pat_b8(SV_VL16);
const svuint8_t c0 = svreinterpret_u8(svindex_u32(0x0C040800, 0x01010101));
const svuint8_t c1 = svadd_n_u8_x(vl128, c0, 16);
const svuint8_t c2 = svadd_n_u8_x(vl128, c0, 32);
const svuint8_t c3 = svadd_n_u8_x(vl128, c1, 32);
svuint8_t symbolMax = svdup_n_u8(0);
svuint16_t hh0 = svdup_n_u16(0);
svuint16_t hh1 = svdup_n_u16(0);
svuint16_t hh2 = svdup_n_u16(0);
svuint16_t hh3 = svdup_n_u16(0);
svuint16_t hh4 = svdup_n_u16(0);
svuint16_t hh5 = svdup_n_u16(0);
svuint16_t hh6 = svdup_n_u16(0);
svuint16_t hh7 = svdup_n_u16(0);
svuint16_t max;
size_t maxSymbolValue;
size_t i = 0;
while (i < sourceSize) {
/* We can only accumulate 15 (15 * 16 <= 255) iterations of
* histogram in 8-bit accumulators! */
const size_t size240 = min_size(i + 240, sourceSize);
svbool_t pred = svwhilelt_b8_u64(i, sourceSize);
svuint8_t c = svld1rq_u8(pred, ip + i);
svuint8_t h0 = svhistseg_u8(c0, c);
svuint8_t h1 = svhistseg_u8(c1, c);
svuint8_t h2 = svhistseg_u8(c2, c);
svuint8_t h3 = svhistseg_u8(c3, c);
symbolMax = svmax_u8_x(vl128, symbolMax, c);
for (i += 16; i < size240; i += 16) {
pred = svwhilelt_b8_u64(i, sourceSize);
c = svld1rq_u8(pred, ip + i);
h0 = svadd_u8_x(vl128, h0, svhistseg_u8(c0, c));
h1 = svadd_u8_x(vl128, h1, svhistseg_u8(c1, c));
h2 = svadd_u8_x(vl128, h2, svhistseg_u8(c2, c));
h3 = svadd_u8_x(vl128, h3, svhistseg_u8(c3, c));
symbolMax = svmax_u8_x(vl128, symbolMax, c);
}
hh0 = svaddwb_u16(hh0, h0);
hh1 = svaddwt_u16(hh1, h0);
hh2 = svaddwb_u16(hh2, h1);
hh3 = svaddwt_u16(hh3, h1);
hh4 = svaddwb_u16(hh4, h2);
hh5 = svaddwt_u16(hh5, h2);
hh6 = svaddwb_u16(hh6, h3);
hh7 = svaddwt_u16(hh7, h3);
}
maxSymbolValue = svmaxv_u8(vl128, symbolMax);
if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
*maxSymbolValuePtr = maxSymbolValue;
/* If the buffer size is not divisible by 16, the last elements of the final
* vector register read will be zeros, and these elements must be subtracted
* from the histogram.
*/
hh0 = svsub_n_u16_m(svptrue_pat_b32(SV_VL1), hh0, -sourceSize & 15);
svst1_u32(svwhilelt_b32_u64( 0, maxCount), count + 0, svshllb_n_u32(hh0, 0));
svst1_u32(svwhilelt_b32_u64( 4, maxCount), count + 4, svshllt_n_u32(hh0, 0));
svst1_u32(svwhilelt_b32_u64( 8, maxCount), count + 8, svshllb_n_u32(hh1, 0));
svst1_u32(svwhilelt_b32_u64(12, maxCount), count + 12, svshllt_n_u32(hh1, 0));
svst1_u32(svwhilelt_b32_u64(16, maxCount), count + 16, svshllb_n_u32(hh2, 0));
svst1_u32(svwhilelt_b32_u64(20, maxCount), count + 20, svshllt_n_u32(hh2, 0));
svst1_u32(svwhilelt_b32_u64(24, maxCount), count + 24, svshllb_n_u32(hh3, 0));
svst1_u32(svwhilelt_b32_u64(28, maxCount), count + 28, svshllt_n_u32(hh3, 0));
svst1_u32(svwhilelt_b32_u64(32, maxCount), count + 32, svshllb_n_u32(hh4, 0));
svst1_u32(svwhilelt_b32_u64(36, maxCount), count + 36, svshllt_n_u32(hh4, 0));
svst1_u32(svwhilelt_b32_u64(40, maxCount), count + 40, svshllb_n_u32(hh5, 0));
svst1_u32(svwhilelt_b32_u64(44, maxCount), count + 44, svshllt_n_u32(hh5, 0));
svst1_u32(svwhilelt_b32_u64(48, maxCount), count + 48, svshllb_n_u32(hh6, 0));
svst1_u32(svwhilelt_b32_u64(52, maxCount), count + 52, svshllt_n_u32(hh6, 0));
svst1_u32(svwhilelt_b32_u64(56, maxCount), count + 56, svshllb_n_u32(hh7, 0));
svst1_u32(svwhilelt_b32_u64(60, maxCount), count + 60, svshllt_n_u32(hh7, 0));
hh0 = svmax_u16_x(vl128, hh0, hh1);
hh2 = svmax_u16_x(vl128, hh2, hh3);
hh4 = svmax_u16_x(vl128, hh4, hh5);
hh6 = svmax_u16_x(vl128, hh6, hh7);
hh0 = svmax_u16_x(vl128, hh0, hh2);
hh4 = svmax_u16_x(vl128, hh4, hh6);
max = svmax_u16_x(vl128, hh0, hh4);
maxSymbolValue = min_size(maxSymbolValue, maxCount);
if (maxSymbolValue >= 64) {
const svuint8_t c4 = svadd_n_u8_x(vl128, c0, 64);
const svuint8_t c5 = svadd_n_u8_x(vl128, c1, 64);
const svuint8_t c6 = svadd_n_u8_x(vl128, c2, 64);
const svuint8_t c7 = svadd_n_u8_x(vl128, c3, 64);
const svuint8_t c8 = svadd_n_u8_x(vl128, c0, 128);
const svuint8_t c9 = svadd_n_u8_x(vl128, c1, 128);
max = HIST_count_6_sve2(ip, sourceSize, count + 64, c4, c5, c6, c7,
c8, c9, max, maxCount - 64);
if (maxSymbolValue >= 160) {
const svuint8_t ca = svadd_n_u8_x(vl128, c2, 128);
const svuint8_t cb = svadd_n_u8_x(vl128, c3, 128);
const svuint8_t cc = svadd_n_u8_x(vl128, c4, 128);
const svuint8_t cd = svadd_n_u8_x(vl128, c5, 128);
const svuint8_t ce = svadd_n_u8_x(vl128, c6, 128);
const svuint8_t cf = svadd_n_u8_x(vl128, c7, 128);
max = HIST_count_6_sve2(ip, sourceSize, count + 160, ca, cb, cc,
cd, ce, cf, max, maxCount - 160);
} else if (maxCount > 160) {
ZSTD_memset(count + 160, 0, (maxCount - 160) * sizeof(*count));
}
} else if (maxCount > 64) {
ZSTD_memset(count + 64, 0, (maxCount - 64) * sizeof(*count));
}
return svmaxv_u16(vl128, max);
}
}
#endif
/* HIST_count_parallel_wksp() :
* store histogram into 4 intermediate tables, recombined at the end.
* this design makes better use of OoO cpus,
@ -73,8 +317,8 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
* `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
* @return : largest histogram frequency,
* or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
static size_t HIST_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
static UNUSED_ATTR
size_t HIST_count_parallel_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
HIST_checkInput_e check,
U32* const workSpace)
@ -151,11 +395,17 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
void* workSpace, size_t workSpaceSize)
{
if (sourceSize < 1500) /* heuristic threshold */
if (sourceSize < HIST_FAST_THRESHOLD) /* heuristic threshold */
return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
#if defined(ZSTD_ARCH_ARM_SVE2)
(void)workSpace;
(void)workSpaceSize;
return HIST_count_sve2(count, maxSymbolValuePtr, source, sourceSize, trustInput);
#else
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
#endif
}
/* HIST_count_wksp() :
@ -165,10 +415,15 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
void* workSpace, size_t workSpaceSize)
{
#if defined(ZSTD_ARCH_ARM_SVE2)
if (*maxSymbolValuePtr < 255)
return HIST_count_sve2(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue);
#else
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
if (*maxSymbolValuePtr < 255)
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
#endif
*maxSymbolValuePtr = 255;
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
}

View File

@ -35,7 +35,11 @@ unsigned HIST_isError(size_t code); /**< tells if a return value is an error co
/* --- advanced histogram functions --- */
#if defined(__ARM_FEATURE_SVE2)
#define HIST_WKSP_SIZE_U32 0
#else
#define HIST_WKSP_SIZE_U32 1024
#endif
#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
/** HIST_count_wksp() :
* Same as HIST_count(), but using an externally provided scratch buffer.

View File

@ -7284,6 +7284,93 @@ static size_t convertSequences_noRepcodes(
return longLen;
}
#elif defined ZSTD_ARCH_RISCV_RVV
#include <riscv_vector.h>
/*
* Convert `vl` sequences per iteration, using AVX2 intrinsics:
* - offset -> offBase = offset + 2
* - litLength -> (U16) litLength
* - matchLength -> (U16)(matchLength - 3)
* - rep is ignored
* Store only 8 bytes per SeqDef (offBase[4], litLength[2], mlBase[2]).
*
* @returns 0 on succes, with no long length detected
* @returns > 0 if there is one long length (> 65535),
* indicating the position, and type.
*/
static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence* inSeqs, size_t nbSequences) {
size_t longLen = 0;
/* RVV depends on the specific definition of target structures */
ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16);
ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, offset) == 0);
ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, litLength) == 4);
ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, matchLength) == 8);
ZSTD_STATIC_ASSERT(sizeof(SeqDef) == 8);
ZSTD_STATIC_ASSERT(offsetof(SeqDef, offBase) == 0);
ZSTD_STATIC_ASSERT(offsetof(SeqDef, litLength) == 4);
ZSTD_STATIC_ASSERT(offsetof(SeqDef, mlBase) == 6);
size_t vl = 0;
for (size_t i = 0; i < nbSequences; i += vl) {
vl = __riscv_vsetvl_e32m2(nbSequences-i);
// Loading structure member variables
vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
(const int32_t*)&inSeqs[i],
vl
);
vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);
// offset + ZSTD_REP_NUM
vuint32m2_t v_offBase = __riscv_vadd_vx_u32m2(v_offset, ZSTD_REP_NUM, vl);
// Check for integer overflow
// Cast to a 16-bit variable
vbool16_t lit_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_lit, 65535, vl);
vuint16m1_t v_lit_clamped = __riscv_vncvt_x_x_w_u16m1(v_lit, vl);
vbool16_t ml_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_match, 65535+MINMATCH, vl);
vuint16m1_t v_ml_clamped = __riscv_vncvt_x_x_w_u16m1(__riscv_vsub_vx_u32m2(v_match, MINMATCH, vl), vl);
// Pack two 16-bit fields into a 32-bit value (little-endian)
// The lower 16 bits contain litLength, and the upper 16 bits contain mlBase
vuint32m2_t v_lit_ml_combined = __riscv_vsll_vx_u32m2(
__riscv_vwcvtu_x_x_v_u32m2(v_ml_clamped, vl), // Convert matchLength to 32-bit
16,
vl
);
v_lit_ml_combined = __riscv_vor_vv_u32m2(
v_lit_ml_combined,
__riscv_vwcvtu_x_x_v_u32m2(v_lit_clamped, vl),
vl
);
// Create a vector of SeqDef structures
// Store the offBase, litLength, and mlBase in a vector of SeqDef
vuint32m2x2_t store_data = __riscv_vcreate_v_u32m2x2(
v_offBase,
v_lit_ml_combined
);
__riscv_vsseg2e32_v_u32m2x2(
(uint32_t*)&dstSeqs[i],
store_data,
vl
);
// Find the first index where an overflow occurs
int first_ml = __riscv_vfirst_m_b16(ml_overflow, vl);
int first_lit = __riscv_vfirst_m_b16(lit_overflow, vl);
if (UNLIKELY(first_ml != -1)) {
assert(longLen == 0);
longLen = i + first_ml + 1;
}
if (UNLIKELY(first_lit != -1)) {
assert(longLen == 0);
longLen = i + first_lit + 1 + nbSequences;
}
}
return longLen;
}
/* the vector implementation could also be ported to SSSE3,
* but since this implementation is targeting modern systems (>= Sapphire Rapid),
* it's not useful to develop and maintain code for older pre-AVX2 platforms */
@ -7451,6 +7538,70 @@ BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
}
}
#elif defined ZSTD_ARCH_RISCV_RVV
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
{
size_t totalMatchSize = 0;
size_t litSize = 0;
size_t i = 0;
int found_terminator = 0;
size_t vl_max = __riscv_vsetvlmax_e32m1();
vuint32m1_t v_lit_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
vuint32m1_t v_match_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
for (; i < nbSeqs; ) {
size_t vl = __riscv_vsetvl_e32m2(nbSeqs - i);
ptrdiff_t stride = sizeof(ZSTD_Sequence); // 16
vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
(const int32_t*)&seqs[i],
vl
);
vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);
// Check if any element has a matchLength of 0
vbool16_t mask = __riscv_vmseq_vx_u32m2_b16(v_match, 0, vl);
int first_zero = __riscv_vfirst_m_b16(mask, vl);
if (first_zero >= 0) {
// Find the first zero byte and set the effective length to that index + 1 to
// recompute the cumulative vector length of literals and matches
vl = first_zero + 1;
// recompute the cumulative vector length of literals and matches
v_lit_sum = __riscv_vredsum_vs_u32m2_u32m1(__riscv_vslidedown_vx_u32m2(v_lit, 0, vl), v_lit_sum, vl);
v_match_sum = __riscv_vredsum_vs_u32m2_u32m1(__riscv_vslidedown_vx_u32m2(v_match, 0, vl), v_match_sum, vl);
i += vl;
found_terminator = 1;
assert(seqs[i - 1].offset == 0);
break;
} else {
v_lit_sum = __riscv_vredsum_vs_u32m2_u32m1(v_lit, v_lit_sum, vl);
v_match_sum = __riscv_vredsum_vs_u32m2_u32m1(v_match, v_match_sum, vl);
i += vl;
}
}
litSize = __riscv_vmv_x_s_u32m1_u32(v_lit_sum);
totalMatchSize = __riscv_vmv_x_s_u32m1_u32(v_match_sum);
if (!found_terminator && i==nbSeqs) {
BlockSummary bs;
bs.nbSequences = ERROR(externalSequences_invalid);
return bs;
}
{ BlockSummary bs;
bs.nbSequences = i;
bs.blockSize = litSize + totalMatchSize;
bs.litSize = litSize;
return bs;
}
}
#else
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)

View File

@ -38,6 +38,16 @@
#endif
// There appears to be an unreconcilable syntax difference between Linux and Darwin assemblers.
// Name of a private label (i.e. not exported to symbol table) on Darwin has to start with "L",
// on Linux has to start with ".". There's no way to have a name start with both "." and "L", so
// we have to use a macro.
#if defined(__APPLE__)
#define LOCAL_LABEL(label) L_ ## label
#else
#define LOCAL_LABEL(label) .L_ ## label
#endif
#if ZSTD_ENABLE_ASM_X86_64_BMI2
/* Calling convention:
@ -117,22 +127,55 @@ ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
_HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
ZSTD_CET_ENDBRANCH
.cfi_startproc
.cfi_def_cfa_offset 8
.cfi_offset %rip, -8
/* Save all registers - even if they are callee saved for simplicity. */
push %rax
.cfi_def_cfa_offset 16
.cfi_offset rax, -16
push %rbx
.cfi_def_cfa_offset 24
.cfi_offset rbx, -24
push %rcx
.cfi_def_cfa_offset 32
.cfi_offset rcx, -32
push %rdx
.cfi_def_cfa_offset 40
.cfi_offset rdx, -40
push %rbp
.cfi_def_cfa_offset 48
.cfi_offset rbp, -48
push %rsi
.cfi_def_cfa_offset 56
.cfi_offset rsi, -56
push %rdi
.cfi_def_cfa_offset 64
.cfi_offset rdi, -64
push %r8
.cfi_def_cfa_offset 72
.cfi_offset r8, -72
push %r9
.cfi_def_cfa_offset 80
.cfi_offset r9, -80
push %r10
.cfi_def_cfa_offset 88
.cfi_offset r10, -88
push %r11
.cfi_def_cfa_offset 96
.cfi_offset r11, -96
push %r12
.cfi_def_cfa_offset 104
.cfi_offset r12, -104
push %r13
.cfi_def_cfa_offset 112
.cfi_offset r13, -112
push %r14
.cfi_def_cfa_offset 120
.cfi_offset r14, -120
push %r15
.cfi_def_cfa_offset 128
.cfi_offset r15, -128
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
@ -154,13 +197,18 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
movq 88(%rax), %bits3
movq 96(%rax), %dtable
push %rax /* argument */
.cfi_def_cfa_offset 136
push 104(%rax) /* ilowest */
.cfi_def_cfa_offset 144
push 112(%rax) /* oend */
.cfi_def_cfa_offset 152
push %olimit /* olimit space */
.cfi_def_cfa_offset 160
subq $24, %rsp
.cfi_def_cfa_offset 184
.L_4X1_compute_olimit:
LOCAL_LABEL(4X1_compute_olimit):
/* Computes how many iterations we can do safely
* %r15, %rax may be clobbered
* rbx, rdx must be saved
@ -207,19 +255,19 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
/* If (op3 + 20 > olimit) */
movq %op3, %rax /* rax = op3 */
cmpq %rax, %olimit /* op3 == olimit */
je .L_4X1_exit
je LOCAL_LABEL(4X1_exit)
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
jb .L_4X1_exit
jb LOCAL_LABEL(4X1_exit)
/* If (ip2 < ip1) go to exit */
cmpq %ip1, %ip2
jb .L_4X1_exit
jb LOCAL_LABEL(4X1_exit)
/* If (ip3 < ip2) go to exit */
cmpq %ip2, %ip3
jb .L_4X1_exit
jb LOCAL_LABEL(4X1_exit)
/* Reads top 11 bits from bits[n]
* Loads dt[bits[n]] into var[n]
@ -280,7 +328,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
.p2align 6
.L_4X1_loop_body:
LOCAL_LABEL(4X1_loop_body):
/* Decode 5 symbols in each of the 4 streams (20 total)
* Must have called GET_NEXT_DELT for each stream
*/
@ -318,7 +366,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
/* If op3 < olimit: continue the loop */
cmp %op3, 24(%rsp)
ja .L_4X1_loop_body
ja LOCAL_LABEL(4X1_loop_body)
/* Reload ip[1,2,3] from stack */
movq 0(%rsp), %ip1
@ -326,20 +374,25 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
movq 16(%rsp), %ip3
/* Re-compute olimit */
jmp .L_4X1_compute_olimit
jmp LOCAL_LABEL(4X1_compute_olimit)
#undef GET_NEXT_DELT
#undef DECODE_FROM_DELT
#undef DECODE
#undef RELOAD_BITS
.L_4X1_exit:
LOCAL_LABEL(4X1_exit):
addq $24, %rsp
.cfi_def_cfa_offset 160
/* Restore stack (oend & olimit) */
pop %rax /* olimit */
.cfi_def_cfa_offset 152
pop %rax /* oend */
.cfi_def_cfa_offset 144
pop %rax /* ilowest */
.cfi_def_cfa_offset 136
pop %rax /* arg */
.cfi_def_cfa_offset 128
/* Save ip / op / bits */
movq %ip0, 0(%rax)
@ -357,41 +410,105 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
/* Restore registers */
pop %r15
.cfi_restore r15
.cfi_def_cfa_offset 120
pop %r14
.cfi_restore r14
.cfi_def_cfa_offset 112
pop %r13
.cfi_restore r13
.cfi_def_cfa_offset 104
pop %r12
.cfi_restore r12
.cfi_def_cfa_offset 96
pop %r11
.cfi_restore r11
.cfi_def_cfa_offset 88
pop %r10
.cfi_restore r10
.cfi_def_cfa_offset 80
pop %r9
.cfi_restore r9
.cfi_def_cfa_offset 72
pop %r8
.cfi_restore r8
.cfi_def_cfa_offset 64
pop %rdi
.cfi_restore rdi
.cfi_def_cfa_offset 56
pop %rsi
.cfi_restore rsi
.cfi_def_cfa_offset 48
pop %rbp
.cfi_restore rbp
.cfi_def_cfa_offset 40
pop %rdx
.cfi_restore rdx
.cfi_def_cfa_offset 32
pop %rcx
.cfi_restore rcx
.cfi_def_cfa_offset 24
pop %rbx
.cfi_restore rbx
.cfi_def_cfa_offset 16
pop %rax
.cfi_restore rax
.cfi_def_cfa_offset 8
ret
.cfi_endproc
_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
ZSTD_CET_ENDBRANCH
.cfi_startproc
.cfi_def_cfa_offset 8
.cfi_offset %rip, -8
/* Save all registers - even if they are callee saved for simplicity. */
push %rax
.cfi_def_cfa_offset 16
.cfi_offset rax, -16
push %rbx
.cfi_def_cfa_offset 24
.cfi_offset rbx, -24
push %rcx
.cfi_def_cfa_offset 32
.cfi_offset rcx, -32
push %rdx
.cfi_def_cfa_offset 40
.cfi_offset rdx, -40
push %rbp
.cfi_def_cfa_offset 48
.cfi_offset rbp, -48
push %rsi
.cfi_def_cfa_offset 56
.cfi_offset rsi, -56
push %rdi
.cfi_def_cfa_offset 64
.cfi_offset rdi, -64
push %r8
.cfi_def_cfa_offset 72
.cfi_offset r8, -72
push %r9
.cfi_def_cfa_offset 80
.cfi_offset r9, -80
push %r10
.cfi_def_cfa_offset 88
.cfi_offset r10, -88
push %r11
.cfi_def_cfa_offset 96
.cfi_offset r11, -96
push %r12
.cfi_def_cfa_offset 104
.cfi_offset r12, -104
push %r13
.cfi_def_cfa_offset 112
.cfi_offset r13, -112
push %r14
.cfi_def_cfa_offset 120
.cfi_offset r14, -120
push %r15
.cfi_def_cfa_offset 128
.cfi_offset r15, -128
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
@ -413,25 +530,33 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
movq 88(%rax), %bits3
movq 96(%rax), %dtable
push %rax /* argument */
.cfi_def_cfa_offset 136
push %rax /* olimit */
.cfi_def_cfa_offset 144
push 104(%rax) /* ilowest */
.cfi_def_cfa_offset 152
movq 112(%rax), %rax
push %rax /* oend3 */
.cfi_def_cfa_offset 160
movq %op3, %rax
push %rax /* oend2 */
.cfi_def_cfa_offset 168
movq %op2, %rax
push %rax /* oend1 */
.cfi_def_cfa_offset 176
movq %op1, %rax
push %rax /* oend0 */
.cfi_def_cfa_offset 184
/* Scratch space */
subq $8, %rsp
.cfi_def_cfa_offset 192
.L_4X2_compute_olimit:
LOCAL_LABEL(4X2_compute_olimit):
/* Computes how many iterations we can do safely
* %r15, %rax may be clobbered
* rdx must be saved
@ -495,19 +620,19 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
/* If (op3 + 10 > olimit) */
movq %op3, %rax /* rax = op3 */
cmpq %rax, %olimit /* op3 == olimit */
je .L_4X2_exit
je LOCAL_LABEL(4X2_exit)
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
jb .L_4X2_exit
jb LOCAL_LABEL(4X2_exit)
/* If (ip2 < ip1) go to exit */
cmpq %ip1, %ip2
jb .L_4X2_exit
jb LOCAL_LABEL(4X2_exit)
/* If (ip3 < ip2) go to exit */
cmpq %ip2, %ip3
jb .L_4X2_exit
jb LOCAL_LABEL(4X2_exit)
#define DECODE(n, idx) \
movq %bits##n, %rax; \
@ -534,7 +659,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
.p2align 6
.L_4X2_loop_body:
LOCAL_LABEL(4X2_loop_body):
/* We clobber r8, so store it on the stack */
movq %r8, 0(%rsp)
@ -551,21 +676,29 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
FOR_EACH_STREAM(RELOAD_BITS)
cmp %op3, 48(%rsp)
ja .L_4X2_loop_body
jmp .L_4X2_compute_olimit
ja LOCAL_LABEL(4X2_loop_body)
jmp LOCAL_LABEL(4X2_compute_olimit)
#undef DECODE
#undef RELOAD_BITS
.L_4X2_exit:
LOCAL_LABEL(4X2_exit):
addq $8, %rsp
.cfi_def_cfa_offset 184
/* Restore stack (oend & olimit) */
pop %rax /* oend0 */
.cfi_def_cfa_offset 176
pop %rax /* oend1 */
.cfi_def_cfa_offset 168
pop %rax /* oend2 */
.cfi_def_cfa_offset 160
pop %rax /* oend3 */
.cfi_def_cfa_offset 152
pop %rax /* ilowest */
.cfi_def_cfa_offset 144
pop %rax /* olimit */
.cfi_def_cfa_offset 136
pop %rax /* arg */
.cfi_def_cfa_offset 128
/* Save ip / op / bits */
movq %ip0, 0(%rax)
@ -583,20 +716,51 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
/* Restore registers */
pop %r15
.cfi_restore r15
.cfi_def_cfa_offset 120
pop %r14
.cfi_restore r14
.cfi_def_cfa_offset 112
pop %r13
.cfi_restore r13
.cfi_def_cfa_offset 104
pop %r12
.cfi_restore r12
.cfi_def_cfa_offset 96
pop %r11
.cfi_restore r11
.cfi_def_cfa_offset 88
pop %r10
.cfi_restore r10
.cfi_def_cfa_offset 80
pop %r9
.cfi_restore r9
.cfi_def_cfa_offset 72
pop %r8
.cfi_restore r8
.cfi_def_cfa_offset 64
pop %rdi
.cfi_restore rdi
.cfi_def_cfa_offset 56
pop %rsi
.cfi_restore rsi
.cfi_def_cfa_offset 48
pop %rbp
.cfi_restore rbp
.cfi_def_cfa_offset 40
pop %rdx
.cfi_restore rdx
.cfi_def_cfa_offset 32
pop %rcx
.cfi_restore rcx
.cfi_def_cfa_offset 24
pop %rbx
.cfi_restore rbx
.cfi_def_cfa_offset 16
pop %rax
.cfi_restore rax
.cfi_def_cfa_offset 8
ret
.cfi_endproc
#endif

View File

@ -153,7 +153,7 @@ Usage: zstd [OPTIONS...] [INPUT... | -] [-o OUTPUT]
Options:
-o OUTPUT Write output to a single file, OUTPUT.
-k, --keep Preserve INPUT file(s). [Default]
--rm Remove INPUT file(s) after successful (de)compression.
--rm Remove INPUT file(s) after successful (de)compression to file.
-# Desired compression level, where `#` is a number between 1 and 19;
lower numbers provide faster compression, higher numbers yield

View File

@ -928,6 +928,7 @@ static int UTIL_prepareFileList(const char* dirName,
hFile=FindFirstFileA(path, &cFile);
if (hFile == INVALID_HANDLE_VALUE) {
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName);
free(path);
return 0;
}
free(path);

View File

@ -147,7 +147,7 @@ static void usage(FILE* f, const char* programName)
DISPLAY_F(f, "Options:\n");
DISPLAY_F(f, " -o OUTPUT Write output to a single file, OUTPUT.\n");
DISPLAY_F(f, " -k, --keep Preserve INPUT file(s). [Default] \n");
DISPLAY_F(f, " --rm Remove INPUT file(s) after successful (de)compression.\n");
DISPLAY_F(f, " --rm Remove INPUT file(s) after successful (de)compression to file.\n");
#ifdef ZSTD_GZCOMPRESS
if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */
DISPLAY_F(f, " -n, --no-name Do not store original filename when compressing.\n\n");

View File

@ -6,7 +6,7 @@ Usage: zstd *OPTIONS...* *INPUT... | -* *-o OUTPUT*
Options:
-o OUTPUT Write output to a single file, OUTPUT.
-k, --keep Preserve INPUT file(s). *Default*
--rm Remove INPUT file(s) after successful (de)compression.
--rm Remove INPUT file(s) after successful (de)compression to file.
-# Desired compression level, where `#` is a number between 1 and 19;
lower numbers provide faster compression, higher numbers yield

View File

@ -640,7 +640,7 @@ if __name__ == "__main__":
help="Preserve the scratch directory TEST_DIR/scratch/ for debugging purposes."
)
parser.add_argument("--verbose", action="store_true", help="Verbose test output.")
parser.add_argument("--timeout", default=200, type=int, help="Test case timeout in seconds. Set to 0 to disable timeouts.")
parser.add_argument("--timeout", default=800, type=int, help="Test case timeout in seconds. Set to 0 to disable timeouts.")
parser.add_argument(
"--exec-prefix",
default=None,

View File

@ -890,6 +890,7 @@ static int benchMem(unsigned scenarioID,
if (!BMK_isSuccessful_runOutcome(bOutcome)) {
DISPLAY("ERROR: Scenario %u: %s \n", scenarioID, ZSTD_getErrorName(BMK_extract_errorResult(bOutcome)));
errorcode = 1;
BMK_freeTimedFnState(tfs);
goto _cleanOut;
}

View File

@ -31,12 +31,11 @@ void* FUZZ_malloc_rand(size_t size, FUZZ_dataProducer_t *producer)
return mem;
} else {
uintptr_t ptr = 0;
/* Add +- 1M 50% of the time */
/* Return junk pointer 50% of the time */
if (FUZZ_dataProducer_uint32Range(producer, 0, 1))
FUZZ_dataProducer_int32Range(producer, -1000000, 1000000);
ptr += FUZZ_dataProducer_int32Range(producer, -1000000, 1000000);
return (void*)ptr;
}
}
int FUZZ_memcmp(void const* lhs, void const* rhs, size_t size)

View File

@ -66,6 +66,7 @@ void* FUZZ_malloc(size_t size);
/**
* malloc except returns random pointer for zero sized data and FUZZ_ASSERT
* that malloc doesn't fail.
* WARNING: Only free the returned pointer if size > 0!
*/
void* FUZZ_malloc_rand(size_t size, FUZZ_dataProducer_t *producer);

View File

@ -44,6 +44,13 @@
/* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
#include "threading.h" /* ZSTD_pthread_create, ZSTD_pthread_join */
#include "compress/hist.h" /* HIST_count_wksp */
/*-************************************
* Macros
**************************************/
#define COUNTOF(array) (sizeof(array) / sizeof(*(array)))
/*-************************************
@ -567,6 +574,123 @@ static void test_decompressBound(unsigned tnb)
DISPLAYLEVEL(3, "OK \n");
}
static unsigned test_histCountWksp(unsigned seed, unsigned testNb)
{
static const unsigned symLowLimits[] = { 0, 27, 0, 0, 27, 42, 0, 0, 27, 42, 27, 42 };
static const unsigned symHighLimits[] = { 255, 255, 210, 110, 42, 42, 210, 110, 42, 42, 42, 42 };
static const unsigned symMaxLimits[] = { 255, 255, 255, 255, 255, 255, 230, 130, 99, 99, 42, 42 };
static const size_t inputSizes[] = { 3367, 1761, 893, 117 };
unsigned workspace[HIST_WKSP_SIZE_U32];
size_t res, i, is, il;
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with empty source : ", testNb++);
{
/* With NULL source UBSan of older Clang could fail: applying zero offset to null pointer. */
static const unsigned char source[4] = { 0 };
unsigned count[1] = { 0 };
unsigned maxSym = 0;
res = HIST_count_wksp(count, &maxSym, source, 0, workspace, sizeof(workspace));
CHECK_EQ(res, 0);
CHECK_EQ(maxSym, 0);
CHECK_EQ(count[0], 0);
}
DISPLAYLEVEL(3, "OK \n");
#if HIST_WKSP_SIZE_U32
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with small workspace : ", testNb++);
{
unsigned count[1] = { 0 };
unsigned maxSym = 0;
res = HIST_count_wksp(count, &maxSym, NULL, 0, workspace, sizeof(workspace) - 1);
CHECK_EQ(res, ERROR(workSpace_tooSmall));
CHECK_EQ(maxSym, 0);
CHECK_EQ(count[0], 0);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with wrong workspace alignment : ", testNb++);
{
unsigned count[1] = { 0 };
unsigned maxSym = 0;
res = HIST_count_wksp(count, &maxSym, NULL, 0, (unsigned*)(void*)((char*)workspace + 1), sizeof(workspace));
CHECK_EQ(res, ERROR(GENERIC));
CHECK_EQ(maxSym, 0);
CHECK_EQ(count[0], 0);
}
DISPLAYLEVEL(3, "OK \n");
#endif
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with symbol out of range, small size : ", testNb++);
{
/* For less elements HIST_count_parallel_wksp would fail. */
static const unsigned char source[4] = { 1, 4, 0, 2 };
static const unsigned expected[6] = { 0 };
unsigned count[6] = { 0 };
unsigned maxSym = 2;
res = HIST_count_wksp(count, &maxSym, source, sizeof(source), workspace, sizeof(workspace));
CHECK_EQ(res, ERROR(maxSymbolValue_tooSmall));
CHECK_EQ(maxSym, 2);
for (i = 0; i < COUNTOF(expected); ++i) CHECK_EQ(count[i], expected[i]);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with symbol out of range, medium size : ", testNb++);
{
unsigned char source[3407];
unsigned count[6] = { 0 };
unsigned maxSym = 2;
for (i = 0; i < COUNTOF(source); ++i) {
source[i] = (48271 * (i + 1)) & 3;
}
res = HIST_count_wksp(count, &maxSym, source, sizeof(source), workspace, sizeof(workspace));
CHECK_EQ(res, ERROR(maxSymbolValue_tooSmall));
CHECK_EQ(maxSym, 2);
for (i = 0; i < COUNTOF(count); ++i) CHECK_EQ(count[i], 0);
}
DISPLAYLEVEL(3, "OK \n");
for (il = 0; il < COUNTOF(symMaxLimits); ++il) {
unsigned symMax = symMaxLimits[il];
unsigned symLow = symLowLimits[il];
unsigned symHigh = symHighLimits[il];
unsigned symRange = symHigh - symLow + 1;
for (is = 0; is < COUNTOF(inputSizes); ++is) {
unsigned char source[4000];
size_t inputSize = inputSizes[is];
assert(inputSize <= sizeof(source));
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp test in [%u..%u], symMax: %u, inputSize: %u : ",
testNb++, symLow, symHigh, symMax, (unsigned)inputSize);
{
unsigned count[260] = { 0 };
unsigned expected[COUNTOF(count)] = { 0 };
unsigned maxSym = symMax;
unsigned realMaxSym = symMax;
unsigned maxCount = 0;
for (i = 0; i < inputSize; ++i) {
unsigned prng = (48271 * (i + seed)) % symRange + symLow;
source[i] = (unsigned char)prng;
++expected[prng];
}
/* for basic buffer overwrite checks */
for (i = maxSym + 1; i < COUNTOF(count); ++i) expected[i] = count[i] = ~0u;
for (i = 0; i <= maxSym; ++i) maxCount = MAX(maxCount, expected[i]);
for (i = realMaxSym; i > 0; --i) {
if (expected[i]) break;
--realMaxSym;
}
res = HIST_count_wksp(count, &maxSym, source, inputSize, workspace, sizeof(workspace));
CHECK_EQ(res, maxCount);
CHECK_EQ(maxSym, realMaxSym);
for (i = 0; i < COUNTOF(expected); ++i) CHECK_EQ(count[i], expected[i]);
}
DISPLAYLEVEL(3, "OK \n");
}
}
return testNb;
}
static void test_setCParams(unsigned tnb)
{
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
@ -712,6 +836,8 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
testNb = test_histCountWksp(seed, testNb);
DISPLAYLEVEL(3, "test%3u : compress %u bytes : ", testNb++, (unsigned)CNBuffSize);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
if (cctx==NULL) goto _output_error;

View File

@ -73,6 +73,7 @@ int main(int argc, const char** argv)
int _exit_code = 1;
(void)argc;
(void)argv;
int _exit_code = 0;
if (!buffer || !out || !roundtrip || !cctx || !dctx) {
fprintf(stderr, "Allocation failure\n");
@ -127,4 +128,5 @@ cleanup:
ZSTD_freeDCtx(dctx);
ZSTD_freeCCtx(cctx);
return _exit_code;
return _exit_code;
}

View File

@ -56,6 +56,7 @@ static int testSimpleAPI(void)
} else {
DISPLAY("ERROR: %s\n", ZSTD_getErrorName(ret));
}
free(output);
return 1;
}
if (ret != size) {
@ -64,6 +65,7 @@ static int testSimpleAPI(void)
}
if (memcmp(EXPECTED, output, size) != 0) {
DISPLAY("ERROR: Wrong decoded output produced\n");
free(output);
return 1;
}

View File

@ -39,24 +39,39 @@ int main(int argc, const char** argv)
unsigned windowLog = 18;
(void)argc;
(void)argv;
int _exit_code = 0;
/* Create stream */
ctx = ZSTD_createCCtx();
if (!ctx) { return 1; }
/* Set parameters */
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, windowLog)))
return 2;
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, 13)))
return 2;
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, 14)))
return 2;
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, 1)))
return 2;
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, 7)))
return 2;
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, 16)))
return 2;
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, ZSTD_fast)))
return 2;
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, windowLog))) {
_exit_code = 2;
goto _clean_ctx;
}
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, 13))) {
_exit_code = 2;
goto _clean_ctx;
}
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, 14))) {
_exit_code = 2;
goto _clean_ctx;
}
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, 1))) {
_exit_code = 2;
goto _clean_ctx;
}
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, 7))) {
_exit_code = 2;
goto _clean_ctx;
}
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, 16))) {
_exit_code = 2;
goto _clean_ctx;
}
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, ZSTD_fast))) {
_exit_code = 2;
goto _clean_ctx;
}
{
U64 compressed = 0;
const U64 toCompress = ((U64)1) << 33;
@ -81,22 +96,28 @@ int main(int argc, const char** argv)
}
printf("Compressing, trying to generate a segfault \n");
if (compress(ctx, out, srcBuffer, size)) {
return 1;
_exit_code = 1;
goto _clean_buffer;
}
compressed += size;
while (compressed < toCompress) {
const size_t block = rand() % (size - pos + 1);
if (pos == size) { pos = 0; }
if (compress(ctx, out, srcBuffer + pos, block)) {
return 1;
_exit_code = 1;
goto _clean_buffer;
}
pos += block;
compressed += block;
}
printf("Compression completed successfully (no error triggered)\n");
_clean_buffer:
free(srcBuffer);
free(dstBuffer);
}
_clean_ctx:
ZSTD_freeCCtx(ctx);
return 0;
return _exit_code;
}