mirror of
https://github.com/facebook/zstd.git
synced 2025-10-04 00:02:33 -04:00
merge
This commit is contained in:
commit
a74f7fcabd
153
.github/workflows/cmake-tests.yml
vendored
Normal file
153
.github/workflows/cmake-tests.yml
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
name: cmake-tests
|
||||
# CMake-specific build and test workflows
|
||||
# This workflow validates zstd builds across different CMake configurations,
|
||||
# platforms, and edge cases to ensure broad compatibility.
|
||||
|
||||
concurrency:
|
||||
group: cmake-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev, release, actionsTest ]
|
||||
|
||||
permissions: read-all
|
||||
|
||||
env:
|
||||
# Centralized test timeouts for consistency
|
||||
QUICK_TEST_TIME: "30s"
|
||||
STANDARD_TEST_TIME: "1mn"
|
||||
# Common CMake flags
|
||||
COMMON_CMAKE_FLAGS: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
|
||||
|
||||
jobs:
|
||||
# Ubuntu-based cmake build using make wrapper
|
||||
# This test uses the make-driven cmake build to ensure compatibility
|
||||
# with the existing build system integration
|
||||
cmake-ubuntu-basic:
|
||||
name: "CMake Ubuntu Basic Build"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt install liblzma-dev # Required for compression algorithms
|
||||
- name: CMake build and test via make
|
||||
run: |
|
||||
# Use make wrapper for cmake build with quick test timeouts
|
||||
FUZZERTEST=-T${{ env.STANDARD_TEST_TIME }} ZSTREAM_TESTTIME=-T${{ env.STANDARD_TEST_TIME }} make cmakebuild V=1
|
||||
|
||||
# Cross-platform cmake build with edge case: source paths containing spaces
|
||||
# This test ensures cmake handles filesystem paths with spaces correctly
|
||||
# across different operating systems and build generators
|
||||
cmake-cross-platform-spaces:
|
||||
name: "CMake Cross-Platform (Spaces in Path)"
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
generator: "Unix Makefiles"
|
||||
name: "Linux"
|
||||
- os: windows-latest
|
||||
generator: "NMake Makefiles"
|
||||
name: "Windows NMake"
|
||||
- os: macos-latest
|
||||
generator: "Unix Makefiles"
|
||||
name: "macOS"
|
||||
env:
|
||||
SRC_DIR: "source directory with spaces"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
with:
|
||||
path: "${{ env.SRC_DIR }}"
|
||||
- uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
|
||||
if: ${{ matrix.generator == 'NMake Makefiles' }}
|
||||
- name: "CMake build and install (${{ matrix.name }})"
|
||||
run: |
|
||||
# Test Release build with installation to verify packaging
|
||||
cmake -S "${{ env.SRC_DIR }}/build/cmake" -B build -DBUILD_TESTING=ON -G "${{ matrix.generator }}" -DCMAKE_BUILD_TYPE=Release --install-prefix "${{ runner.temp }}/install"
|
||||
cmake --build build --config Release
|
||||
cmake --install build --config Release
|
||||
|
||||
# Windows-specific cmake testing with Visual Studio 2022
|
||||
# Tests multiple generators and toolchains to ensure broad Windows compatibility
|
||||
# including MSVC (x64, Win32, ARM64), MinGW, and Clang-CL with various architectures and optimizations
|
||||
cmake-windows-comprehensive:
|
||||
name: "CMake Windows VS2022 (${{ matrix.name }})"
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-A x64"
|
||||
name: "MSVC x64"
|
||||
runner: "windows-2022"
|
||||
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-A Win32"
|
||||
name: "MSVC Win32"
|
||||
runner: "windows-2022"
|
||||
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-A x64"
|
||||
name: "MSVC x64 (No ZSTD_BUILD_TESTS)"
|
||||
runner: "windows-2022"
|
||||
# Intentionally omit ZSTD_BUILD_TESTS to reproduce the CXX language configuration bug
|
||||
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON"
|
||||
# - generator: "Visual Studio 17 2022"
|
||||
# flags: "-A ARM64"
|
||||
# name: "MSVC ARM64"
|
||||
# runner: "windows-2022-arm64" # Disabled due to very long queue times
|
||||
- generator: "MinGW Makefiles"
|
||||
flags: ""
|
||||
name: "MinGW"
|
||||
runner: "windows-2022"
|
||||
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-T ClangCL"
|
||||
name: "Clang-CL"
|
||||
runner: "windows-2022"
|
||||
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-T ClangCL -A x64 -DCMAKE_C_FLAGS=/arch:AVX2"
|
||||
name: "Clang-CL AVX2"
|
||||
runner: "windows-2022"
|
||||
cmake_extra_flags: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS=ON"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
- name: Add MSBuild to PATH
|
||||
uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
|
||||
- name: "Configure CMake (${{ matrix.name }})"
|
||||
run: |
|
||||
cd build\cmake
|
||||
mkdir build
|
||||
cd build
|
||||
cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} -DCMAKE_BUILD_TYPE=Debug ${{ matrix.cmake_extra_flags }} -DZSTD_ZSTREAM_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FUZZER_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FULLBENCH_FLAGS=-i0 ..
|
||||
- name: "Build (${{ matrix.name }})"
|
||||
run: |
|
||||
cd build\cmake\build
|
||||
cmake.exe --build .
|
||||
- name: "Test (${{ matrix.name }})"
|
||||
run: |
|
||||
cd build\cmake\build
|
||||
ctest.exe -V -C Debug
|
||||
|
||||
# macOS ARM64 (Apple Silicon) specific cmake testing
|
||||
# Validates zstd builds and runs correctly on Apple Silicon architecture
|
||||
# Uses native ARM64 hardware for optimal performance and compatibility testing
|
||||
cmake-macos-arm64:
|
||||
name: "CMake macOS ARM64 (Apple Silicon)"
|
||||
runs-on: macos-14 # ARM64 runner
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
- name: "CMake build and test (ARM64)"
|
||||
run: |
|
||||
# Configure and build with ARM64-specific optimizations
|
||||
cd build/cmake
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release ${{ env.COMMON_CMAKE_FLAGS }} -DZSTD_ZSTREAM_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FUZZER_FLAGS=-T${{ env.QUICK_TEST_TIME }} -DZSTD_FULLBENCH_FLAGS=-i1 ..
|
||||
make -j$(sysctl -n hw.ncpu)
|
||||
ctest -V
|
64
.github/workflows/dev-short-tests.yml
vendored
64
.github/workflows/dev-short-tests.yml
vendored
@ -72,40 +72,6 @@ jobs:
|
||||
# candidate test (for discussion) : underlink test
|
||||
# LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
|
||||
|
||||
cmake-build-and-test-check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
- name: cmake build and test
|
||||
run: |
|
||||
sudo apt install liblzma-dev
|
||||
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild V=1
|
||||
|
||||
cmake-source-directory-with-spaces:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
generator: "Unix Makefiles"
|
||||
- os: windows-latest
|
||||
generator: "NMake Makefiles"
|
||||
- os: macos-latest
|
||||
generator: "Unix Makefiles"
|
||||
env:
|
||||
SRC_DIR: "source directory with spaces"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
with:
|
||||
path: "${{ env.SRC_DIR }}"
|
||||
- uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
|
||||
if: ${{ matrix.generator == 'NMake Makefiles' }}
|
||||
- name: cmake build on a source directory with spaces
|
||||
run: |
|
||||
cmake -S "${{ env.SRC_DIR }}/build/cmake" -B build -DBUILD_TESTING=ON -G "${{ matrix.generator }}" -DCMAKE_BUILD_TYPE=Release --install-prefix "${{ runner.temp }}/install"
|
||||
cmake --build build --config Release
|
||||
cmake --install build --config Release
|
||||
|
||||
cpp-gnu90-c99-compatibility:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@ -339,34 +305,6 @@ jobs:
|
||||
run: |
|
||||
meson install -C builddir --destdir staging/
|
||||
|
||||
cmake-visual-2022:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-A x64"
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-A Win32"
|
||||
- generator: "MinGW Makefiles"
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-T ClangCL"
|
||||
- generator: "Visual Studio 17 2022"
|
||||
flags: "-T ClangCL -A x64 -DCMAKE_C_FLAGS=/arch:AVX2"
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
|
||||
- name: Add MSBuild to PATH
|
||||
uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
|
||||
- name: Build & Test
|
||||
working-directory: ${{env.GITHUB_WORKSPACE}}
|
||||
run: |
|
||||
cd build\cmake
|
||||
mkdir build
|
||||
cd build
|
||||
cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} -DCMAKE_BUILD_TYPE=Debug -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZSTREAM_FLAGS=-T30s -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_FULLBENCH_FLAGS=-i0 ..
|
||||
cmake.exe --build .
|
||||
ctest.exe -V -C Debug
|
||||
|
||||
msbuild-visual-studio:
|
||||
strategy:
|
||||
fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed.
|
||||
@ -497,6 +435,8 @@ jobs:
|
||||
make clean
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j check
|
||||
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j -C tests test-cli-tests
|
||||
CFLAGS="-march=armv8.2-a+sve2" LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j check
|
||||
CFLAGS="-march=armv8.2-a+sve2" LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j -C tests test-cli-tests
|
||||
# This test is only compatible with standard libraries that support BTI (Branch Target Identification).
|
||||
# Unfortunately, the standard library provided on Ubuntu 24.04 does not have this feature enabled.
|
||||
# make clean
|
||||
|
@ -9,226 +9,73 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
|
||||
|
||||
# As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies.
|
||||
# Set and use the newest cmake policies that are validated to work
|
||||
set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
|
||||
set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13") #Policies never changed at PATCH level
|
||||
if("${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
|
||||
"${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
|
||||
set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
|
||||
else()
|
||||
set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
|
||||
endif()
|
||||
cmake_policy(VERSION ${ZSTD_CMAKE_POLICY_VERSION})
|
||||
|
||||
set(CMAKE_BUILD_WITH_INSTALL_RPATH on)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Setup CMake environment
|
||||
#-----------------------------------------------------------------------------
|
||||
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||
|
||||
# Define project paths
|
||||
set(ZSTD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..")
|
||||
set(LIBRARY_DIR ${ZSTD_SOURCE_DIR}/lib)
|
||||
# Parse version
|
||||
include(GetZstdLibraryVersion)
|
||||
GetZstdLibraryVersion(${LIBRARY_DIR}/zstd.h zstd_VERSION_MAJOR zstd_VERSION_MINOR zstd_VERSION_PATCH)
|
||||
|
||||
set(ZSTD_SHORT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}")
|
||||
set(ZSTD_FULL_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
|
||||
|
||||
project(zstd
|
||||
VERSION "${ZSTD_FULL_VERSION}"
|
||||
LANGUAGES C # Main library is in C
|
||||
ASM # And ASM
|
||||
)
|
||||
|
||||
message(STATUS "ZSTD VERSION: ${zstd_VERSION}")
|
||||
set(zstd_HOMEPAGE_URL "https://facebook.github.io/zstd")
|
||||
set(zstd_DESCRIPTION "Zstandard is a real-time compression algorithm, providing high compression ratios.")
|
||||
|
||||
# Set a default build type if none was specified
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
message(STATUS "Setting build type to 'Release' as none was specified.")
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
|
||||
# Set the possible values of build type for cmake-gui
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# Always hide XXHash symbols
|
||||
add_definitions(-DXXH_NAMESPACE=ZSTD_)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Installation variables
|
||||
# Configure CMake policies and version
|
||||
#-----------------------------------------------------------------------------
|
||||
include(ZstdVersion)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Project declaration
|
||||
#-----------------------------------------------------------------------------
|
||||
project(zstd
|
||||
VERSION "${ZSTD_FULL_VERSION}"
|
||||
LANGUAGES C ASM # Main library is in C and ASM
|
||||
HOMEPAGE_URL "${zstd_HOMEPAGE_URL}"
|
||||
DESCRIPTION "${zstd_DESCRIPTION}"
|
||||
)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Build type configuration
|
||||
#-----------------------------------------------------------------------------
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
message(STATUS "Setting build type to 'Release' as none was specified.")
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Include standard modules
|
||||
#-----------------------------------------------------------------------------
|
||||
include(GNUInstallDirs)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Display installation information
|
||||
#-----------------------------------------------------------------------------
|
||||
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
|
||||
message(STATUS "CMAKE_INSTALL_LIBDIR: ${CMAKE_INSTALL_LIBDIR}")
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Options
|
||||
# Configure build options
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# Legacy support
|
||||
option(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" ON)
|
||||
|
||||
if (ZSTD_LEGACY_SUPPORT)
|
||||
message(STATUS "ZSTD_LEGACY_SUPPORT defined!")
|
||||
set(ZSTD_LEGACY_LEVEL 5 CACHE STRING "")
|
||||
add_definitions(-DZSTD_LEGACY_SUPPORT=${ZSTD_LEGACY_LEVEL})
|
||||
else ()
|
||||
message(STATUS "ZSTD_LEGACY_SUPPORT not defined!")
|
||||
add_definitions(-DZSTD_LEGACY_SUPPORT=0)
|
||||
endif ()
|
||||
|
||||
if (APPLE)
|
||||
option(ZSTD_FRAMEWORK "Build as Apple Frameworks" OFF)
|
||||
endif ()
|
||||
|
||||
if (ANDROID)
|
||||
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT OFF)
|
||||
# Old versions of bionic libc don't have fseeko/ftello
|
||||
if ((NOT ${ANDROID_PLATFORM_LEVEL}) OR ${ANDROID_PLATFORM_LEVEL} VERSION_LESS 24)
|
||||
message(STATUS "Setting compile definitions for old Android API")
|
||||
add_compile_definitions(LIBC_NO_FSEEKO)
|
||||
endif ()
|
||||
else()
|
||||
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
# Multi-threading support
|
||||
option(ZSTD_MULTITHREAD_SUPPORT "MULTITHREADING SUPPORT" ${ZSTD_MULTITHREAD_SUPPORT_DEFAULT})
|
||||
|
||||
if (ZSTD_MULTITHREAD_SUPPORT)
|
||||
message(STATUS "ZSTD_MULTITHREAD_SUPPORT is enabled")
|
||||
else ()
|
||||
message(STATUS "ZSTD_MULTITHREAD_SUPPORT is disabled")
|
||||
endif ()
|
||||
|
||||
option(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" ON)
|
||||
option(ZSTD_BUILD_CONTRIB "BUILD CONTRIB" OFF)
|
||||
|
||||
# Respect the conventional CMake option for enabling tests if it was specified on the first configure
|
||||
if (BUILD_TESTING)
|
||||
set(ZSTD_BUILD_TESTS_default ON)
|
||||
else()
|
||||
set(ZSTD_BUILD_TESTS_default OFF)
|
||||
endif()
|
||||
option(ZSTD_BUILD_TESTS "BUILD TESTS" ${ZSTD_BUILD_TESTS_default})
|
||||
if (MSVC)
|
||||
option(ZSTD_USE_STATIC_RUNTIME "LINK TO STATIC RUN-TIME LIBRARIES" OFF)
|
||||
endif ()
|
||||
|
||||
# Enable C++ support for testing.
|
||||
set(ZSTD_ENABLE_CXX ${ZSTD_BUILD_TESTS})
|
||||
|
||||
if(ZSTD_ENABLE_CXX)
|
||||
enable_language(CXX)
|
||||
endif()
|
||||
include(ZstdOptions)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Add extra compilation flags
|
||||
# Configure compilation flags
|
||||
#-----------------------------------------------------------------------------
|
||||
include(AddZstdCompilationFlags)
|
||||
ADD_ZSTD_COMPILATION_FLAGS(ON ZSTD_ENABLE_CXX ON) # C CXX LD
|
||||
ADD_ZSTD_COMPILATION_FLAGS(ON ZSTD_ENABLE_CXX ON)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# External dependencies
|
||||
# Configure dependencies
|
||||
#-----------------------------------------------------------------------------
|
||||
# Define a function to handle special thread settings for HP-UX
|
||||
# See https://github.com/facebook/zstd/pull/3862 for details.
|
||||
function(setup_hpux_threads)
|
||||
find_package(Threads)
|
||||
if (NOT Threads_FOUND)
|
||||
set(CMAKE_USE_PTHREADS_INIT 1 PARENT_SCOPE)
|
||||
set(CMAKE_THREAD_LIBS_INIT -lpthread PARENT_SCOPE)
|
||||
set(CMAKE_HAVE_THREADS_LIBRARY 1 PARENT_SCOPE)
|
||||
set(Threads_FOUND TRUE PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if (ZSTD_MULTITHREAD_SUPPORT AND UNIX)
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "HP-UX")
|
||||
setup_hpux_threads()
|
||||
else()
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
endif()
|
||||
if (CMAKE_USE_PTHREADS_INIT)
|
||||
set(THREADS_LIBS "${CMAKE_THREAD_LIBS_INIT}")
|
||||
else()
|
||||
message(SEND_ERROR "ZSTD currently does not support thread libraries other than pthreads")
|
||||
endif()
|
||||
endif ()
|
||||
include(ZstdDependencies)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Add source directories
|
||||
# Configure build targets
|
||||
#-----------------------------------------------------------------------------
|
||||
add_subdirectory(lib)
|
||||
|
||||
option(ZSTD_PROGRAMS_LINK_SHARED "PROGRAMS LINK SHARED" OFF)
|
||||
|
||||
if (ZSTD_BUILD_PROGRAMS)
|
||||
if (NOT ZSTD_BUILD_STATIC AND NOT ZSTD_PROGRAMS_LINK_SHARED)
|
||||
message(SEND_ERROR "You need to build static library to build zstd CLI")
|
||||
elseif(NOT ZSTD_BUILD_SHARED AND ZSTD_PROGRAMS_LINK_SHARED)
|
||||
message(SEND_ERROR "You need to build shared library to build zstd CLI")
|
||||
endif ()
|
||||
|
||||
add_subdirectory(programs)
|
||||
endif ()
|
||||
|
||||
if (ZSTD_BUILD_TESTS)
|
||||
enable_testing()
|
||||
if (NOT ZSTD_BUILD_STATIC)
|
||||
message(SEND_ERROR "You need to build static library to build tests")
|
||||
endif ()
|
||||
|
||||
add_subdirectory(tests)
|
||||
endif ()
|
||||
|
||||
if (ZSTD_BUILD_CONTRIB)
|
||||
add_subdirectory(contrib)
|
||||
endif ()
|
||||
include(ZstdBuild)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Add clean-all target
|
||||
# Configure package generation
|
||||
#-----------------------------------------------------------------------------
|
||||
add_custom_target(clean-all
|
||||
COMMAND ${CMAKE_BUILD_TOOL} clean
|
||||
COMMAND rm -rf ${CMAKE_BINARY_DIR}/
|
||||
)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Generate Package Config files
|
||||
#
|
||||
# This section is based on the boiler plate code from:
|
||||
# https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html#creating-packages
|
||||
#-----------------------------------------------------------------------------
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
|
||||
VERSION ${zstd_VERSION}
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
# A Package Config file that works from the build directory
|
||||
export(EXPORT zstdExports
|
||||
FILE "${CMAKE_CURRENT_BINARY_DIR}/zstdTargets.cmake"
|
||||
NAMESPACE zstd::
|
||||
)
|
||||
|
||||
# A Package Config file that works from the installation directory
|
||||
set(ConfigPackageLocation ${CMAKE_INSTALL_LIBDIR}/cmake/zstd)
|
||||
install(EXPORT zstdExports
|
||||
FILE zstdTargets.cmake
|
||||
NAMESPACE zstd::
|
||||
DESTINATION ${ConfigPackageLocation}
|
||||
)
|
||||
configure_package_config_file(
|
||||
zstdConfig.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
|
||||
INSTALL_DESTINATION ${ConfigPackageLocation}
|
||||
)
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
|
||||
DESTINATION ${ConfigPackageLocation}
|
||||
)
|
||||
include(ZstdPackage)
|
||||
|
@ -1,5 +1,7 @@
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckCCompilerFlag)
|
||||
if(CMAKE_CXX_COMPILER)
|
||||
include(CheckCXXCompilerFlag)
|
||||
endif()
|
||||
|
||||
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
|
||||
set(ZSTD_HAVE_CHECK_LINKER_FLAG true)
|
||||
@ -21,7 +23,7 @@ function(EnableCompilerFlag _flag _C _CXX _LD)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${_flag}" PARENT_SCOPE)
|
||||
endif ()
|
||||
endif ()
|
||||
if (_CXX)
|
||||
if (_CXX AND CMAKE_CXX_COMPILER)
|
||||
CHECK_CXX_COMPILER_FLAG(${_flag} CXX_FLAG_${varname})
|
||||
if (CXX_FLAG_${varname})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_flag}" PARENT_SCOPE)
|
||||
|
42
build/cmake/CMakeModules/ZstdBuild.cmake
Normal file
42
build/cmake/CMakeModules/ZstdBuild.cmake
Normal file
@ -0,0 +1,42 @@
|
||||
# ################################################################
|
||||
# ZSTD Build Targets Configuration
|
||||
# ################################################################
|
||||
|
||||
# Always build the library first (this defines ZSTD_BUILD_STATIC/SHARED options)
|
||||
add_subdirectory(lib)
|
||||
|
||||
# Validate build configuration after lib options are defined
|
||||
if(ZSTD_BUILD_PROGRAMS)
|
||||
if(NOT ZSTD_BUILD_STATIC AND NOT ZSTD_PROGRAMS_LINK_SHARED)
|
||||
message(SEND_ERROR "Static library required to build zstd CLI programs")
|
||||
elseif(NOT ZSTD_BUILD_SHARED AND ZSTD_PROGRAMS_LINK_SHARED)
|
||||
message(SEND_ERROR "Shared library required to build zstd CLI programs")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ZSTD_BUILD_TESTS AND NOT ZSTD_BUILD_STATIC)
|
||||
message(SEND_ERROR "Static library required to build test suite")
|
||||
endif()
|
||||
|
||||
# Add programs if requested
|
||||
if(ZSTD_BUILD_PROGRAMS)
|
||||
add_subdirectory(programs)
|
||||
endif()
|
||||
|
||||
# Add tests if requested
|
||||
if(ZSTD_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
# Add contrib utilities if requested
|
||||
if(ZSTD_BUILD_CONTRIB)
|
||||
add_subdirectory(contrib)
|
||||
endif()
|
||||
|
||||
# Clean-all target for thorough cleanup
|
||||
add_custom_target(clean-all
|
||||
COMMAND ${CMAKE_BUILD_TOOL} clean
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/
|
||||
COMMENT "Performing complete clean including build directory"
|
||||
)
|
30
build/cmake/CMakeModules/ZstdDependencies.cmake
Normal file
30
build/cmake/CMakeModules/ZstdDependencies.cmake
Normal file
@ -0,0 +1,30 @@
|
||||
# ################################################################
|
||||
# ZSTD Dependencies Configuration
|
||||
# ################################################################
|
||||
|
||||
# Function to handle HP-UX thread configuration
|
||||
function(setup_hpux_threads)
|
||||
find_package(Threads)
|
||||
if(NOT Threads_FOUND)
|
||||
set(CMAKE_USE_PTHREADS_INIT 1 PARENT_SCOPE)
|
||||
set(CMAKE_THREAD_LIBS_INIT -lpthread PARENT_SCOPE)
|
||||
set(CMAKE_HAVE_THREADS_LIBRARY 1 PARENT_SCOPE)
|
||||
set(Threads_FOUND TRUE PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Configure threading support
|
||||
if(ZSTD_MULTITHREAD_SUPPORT AND UNIX)
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "HP-UX")
|
||||
setup_hpux_threads()
|
||||
else()
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
endif()
|
||||
|
||||
if(CMAKE_USE_PTHREADS_INIT)
|
||||
set(THREADS_LIBS "${CMAKE_THREAD_LIBS_INIT}")
|
||||
else()
|
||||
message(SEND_ERROR "ZSTD currently does not support thread libraries other than pthreads")
|
||||
endif()
|
||||
endif()
|
68
build/cmake/CMakeModules/ZstdOptions.cmake
Normal file
68
build/cmake/CMakeModules/ZstdOptions.cmake
Normal file
@ -0,0 +1,68 @@
|
||||
# ################################################################
|
||||
# ZSTD Build Options Configuration
|
||||
# ################################################################
|
||||
|
||||
# Legacy support configuration
|
||||
option(ZSTD_LEGACY_SUPPORT "Enable legacy format support" ON)
|
||||
|
||||
if(ZSTD_LEGACY_SUPPORT)
|
||||
message(STATUS "ZSTD_LEGACY_SUPPORT enabled")
|
||||
set(ZSTD_LEGACY_LEVEL 5 CACHE STRING "Legacy support level")
|
||||
add_definitions(-DZSTD_LEGACY_SUPPORT=${ZSTD_LEGACY_LEVEL})
|
||||
else()
|
||||
message(STATUS "ZSTD_LEGACY_SUPPORT disabled")
|
||||
add_definitions(-DZSTD_LEGACY_SUPPORT=0)
|
||||
endif()
|
||||
|
||||
# Platform-specific options
|
||||
if(APPLE)
|
||||
option(ZSTD_FRAMEWORK "Build as Apple Framework" OFF)
|
||||
endif()
|
||||
|
||||
# Android-specific configuration
|
||||
if(ANDROID)
|
||||
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT OFF)
|
||||
# Handle old Android API levels
|
||||
if((NOT ANDROID_PLATFORM_LEVEL) OR (ANDROID_PLATFORM_LEVEL VERSION_LESS 24))
|
||||
message(STATUS "Configuring for old Android API - disabling fseeko/ftello")
|
||||
add_compile_definitions(LIBC_NO_FSEEKO)
|
||||
endif()
|
||||
else()
|
||||
set(ZSTD_MULTITHREAD_SUPPORT_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
# Multi-threading support
|
||||
option(ZSTD_MULTITHREAD_SUPPORT "Enable multi-threading support" ${ZSTD_MULTITHREAD_SUPPORT_DEFAULT})
|
||||
|
||||
if(ZSTD_MULTITHREAD_SUPPORT)
|
||||
message(STATUS "Multi-threading support enabled")
|
||||
else()
|
||||
message(STATUS "Multi-threading support disabled")
|
||||
endif()
|
||||
|
||||
# Build component options
|
||||
option(ZSTD_BUILD_PROGRAMS "Build command-line programs" ON)
|
||||
option(ZSTD_BUILD_CONTRIB "Build contrib utilities" OFF)
|
||||
option(ZSTD_PROGRAMS_LINK_SHARED "Link programs against shared library" OFF)
|
||||
|
||||
# Test configuration
|
||||
if(BUILD_TESTING)
|
||||
set(ZSTD_BUILD_TESTS_default ON)
|
||||
else()
|
||||
set(ZSTD_BUILD_TESTS_default OFF)
|
||||
endif()
|
||||
option(ZSTD_BUILD_TESTS "Build test suite" ${ZSTD_BUILD_TESTS_default})
|
||||
|
||||
# MSVC-specific options
|
||||
if(MSVC)
|
||||
option(ZSTD_USE_STATIC_RUNTIME "Link to static runtime libraries" OFF)
|
||||
endif()
|
||||
|
||||
# C++ support (needed for tests)
|
||||
set(ZSTD_ENABLE_CXX ${ZSTD_BUILD_TESTS})
|
||||
if(ZSTD_ENABLE_CXX)
|
||||
enable_language(CXX)
|
||||
endif()
|
||||
|
||||
# Set global definitions
|
||||
add_definitions(-DXXH_NAMESPACE=ZSTD_)
|
42
build/cmake/CMakeModules/ZstdPackage.cmake
Normal file
42
build/cmake/CMakeModules/ZstdPackage.cmake
Normal file
@ -0,0 +1,42 @@
|
||||
# ################################################################
|
||||
# ZSTD Package Configuration
|
||||
# ################################################################
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
# Generate version file
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
|
||||
VERSION ${zstd_VERSION}
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
# Export targets for build directory
|
||||
export(EXPORT zstdExports
|
||||
FILE "${CMAKE_CURRENT_BINARY_DIR}/zstdTargets.cmake"
|
||||
NAMESPACE zstd::
|
||||
)
|
||||
|
||||
# Configure package for installation
|
||||
set(ConfigPackageLocation ${CMAKE_INSTALL_LIBDIR}/cmake/zstd)
|
||||
|
||||
# Install exported targets
|
||||
install(EXPORT zstdExports
|
||||
FILE zstdTargets.cmake
|
||||
NAMESPACE zstd::
|
||||
DESTINATION ${ConfigPackageLocation}
|
||||
)
|
||||
|
||||
# Configure and install package config file
|
||||
configure_package_config_file(
|
||||
zstdConfig.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
|
||||
INSTALL_DESTINATION ${ConfigPackageLocation}
|
||||
)
|
||||
|
||||
# Install config files
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/zstdConfigVersion.cmake"
|
||||
DESTINATION ${ConfigPackageLocation}
|
||||
)
|
31
build/cmake/CMakeModules/ZstdVersion.cmake
Normal file
31
build/cmake/CMakeModules/ZstdVersion.cmake
Normal file
@ -0,0 +1,31 @@
|
||||
# ################################################################
|
||||
# ZSTD Version Configuration
|
||||
# ################################################################
|
||||
|
||||
# Setup CMake policy version
|
||||
set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
|
||||
set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13")
|
||||
|
||||
# Determine appropriate policy version
|
||||
if("${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
|
||||
"${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
|
||||
set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
|
||||
else()
|
||||
set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
|
||||
endif()
|
||||
|
||||
cmake_policy(VERSION ${ZSTD_CMAKE_POLICY_VERSION})
|
||||
|
||||
# Parse version from header file
|
||||
include(GetZstdLibraryVersion)
|
||||
GetZstdLibraryVersion(${LIBRARY_DIR}/zstd.h zstd_VERSION_MAJOR zstd_VERSION_MINOR zstd_VERSION_PATCH)
|
||||
|
||||
# Set version variables
|
||||
set(ZSTD_SHORT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}")
|
||||
set(ZSTD_FULL_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
|
||||
|
||||
# Project metadata
|
||||
set(zstd_HOMEPAGE_URL "https://facebook.github.io/zstd")
|
||||
set(zstd_DESCRIPTION "Zstandard is a real-time compression algorithm, providing high compression ratios.")
|
||||
|
||||
message(STATUS "ZSTD VERSION: ${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
|
@ -97,9 +97,11 @@ if (ZSTD_LEGACY_SUPPORT)
|
||||
${LIBRARY_LEGACY_DIR}/zstd_v07.h)
|
||||
endif ()
|
||||
|
||||
if (MSVC)
|
||||
if (MSVC AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
|
||||
set(MSVC_RESOURCE_DIR ${ZSTD_SOURCE_DIR}/build/VS2010/libzstd-dll)
|
||||
set(PlatformDependResources ${MSVC_RESOURCE_DIR}/libzstd-dll.rc)
|
||||
else()
|
||||
set(PlatformDependResources)
|
||||
endif ()
|
||||
|
||||
# Explicitly set the language to C for all files, including ASM files.
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
project('zstd',
|
||||
['c', 'cpp'],
|
||||
license: ['BSD', 'GPLv2'],
|
||||
license: 'BSD-3-Clause OR GPL-2.0-only',
|
||||
default_options : [
|
||||
# There shouldn't be any need to force a C standard convention for zstd
|
||||
# but in case one would want that anyway, this can be done here.
|
||||
|
@ -739,6 +739,8 @@ static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks,
|
||||
/* BMK_benchTimedFn may not run exactly nbRounds iterations */
|
||||
double speedAggregated =
|
||||
aggregateData(speedPerRound, roundNb + 1, metricAggregatePref);
|
||||
free(speedPerRound);
|
||||
|
||||
if (metricAggregatePref == fastest)
|
||||
DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated);
|
||||
else
|
||||
|
@ -316,6 +316,10 @@ int main(int argc, const char** argv)
|
||||
for (pos = 0; pos < inSize; pos += 2) {
|
||||
size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, 1, pos);
|
||||
if (decStatus != 1 || outBuffer[0] != inBuffer[pos]) {
|
||||
free(seekBuffer);
|
||||
free(outBuffer);
|
||||
ZSTD_seekable_freeCStream(zscs);
|
||||
ZSTD_seekable_free(stream);
|
||||
goto _test_error;
|
||||
}
|
||||
}
|
||||
@ -323,6 +327,10 @@ int main(int argc, const char** argv)
|
||||
/* We read more than the compressed size, meaning there were some rereads.
|
||||
This is unneeded because we only seeked forward. */
|
||||
printf("Too much data read: %zu read, with compressed size %zu\n", buffWrapper.totalRead, seekSize);
|
||||
free(seekBuffer);
|
||||
free(outBuffer);
|
||||
ZSTD_seekable_freeCStream(zscs);
|
||||
ZSTD_seekable_free(stream);
|
||||
goto _test_error;
|
||||
}
|
||||
|
||||
@ -342,6 +350,10 @@ int main(int argc, const char** argv)
|
||||
for (idx = 0; idx < sizeof(tests) / sizeof(tests[0]); idx++) {
|
||||
size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, tests[idx].size, tests[idx].offset);
|
||||
if (decStatus != tests[idx].size || memcmp(outBuffer, inBuffer + tests[idx].offset, tests[idx].size) != 0) {
|
||||
free(seekBuffer);
|
||||
free(outBuffer);
|
||||
ZSTD_seekable_freeCStream(zscs);
|
||||
ZSTD_seekable_free(stream);
|
||||
goto _test_error;
|
||||
}
|
||||
}
|
||||
|
@ -218,6 +218,15 @@
|
||||
# if defined(__ARM_NEON) || defined(_M_ARM64)
|
||||
# define ZSTD_ARCH_ARM_NEON
|
||||
# endif
|
||||
# if defined(__ARM_FEATURE_SVE)
|
||||
# define ZSTD_ARCH_ARM_SVE
|
||||
# endif
|
||||
# if defined(__ARM_FEATURE_SVE2)
|
||||
# define ZSTD_ARCH_ARM_SVE2
|
||||
# endif
|
||||
# if defined(__riscv) && defined(__riscv_vector)
|
||||
# define ZSTD_ARCH_RISCV_RVV
|
||||
# endif
|
||||
#
|
||||
# if defined(ZSTD_ARCH_X86_AVX2)
|
||||
# include <immintrin.h>
|
||||
@ -227,6 +236,12 @@
|
||||
# elif defined(ZSTD_ARCH_ARM_NEON)
|
||||
# include <arm_neon.h>
|
||||
# endif
|
||||
# if defined(ZSTD_ARCH_ARM_SVE) || defined(ZSTD_ARCH_ARM_SVE2)
|
||||
# include <arm_sve.h>
|
||||
# endif
|
||||
# if defined(ZSTD_ARCH_RISCV_RVV)
|
||||
# include <riscv_vector.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* C-language Attributes are added in C23. */
|
||||
|
@ -19,6 +19,12 @@
|
||||
#include "../common/error_private.h" /* ERROR */
|
||||
#include "hist.h"
|
||||
|
||||
#if defined(ZSTD_ARCH_ARM_SVE2)
|
||||
#define HIST_FAST_THRESHOLD 500
|
||||
#else
|
||||
#define HIST_FAST_THRESHOLD 1500
|
||||
#endif
|
||||
|
||||
|
||||
/* --- Error management --- */
|
||||
unsigned HIST_isError(size_t code) { return ERR_isError(code); }
|
||||
@ -65,6 +71,244 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
|
||||
typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
|
||||
|
||||
#if defined(ZSTD_ARCH_ARM_SVE2)
|
||||
FORCE_INLINE_TEMPLATE size_t min_size(size_t a, size_t b) { return a < b ? a : b; }
|
||||
|
||||
static
|
||||
svuint16_t HIST_count_6_sve2(const BYTE* const src, size_t size, U32* const dst,
|
||||
const svuint8_t c0, const svuint8_t c1,
|
||||
const svuint8_t c2, const svuint8_t c3,
|
||||
const svuint8_t c4, const svuint8_t c5,
|
||||
const svuint16_t histmax, size_t maxCount)
|
||||
{
|
||||
const svbool_t vl128 = svptrue_pat_b8(SV_VL16);
|
||||
svuint16_t hh0 = svdup_n_u16(0);
|
||||
svuint16_t hh1 = svdup_n_u16(0);
|
||||
svuint16_t hh2 = svdup_n_u16(0);
|
||||
svuint16_t hh3 = svdup_n_u16(0);
|
||||
svuint16_t hh4 = svdup_n_u16(0);
|
||||
svuint16_t hh5 = svdup_n_u16(0);
|
||||
svuint16_t hh6 = svdup_n_u16(0);
|
||||
svuint16_t hh7 = svdup_n_u16(0);
|
||||
svuint16_t hh8 = svdup_n_u16(0);
|
||||
svuint16_t hh9 = svdup_n_u16(0);
|
||||
svuint16_t hha = svdup_n_u16(0);
|
||||
svuint16_t hhb = svdup_n_u16(0);
|
||||
|
||||
size_t i = 0;
|
||||
while (i < size) {
|
||||
/* We can only accumulate 15 (15 * 16 <= 255) iterations of histogram
|
||||
* in 8-bit accumulators! */
|
||||
const size_t size240 = min_size(i + 240, size);
|
||||
|
||||
svbool_t pred = svwhilelt_b8_u64(i, size);
|
||||
svuint8_t c = svld1rq_u8(pred, src + i);
|
||||
svuint8_t h0 = svhistseg_u8(c0, c);
|
||||
svuint8_t h1 = svhistseg_u8(c1, c);
|
||||
svuint8_t h2 = svhistseg_u8(c2, c);
|
||||
svuint8_t h3 = svhistseg_u8(c3, c);
|
||||
svuint8_t h4 = svhistseg_u8(c4, c);
|
||||
svuint8_t h5 = svhistseg_u8(c5, c);
|
||||
|
||||
for (i += 16; i < size240; i += 16) {
|
||||
pred = svwhilelt_b8_u64(i, size);
|
||||
c = svld1rq_u8(pred, src + i);
|
||||
h0 = svadd_u8_x(vl128, h0, svhistseg_u8(c0, c));
|
||||
h1 = svadd_u8_x(vl128, h1, svhistseg_u8(c1, c));
|
||||
h2 = svadd_u8_x(vl128, h2, svhistseg_u8(c2, c));
|
||||
h3 = svadd_u8_x(vl128, h3, svhistseg_u8(c3, c));
|
||||
h4 = svadd_u8_x(vl128, h4, svhistseg_u8(c4, c));
|
||||
h5 = svadd_u8_x(vl128, h5, svhistseg_u8(c5, c));
|
||||
}
|
||||
|
||||
hh0 = svaddwb_u16(hh0, h0);
|
||||
hh1 = svaddwt_u16(hh1, h0);
|
||||
hh2 = svaddwb_u16(hh2, h1);
|
||||
hh3 = svaddwt_u16(hh3, h1);
|
||||
hh4 = svaddwb_u16(hh4, h2);
|
||||
hh5 = svaddwt_u16(hh5, h2);
|
||||
hh6 = svaddwb_u16(hh6, h3);
|
||||
hh7 = svaddwt_u16(hh7, h3);
|
||||
hh8 = svaddwb_u16(hh8, h4);
|
||||
hh9 = svaddwt_u16(hh9, h4);
|
||||
hha = svaddwb_u16(hha, h5);
|
||||
hhb = svaddwt_u16(hhb, h5);
|
||||
}
|
||||
|
||||
svst1_u32(svwhilelt_b32_u64( 0, maxCount), dst + 0, svshllb_n_u32(hh0, 0));
|
||||
svst1_u32(svwhilelt_b32_u64( 4, maxCount), dst + 4, svshllt_n_u32(hh0, 0));
|
||||
svst1_u32(svwhilelt_b32_u64( 8, maxCount), dst + 8, svshllb_n_u32(hh1, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(12, maxCount), dst + 12, svshllt_n_u32(hh1, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(16, maxCount), dst + 16, svshllb_n_u32(hh2, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(20, maxCount), dst + 20, svshllt_n_u32(hh2, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(24, maxCount), dst + 24, svshllb_n_u32(hh3, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(28, maxCount), dst + 28, svshllt_n_u32(hh3, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(32, maxCount), dst + 32, svshllb_n_u32(hh4, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(36, maxCount), dst + 36, svshllt_n_u32(hh4, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(40, maxCount), dst + 40, svshllb_n_u32(hh5, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(44, maxCount), dst + 44, svshllt_n_u32(hh5, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(48, maxCount), dst + 48, svshllb_n_u32(hh6, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(52, maxCount), dst + 52, svshllt_n_u32(hh6, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(56, maxCount), dst + 56, svshllb_n_u32(hh7, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(60, maxCount), dst + 60, svshllt_n_u32(hh7, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(64, maxCount), dst + 64, svshllb_n_u32(hh8, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(68, maxCount), dst + 68, svshllt_n_u32(hh8, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(72, maxCount), dst + 72, svshllb_n_u32(hh9, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(76, maxCount), dst + 76, svshllt_n_u32(hh9, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(80, maxCount), dst + 80, svshllb_n_u32(hha, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(84, maxCount), dst + 84, svshllt_n_u32(hha, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(88, maxCount), dst + 88, svshllb_n_u32(hhb, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(92, maxCount), dst + 92, svshllt_n_u32(hhb, 0));
|
||||
|
||||
hh0 = svmax_u16_x(vl128, hh0, hh1);
|
||||
hh2 = svmax_u16_x(vl128, hh2, hh3);
|
||||
hh4 = svmax_u16_x(vl128, hh4, hh5);
|
||||
hh6 = svmax_u16_x(vl128, hh6, hh7);
|
||||
hh8 = svmax_u16_x(vl128, hh8, hh9);
|
||||
hha = svmax_u16_x(vl128, hha, hhb);
|
||||
hh0 = svmax_u16_x(vl128, hh0, hh2);
|
||||
hh4 = svmax_u16_x(vl128, hh4, hh6);
|
||||
hh8 = svmax_u16_x(vl128, hh8, hha);
|
||||
hh0 = svmax_u16_x(vl128, hh0, hh4);
|
||||
hh8 = svmax_u16_x(vl128, hh8, histmax);
|
||||
return svmax_u16_x(vl128, hh0, hh8);
|
||||
}
|
||||
|
||||
static size_t HIST_count_sve2(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
HIST_checkInput_e check)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)source;
|
||||
const size_t maxCount = *maxSymbolValuePtr + 1;
|
||||
|
||||
assert(*maxSymbolValuePtr <= 255);
|
||||
if (!sourceSize) {
|
||||
ZSTD_memset(count, 0, maxCount * sizeof(*count));
|
||||
*maxSymbolValuePtr = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
{ const svbool_t vl128 = svptrue_pat_b8(SV_VL16);
|
||||
const svuint8_t c0 = svreinterpret_u8(svindex_u32(0x0C040800, 0x01010101));
|
||||
const svuint8_t c1 = svadd_n_u8_x(vl128, c0, 16);
|
||||
const svuint8_t c2 = svadd_n_u8_x(vl128, c0, 32);
|
||||
const svuint8_t c3 = svadd_n_u8_x(vl128, c1, 32);
|
||||
|
||||
svuint8_t symbolMax = svdup_n_u8(0);
|
||||
svuint16_t hh0 = svdup_n_u16(0);
|
||||
svuint16_t hh1 = svdup_n_u16(0);
|
||||
svuint16_t hh2 = svdup_n_u16(0);
|
||||
svuint16_t hh3 = svdup_n_u16(0);
|
||||
svuint16_t hh4 = svdup_n_u16(0);
|
||||
svuint16_t hh5 = svdup_n_u16(0);
|
||||
svuint16_t hh6 = svdup_n_u16(0);
|
||||
svuint16_t hh7 = svdup_n_u16(0);
|
||||
svuint16_t max;
|
||||
size_t maxSymbolValue;
|
||||
|
||||
size_t i = 0;
|
||||
while (i < sourceSize) {
|
||||
/* We can only accumulate 15 (15 * 16 <= 255) iterations of
|
||||
* histogram in 8-bit accumulators! */
|
||||
const size_t size240 = min_size(i + 240, sourceSize);
|
||||
|
||||
svbool_t pred = svwhilelt_b8_u64(i, sourceSize);
|
||||
svuint8_t c = svld1rq_u8(pred, ip + i);
|
||||
svuint8_t h0 = svhistseg_u8(c0, c);
|
||||
svuint8_t h1 = svhistseg_u8(c1, c);
|
||||
svuint8_t h2 = svhistseg_u8(c2, c);
|
||||
svuint8_t h3 = svhistseg_u8(c3, c);
|
||||
symbolMax = svmax_u8_x(vl128, symbolMax, c);
|
||||
|
||||
for (i += 16; i < size240; i += 16) {
|
||||
pred = svwhilelt_b8_u64(i, sourceSize);
|
||||
c = svld1rq_u8(pred, ip + i);
|
||||
h0 = svadd_u8_x(vl128, h0, svhistseg_u8(c0, c));
|
||||
h1 = svadd_u8_x(vl128, h1, svhistseg_u8(c1, c));
|
||||
h2 = svadd_u8_x(vl128, h2, svhistseg_u8(c2, c));
|
||||
h3 = svadd_u8_x(vl128, h3, svhistseg_u8(c3, c));
|
||||
symbolMax = svmax_u8_x(vl128, symbolMax, c);
|
||||
}
|
||||
|
||||
hh0 = svaddwb_u16(hh0, h0);
|
||||
hh1 = svaddwt_u16(hh1, h0);
|
||||
hh2 = svaddwb_u16(hh2, h1);
|
||||
hh3 = svaddwt_u16(hh3, h1);
|
||||
hh4 = svaddwb_u16(hh4, h2);
|
||||
hh5 = svaddwt_u16(hh5, h2);
|
||||
hh6 = svaddwb_u16(hh6, h3);
|
||||
hh7 = svaddwt_u16(hh7, h3);
|
||||
}
|
||||
maxSymbolValue = svmaxv_u8(vl128, symbolMax);
|
||||
|
||||
if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
|
||||
/* If the buffer size is not divisible by 16, the last elements of the final
|
||||
* vector register read will be zeros, and these elements must be subtracted
|
||||
* from the histogram.
|
||||
*/
|
||||
hh0 = svsub_n_u16_m(svptrue_pat_b32(SV_VL1), hh0, -sourceSize & 15);
|
||||
|
||||
svst1_u32(svwhilelt_b32_u64( 0, maxCount), count + 0, svshllb_n_u32(hh0, 0));
|
||||
svst1_u32(svwhilelt_b32_u64( 4, maxCount), count + 4, svshllt_n_u32(hh0, 0));
|
||||
svst1_u32(svwhilelt_b32_u64( 8, maxCount), count + 8, svshllb_n_u32(hh1, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(12, maxCount), count + 12, svshllt_n_u32(hh1, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(16, maxCount), count + 16, svshllb_n_u32(hh2, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(20, maxCount), count + 20, svshllt_n_u32(hh2, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(24, maxCount), count + 24, svshllb_n_u32(hh3, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(28, maxCount), count + 28, svshllt_n_u32(hh3, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(32, maxCount), count + 32, svshllb_n_u32(hh4, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(36, maxCount), count + 36, svshllt_n_u32(hh4, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(40, maxCount), count + 40, svshllb_n_u32(hh5, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(44, maxCount), count + 44, svshllt_n_u32(hh5, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(48, maxCount), count + 48, svshllb_n_u32(hh6, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(52, maxCount), count + 52, svshllt_n_u32(hh6, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(56, maxCount), count + 56, svshllb_n_u32(hh7, 0));
|
||||
svst1_u32(svwhilelt_b32_u64(60, maxCount), count + 60, svshllt_n_u32(hh7, 0));
|
||||
|
||||
hh0 = svmax_u16_x(vl128, hh0, hh1);
|
||||
hh2 = svmax_u16_x(vl128, hh2, hh3);
|
||||
hh4 = svmax_u16_x(vl128, hh4, hh5);
|
||||
hh6 = svmax_u16_x(vl128, hh6, hh7);
|
||||
hh0 = svmax_u16_x(vl128, hh0, hh2);
|
||||
hh4 = svmax_u16_x(vl128, hh4, hh6);
|
||||
max = svmax_u16_x(vl128, hh0, hh4);
|
||||
|
||||
maxSymbolValue = min_size(maxSymbolValue, maxCount);
|
||||
if (maxSymbolValue >= 64) {
|
||||
const svuint8_t c4 = svadd_n_u8_x(vl128, c0, 64);
|
||||
const svuint8_t c5 = svadd_n_u8_x(vl128, c1, 64);
|
||||
const svuint8_t c6 = svadd_n_u8_x(vl128, c2, 64);
|
||||
const svuint8_t c7 = svadd_n_u8_x(vl128, c3, 64);
|
||||
const svuint8_t c8 = svadd_n_u8_x(vl128, c0, 128);
|
||||
const svuint8_t c9 = svadd_n_u8_x(vl128, c1, 128);
|
||||
|
||||
max = HIST_count_6_sve2(ip, sourceSize, count + 64, c4, c5, c6, c7,
|
||||
c8, c9, max, maxCount - 64);
|
||||
|
||||
if (maxSymbolValue >= 160) {
|
||||
const svuint8_t ca = svadd_n_u8_x(vl128, c2, 128);
|
||||
const svuint8_t cb = svadd_n_u8_x(vl128, c3, 128);
|
||||
const svuint8_t cc = svadd_n_u8_x(vl128, c4, 128);
|
||||
const svuint8_t cd = svadd_n_u8_x(vl128, c5, 128);
|
||||
const svuint8_t ce = svadd_n_u8_x(vl128, c6, 128);
|
||||
const svuint8_t cf = svadd_n_u8_x(vl128, c7, 128);
|
||||
|
||||
max = HIST_count_6_sve2(ip, sourceSize, count + 160, ca, cb, cc,
|
||||
cd, ce, cf, max, maxCount - 160);
|
||||
} else if (maxCount > 160) {
|
||||
ZSTD_memset(count + 160, 0, (maxCount - 160) * sizeof(*count));
|
||||
}
|
||||
} else if (maxCount > 64) {
|
||||
ZSTD_memset(count + 64, 0, (maxCount - 64) * sizeof(*count));
|
||||
}
|
||||
|
||||
return svmaxv_u16(vl128, max);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* HIST_count_parallel_wksp() :
|
||||
* store histogram into 4 intermediate tables, recombined at the end.
|
||||
* this design makes better use of OoO cpus,
|
||||
@ -73,8 +317,8 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
|
||||
* `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
|
||||
* @return : largest histogram frequency,
|
||||
* or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
|
||||
static size_t HIST_count_parallel_wksp(
|
||||
unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
static UNUSED_ATTR
|
||||
size_t HIST_count_parallel_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
HIST_checkInput_e check,
|
||||
U32* const workSpace)
|
||||
@ -151,11 +395,17 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
void* workSpace, size_t workSpaceSize)
|
||||
{
|
||||
if (sourceSize < 1500) /* heuristic threshold */
|
||||
if (sourceSize < HIST_FAST_THRESHOLD) /* heuristic threshold */
|
||||
return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
|
||||
#if defined(ZSTD_ARCH_ARM_SVE2)
|
||||
(void)workSpace;
|
||||
(void)workSpaceSize;
|
||||
return HIST_count_sve2(count, maxSymbolValuePtr, source, sourceSize, trustInput);
|
||||
#else
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* HIST_count_wksp() :
|
||||
@ -165,10 +415,15 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
void* workSpace, size_t workSpaceSize)
|
||||
{
|
||||
#if defined(ZSTD_ARCH_ARM_SVE2)
|
||||
if (*maxSymbolValuePtr < 255)
|
||||
return HIST_count_sve2(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue);
|
||||
#else
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
|
||||
if (*maxSymbolValuePtr < 255)
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
|
||||
#endif
|
||||
*maxSymbolValuePtr = 255;
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
|
||||
}
|
||||
|
@ -35,7 +35,11 @@ unsigned HIST_isError(size_t code); /**< tells if a return value is an error co
|
||||
|
||||
/* --- advanced histogram functions --- */
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE2)
|
||||
#define HIST_WKSP_SIZE_U32 0
|
||||
#else
|
||||
#define HIST_WKSP_SIZE_U32 1024
|
||||
#endif
|
||||
#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
|
||||
/** HIST_count_wksp() :
|
||||
* Same as HIST_count(), but using an externally provided scratch buffer.
|
||||
|
@ -7284,6 +7284,93 @@ static size_t convertSequences_noRepcodes(
|
||||
return longLen;
|
||||
}
|
||||
|
||||
#elif defined ZSTD_ARCH_RISCV_RVV
|
||||
#include <riscv_vector.h>
|
||||
/*
|
||||
* Convert `vl` sequences per iteration, using AVX2 intrinsics:
|
||||
* - offset -> offBase = offset + 2
|
||||
* - litLength -> (U16) litLength
|
||||
* - matchLength -> (U16)(matchLength - 3)
|
||||
* - rep is ignored
|
||||
* Store only 8 bytes per SeqDef (offBase[4], litLength[2], mlBase[2]).
|
||||
*
|
||||
* @returns 0 on succes, with no long length detected
|
||||
* @returns > 0 if there is one long length (> 65535),
|
||||
* indicating the position, and type.
|
||||
*/
|
||||
static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence* inSeqs, size_t nbSequences) {
|
||||
size_t longLen = 0;
|
||||
|
||||
/* RVV depends on the specific definition of target structures */
|
||||
ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16);
|
||||
ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, offset) == 0);
|
||||
ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, litLength) == 4);
|
||||
ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, matchLength) == 8);
|
||||
ZSTD_STATIC_ASSERT(sizeof(SeqDef) == 8);
|
||||
ZSTD_STATIC_ASSERT(offsetof(SeqDef, offBase) == 0);
|
||||
ZSTD_STATIC_ASSERT(offsetof(SeqDef, litLength) == 4);
|
||||
ZSTD_STATIC_ASSERT(offsetof(SeqDef, mlBase) == 6);
|
||||
size_t vl = 0;
|
||||
for (size_t i = 0; i < nbSequences; i += vl) {
|
||||
|
||||
vl = __riscv_vsetvl_e32m2(nbSequences-i);
|
||||
// Loading structure member variables
|
||||
vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
|
||||
(const int32_t*)&inSeqs[i],
|
||||
vl
|
||||
);
|
||||
vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
|
||||
vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
|
||||
vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);
|
||||
// offset + ZSTD_REP_NUM
|
||||
vuint32m2_t v_offBase = __riscv_vadd_vx_u32m2(v_offset, ZSTD_REP_NUM, vl);
|
||||
// Check for integer overflow
|
||||
// Cast to a 16-bit variable
|
||||
vbool16_t lit_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_lit, 65535, vl);
|
||||
vuint16m1_t v_lit_clamped = __riscv_vncvt_x_x_w_u16m1(v_lit, vl);
|
||||
|
||||
vbool16_t ml_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_match, 65535+MINMATCH, vl);
|
||||
vuint16m1_t v_ml_clamped = __riscv_vncvt_x_x_w_u16m1(__riscv_vsub_vx_u32m2(v_match, MINMATCH, vl), vl);
|
||||
|
||||
// Pack two 16-bit fields into a 32-bit value (little-endian)
|
||||
// The lower 16 bits contain litLength, and the upper 16 bits contain mlBase
|
||||
vuint32m2_t v_lit_ml_combined = __riscv_vsll_vx_u32m2(
|
||||
__riscv_vwcvtu_x_x_v_u32m2(v_ml_clamped, vl), // Convert matchLength to 32-bit
|
||||
16,
|
||||
vl
|
||||
);
|
||||
v_lit_ml_combined = __riscv_vor_vv_u32m2(
|
||||
v_lit_ml_combined,
|
||||
__riscv_vwcvtu_x_x_v_u32m2(v_lit_clamped, vl),
|
||||
vl
|
||||
);
|
||||
// Create a vector of SeqDef structures
|
||||
// Store the offBase, litLength, and mlBase in a vector of SeqDef
|
||||
vuint32m2x2_t store_data = __riscv_vcreate_v_u32m2x2(
|
||||
v_offBase,
|
||||
v_lit_ml_combined
|
||||
);
|
||||
__riscv_vsseg2e32_v_u32m2x2(
|
||||
(uint32_t*)&dstSeqs[i],
|
||||
store_data,
|
||||
vl
|
||||
);
|
||||
// Find the first index where an overflow occurs
|
||||
int first_ml = __riscv_vfirst_m_b16(ml_overflow, vl);
|
||||
int first_lit = __riscv_vfirst_m_b16(lit_overflow, vl);
|
||||
|
||||
if (UNLIKELY(first_ml != -1)) {
|
||||
assert(longLen == 0);
|
||||
longLen = i + first_ml + 1;
|
||||
}
|
||||
if (UNLIKELY(first_lit != -1)) {
|
||||
assert(longLen == 0);
|
||||
longLen = i + first_lit + 1 + nbSequences;
|
||||
}
|
||||
}
|
||||
return longLen;
|
||||
}
|
||||
|
||||
/* the vector implementation could also be ported to SSSE3,
|
||||
* but since this implementation is targeting modern systems (>= Sapphire Rapid),
|
||||
* it's not useful to develop and maintain code for older pre-AVX2 platforms */
|
||||
@ -7451,6 +7538,70 @@ BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined ZSTD_ARCH_RISCV_RVV
|
||||
|
||||
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
||||
{
|
||||
size_t totalMatchSize = 0;
|
||||
size_t litSize = 0;
|
||||
size_t i = 0;
|
||||
int found_terminator = 0;
|
||||
size_t vl_max = __riscv_vsetvlmax_e32m1();
|
||||
vuint32m1_t v_lit_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
|
||||
vuint32m1_t v_match_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
|
||||
|
||||
for (; i < nbSeqs; ) {
|
||||
size_t vl = __riscv_vsetvl_e32m2(nbSeqs - i);
|
||||
|
||||
ptrdiff_t stride = sizeof(ZSTD_Sequence); // 16
|
||||
vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
|
||||
(const int32_t*)&seqs[i],
|
||||
vl
|
||||
);
|
||||
vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
|
||||
vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
|
||||
vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);
|
||||
|
||||
// Check if any element has a matchLength of 0
|
||||
vbool16_t mask = __riscv_vmseq_vx_u32m2_b16(v_match, 0, vl);
|
||||
int first_zero = __riscv_vfirst_m_b16(mask, vl);
|
||||
|
||||
if (first_zero >= 0) {
|
||||
// Find the first zero byte and set the effective length to that index + 1 to
|
||||
// recompute the cumulative vector length of literals and matches
|
||||
vl = first_zero + 1;
|
||||
|
||||
// recompute the cumulative vector length of literals and matches
|
||||
v_lit_sum = __riscv_vredsum_vs_u32m2_u32m1(__riscv_vslidedown_vx_u32m2(v_lit, 0, vl), v_lit_sum, vl);
|
||||
v_match_sum = __riscv_vredsum_vs_u32m2_u32m1(__riscv_vslidedown_vx_u32m2(v_match, 0, vl), v_match_sum, vl);
|
||||
|
||||
i += vl;
|
||||
found_terminator = 1;
|
||||
assert(seqs[i - 1].offset == 0);
|
||||
break;
|
||||
} else {
|
||||
|
||||
v_lit_sum = __riscv_vredsum_vs_u32m2_u32m1(v_lit, v_lit_sum, vl);
|
||||
v_match_sum = __riscv_vredsum_vs_u32m2_u32m1(v_match, v_match_sum, vl);
|
||||
i += vl;
|
||||
}
|
||||
}
|
||||
litSize = __riscv_vmv_x_s_u32m1_u32(v_lit_sum);
|
||||
totalMatchSize = __riscv_vmv_x_s_u32m1_u32(v_match_sum);
|
||||
|
||||
if (!found_terminator && i==nbSeqs) {
|
||||
BlockSummary bs;
|
||||
bs.nbSequences = ERROR(externalSequences_invalid);
|
||||
return bs;
|
||||
}
|
||||
{ BlockSummary bs;
|
||||
bs.nbSequences = i;
|
||||
bs.blockSize = litSize + totalMatchSize;
|
||||
bs.litSize = litSize;
|
||||
return bs;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
||||
|
@ -38,6 +38,16 @@
|
||||
|
||||
#endif
|
||||
|
||||
// There appears to be an unreconcilable syntax difference between Linux and Darwin assemblers.
|
||||
// Name of a private label (i.e. not exported to symbol table) on Darwin has to start with "L",
|
||||
// on Linux has to start with ".". There's no way to have a name start with both "." and "L", so
|
||||
// we have to use a macro.
|
||||
#if defined(__APPLE__)
|
||||
#define LOCAL_LABEL(label) L_ ## label
|
||||
#else
|
||||
#define LOCAL_LABEL(label) .L_ ## label
|
||||
#endif
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
||||
|
||||
/* Calling convention:
|
||||
@ -117,22 +127,55 @@ ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
|
||||
_HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
ZSTD_CET_ENDBRANCH
|
||||
.cfi_startproc
|
||||
.cfi_def_cfa_offset 8
|
||||
.cfi_offset %rip, -8
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset rax, -16
|
||||
push %rbx
|
||||
.cfi_def_cfa_offset 24
|
||||
.cfi_offset rbx, -24
|
||||
push %rcx
|
||||
.cfi_def_cfa_offset 32
|
||||
.cfi_offset rcx, -32
|
||||
push %rdx
|
||||
.cfi_def_cfa_offset 40
|
||||
.cfi_offset rdx, -40
|
||||
push %rbp
|
||||
.cfi_def_cfa_offset 48
|
||||
.cfi_offset rbp, -48
|
||||
push %rsi
|
||||
.cfi_def_cfa_offset 56
|
||||
.cfi_offset rsi, -56
|
||||
push %rdi
|
||||
.cfi_def_cfa_offset 64
|
||||
.cfi_offset rdi, -64
|
||||
push %r8
|
||||
.cfi_def_cfa_offset 72
|
||||
.cfi_offset r8, -72
|
||||
push %r9
|
||||
.cfi_def_cfa_offset 80
|
||||
.cfi_offset r9, -80
|
||||
push %r10
|
||||
.cfi_def_cfa_offset 88
|
||||
.cfi_offset r10, -88
|
||||
push %r11
|
||||
.cfi_def_cfa_offset 96
|
||||
.cfi_offset r11, -96
|
||||
push %r12
|
||||
.cfi_def_cfa_offset 104
|
||||
.cfi_offset r12, -104
|
||||
push %r13
|
||||
.cfi_def_cfa_offset 112
|
||||
.cfi_offset r13, -112
|
||||
push %r14
|
||||
.cfi_def_cfa_offset 120
|
||||
.cfi_offset r14, -120
|
||||
push %r15
|
||||
.cfi_def_cfa_offset 128
|
||||
.cfi_offset r15, -128
|
||||
|
||||
/* Read HUF_DecompressAsmArgs* args from %rax */
|
||||
#if defined(_WIN32)
|
||||
@ -154,13 +197,18 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
movq 88(%rax), %bits3
|
||||
movq 96(%rax), %dtable
|
||||
push %rax /* argument */
|
||||
.cfi_def_cfa_offset 136
|
||||
push 104(%rax) /* ilowest */
|
||||
.cfi_def_cfa_offset 144
|
||||
push 112(%rax) /* oend */
|
||||
.cfi_def_cfa_offset 152
|
||||
push %olimit /* olimit space */
|
||||
.cfi_def_cfa_offset 160
|
||||
|
||||
subq $24, %rsp
|
||||
.cfi_def_cfa_offset 184
|
||||
|
||||
.L_4X1_compute_olimit:
|
||||
LOCAL_LABEL(4X1_compute_olimit):
|
||||
/* Computes how many iterations we can do safely
|
||||
* %r15, %rax may be clobbered
|
||||
* rbx, rdx must be saved
|
||||
@ -207,19 +255,19 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
/* If (op3 + 20 > olimit) */
|
||||
movq %op3, %rax /* rax = op3 */
|
||||
cmpq %rax, %olimit /* op3 == olimit */
|
||||
je .L_4X1_exit
|
||||
je LOCAL_LABEL(4X1_exit)
|
||||
|
||||
/* If (ip1 < ip0) go to exit */
|
||||
cmpq %ip0, %ip1
|
||||
jb .L_4X1_exit
|
||||
jb LOCAL_LABEL(4X1_exit)
|
||||
|
||||
/* If (ip2 < ip1) go to exit */
|
||||
cmpq %ip1, %ip2
|
||||
jb .L_4X1_exit
|
||||
jb LOCAL_LABEL(4X1_exit)
|
||||
|
||||
/* If (ip3 < ip2) go to exit */
|
||||
cmpq %ip2, %ip3
|
||||
jb .L_4X1_exit
|
||||
jb LOCAL_LABEL(4X1_exit)
|
||||
|
||||
/* Reads top 11 bits from bits[n]
|
||||
* Loads dt[bits[n]] into var[n]
|
||||
@ -280,7 +328,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
|
||||
.p2align 6
|
||||
|
||||
.L_4X1_loop_body:
|
||||
LOCAL_LABEL(4X1_loop_body):
|
||||
/* Decode 5 symbols in each of the 4 streams (20 total)
|
||||
* Must have called GET_NEXT_DELT for each stream
|
||||
*/
|
||||
@ -318,7 +366,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
|
||||
/* If op3 < olimit: continue the loop */
|
||||
cmp %op3, 24(%rsp)
|
||||
ja .L_4X1_loop_body
|
||||
ja LOCAL_LABEL(4X1_loop_body)
|
||||
|
||||
/* Reload ip[1,2,3] from stack */
|
||||
movq 0(%rsp), %ip1
|
||||
@ -326,20 +374,25 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
movq 16(%rsp), %ip3
|
||||
|
||||
/* Re-compute olimit */
|
||||
jmp .L_4X1_compute_olimit
|
||||
jmp LOCAL_LABEL(4X1_compute_olimit)
|
||||
|
||||
#undef GET_NEXT_DELT
|
||||
#undef DECODE_FROM_DELT
|
||||
#undef DECODE
|
||||
#undef RELOAD_BITS
|
||||
.L_4X1_exit:
|
||||
LOCAL_LABEL(4X1_exit):
|
||||
addq $24, %rsp
|
||||
.cfi_def_cfa_offset 160
|
||||
|
||||
/* Restore stack (oend & olimit) */
|
||||
pop %rax /* olimit */
|
||||
.cfi_def_cfa_offset 152
|
||||
pop %rax /* oend */
|
||||
.cfi_def_cfa_offset 144
|
||||
pop %rax /* ilowest */
|
||||
.cfi_def_cfa_offset 136
|
||||
pop %rax /* arg */
|
||||
.cfi_def_cfa_offset 128
|
||||
|
||||
/* Save ip / op / bits */
|
||||
movq %ip0, 0(%rax)
|
||||
@ -357,41 +410,105 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
|
||||
/* Restore registers */
|
||||
pop %r15
|
||||
.cfi_restore r15
|
||||
.cfi_def_cfa_offset 120
|
||||
pop %r14
|
||||
.cfi_restore r14
|
||||
.cfi_def_cfa_offset 112
|
||||
pop %r13
|
||||
.cfi_restore r13
|
||||
.cfi_def_cfa_offset 104
|
||||
pop %r12
|
||||
.cfi_restore r12
|
||||
.cfi_def_cfa_offset 96
|
||||
pop %r11
|
||||
.cfi_restore r11
|
||||
.cfi_def_cfa_offset 88
|
||||
pop %r10
|
||||
.cfi_restore r10
|
||||
.cfi_def_cfa_offset 80
|
||||
pop %r9
|
||||
.cfi_restore r9
|
||||
.cfi_def_cfa_offset 72
|
||||
pop %r8
|
||||
.cfi_restore r8
|
||||
.cfi_def_cfa_offset 64
|
||||
pop %rdi
|
||||
.cfi_restore rdi
|
||||
.cfi_def_cfa_offset 56
|
||||
pop %rsi
|
||||
.cfi_restore rsi
|
||||
.cfi_def_cfa_offset 48
|
||||
pop %rbp
|
||||
.cfi_restore rbp
|
||||
.cfi_def_cfa_offset 40
|
||||
pop %rdx
|
||||
.cfi_restore rdx
|
||||
.cfi_def_cfa_offset 32
|
||||
pop %rcx
|
||||
.cfi_restore rcx
|
||||
.cfi_def_cfa_offset 24
|
||||
pop %rbx
|
||||
.cfi_restore rbx
|
||||
.cfi_def_cfa_offset 16
|
||||
pop %rax
|
||||
.cfi_restore rax
|
||||
.cfi_def_cfa_offset 8
|
||||
ret
|
||||
.cfi_endproc
|
||||
|
||||
_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
ZSTD_CET_ENDBRANCH
|
||||
.cfi_startproc
|
||||
.cfi_def_cfa_offset 8
|
||||
.cfi_offset %rip, -8
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset rax, -16
|
||||
push %rbx
|
||||
.cfi_def_cfa_offset 24
|
||||
.cfi_offset rbx, -24
|
||||
push %rcx
|
||||
.cfi_def_cfa_offset 32
|
||||
.cfi_offset rcx, -32
|
||||
push %rdx
|
||||
.cfi_def_cfa_offset 40
|
||||
.cfi_offset rdx, -40
|
||||
push %rbp
|
||||
.cfi_def_cfa_offset 48
|
||||
.cfi_offset rbp, -48
|
||||
push %rsi
|
||||
.cfi_def_cfa_offset 56
|
||||
.cfi_offset rsi, -56
|
||||
push %rdi
|
||||
.cfi_def_cfa_offset 64
|
||||
.cfi_offset rdi, -64
|
||||
push %r8
|
||||
.cfi_def_cfa_offset 72
|
||||
.cfi_offset r8, -72
|
||||
push %r9
|
||||
.cfi_def_cfa_offset 80
|
||||
.cfi_offset r9, -80
|
||||
push %r10
|
||||
.cfi_def_cfa_offset 88
|
||||
.cfi_offset r10, -88
|
||||
push %r11
|
||||
.cfi_def_cfa_offset 96
|
||||
.cfi_offset r11, -96
|
||||
push %r12
|
||||
.cfi_def_cfa_offset 104
|
||||
.cfi_offset r12, -104
|
||||
push %r13
|
||||
.cfi_def_cfa_offset 112
|
||||
.cfi_offset r13, -112
|
||||
push %r14
|
||||
.cfi_def_cfa_offset 120
|
||||
.cfi_offset r14, -120
|
||||
push %r15
|
||||
.cfi_def_cfa_offset 128
|
||||
.cfi_offset r15, -128
|
||||
|
||||
/* Read HUF_DecompressAsmArgs* args from %rax */
|
||||
#if defined(_WIN32)
|
||||
@ -413,25 +530,33 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
movq 88(%rax), %bits3
|
||||
movq 96(%rax), %dtable
|
||||
push %rax /* argument */
|
||||
.cfi_def_cfa_offset 136
|
||||
push %rax /* olimit */
|
||||
.cfi_def_cfa_offset 144
|
||||
push 104(%rax) /* ilowest */
|
||||
.cfi_def_cfa_offset 152
|
||||
|
||||
movq 112(%rax), %rax
|
||||
push %rax /* oend3 */
|
||||
.cfi_def_cfa_offset 160
|
||||
|
||||
movq %op3, %rax
|
||||
push %rax /* oend2 */
|
||||
.cfi_def_cfa_offset 168
|
||||
|
||||
movq %op2, %rax
|
||||
push %rax /* oend1 */
|
||||
.cfi_def_cfa_offset 176
|
||||
|
||||
movq %op1, %rax
|
||||
push %rax /* oend0 */
|
||||
.cfi_def_cfa_offset 184
|
||||
|
||||
/* Scratch space */
|
||||
subq $8, %rsp
|
||||
.cfi_def_cfa_offset 192
|
||||
|
||||
.L_4X2_compute_olimit:
|
||||
LOCAL_LABEL(4X2_compute_olimit):
|
||||
/* Computes how many iterations we can do safely
|
||||
* %r15, %rax may be clobbered
|
||||
* rdx must be saved
|
||||
@ -495,19 +620,19 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
/* If (op3 + 10 > olimit) */
|
||||
movq %op3, %rax /* rax = op3 */
|
||||
cmpq %rax, %olimit /* op3 == olimit */
|
||||
je .L_4X2_exit
|
||||
je LOCAL_LABEL(4X2_exit)
|
||||
|
||||
/* If (ip1 < ip0) go to exit */
|
||||
cmpq %ip0, %ip1
|
||||
jb .L_4X2_exit
|
||||
jb LOCAL_LABEL(4X2_exit)
|
||||
|
||||
/* If (ip2 < ip1) go to exit */
|
||||
cmpq %ip1, %ip2
|
||||
jb .L_4X2_exit
|
||||
jb LOCAL_LABEL(4X2_exit)
|
||||
|
||||
/* If (ip3 < ip2) go to exit */
|
||||
cmpq %ip2, %ip3
|
||||
jb .L_4X2_exit
|
||||
jb LOCAL_LABEL(4X2_exit)
|
||||
|
||||
#define DECODE(n, idx) \
|
||||
movq %bits##n, %rax; \
|
||||
@ -534,7 +659,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
|
||||
.p2align 6
|
||||
|
||||
.L_4X2_loop_body:
|
||||
LOCAL_LABEL(4X2_loop_body):
|
||||
/* We clobber r8, so store it on the stack */
|
||||
movq %r8, 0(%rsp)
|
||||
|
||||
@ -551,21 +676,29 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
FOR_EACH_STREAM(RELOAD_BITS)
|
||||
|
||||
cmp %op3, 48(%rsp)
|
||||
ja .L_4X2_loop_body
|
||||
jmp .L_4X2_compute_olimit
|
||||
ja LOCAL_LABEL(4X2_loop_body)
|
||||
jmp LOCAL_LABEL(4X2_compute_olimit)
|
||||
|
||||
#undef DECODE
|
||||
#undef RELOAD_BITS
|
||||
.L_4X2_exit:
|
||||
LOCAL_LABEL(4X2_exit):
|
||||
addq $8, %rsp
|
||||
.cfi_def_cfa_offset 184
|
||||
/* Restore stack (oend & olimit) */
|
||||
pop %rax /* oend0 */
|
||||
.cfi_def_cfa_offset 176
|
||||
pop %rax /* oend1 */
|
||||
.cfi_def_cfa_offset 168
|
||||
pop %rax /* oend2 */
|
||||
.cfi_def_cfa_offset 160
|
||||
pop %rax /* oend3 */
|
||||
.cfi_def_cfa_offset 152
|
||||
pop %rax /* ilowest */
|
||||
.cfi_def_cfa_offset 144
|
||||
pop %rax /* olimit */
|
||||
.cfi_def_cfa_offset 136
|
||||
pop %rax /* arg */
|
||||
.cfi_def_cfa_offset 128
|
||||
|
||||
/* Save ip / op / bits */
|
||||
movq %ip0, 0(%rax)
|
||||
@ -583,20 +716,51 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
|
||||
/* Restore registers */
|
||||
pop %r15
|
||||
.cfi_restore r15
|
||||
.cfi_def_cfa_offset 120
|
||||
pop %r14
|
||||
.cfi_restore r14
|
||||
.cfi_def_cfa_offset 112
|
||||
pop %r13
|
||||
.cfi_restore r13
|
||||
.cfi_def_cfa_offset 104
|
||||
pop %r12
|
||||
.cfi_restore r12
|
||||
.cfi_def_cfa_offset 96
|
||||
pop %r11
|
||||
.cfi_restore r11
|
||||
.cfi_def_cfa_offset 88
|
||||
pop %r10
|
||||
.cfi_restore r10
|
||||
.cfi_def_cfa_offset 80
|
||||
pop %r9
|
||||
.cfi_restore r9
|
||||
.cfi_def_cfa_offset 72
|
||||
pop %r8
|
||||
.cfi_restore r8
|
||||
.cfi_def_cfa_offset 64
|
||||
pop %rdi
|
||||
.cfi_restore rdi
|
||||
.cfi_def_cfa_offset 56
|
||||
pop %rsi
|
||||
.cfi_restore rsi
|
||||
.cfi_def_cfa_offset 48
|
||||
pop %rbp
|
||||
.cfi_restore rbp
|
||||
.cfi_def_cfa_offset 40
|
||||
pop %rdx
|
||||
.cfi_restore rdx
|
||||
.cfi_def_cfa_offset 32
|
||||
pop %rcx
|
||||
.cfi_restore rcx
|
||||
.cfi_def_cfa_offset 24
|
||||
pop %rbx
|
||||
.cfi_restore rbx
|
||||
.cfi_def_cfa_offset 16
|
||||
pop %rax
|
||||
.cfi_restore rax
|
||||
.cfi_def_cfa_offset 8
|
||||
ret
|
||||
.cfi_endproc
|
||||
|
||||
#endif
|
||||
|
@ -153,7 +153,7 @@ Usage: zstd [OPTIONS...] [INPUT... | -] [-o OUTPUT]
|
||||
Options:
|
||||
-o OUTPUT Write output to a single file, OUTPUT.
|
||||
-k, --keep Preserve INPUT file(s). [Default]
|
||||
--rm Remove INPUT file(s) after successful (de)compression.
|
||||
--rm Remove INPUT file(s) after successful (de)compression to file.
|
||||
|
||||
-# Desired compression level, where `#` is a number between 1 and 19;
|
||||
lower numbers provide faster compression, higher numbers yield
|
||||
|
@ -928,6 +928,7 @@ static int UTIL_prepareFileList(const char* dirName,
|
||||
hFile=FindFirstFileA(path, &cFile);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName);
|
||||
free(path);
|
||||
return 0;
|
||||
}
|
||||
free(path);
|
||||
|
@ -147,7 +147,7 @@ static void usage(FILE* f, const char* programName)
|
||||
DISPLAY_F(f, "Options:\n");
|
||||
DISPLAY_F(f, " -o OUTPUT Write output to a single file, OUTPUT.\n");
|
||||
DISPLAY_F(f, " -k, --keep Preserve INPUT file(s). [Default] \n");
|
||||
DISPLAY_F(f, " --rm Remove INPUT file(s) after successful (de)compression.\n");
|
||||
DISPLAY_F(f, " --rm Remove INPUT file(s) after successful (de)compression to file.\n");
|
||||
#ifdef ZSTD_GZCOMPRESS
|
||||
if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */
|
||||
DISPLAY_F(f, " -n, --no-name Do not store original filename when compressing.\n\n");
|
||||
|
@ -6,7 +6,7 @@ Usage: zstd *OPTIONS...* *INPUT... | -* *-o OUTPUT*
|
||||
Options:
|
||||
-o OUTPUT Write output to a single file, OUTPUT.
|
||||
-k, --keep Preserve INPUT file(s). *Default*
|
||||
--rm Remove INPUT file(s) after successful (de)compression.
|
||||
--rm Remove INPUT file(s) after successful (de)compression to file.
|
||||
|
||||
-# Desired compression level, where `#` is a number between 1 and 19;
|
||||
lower numbers provide faster compression, higher numbers yield
|
||||
|
@ -640,7 +640,7 @@ if __name__ == "__main__":
|
||||
help="Preserve the scratch directory TEST_DIR/scratch/ for debugging purposes."
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Verbose test output.")
|
||||
parser.add_argument("--timeout", default=200, type=int, help="Test case timeout in seconds. Set to 0 to disable timeouts.")
|
||||
parser.add_argument("--timeout", default=800, type=int, help="Test case timeout in seconds. Set to 0 to disable timeouts.")
|
||||
parser.add_argument(
|
||||
"--exec-prefix",
|
||||
default=None,
|
||||
|
@ -890,6 +890,7 @@ static int benchMem(unsigned scenarioID,
|
||||
if (!BMK_isSuccessful_runOutcome(bOutcome)) {
|
||||
DISPLAY("ERROR: Scenario %u: %s \n", scenarioID, ZSTD_getErrorName(BMK_extract_errorResult(bOutcome)));
|
||||
errorcode = 1;
|
||||
BMK_freeTimedFnState(tfs);
|
||||
goto _cleanOut;
|
||||
}
|
||||
|
||||
|
@ -31,12 +31,11 @@ void* FUZZ_malloc_rand(size_t size, FUZZ_dataProducer_t *producer)
|
||||
return mem;
|
||||
} else {
|
||||
uintptr_t ptr = 0;
|
||||
/* Add +- 1M 50% of the time */
|
||||
/* Return junk pointer 50% of the time */
|
||||
if (FUZZ_dataProducer_uint32Range(producer, 0, 1))
|
||||
FUZZ_dataProducer_int32Range(producer, -1000000, 1000000);
|
||||
ptr += FUZZ_dataProducer_int32Range(producer, -1000000, 1000000);
|
||||
return (void*)ptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int FUZZ_memcmp(void const* lhs, void const* rhs, size_t size)
|
||||
|
@ -66,6 +66,7 @@ void* FUZZ_malloc(size_t size);
|
||||
/**
|
||||
* malloc except returns random pointer for zero sized data and FUZZ_ASSERT
|
||||
* that malloc doesn't fail.
|
||||
* WARNING: Only free the returned pointer if size > 0!
|
||||
*/
|
||||
void* FUZZ_malloc_rand(size_t size, FUZZ_dataProducer_t *producer);
|
||||
|
||||
|
126
tests/fuzzer.c
126
tests/fuzzer.c
@ -44,6 +44,13 @@
|
||||
/* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
|
||||
#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
|
||||
#include "threading.h" /* ZSTD_pthread_create, ZSTD_pthread_join */
|
||||
#include "compress/hist.h" /* HIST_count_wksp */
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Macros
|
||||
**************************************/
|
||||
#define COUNTOF(array) (sizeof(array) / sizeof(*(array)))
|
||||
|
||||
|
||||
/*-************************************
|
||||
@ -567,6 +574,123 @@ static void test_decompressBound(unsigned tnb)
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
}
|
||||
|
||||
static unsigned test_histCountWksp(unsigned seed, unsigned testNb)
|
||||
{
|
||||
static const unsigned symLowLimits[] = { 0, 27, 0, 0, 27, 42, 0, 0, 27, 42, 27, 42 };
|
||||
static const unsigned symHighLimits[] = { 255, 255, 210, 110, 42, 42, 210, 110, 42, 42, 42, 42 };
|
||||
static const unsigned symMaxLimits[] = { 255, 255, 255, 255, 255, 255, 230, 130, 99, 99, 42, 42 };
|
||||
static const size_t inputSizes[] = { 3367, 1761, 893, 117 };
|
||||
unsigned workspace[HIST_WKSP_SIZE_U32];
|
||||
size_t res, i, is, il;
|
||||
|
||||
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with empty source : ", testNb++);
|
||||
{
|
||||
/* With NULL source UBSan of older Clang could fail: applying zero offset to null pointer. */
|
||||
static const unsigned char source[4] = { 0 };
|
||||
unsigned count[1] = { 0 };
|
||||
unsigned maxSym = 0;
|
||||
res = HIST_count_wksp(count, &maxSym, source, 0, workspace, sizeof(workspace));
|
||||
CHECK_EQ(res, 0);
|
||||
CHECK_EQ(maxSym, 0);
|
||||
CHECK_EQ(count[0], 0);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
#if HIST_WKSP_SIZE_U32
|
||||
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with small workspace : ", testNb++);
|
||||
{
|
||||
unsigned count[1] = { 0 };
|
||||
unsigned maxSym = 0;
|
||||
res = HIST_count_wksp(count, &maxSym, NULL, 0, workspace, sizeof(workspace) - 1);
|
||||
CHECK_EQ(res, ERROR(workSpace_tooSmall));
|
||||
CHECK_EQ(maxSym, 0);
|
||||
CHECK_EQ(count[0], 0);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with wrong workspace alignment : ", testNb++);
|
||||
{
|
||||
unsigned count[1] = { 0 };
|
||||
unsigned maxSym = 0;
|
||||
res = HIST_count_wksp(count, &maxSym, NULL, 0, (unsigned*)(void*)((char*)workspace + 1), sizeof(workspace));
|
||||
CHECK_EQ(res, ERROR(GENERIC));
|
||||
CHECK_EQ(maxSym, 0);
|
||||
CHECK_EQ(count[0], 0);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
#endif
|
||||
|
||||
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with symbol out of range, small size : ", testNb++);
|
||||
{
|
||||
/* For less elements HIST_count_parallel_wksp would fail. */
|
||||
static const unsigned char source[4] = { 1, 4, 0, 2 };
|
||||
static const unsigned expected[6] = { 0 };
|
||||
unsigned count[6] = { 0 };
|
||||
unsigned maxSym = 2;
|
||||
res = HIST_count_wksp(count, &maxSym, source, sizeof(source), workspace, sizeof(workspace));
|
||||
CHECK_EQ(res, ERROR(maxSymbolValue_tooSmall));
|
||||
CHECK_EQ(maxSym, 2);
|
||||
for (i = 0; i < COUNTOF(expected); ++i) CHECK_EQ(count[i], expected[i]);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp with symbol out of range, medium size : ", testNb++);
|
||||
{
|
||||
unsigned char source[3407];
|
||||
unsigned count[6] = { 0 };
|
||||
unsigned maxSym = 2;
|
||||
for (i = 0; i < COUNTOF(source); ++i) {
|
||||
source[i] = (48271 * (i + 1)) & 3;
|
||||
}
|
||||
res = HIST_count_wksp(count, &maxSym, source, sizeof(source), workspace, sizeof(workspace));
|
||||
CHECK_EQ(res, ERROR(maxSymbolValue_tooSmall));
|
||||
CHECK_EQ(maxSym, 2);
|
||||
for (i = 0; i < COUNTOF(count); ++i) CHECK_EQ(count[i], 0);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
for (il = 0; il < COUNTOF(symMaxLimits); ++il) {
|
||||
unsigned symMax = symMaxLimits[il];
|
||||
unsigned symLow = symLowLimits[il];
|
||||
unsigned symHigh = symHighLimits[il];
|
||||
unsigned symRange = symHigh - symLow + 1;
|
||||
|
||||
for (is = 0; is < COUNTOF(inputSizes); ++is) {
|
||||
unsigned char source[4000];
|
||||
size_t inputSize = inputSizes[is];
|
||||
assert(inputSize <= sizeof(source));
|
||||
DISPLAYLEVEL(3, "test%3u : HIST_count_wksp test in [%u..%u], symMax: %u, inputSize: %u : ",
|
||||
testNb++, symLow, symHigh, symMax, (unsigned)inputSize);
|
||||
{
|
||||
unsigned count[260] = { 0 };
|
||||
unsigned expected[COUNTOF(count)] = { 0 };
|
||||
unsigned maxSym = symMax;
|
||||
unsigned realMaxSym = symMax;
|
||||
unsigned maxCount = 0;
|
||||
for (i = 0; i < inputSize; ++i) {
|
||||
unsigned prng = (48271 * (i + seed)) % symRange + symLow;
|
||||
source[i] = (unsigned char)prng;
|
||||
++expected[prng];
|
||||
}
|
||||
/* for basic buffer overwrite checks */
|
||||
for (i = maxSym + 1; i < COUNTOF(count); ++i) expected[i] = count[i] = ~0u;
|
||||
for (i = 0; i <= maxSym; ++i) maxCount = MAX(maxCount, expected[i]);
|
||||
for (i = realMaxSym; i > 0; --i) {
|
||||
if (expected[i]) break;
|
||||
--realMaxSym;
|
||||
}
|
||||
res = HIST_count_wksp(count, &maxSym, source, inputSize, workspace, sizeof(workspace));
|
||||
CHECK_EQ(res, maxCount);
|
||||
CHECK_EQ(maxSym, realMaxSym);
|
||||
for (i = 0; i < COUNTOF(expected); ++i) CHECK_EQ(count[i], expected[i]);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
}
|
||||
}
|
||||
|
||||
return testNb;
|
||||
}
|
||||
|
||||
static void test_setCParams(unsigned tnb)
|
||||
{
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
@ -712,6 +836,8 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
testNb = test_histCountWksp(seed, testNb);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3u : compress %u bytes : ", testNb++, (unsigned)CNBuffSize);
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
if (cctx==NULL) goto _output_error;
|
||||
|
@ -73,6 +73,7 @@ int main(int argc, const char** argv)
|
||||
int _exit_code = 1;
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
int _exit_code = 0;
|
||||
|
||||
if (!buffer || !out || !roundtrip || !cctx || !dctx) {
|
||||
fprintf(stderr, "Allocation failure\n");
|
||||
@ -127,4 +128,5 @@ cleanup:
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
return _exit_code;
|
||||
return _exit_code;
|
||||
}
|
||||
|
@ -56,6 +56,7 @@ static int testSimpleAPI(void)
|
||||
} else {
|
||||
DISPLAY("ERROR: %s\n", ZSTD_getErrorName(ret));
|
||||
}
|
||||
free(output);
|
||||
return 1;
|
||||
}
|
||||
if (ret != size) {
|
||||
@ -64,6 +65,7 @@ static int testSimpleAPI(void)
|
||||
}
|
||||
if (memcmp(EXPECTED, output, size) != 0) {
|
||||
DISPLAY("ERROR: Wrong decoded output produced\n");
|
||||
free(output);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -39,24 +39,39 @@ int main(int argc, const char** argv)
|
||||
unsigned windowLog = 18;
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
int _exit_code = 0;
|
||||
/* Create stream */
|
||||
ctx = ZSTD_createCCtx();
|
||||
if (!ctx) { return 1; }
|
||||
/* Set parameters */
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, windowLog)))
|
||||
return 2;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, 13)))
|
||||
return 2;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, 14)))
|
||||
return 2;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, 1)))
|
||||
return 2;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, 7)))
|
||||
return 2;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, 16)))
|
||||
return 2;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, ZSTD_fast)))
|
||||
return 2;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, windowLog))) {
|
||||
_exit_code = 2;
|
||||
goto _clean_ctx;
|
||||
}
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, 13))) {
|
||||
_exit_code = 2;
|
||||
goto _clean_ctx;
|
||||
}
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, 14))) {
|
||||
_exit_code = 2;
|
||||
goto _clean_ctx;
|
||||
}
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, 1))) {
|
||||
_exit_code = 2;
|
||||
goto _clean_ctx;
|
||||
}
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, 7))) {
|
||||
_exit_code = 2;
|
||||
goto _clean_ctx;
|
||||
}
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, 16))) {
|
||||
_exit_code = 2;
|
||||
goto _clean_ctx;
|
||||
}
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, ZSTD_fast))) {
|
||||
_exit_code = 2;
|
||||
goto _clean_ctx;
|
||||
}
|
||||
{
|
||||
U64 compressed = 0;
|
||||
const U64 toCompress = ((U64)1) << 33;
|
||||
@ -81,22 +96,28 @@ int main(int argc, const char** argv)
|
||||
}
|
||||
printf("Compressing, trying to generate a segfault \n");
|
||||
if (compress(ctx, out, srcBuffer, size)) {
|
||||
return 1;
|
||||
_exit_code = 1;
|
||||
goto _clean_buffer;
|
||||
}
|
||||
compressed += size;
|
||||
while (compressed < toCompress) {
|
||||
const size_t block = rand() % (size - pos + 1);
|
||||
if (pos == size) { pos = 0; }
|
||||
if (compress(ctx, out, srcBuffer + pos, block)) {
|
||||
return 1;
|
||||
_exit_code = 1;
|
||||
goto _clean_buffer;
|
||||
}
|
||||
pos += block;
|
||||
compressed += block;
|
||||
}
|
||||
printf("Compression completed successfully (no error triggered)\n");
|
||||
|
||||
_clean_buffer:
|
||||
free(srcBuffer);
|
||||
free(dstBuffer);
|
||||
}
|
||||
|
||||
_clean_ctx:
|
||||
ZSTD_freeCCtx(ctx);
|
||||
return 0;
|
||||
return _exit_code;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user