Compare commits

..

1 Commits

Author SHA1 Message Date
Alecto Irene Perez
d9364ce284
Implement #624: Use shorter hashes with CPM_SOURCE_CACHE (#631)
* Add ASSERT_CONTENTS_EQUAL test macro in testing.cmake

Checks if the contents of a file matches the given input

* Use shorter hashes with CPM_SOURCE_CACHE (#624)

Uses shorter hashes with CPM_SOURCE_CACHE. Falls back to a longer hash
if necessary (ie, if there's a collision with an existing hash).

See: https://github.com/cpm-cmake/CPM.cmake/issues/624

* Update integration tests to support shorter hashes

* trigger ci

* run cmake-format

* if already available, use the legacy cache hash

* create temporary file in current binary dir

* add test case for legacy hash

---------

Co-authored-by: Lars Melchior <lars.melchior@gmail.com>
Co-authored-by: Lars Melchior <TheLartians@users.noreply.github.com>
2025-05-18 19:02:47 +02:00
4 changed files with 182 additions and 1 deletions

View File

@ -202,6 +202,60 @@ function(cpm_package_name_from_git_uri URI RESULT)
endif() endif()
endfunction() endfunction()
# Find the shortest hash that can be used eg, if origin_hash is
# cccb77ae9609d2768ed80dd42cec54f77b1f1455 the following files will be checked, until one is found
# that is either empty (allowing us to assign origin_hash), or whose contents matches ${origin_hash}
#
# * .../cccb.hash
# * .../cccb77ae.hash
# * .../cccb77ae9609.hash
# * .../cccb77ae9609d276.hash
# * etc
#
# We will be able to use a shorter path with very high probability, but in the (rare) event that the
# first couple characters collide, we will check longer and longer substrings.
function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var)
# for compatibility with caches populated by a previous version of CPM, check if a directory using
# the full hash already exists
if(EXISTS "${source_cache_dir}/${origin_hash}")
set(${short_hash_output_var}
"${origin_hash}"
PARENT_SCOPE
)
return()
endif()
foreach(len RANGE 4 40 4)
string(SUBSTRING "${origin_hash}" 0 ${len} short_hash)
set(hash_lock ${source_cache_dir}/${short_hash}.lock)
set(hash_fp ${source_cache_dir}/${short_hash}.hash)
# Take a lock, so we don't have a race condition with another instance of cmake. We will release
# this lock when we can, however, if there is an error, we want to ensure it gets released on
# it's own on exit from the function.
file(LOCK ${hash_lock} GUARD FUNCTION)
# Load the contents of .../${short_hash}.hash
file(TOUCH ${hash_fp})
file(READ ${hash_fp} hash_fp_contents)
if(hash_fp_contents STREQUAL "")
# Write the origin hash
file(WRITE ${hash_fp} ${origin_hash})
file(LOCK ${hash_lock} RELEASE)
break()
elseif(hash_fp_contents STREQUAL origin_hash)
file(LOCK ${hash_lock} RELEASE)
break()
else()
file(LOCK ${hash_lock} RELEASE)
endif()
endforeach()
set(${short_hash_output_var}
"${short_hash}"
PARENT_SCOPE
)
endfunction()
# Try to infer package name and version from a url # Try to infer package name and version from a url
function(cpm_package_name_and_ver_from_url url outName outVer) function(cpm_package_name_and_ver_from_url url outName outVer)
if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)") if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)")
@ -806,9 +860,19 @@ function(CPMAddPackage)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY}) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
elseif(CPM_USE_NAMED_CACHE_DIRECTORIES) elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG") string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
cpm_get_shortest_hash(
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
"${origin_hash}" # Input hash
origin_hash # Computed hash
)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME}) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
else() else()
string(SHA1 origin_hash "${origin_parameters}") string(SHA1 origin_hash "${origin_parameters}")
cpm_get_shortest_hash(
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
"${origin_hash}" # Input hash
origin_hash # Computed hash
)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
endif() endif()
# Expand `download_directory` relative path. This is important because EXISTS doesn't work for # Expand `download_directory` relative path. This is important because EXISTS doesn't work for

View File

@ -79,3 +79,16 @@ function(ASSERT_NOT_EXISTS file)
message(FATAL_ERROR "assertion failed: file ${file} exists") message(FATAL_ERROR "assertion failed: file ${file} exists")
endif() endif()
endfunction() endfunction()
function(ASSERT_CONTENTS_EQUAL file content)
if(EXISTS ${file})
file(READ ${file} file_content)
if(content STREQUAL file_content)
message(STATUS "test passed: '${file}' exists and contains '${content}'")
else()
message(FATAL_ERROR "assertion failed: file '${file}' does not contain expected content.")
endif()
else()
message(FATAL_ERROR "assertion failed: file '${file} does not exist")
endif()
endfunction()

View File

@ -75,6 +75,17 @@ class SourceCache < IntegrationTest
assert_equal ver, package.ver assert_equal ver, package.ver
expected_parent_dir = File.join(@cache_dir, name.downcase) expected_parent_dir = File.join(@cache_dir, name.downcase)
assert package.src_dir.start_with?(expected_parent_dir), "#{package.src_dir} must be in #{expected_parent_dir}" assert package.src_dir.start_with?(expected_parent_dir), "#{package.src_dir} must be in #{expected_parent_dir}"
assert_equal dir_sha1, File.basename(package.src_dir)
# The hash has been shortened by cpm_get_shortest_hash. The following
# should hold:
# - The short hash should be a prefix of the input hash
# - There should be a file ".../${short_hash}.hash" which matches the full hash
short_hash = File.basename(package.src_dir)
assert dir_sha1.start_with?(short_hash), "short_hash should be a prefix of dir_sha1"
# Check that the full hash is stored in the .hash file
hash_file = "#{package.src_dir}.hash"
assert File.exist?(hash_file), "Hash file #{hash_file} should exist"
assert_equal dir_sha1, File.read(hash_file), "Hash file should contain the full original hash"
end end
end end

View File

@ -0,0 +1,93 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
include(${CPM_PATH}/CPM.cmake)
include(${CPM_PATH}/testing.cmake)
# Random suffix
string(
RANDOM
LENGTH 6
ALPHABET "0123456789abcdef" tmpdir_suffix
)
# Seconds since epoch
string(TIMESTAMP tmpdir_base "%s" UTC)
set(tmp "${CMAKE_CURRENT_BINARY_DIR}/get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}")
if(IS_DIRECTORY ${tmp})
message(FATAL_ERROR "Test directory ${tmp} already exists")
endif()
file(MAKE_DIRECTORY "${tmp}")
# 1. Sanity check: none of these directories should exist yet
assert_not_exists(${tmp}/cccb.hash)
assert_not_exists(${tmp}/cccb77ae.hash)
assert_not_exists(${tmp}/cccb77ae9609.hash)
assert_not_exists(${tmp}/cccb77ae9608.hash)
assert_not_exists(${tmp}/cccb77be.hash)
# 1. The directory is empty, so it should get a 4-character hash
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb")
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
# 1. Calling the function with a new hash that differs subtly should result in more characters being
# used, enough to uniquely identify the hash
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77ae")
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
assert_equal(${hash} "cccb77ae9609")
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb77ae9608")
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77be")
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
# check that legacy hashs are recognized
file(MAKE_DIRECTORY "${tmp}/cccb77be9609d2768ed80dd42cec54f77b1f1457")
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1457" hash)
assert_equal(${hash} "cccb77be9609d2768ed80dd42cec54f77b1f1457")
# 1. The old file should still exist, and have the same content
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
# 1. Confirm idempotence: calling any of these function should produce the same hash as before (hash
# lookups work correctly once the .hash files are created)
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb")
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77ae")
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
assert_equal(${hash} "cccb77ae9609")
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb77ae9608")
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77be")
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
# 1. Cleanup - remove the temporary directory that we created
file(REMOVE_RECURSE ${tmp})