Merge pull request #4246 from pps83/dev-asmx64-win

[asm] Enable x86_64 asm for windows builds
This commit is contained in:
Yann Collet 2025-01-18 20:03:16 -08:00 committed by GitHub
commit 167b00495d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 5 deletions

View File

@ -93,7 +93,7 @@
* Only enable assembly for GNU C compatible compilers,
* because other platforms may not support GAS assembly syntax.
*
* Only enable assembly for Linux / MacOS, other platforms may
* Only enable assembly for Linux / MacOS / Win32, other platforms may
* work, but they haven't been tested. This could likely be
* extended to BSD systems.
*
@ -101,7 +101,7 @@
* 100% of code to be instrumented to work.
*/
#if defined(__GNUC__)
# if defined(__linux__) || defined(__linux) || defined(__APPLE__)
# if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(_WIN32)
# if ZSTD_MEMORY_SANITIZER
# define ZSTD_ASM_SUPPORTED 0
# elif ZSTD_DATAFLOW_SANITIZER

View File

@ -42,13 +42,11 @@
/* Calling convention:
*
* %rdi contains the first argument: HUF_DecompressAsmArgs*.
* %rdi (or %rcx on Windows) contains the first argument: HUF_DecompressAsmArgs*.
* %rbp isn't maintained (no frame pointer).
* %rsp contains the stack pointer that grows down.
* No red-zone is assumed, only addresses >= %rsp are used.
* All register contents are preserved.
*
* TODO: Support Windows calling convention.
*/
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
@ -137,7 +135,11 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
push %r15
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
movq %rcx, %rax
#else
movq %rdi, %rax
#endif
movq 0(%rax), %ip0
movq 8(%rax), %ip1
movq 16(%rax), %ip2
@ -391,7 +393,12 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
push %r14
push %r15
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
movq %rcx, %rax
#else
movq %rdi, %rax
#endif
movq 0(%rax), %ip0
movq 8(%rax), %ip1
movq 16(%rax), %ip2