mirror of
https://github.com/facebook/zstd.git
synced 2025-11-22 00:10:22 -05:00
common: apply two stage copy to aarch64
On aarch64 ZSTD_wildcopy uses a simple loop to do 16B based memory copy. There is existing optimized two stage copy that can achieve better performance. By applying this to aarch64 it is also observed ~1% uplift in silesia corpus. Signed-off-by: Jun He <jun.he@arm.com> Change-Id: Ic1253308e7a8a7df2d08963ba544e086c81ce8be
This commit is contained in:
parent
9a5e73c74e
commit
d7249dafb4
@ -235,12 +235,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
* one COPY16() in the first call. Then, do two calls per loop since
|
||||
* at that point it is more likely to have a high trip count.
|
||||
*/
|
||||
#ifdef __aarch64__
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
#else
|
||||
ZSTD_copy16(op, ip);
|
||||
if (16 >= length) return;
|
||||
op += 16;
|
||||
@ -250,7 +244,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user