From 6d25a633ea68a103c7293d16eb69a7d4689075ad Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 30 Jul 2020 12:08:34 -0700 Subject: lib: Prepare zstd for preboot environment, improve performance These changes are necessary to get the build to work in the preboot environment, and to get reasonable performance: - Remove a double definition of the CHECK_F macro when the zstd library is amalgamated. - Switch ZSTD_copy8() to __builtin_memcpy(), because in the preboot environment on x86 gcc can't inline `memcpy()` otherwise. - Limit the gcc hack in ZSTD_wildcopy() to the broken gcc version. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. ZSTD_copy8() and ZSTD_wildcopy() are in the core of the zstd hot loop. So outlining these calls to memcpy(), and having an extra branch are very detrimental to performance. Signed-off-by: Nick Terrell Signed-off-by: Ingo Molnar Tested-by: Sedat Dilek Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20200730190841.2071656-2-nickrterrell@gmail.com --- lib/zstd/fse_decompress.c | 9 +-------- lib/zstd/zstd_internal.h | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'lib/zstd') diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c index a84300e5a013..0b353530fb3f 100644 --- a/lib/zstd/fse_decompress.c +++ b/lib/zstd/fse_decompress.c @@ -47,6 +47,7 @@ ****************************************************************/ #include "bitstream.h" #include "fse.h" +#include "zstd_internal.h" #include #include #include /* memcpy, memset */ @@ -60,14 +61,6 @@ enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ } /* use only *after* variable declarations */ -/* check and forward error code */ -#define CHECK_F(f) \ - { \ - size_t const e = f; \ - if (FSE_isError(e)) \ - return e; \ - } - /* ************************************************************** * Templates ****************************************************************/ diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h index 1a79fab9e13a..dac753397f86 100644 --- a/lib/zstd/zstd_internal.h +++ b/lib/zstd/zstd_internal.h @@ -127,7 +127,14 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; * Shared functions to include for inlining *********************************************/ ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) { - memcpy(dst, src, 8); + /* + * zstd relies heavily on gcc being able to analyze and inline this + * memcpy() call, since it is called in a tight loop. Preboot mode + * is compiled in freestanding mode, which stops gcc from analyzing + * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a + * regular memcpy(). + */ + __builtin_memcpy(dst, src, 8); } /*! ZSTD_wildcopy() : * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ @@ -137,13 +144,16 @@ ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length) const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - /* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. +#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200 + /* + * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. * Avoid the bad case where the loop only runs once by handling the * special case separately. This doesn't trigger the bug because it * doesn't involve pointer/integer overflow. */ if (length <= 8) return ZSTD_copy8(dst, src); +#endif do { ZSTD_copy8(op, ip); op += 8; -- cgit