From 8d16ce148858669f05b8e117a0634010397e17d6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 5 Feb 2024 16:48:44 +0100 Subject: s390/fpu: make use of __uninitialized macro Code sections in s390 specific kernel code which use floating point or vector registers all come with a 520 byte stack variable to save already in use registers, if required. With INIT_STACK_ALL_PATTERN or INIT_STACK_ALL_ZERO enabled this variable will always be initialized on function entry in addition to saving register contents, which contradicts the intention (performance improvement) of such code sections. Therefore provide a DECLARE_KERNEL_FPU_ONSTACK() macro which provides struct kernel_fpu variables with an __uninitialized attribute, and convert all existing code to use this. This way only this specific type of stack variable will not be initialized, regardless of config options. Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20240205154844.3757121-3-hca@linux.ibm.com Signed-off-by: Heiko Carstens --- lib/raid6/s390vx.uc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index cd0b9e95f499..7b0b355e1a26 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -81,7 +81,7 @@ static inline void COPY_VEC(int x, int y) static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) { - struct kernel_fpu vxstate; + DECLARE_KERNEL_FPU_ONSTACK(vxstate); u8 **dptr, *p, *q; int d, z, z0; @@ -114,7 +114,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, size_t bytes, void **ptrs) { - struct kernel_fpu vxstate; + DECLARE_KERNEL_FPU_ONSTACK(vxstate); u8 **dptr, *p, *q; int d, z, z0; -- cgit From fd2527f20915d041e838b6e4a08122dbc73c7abc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:02 +0100 Subject: s390/fpu: move, rename, and merge header files Move, rename, and merge the fpu and vx header files. This way fpu header files have a consistent naming scheme (fpu*.h). Also get rid of the fpu subdirectory and move header files to asm directory, so that all fpu and vx header files can be found at the same location. Merge internal.h header file into other header files, since the internal helpers are used at many locations. so those helper functions are really not internal. Signed-off-by: Heiko Carstens --- lib/raid6/s390vx.uc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 7b0b355e1a26..bc2f4fbe5a82 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -12,8 +12,7 @@ */ #include -#include -#include +#include #define NSIZE 16 -- cgit From 066c40918bb495de8f2e45bd7eec06737a142712 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:17 +0100 Subject: s390/fpu: decrease stack usage for some cases The kernel_fpu structure has a quite large size of 520 bytes. In order to reduce stack footprint introduce several kernel fpu structures with different and also smaller sizes. This way every kernel fpu user must use the correct variant. A compile time check verifies that the correct variant is used. There are several users which use only 16 instead of all 32 vector registers. For those users the new kernel_fpu_16 structure with a size of only 266 bytes can be used. Signed-off-by: Heiko Carstens --- lib/raid6/s390vx.uc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index bc2f4fbe5a82..92c05b7596bc 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -80,7 +80,7 @@ static inline void COPY_VEC(int x, int y) static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) { - DECLARE_KERNEL_FPU_ONSTACK(vxstate); + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); u8 **dptr, *p, *q; int d, z, z0; @@ -113,7 +113,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, size_t bytes, void **ptrs) { - DECLARE_KERNEL_FPU_ONSTACK(vxstate); + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); u8 **dptr, *p, *q; int d, z, z0; -- cgit From c8dde11df19192c421f5b70c2b8ba55d32e07c66 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:24 +0100 Subject: s390/raid6: convert to use standard fpu_*() inline assemblies Move the s390 specific raid6 inline assemblies, make them generic, and reuse them to implement the raid6 gen/xor implementation. Signed-off-by: Heiko Carstens --- lib/raid6/s390vx.uc | 55 ++++++++++------------------------------------------- 1 file changed, 10 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 92c05b7596bc..863e2d320938 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -16,10 +16,10 @@ #define NSIZE 16 -static inline void LOAD_CONST(void) +static __always_inline void LOAD_CONST(void) { - asm volatile("VREPIB %v24,7"); - asm volatile("VREPIB %v25,0x1d"); + fpu_vrepib(24, 0x07); + fpu_vrepib(25, 0x1d); } /* @@ -27,10 +27,7 @@ static inline void LOAD_CONST(void) * vector register y left by 1 bit and stores the result in * vector register x. */ -static inline void SHLBYTE(int x, int y) -{ - asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y)); -} +#define SHLBYTE(x, y) fpu_vab(x, y, y) /* * For each of the 16 bytes in the vector register y the MASK() @@ -38,45 +35,13 @@ static inline void SHLBYTE(int x, int y) * or 0x00 if the high bit is 0. The result is stored in vector * register x. */ -static inline void MASK(int x, int y) -{ - asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y)); -} - -static inline void AND(int x, int y, int z) -{ - asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); -} - -static inline void XOR(int x, int y, int z) -{ - asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); -} +#define MASK(x, y) fpu_vesravb(x, y, 24) -static inline void LOAD_DATA(int x, u8 *ptr) -{ - typedef struct { u8 _[16 * $#]; } addrtype; - register addrtype *__ptr asm("1") = (addrtype *) ptr; - - asm volatile ("VLM %2,%3,0,%1" - : : "m" (*__ptr), "a" (__ptr), "i" (x), - "i" (x + $# - 1)); -} - -static inline void STORE_DATA(int x, u8 *ptr) -{ - typedef struct { u8 _[16 * $#]; } addrtype; - register addrtype *__ptr asm("1") = (addrtype *) ptr; - - asm volatile ("VSTM %2,%3,0,1" - : "=m" (*__ptr) : "a" (__ptr), "i" (x), - "i" (x + $# - 1)); -} - -static inline void COPY_VEC(int x, int y) -{ - asm volatile ("VLR %0,%1" : : "i" (x), "i" (y)); -} +#define AND(x, y, z) fpu_vn(x, y, z) +#define XOR(x, y, z) fpu_vx(x, y, z) +#define LOAD_DATA(x, ptr) fpu_vlm(x, x + $# - 1, ptr) +#define STORE_DATA(x, ptr) fpu_vstm(x, x + $# - 1, ptr) +#define COPY_VEC(x, y) fpu_vlr(x, y) static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) { -- cgit