diff options
Diffstat (limited to 'arch/s390/lib')
-rw-r--r-- | arch/s390/lib/Makefile | 15 | ||||
-rw-r--r-- | arch/s390/lib/crc32-glue.c | 92 | ||||
-rw-r--r-- | arch/s390/lib/crc32-vx.h | 12 | ||||
-rw-r--r-- | arch/s390/lib/crc32be-vx.c | 174 | ||||
-rw-r--r-- | arch/s390/lib/crc32le-vx.c | 240 | ||||
-rw-r--r-- | arch/s390/lib/csum-partial.c | 91 | ||||
-rw-r--r-- | arch/s390/lib/delay.c | 114 | ||||
-rw-r--r-- | arch/s390/lib/error-inject.c | 14 | ||||
-rw-r--r-- | arch/s390/lib/expoline.S | 12 | ||||
-rw-r--r-- | arch/s390/lib/mem.S | 45 | ||||
-rw-r--r-- | arch/s390/lib/spinlock.c | 90 | ||||
-rw-r--r-- | arch/s390/lib/string.c | 184 | ||||
-rw-r--r-- | arch/s390/lib/test_kprobes.c | 76 | ||||
-rw-r--r-- | arch/s390/lib/test_kprobes.h | 10 | ||||
-rw-r--r-- | arch/s390/lib/test_kprobes_asm.S | 45 | ||||
-rw-r--r-- | arch/s390/lib/test_modules.c | 33 | ||||
-rw-r--r-- | arch/s390/lib/test_modules.h | 53 | ||||
-rw-r--r-- | arch/s390/lib/test_modules_helpers.c | 13 | ||||
-rw-r--r-- | arch/s390/lib/test_unwind.c | 420 | ||||
-rw-r--r-- | arch/s390/lib/tishift.S | 63 | ||||
-rw-r--r-- | arch/s390/lib/uaccess.c | 542 | ||||
-rw-r--r-- | arch/s390/lib/xor.c | 87 |
22 files changed, 1588 insertions, 837 deletions
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 28fd66d558ff..14bbfe50033c 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,10 +3,13 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess.o find.o spinlock.o +lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o +lib-y += csum-partial.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o +obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o +test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o # Instrumenting memory accesses to __user data (in different address space) # produce false positives @@ -14,3 +17,13 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls + +obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o +obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o + +lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o + +obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o +crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c new file mode 100644 index 000000000000..124214a27340 --- /dev/null +++ b/arch/s390/lib/crc32-glue.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CRC-32 implemented with the z/Architecture Vector Extension Facility. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ +#define KMSG_COMPONENT "crc32-vx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/cpufeature.h> +#include <linux/crc32.h> +#include <asm/fpu.h> +#include "crc32-vx.h" + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +static DEFINE_STATIC_KEY_FALSE(have_vxrs); + +/* + * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension + * + * Creates a function to perform a particular CRC-32 computation. Depending + * on the message buffer, the hardware-accelerated or software implementation + * is used. Note that the message buffer is aligned to improve fetch + * operations of VECTOR LOAD MULTIPLE instructions. + */ +#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ + u32 ___fname(u32 crc, const u8 *data, size_t datalen) \ + { \ + unsigned long prealign, aligned, remaining; \ + DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ + \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \ + !static_branch_likely(&have_vxrs)) \ + return ___crc32_sw(crc, data, datalen); \ + \ + if ((unsigned long)data & VX_ALIGN_MASK) { \ + prealign = VX_ALIGNMENT - \ + ((unsigned long)data & VX_ALIGN_MASK); \ + datalen -= prealign; \ + crc = ___crc32_sw(crc, data, prealign); \ + data = (void *)((unsigned long)data + prealign); \ + } \ + \ + aligned = datalen & ~VX_ALIGN_MASK; \ + remaining = datalen & VX_ALIGN_MASK; \ + \ + kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ + crc = ___crc32_vx(crc, data, aligned); \ + kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ + \ + if (remaining) \ + crc = ___crc32_sw(crc, data + aligned, remaining); \ + \ + return crc; \ + } \ + EXPORT_SYMBOL(___fname); + +DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) +DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) +DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) + +static int __init crc32_s390_init(void) +{ + if (cpu_have_feature(S390_CPU_FEATURE_VXRS)) + static_branch_enable(&have_vxrs); + return 0; +} +arch_initcall(crc32_s390_init); + +static void __exit crc32_s390_exit(void) +{ +} +module_exit(crc32_s390_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_vxrs)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/crc32-vx.h b/arch/s390/lib/crc32-vx.h new file mode 100644 index 000000000000..652c96e1a822 --- /dev/null +++ b/arch/s390/lib/crc32-vx.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _CRC32_VX_S390_H +#define _CRC32_VX_S390_H + +#include <linux/types.h> + +u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size); +u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); +u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); + +#endif /* _CRC32_VX_S390_H */ diff --git a/arch/s390/lib/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c new file mode 100644 index 000000000000..fed7c9c70d05 --- /dev/null +++ b/arch/s390/lib/crc32be-vx.c @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of CRC-32 checksums. + * + * This CRC-32 implementation algorithm processes the most-significant + * bit first (BE). + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#include <linux/types.h> +#include <asm/fpu.h> +#include "crc32-vx.h" + +/* Vector register range containing CRC-32 constants */ +#define CONST_R1R2 9 +#define CONST_R3R4 10 +#define CONST_R5 11 +#define CONST_R6 12 +#define CONST_RU_POLY 13 +#define CONST_CRC_POLY 14 + +/* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these definitions: + * + * R1 = x4*128+64 mod P(x) + * R2 = x4*128 mod P(x) + * R3 = x128+64 mod P(x) + * R4 = x128 mod P(x) + * R5 = x96 mod P(x) + * R6 = x64 mod P(x) + * + * Barret reduction constant, u, is defined as floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * Note that the constant definitions below are extended in order to compute + * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction. + * The rightmost doubleword can be 0 to prevent contribution to the result or + * can be multiplied by 1 to perform an XOR without the need for a separate + * VECTOR EXCLUSIVE OR instruction. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + */ + +static unsigned long constants_CRC_32_BE[] = { + 0x08833794c, 0x0e6228b11, /* R1, R2 */ + 0x0c5b9cd4c, 0x0e8a45605, /* R3, R4 */ + 0x0f200aa66, 1UL << 32, /* R5, x32 */ + 0x0490d678d, 1, /* R6, 1 */ + 0x104d101df, 0, /* u */ + 0x104C11DB7, 0, /* P(x) */ +}; + +/** + * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers + * @crc: Initial CRC value, typically ~0. + * @buf: Input buffer pointer, performance might be improved if the + * buffer is on a doubleword boundary. + * @size: Size of the buffer, must be 64 bytes or greater. + * + * Register usage: + * V0: Initial CRC value and intermediate constants and results. + * V1..V4: Data for CRC computation. + * V5..V8: Next data chunks that are fetched from the input buffer. + * V9..V14: CRC-32 constants. + */ +u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size) +{ + /* Load CRC-32 constants */ + fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE); + fpu_vzero(0); + + /* Load the initial CRC value into the leftmost word of V0. */ + fpu_vlvgf(0, crc, 0); + + /* Load a 64-byte data chunk and XOR with CRC */ + fpu_vlm(1, 4, buf); + fpu_vx(1, 0, 1); + buf += 64; + size -= 64; + + while (size >= 64) { + /* Load the next 64-byte data chunk into V5 to V8 */ + fpu_vlm(5, 8, buf); + + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the reduction constants in V0. The intermediate result is + * then folded (accumulated) with the next data chunk in V5 and + * stored in V1. Repeat this step for the register contents + * in V2, V3, and V4 respectively. + */ + fpu_vgfmag(1, CONST_R1R2, 1, 5); + fpu_vgfmag(2, CONST_R1R2, 2, 6); + fpu_vgfmag(3, CONST_R1R2, 3, 7); + fpu_vgfmag(4, CONST_R1R2, 4, 8); + buf += 64; + size -= 64; + } + + /* Fold V1 to V4 into a single 128-bit value in V1 */ + fpu_vgfmag(1, CONST_R3R4, 1, 2); + fpu_vgfmag(1, CONST_R3R4, 1, 3); + fpu_vgfmag(1, CONST_R3R4, 1, 4); + + while (size >= 16) { + fpu_vl(2, buf); + fpu_vgfmag(1, CONST_R3R4, 1, 2); + buf += 16; + size -= 16; + } + + /* + * The R5 constant is used to fold a 128-bit value into an 96-bit value + * that is XORed with the next 96-bit input data chunk. To use a single + * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to + * form an intermediate 96-bit value (with appended zeros) which is then + * XORed with the intermediate reduction result. + */ + fpu_vgfmg(1, CONST_R5, 1); + + /* + * Further reduce the remaining 96-bit value to a 64-bit value using a + * single VGFMG, the rightmost doubleword is multiplied with 0x1. The + * intermediate result is then XORed with the product of the leftmost + * doubleword with R6. The result is a 64-bit value and is subject to + * the Barret reduction. + */ + fpu_vgfmg(1, CONST_R6, 1); + + /* + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: To compensate the division by x^32, use the vector unpack + * instruction to move the leftmost word into the leftmost doubleword + * of the vector register. The rightmost doubleword is multiplied + * with zero to not contribute to the intermediate results. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + fpu_vupllf(2, 1); + fpu_vgfmg(2, CONST_RU_POLY, 2); + + /* + * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is in the rightmost word of V2. + */ + fpu_vupllf(2, 2); + fpu_vgfmag(2, CONST_CRC_POLY, 2, 1); + return fpu_vlgvf(2, 3); +} diff --git a/arch/s390/lib/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c new file mode 100644 index 000000000000..2f629f394df7 --- /dev/null +++ b/arch/s390/lib/crc32le-vx.c @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet + * and Castagnoli. + * + * This CRC-32 implementation algorithm is bitreflected and processes + * the least-significant bit first (Little-Endian). + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#include <linux/types.h> +#include <asm/fpu.h> +#include "crc32-vx.h" + +/* Vector register range containing CRC-32 constants */ +#define CONST_PERM_LE2BE 9 +#define CONST_R2R1 10 +#define CONST_R4R3 11 +#define CONST_R5 12 +#define CONST_RU_POLY 13 +#define CONST_CRC_POLY 14 + +/* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these definitions: + * + * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 + * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 + * R3 = [(x128+32 mod P'(x) << 32)]' << 1 + * R4 = [(x128-32 mod P'(x) << 32)]' << 1 + * R5 = [(x64 mod P'(x) << 32)]' << 1 + * R6 = [(x32 mod P'(x) << 32)]' << 1 + * + * The bitreflected Barret reduction constant, u', is defined as + * the bit reversal of floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + * + * CRC-32C (Castagnoli) polynomials: + * + * P(x) = 0x1EDC6F41 + * P'(x) = 0x82F63B78 + */ + +static unsigned long constants_CRC_32_LE[] = { + 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */ + 0x1c6e41596, 0x154442bd4, /* R2, R1 */ + 0x0ccaa009e, 0x1751997d0, /* R4, R3 */ + 0x0, 0x163cd6124, /* R5 */ + 0x0, 0x1f7011641, /* u' */ + 0x0, 0x1db710641 /* P'(x) << 1 */ +}; + +static unsigned long constants_CRC_32C_LE[] = { + 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */ + 0x09e4addf8, 0x740eef02, /* R2, R1 */ + 0x14cd00bd6, 0xf20c0dfe, /* R4, R3 */ + 0x0, 0x0dd45aab8, /* R5 */ + 0x0, 0x0dea713f1, /* u' */ + 0x0, 0x105ec76f0 /* P'(x) << 1 */ +}; + +/** + * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers + * @crc: Initial CRC value, typically ~0. + * @buf: Input buffer pointer, performance might be improved if the + * buffer is on a doubleword boundary. + * @size: Size of the buffer, must be 64 bytes or greater. + * @constants: CRC-32 constant pool base pointer. + * + * Register usage: + * V0: Initial CRC value and intermediate constants and results. + * V1..V4: Data for CRC computation. + * V5..V8: Next data chunks that are fetched from the input buffer. + * V9: Constant for BE->LE conversion and shift operations + * V10..V14: CRC-32 constants. + */ +static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants) +{ + /* Load CRC-32 constants */ + fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants); + + /* + * Load the initial CRC value. + * + * The CRC value is loaded into the rightmost word of the + * vector register and is later XORed with the LSB portion + * of the loaded input data. + */ + fpu_vzero(0); /* Clear V0 */ + fpu_vlvgf(0, crc, 3); /* Load CRC into rightmost word */ + + /* Load a 64-byte data chunk and XOR with CRC */ + fpu_vlm(1, 4, buf); + fpu_vperm(1, 1, 1, CONST_PERM_LE2BE); + fpu_vperm(2, 2, 2, CONST_PERM_LE2BE); + fpu_vperm(3, 3, 3, CONST_PERM_LE2BE); + fpu_vperm(4, 4, 4, CONST_PERM_LE2BE); + + fpu_vx(1, 0, 1); /* V1 ^= CRC */ + buf += 64; + size -= 64; + + while (size >= 64) { + fpu_vlm(5, 8, buf); + fpu_vperm(5, 5, 5, CONST_PERM_LE2BE); + fpu_vperm(6, 6, 6, CONST_PERM_LE2BE); + fpu_vperm(7, 7, 7, CONST_PERM_LE2BE); + fpu_vperm(8, 8, 8, CONST_PERM_LE2BE); + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the R1 and R2 reduction constants in V0. The intermediate + * result is then folded (accumulated) with the next data chunk + * in V5 and stored in V1. Repeat this step for the register + * contents in V2, V3, and V4 respectively. + */ + fpu_vgfmag(1, CONST_R2R1, 1, 5); + fpu_vgfmag(2, CONST_R2R1, 2, 6); + fpu_vgfmag(3, CONST_R2R1, 3, 7); + fpu_vgfmag(4, CONST_R2R1, 4, 8); + buf += 64; + size -= 64; + } + + /* + * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 + * and R4 and accumulating the next 128-bit chunk until a single 128-bit + * value remains. + */ + fpu_vgfmag(1, CONST_R4R3, 1, 2); + fpu_vgfmag(1, CONST_R4R3, 1, 3); + fpu_vgfmag(1, CONST_R4R3, 1, 4); + + while (size >= 16) { + fpu_vl(2, buf); + fpu_vperm(2, 2, 2, CONST_PERM_LE2BE); + fpu_vgfmag(1, CONST_R4R3, 1, 2); + buf += 16; + size -= 16; + } + + /* + * Set up a vector register for byte shifts. The shift value must + * be loaded in bits 1-4 in byte element 7 of a vector register. + * Shift by 8 bytes: 0x40 + * Shift by 4 bytes: 0x20 + */ + fpu_vleib(9, 0x40, 7); + + /* + * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes + * to move R4 into the rightmost doubleword and set the leftmost + * doubleword to 0x1. + */ + fpu_vsrlb(0, CONST_R4R3, 9); + fpu_vleig(0, 1, 0); + + /* + * Compute GF(2) product of V1 and V0. The rightmost doubleword + * of V1 is multiplied with R4. The leftmost doubleword of V1 is + * multiplied by 0x1 and is then XORed with rightmost product. + * Implicitly, the intermediate leftmost product becomes padded + */ + fpu_vgfmg(1, 0, 1); + + /* + * Now do the final 32-bit fold by multiplying the rightmost word + * in V1 with R5 and XOR the result with the remaining bits in V1. + * + * To achieve this by a single VGFMAG, right shift V1 by a word + * and store the result in V2 which is then accumulated. Use the + * vector unpack instruction to load the rightmost half of the + * doubleword into the rightmost doubleword element of V1; the other + * half is loaded in the leftmost doubleword. + * The vector register with CONST_R5 contains the R5 constant in the + * rightmost doubleword and the leftmost doubleword is zero to ignore + * the leftmost product of V1. + */ + fpu_vleib(9, 0x20, 7); /* Shift by words */ + fpu_vsrlb(2, 1, 9); /* Store remaining bits in V2 */ + fpu_vupllf(1, 1); /* Split rightmost doubleword */ + fpu_vgfmag(1, CONST_R5, 1, 2); /* V1 = (V1 * R5) XOR V2 */ + + /* + * Apply a Barret reduction to compute the final 32-bit CRC value. + * + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: The leftmost doubleword of vector register containing + * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product + * is zero and does not contribute to the final result. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + fpu_vupllf(2, 1); + fpu_vgfmg(2, CONST_RU_POLY, 2); + + /* + * Compute the GF(2) product of the CRC polynomial with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is stored in word element 2 of V2. + */ + fpu_vupllf(2, 2); + fpu_vgfmag(2, CONST_CRC_POLY, 2, 1); + + return fpu_vlgvf(2, 2); +} + +u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size) +{ + return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]); +} + +u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size) +{ + return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]); +} diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c new file mode 100644 index 000000000000..458abd9bac70 --- /dev/null +++ b/arch/s390/lib/csum-partial.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/export.h> +#include <asm/checksum.h> +#include <asm/fpu.h> + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit). If copy is true copies to dst. + * + * Returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic. + * + * This function must be called with even lengths, except + * for the last fragment, which may be odd. + * + * It's best to have src and dst aligned on a 64-bit boundary. + */ +static __always_inline __wsum csum_copy(void *dst, const void *src, int len, __wsum sum, bool copy) +{ + DECLARE_KERNEL_FPU_ONSTACK8(vxstate); + + if (!cpu_has_vx()) { + if (copy) + memcpy(dst, src, len); + return cksm(dst, len, sum); + } + kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23); + fpu_vlvgf(16, (__force u32)sum, 1); + fpu_vzero(17); + fpu_vzero(18); + fpu_vzero(19); + while (len >= 64) { + fpu_vlm(20, 23, src); + if (copy) { + fpu_vstm(20, 23, dst); + dst += 64; + } + fpu_vcksm(16, 20, 16); + fpu_vcksm(17, 21, 17); + fpu_vcksm(18, 22, 18); + fpu_vcksm(19, 23, 19); + src += 64; + len -= 64; + } + while (len >= 32) { + fpu_vlm(20, 21, src); + if (copy) { + fpu_vstm(20, 21, dst); + dst += 32; + } + fpu_vcksm(16, 20, 16); + fpu_vcksm(17, 21, 17); + src += 32; + len -= 32; + } + while (len >= 16) { + fpu_vl(20, src); + if (copy) { + fpu_vst(20, dst); + dst += 16; + } + fpu_vcksm(16, 20, 16); + src += 16; + len -= 16; + } + if (len) { + fpu_vll(20, len - 1, src); + if (copy) + fpu_vstl(20, len - 1, dst); + fpu_vcksm(16, 20, 16); + } + fpu_vcksm(18, 19, 18); + fpu_vcksm(16, 17, 16); + fpu_vcksm(16, 18, 16); + sum = (__force __wsum)fpu_vlgvf(16, 1); + kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23); + return sum; +} + +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + return csum_copy(NULL, buff, len, sum, false); +} +EXPORT_SYMBOL(csum_partial); + +__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) +{ + return csum_copy(dst, src, len, 0, true); +} +EXPORT_SYMBOL(csum_partial_copy_nocheck); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index d4aa10795605..be14c58cb989 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -4,126 +4,42 @@ * * Copyright IBM Corp. 1999, 2008 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, - * Heiko Carstens <heiko.carstens@de.ibm.com>, */ -#include <linux/sched.h> +#include <linux/processor.h> #include <linux/delay.h> -#include <linux/timex.h> -#include <linux/export.h> -#include <linux/irqflags.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <asm/vtimer.h> #include <asm/div64.h> -#include <asm/idle.h> +#include <asm/timex.h> void __delay(unsigned long loops) { - /* - * To end the bloody studid and useless discussion about the - * BogoMips number I took the liberty to define the __delay - * function in a way that that resulting BogoMips number will - * yield the megahertz number of the cpu. The important function - * is udelay and that is done using the tod clock. -- martin. - */ + /* + * Loop 'loops' times. Callers must not assume a specific + * amount of time passes before this function returns. + */ asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); } EXPORT_SYMBOL(__delay); -static void __udelay_disabled(unsigned long long usecs) +static void delay_loop(unsigned long delta) { - unsigned long cr0, cr0_new, psw_mask; - struct s390_idle_data idle; - u64 end; + unsigned long end; - end = get_tod_clock() + (usecs << 12); - __ctl_store(cr0, 0, 0); - cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK; - cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */ - __ctl_load(cr0_new, 0, 0); - psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; - set_clock_comparator(end); - set_cpu_flag(CIF_IGNORE_IRQ); - psw_idle(&idle, psw_mask); - clear_cpu_flag(CIF_IGNORE_IRQ); - set_clock_comparator(S390_lowcore.clock_comparator); - __ctl_load(cr0, 0, 0); -} - -static void __udelay_enabled(unsigned long long usecs) -{ - u64 clock_saved, end; - - end = get_tod_clock_fast() + (usecs << 12); - do { - clock_saved = 0; - if (tod_after(S390_lowcore.clock_comparator, end)) { - clock_saved = local_tick_disable(); - set_clock_comparator(end); - } - enabled_wait(); - if (clock_saved) - local_tick_enable(clock_saved); - } while (get_tod_clock_fast() < end); + end = get_tod_clock_monotonic() + delta; + while (!tod_after(get_tod_clock_monotonic(), end)) + cpu_relax(); } -/* - * Waits for 'usecs' microseconds using the TOD clock comparator. - */ -void __udelay(unsigned long long usecs) +void __udelay(unsigned long usecs) { - unsigned long flags; - - preempt_disable(); - local_irq_save(flags); - if (in_irq()) { - __udelay_disabled(usecs); - goto out; - } - if (in_softirq()) { - if (raw_irqs_disabled_flags(flags)) - __udelay_disabled(usecs); - else - __udelay_enabled(usecs); - goto out; - } - if (raw_irqs_disabled_flags(flags)) { - local_bh_disable(); - __udelay_disabled(usecs); - _local_bh_enable(); - goto out; - } - __udelay_enabled(usecs); -out: - local_irq_restore(flags); - preempt_enable(); + delay_loop(usecs << 12); } EXPORT_SYMBOL(__udelay); -/* - * Simple udelay variant. To be used on startup and reboot - * when the interrupt handler isn't working. - */ -void udelay_simple(unsigned long long usecs) -{ - u64 end; - - end = get_tod_clock_fast() + (usecs << 12); - while (get_tod_clock_fast() < end) - cpu_relax(); -} - -void __ndelay(unsigned long long nsecs) +void __ndelay(unsigned long nsecs) { - u64 end; - nsecs <<= 9; do_div(nsecs, 125); - end = get_tod_clock_fast() + nsecs; - if (nsecs & ~0xfffUL) - __udelay(nsecs >> 12); - while (get_tod_clock_fast() < end) - barrier(); + delay_loop(nsecs); } EXPORT_SYMBOL(__ndelay); diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c new file mode 100644 index 000000000000..8c9d4da87eef --- /dev/null +++ b/arch/s390/lib/error-inject.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include <asm/ptrace.h> +#include <linux/error-injection.h> +#include <linux/kprobes.h> + +void override_function_with_return(struct pt_regs *regs) +{ + /* + * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some + * kernel function. + */ + regs->psw.addr = regs->gprs[14]; +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline.S new file mode 100644 index 000000000000..92ed8409a7a4 --- /dev/null +++ b/arch/s390/lib/expoline.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <asm/nospec-insn.h> +#include <linux/linkage.h> + +.macro GEN_ALL_BR_THUNK_EXTERN + .irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GEN_BR_THUNK_EXTERN %r\r1 + .endr +.endm + +GEN_ALL_BR_THUNK_EXTERN diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index dc0874f2e203..d026debf250c 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -5,8 +5,8 @@ * Copyright IBM Corp. 2012 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> #include <asm/nospec-insn.h> GEN_BR_THUNK %r14 @@ -14,8 +14,7 @@ /* * void *memmove(void *dest, const void *src, size_t n) */ -WEAK(memmove) -ENTRY(__memmove) +SYM_FUNC_START(__memmove) ltgr %r4,%r4 lgr %r1,%r2 jz .Lmemmove_exit @@ -35,8 +34,7 @@ ENTRY(__memmove) la %r3,256(%r3) brctg %r0,.Lmemmove_forward_loop .Lmemmove_forward_remainder: - larl %r5,.Lmemmove_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemmove_mvc .Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: @@ -48,7 +46,10 @@ ENTRY(__memmove) BR_EX %r14 .Lmemmove_mvc: mvc 0(1,%r1),0(%r3) -ENDPROC(__memmove) +SYM_FUNC_END(__memmove) +EXPORT_SYMBOL(__memmove) + +SYM_FUNC_ALIAS(memmove, __memmove) EXPORT_SYMBOL(memmove) /* @@ -66,8 +67,7 @@ EXPORT_SYMBOL(memmove) * return __builtin_memset(s, c, n); * } */ -WEAK(memset) -ENTRY(__memset) +SYM_FUNC_START(__memset) ltgr %r4,%r4 jz .Lmemset_exit ltgr %r3,%r3 @@ -82,8 +82,7 @@ ENTRY(__memset) la %r1,256(%r1) brctg %r3,.Lmemset_clear_loop .Lmemset_clear_remainder: - larl %r3,.Lmemset_xc - ex %r4,0(%r3) + exrl %r4,.Lmemset_xc .Lmemset_exit: BR_EX %r14 .Lmemset_fill: @@ -101,8 +100,7 @@ ENTRY(__memset) brctg %r5,.Lmemset_fill_loop .Lmemset_fill_remainder: stc %r3,0(%r1) - larl %r5,.Lmemset_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemset_mvc BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) @@ -111,7 +109,10 @@ ENTRY(__memset) xc 0(1,%r1),0(%r1) .Lmemset_mvc: mvc 1(1,%r1),0(%r1) -ENDPROC(__memset) +SYM_FUNC_END(__memset) +EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS(memset, __memset) EXPORT_SYMBOL(memset) /* @@ -119,8 +120,7 @@ EXPORT_SYMBOL(memset) * * void *memcpy(void *dest, const void *src, size_t n) */ -WEAK(memcpy) -ENTRY(__memcpy) +SYM_FUNC_START(__memcpy) ltgr %r4,%r4 jz .Lmemcpy_exit aghi %r4,-1 @@ -129,8 +129,7 @@ ENTRY(__memcpy) lgr %r1,%r2 jnz .Lmemcpy_loop .Lmemcpy_remainder: - larl %r5,.Lmemcpy_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemcpy_mvc .Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: @@ -141,7 +140,10 @@ ENTRY(__memcpy) j .Lmemcpy_remainder .Lmemcpy_mvc: mvc 0(1,%r1),0(%r3) -ENDPROC(__memcpy) +SYM_FUNC_END(__memcpy) +EXPORT_SYMBOL(__memcpy) + +SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) /* @@ -152,7 +154,7 @@ EXPORT_SYMBOL(memcpy) * void *__memset64(uint64_t *s, uint64_t v, size_t count) */ .macro __MEMSET bits,bytes,insn -ENTRY(__memset\bits) +SYM_FUNC_START(__memset\bits) ltgr %r4,%r4 jz .L__memset_exit\bits cghi %r4,\bytes @@ -169,8 +171,7 @@ ENTRY(__memset\bits) brctg %r5,.L__memset_loop\bits .L__memset_remainder\bits: \insn %r3,0(%r1) - larl %r5,.L__memset_mvc\bits - ex %r4,0(%r5) + exrl %r4,.L__memset_mvc\bits BR_EX %r14 .L__memset_store\bits: \insn %r3,0(%r2) @@ -178,7 +179,7 @@ ENTRY(__memset\bits) BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) -ENDPROC(__memset\bits) +SYM_FUNC_END(__memset\bits) .endm __MEMSET 16,2,sth diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ce1e4bbe53aa..ad9da4038511 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -10,11 +10,14 @@ #include <linux/export.h> #include <linux/spinlock.h> #include <linux/jiffies.h> +#include <linux/sysctl.h> #include <linux/init.h> #include <linux/smp.h> #include <linux/percpu.h> +#include <linux/io.h> #include <asm/alternative.h> -#include <asm/io.h> +#include <asm/machine.h> +#include <asm/asm.h> int spin_retry = -1; @@ -26,7 +29,7 @@ static int __init spin_retry_init(void) } early_initcall(spin_retry_init); -/** +/* * spin_retry= parameter */ static int __init spin_retry_setup(char *str) @@ -36,6 +39,23 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); +static const struct ctl_table s390_spin_sysctl_table[] = { + { + .procname = "spin_retry", + .data = &spin_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_spin_sysctls(void) +{ + register_sysctl_init("kernel", s390_spin_sysctl_table); + return 0; +} +arch_initcall(init_s390_spin_sysctls); + struct spin_wait { struct spin_wait *next, *prev; int node_id; @@ -75,25 +95,44 @@ static inline int arch_load_niai4(int *lock) int owner; asm_inline volatile( - ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */ - " l %0,%1\n" - : "=d" (owner) : "Q" (*lock) : "memory"); + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ + " l %[owner],%[lock]\n" + : [owner] "=d" (owner) : [lock] "R" (*lock) : "memory"); return owner; } -static inline int arch_cmpxchg_niai8(int *lock, int old, int new) +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) +{ + int cc; + + asm_inline volatile( + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc) + : [new] "d" (new) + : "memory"); + return cc == 0; +} + +#else /* __HAVE_ASM_FLAG_OUTPUTS__ */ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) { int expected = old; asm_inline volatile( - ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */ - " cs %0,%3,%1\n" - : "=d" (old), "=Q" (*lock) - : "0" (old), "d" (new), "Q" (*lock) + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock) + : [new] "d" (new) : "cc", "memory"); return expected == old; } +#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */ + static inline struct spin_wait *arch_spin_decode_tail(int lock) { int ix, cpu; @@ -119,16 +158,16 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) struct spin_wait *node, *next; int lockval, ix, node_id, tail_id, old, new, owner, count; - ix = S390_lowcore.spinlock_index++; + ix = get_lowcore()->spinlock_index++; barrier(); - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ node = this_cpu_ptr(&spin_wait[ix]); node->prev = node->next = NULL; node_id = node->node_id; /* Enqueue the node for this CPU in the spinlock wait queue */ + old = READ_ONCE(lp->lock); while (1) { - old = READ_ONCE(lp->lock); if ((old & _Q_LOCK_CPU_MASK) == 0 && (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) { /* @@ -139,7 +178,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) * waiter will get the lock. */ new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval; - if (__atomic_cmpxchg_bool(&lp->lock, old, new)) + if (arch_try_cmpxchg(&lp->lock, &old, new)) /* Got the lock */ goto out; /* lock passing in progress */ @@ -147,7 +186,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) } /* Make the node of this CPU the new tail. */ new = node_id | (old & _Q_LOCK_MASK); - if (__atomic_cmpxchg_bool(&lp->lock, old, new)) + if (arch_try_cmpxchg(&lp->lock, &old, new)) break; } /* Set the 'next' pointer of the tail node in the queue */ @@ -184,7 +223,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) if (!owner) { tail_id = old & _Q_TAIL_MASK; new = ((tail_id != node_id) ? tail_id : 0) | lockval; - if (__atomic_cmpxchg_bool(&lp->lock, old, new)) + if (arch_try_cmpxchg(&lp->lock, &old, new)) /* Got the lock */ break; continue; @@ -192,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } @@ -205,14 +244,14 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) } out: - S390_lowcore.spinlock_index--; + get_lowcore()->spinlock_index--; } static inline void arch_spin_lock_classic(arch_spinlock_t *lp) { int lockval, old, new, owner, count; - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ /* Pass the virtual CPU to the lock holder if it is not running */ owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL); @@ -226,7 +265,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) /* Try to get the lock if it is free. */ if (!owner) { new = (old & _Q_TAIL_MASK) | lockval; - if (arch_cmpxchg_niai8(&lp->lock, old, new)) { + if (arch_try_cmpxchg_niai8(&lp->lock, old, new)) { /* Got the lock */ return; } @@ -235,14 +274,13 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } } void arch_spin_lock_wait(arch_spinlock_t *lp) { - /* Use classic spinlocks + niai if the steal time is >= 10% */ if (test_cpu_flag(CIF_DEDICATED_CPU)) arch_spin_lock_queued(lp); else @@ -252,14 +290,14 @@ EXPORT_SYMBOL(arch_spin_lock_wait); int arch_spin_trylock_retry(arch_spinlock_t *lp) { - int cpu = SPINLOCK_LOCKVAL; + int cpu = spinlock_lockval(); int owner, count; for (count = spin_retry; count > 0; count--) { owner = READ_ONCE(lp->lock); /* Try to get the lock if it is free. */ if (!owner) { - if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) + if (arch_try_cmpxchg(&lp->lock, &owner, cpu)) return 1; } } @@ -301,7 +339,7 @@ void arch_write_lock_wait(arch_rwlock_t *rw) while (1) { old = READ_ONCE(rw->cnts); if ((old & 0x1ffff) == 0 && - __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000)) + arch_try_cmpxchg(&rw->cnts, &old, old | 0x10000)) /* Got the lock */ break; barrier(); @@ -318,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp) cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK; if (!cpu) return; - if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1)) + if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1)) return; smp_yield_cpu(cpu - 1); } diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 0e30e6e43b0c..373fa1f01937 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -8,33 +8,44 @@ */ #define IN_ARCH_STRING_C 1 +#ifndef __NO_FORTIFY +# define __NO_FORTIFY +#endif #include <linux/types.h> #include <linux/string.h> #include <linux/export.h> +#include <asm/asm.h> /* * Helper functions to find the end of a string */ static inline char *__strend(const char *s) { - register unsigned long r0 asm("0") = 0; - - asm volatile ("0: srst %0,%1\n" - " jo 0b" - : "+d" (r0), "+a" (s) : : "cc", "memory"); - return (char *) r0; + unsigned long e = 0; + + asm volatile( + " lghi 0,0\n" + "0: srst %[e],%[s]\n" + " jo 0b\n" + : [e] "+&a" (e), [s] "+&a" (s) + : + : "cc", "memory", "0"); + return (char *)e; } static inline char *__strnend(const char *s, size_t n) { - register unsigned long r0 asm("0") = 0; const char *p = s + n; - asm volatile ("0: srst %0,%1\n" - " jo 0b" - : "+d" (p), "+a" (s) : "d" (r0) : "cc", "memory"); - return (char *) p; + asm volatile( + " lghi 0,0\n" + "0: srst %[p],%[s]\n" + " jo 0b\n" + : [p] "+&d" (p), [s] "+&a" (s) + : + : "cc", "memory", "0"); + return (char *)p; } /** @@ -76,45 +87,21 @@ EXPORT_SYMBOL(strnlen); #ifdef __HAVE_ARCH_STRCPY char *strcpy(char *dest, const char *src) { - register int r0 asm("0") = 0; char *ret = dest; - asm volatile ("0: mvst %0,%1\n" - " jo 0b" - : "+&a" (dest), "+&a" (src) : "d" (r0) - : "cc", "memory" ); + asm volatile( + " lghi 0,0\n" + "0: mvst %[dest],%[src]\n" + " jo 0b\n" + : [dest] "+&a" (dest), [src] "+&a" (src) + : + : "cc", "memory", "0"); return ret; } EXPORT_SYMBOL(strcpy); #endif /** - * strlcpy - Copy a %NUL terminated string into a sized buffer - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Compatible with *BSD: the result is always a valid - * NUL-terminated string that fits in the buffer (unless, - * of course, the buffer size is zero). It does not pad - * out the result like strncpy() does. - */ -#ifdef __HAVE_ARCH_STRLCPY -size_t strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = __strend(src) - src; - - if (size) { - size_t len = (ret >= size) ? size-1 : ret; - dest[len] = '\0'; - memcpy(dest, src, len); - } - return ret; -} -EXPORT_SYMBOL(strlcpy); -#endif - -/** * strncpy - Copy a length-limited, %NUL-terminated string * @dest: Where to copy the string to * @src: Where to copy the string from @@ -144,16 +131,18 @@ EXPORT_SYMBOL(strncpy); #ifdef __HAVE_ARCH_STRCAT char *strcat(char *dest, const char *src) { - register int r0 asm("0") = 0; - unsigned long dummy; + unsigned long dummy = 0; char *ret = dest; - asm volatile ("0: srst %0,%1\n" - " jo 0b\n" - "1: mvst %0,%2\n" - " jo 1b" - : "=&a" (dummy), "+a" (dest), "+a" (src) - : "d" (r0), "0" (0UL) : "cc", "memory" ); + asm volatile( + " lghi 0,0\n" + "0: srst %[dummy],%[dest]\n" + " jo 0b\n" + "1: mvst %[dummy],%[src]\n" + " jo 1b\n" + : [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src) + : + : "cc", "memory", "0"); return ret; } EXPORT_SYMBOL(strcat); @@ -221,59 +210,40 @@ EXPORT_SYMBOL(strncat); #ifdef __HAVE_ARCH_STRCMP int strcmp(const char *s1, const char *s2) { - register int r0 asm("0") = 0; int ret = 0; - asm volatile ("0: clst %2,%3\n" - " jo 0b\n" - " je 1f\n" - " ic %0,0(%2)\n" - " ic %1,0(%3)\n" - " sr %0,%1\n" - "1:" - : "+d" (ret), "+d" (r0), "+a" (s1), "+a" (s2) - : : "cc", "memory"); + asm volatile( + " lghi 0,0\n" + "0: clst %[s1],%[s2]\n" + " jo 0b\n" + " je 1f\n" + " ic %[ret],0(%[s1])\n" + " ic 0,0(%[s2])\n" + " sr %[ret],0\n" + "1:" + : [ret] "+&d" (ret), [s1] "+&a" (s1), [s2] "+&a" (s2) + : + : "cc", "memory", "0"); return ret; } EXPORT_SYMBOL(strcmp); #endif -/** - * strrchr - Find the last occurrence of a character in a string - * @s: The string to be searched - * @c: The character to search for - */ -#ifdef __HAVE_ARCH_STRRCHR -char *strrchr(const char *s, int c) -{ - size_t len = __strend(s) - s; - - if (len) - do { - if (s[len] == (char) c) - return (char *) s + len; - } while (--len > 0); - return NULL; -} -EXPORT_SYMBOL(strrchr); -#endif - static inline int clcle(const char *s1, unsigned long l1, const char *s2, unsigned long l2) { - register unsigned long r2 asm("2") = (unsigned long) s1; - register unsigned long r3 asm("3") = (unsigned long) l1; - register unsigned long r4 asm("4") = (unsigned long) s2; - register unsigned long r5 asm("5") = (unsigned long) l2; + union register_pair r1 = { .even = (unsigned long)s1, .odd = l1, }; + union register_pair r3 = { .even = (unsigned long)s2, .odd = l2, }; int cc; - asm volatile ("0: clcle %1,%3,0\n" - " jo 0b\n" - " ipm %0\n" - " srl %0,28" - : "=&d" (cc), "+a" (r2), "+a" (r3), - "+a" (r4), "+a" (r5) : : "cc", "memory"); - return cc; + asm volatile( + "0: clcle %[r1],%[r3],0\n" + " jo 0b\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [r1] "+d" (r1.pair), [r3] "+d" (r3.pair) + : + : CC_CLOBBER_LIST("memory")); + return CC_TRANSFORM(cc); } /** @@ -315,15 +285,18 @@ EXPORT_SYMBOL(strstr); #ifdef __HAVE_ARCH_MEMCHR void *memchr(const void *s, int c, size_t n) { - register int r0 asm("0") = (char) c; const void *ret = s + n; - asm volatile ("0: srst %0,%1\n" - " jo 0b\n" - " jl 1f\n" - " la %0,0\n" - "1:" - : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); + asm volatile( + " lgr 0,%[c]\n" + "0: srst %[ret],%[s]\n" + " jo 0b\n" + " jl 1f\n" + " la %[ret],0\n" + "1:" + : [ret] "+&a" (ret), [s] "+&a" (s) + : [c] "d" (c) + : "cc", "memory", "0"); return (void *) ret; } EXPORT_SYMBOL(memchr); @@ -333,7 +306,7 @@ EXPORT_SYMBOL(memchr); * memcmp - Compare two areas of memory * @s1: One area of memory * @s2: Another area of memory - * @count: The size of the area. + * @n: The size of the area. */ #ifdef __HAVE_ARCH_MEMCMP int memcmp(const void *s1, const void *s2, size_t n) @@ -360,13 +333,16 @@ EXPORT_SYMBOL(memcmp); #ifdef __HAVE_ARCH_MEMSCAN void *memscan(void *s, int c, size_t n) { - register int r0 asm("0") = (char) c; const void *ret = s + n; - asm volatile ("0: srst %0,%1\n" - " jo 0b\n" - : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); - return (void *) ret; + asm volatile( + " lgr 0,%[c]\n" + "0: srst %[ret],%[s]\n" + " jo 0b\n" + : [ret] "+&a" (ret), [s] "+&a" (s) + : [c] "d" (c) + : "cc", "memory", "0"); + return (void *)ret; } EXPORT_SYMBOL(memscan); #endif diff --git a/arch/s390/lib/test_kprobes.c b/arch/s390/lib/test_kprobes.c new file mode 100644 index 000000000000..9021298c3e8a --- /dev/null +++ b/arch/s390/lib/test_kprobes.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/random.h> +#include <kunit/test.h> +#include "test_kprobes.h" + +static struct kprobe kp; + +static void setup_kprobe(struct kunit *test, struct kprobe *kp, + const char *symbol, int offset) +{ + kp->offset = offset; + kp->addr = NULL; + kp->symbol_name = symbol; +} + +static void test_kprobe_offset(struct kunit *test, struct kprobe *kp, + const char *target, int offset) +{ + int ret; + + setup_kprobe(test, kp, target, 0); + ret = register_kprobe(kp); + if (!ret) + unregister_kprobe(kp); + KUNIT_EXPECT_EQ(test, 0, ret); + setup_kprobe(test, kp, target, offset); + ret = register_kprobe(kp); + KUNIT_EXPECT_EQ(test, -EINVAL, ret); + if (!ret) + unregister_kprobe(kp); +} + +static void test_kprobe_odd(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_odd", + kprobes_target_odd_offs); +} + +static void test_kprobe_in_insn4(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_in_insn4", + kprobes_target_in_insn4_offs); +} + +static void test_kprobe_in_insn6_lo(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_in_insn6_lo", + kprobes_target_in_insn6_lo_offs); +} + +static void test_kprobe_in_insn6_hi(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_in_insn6_hi", + kprobes_target_in_insn6_hi_offs); +} + +static struct kunit_case kprobes_testcases[] = { + KUNIT_CASE(test_kprobe_odd), + KUNIT_CASE(test_kprobe_in_insn4), + KUNIT_CASE(test_kprobe_in_insn6_lo), + KUNIT_CASE(test_kprobe_in_insn6_hi), + {} +}; + +static struct kunit_suite kprobes_test_suite = { + .name = "kprobes_test_s390", + .test_cases = kprobes_testcases, +}; + +kunit_test_suites(&kprobes_test_suite); + +MODULE_DESCRIPTION("KUnit tests for kprobes"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_kprobes.h b/arch/s390/lib/test_kprobes.h new file mode 100644 index 000000000000..2b4c9bc337f1 --- /dev/null +++ b/arch/s390/lib/test_kprobes.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef TEST_KPROBES_H +#define TEST_KPROBES_H + +extern unsigned long kprobes_target_odd_offs; +extern unsigned long kprobes_target_in_insn4_offs; +extern unsigned long kprobes_target_in_insn6_lo_offs; +extern unsigned long kprobes_target_in_insn6_hi_offs; + +#endif diff --git a/arch/s390/lib/test_kprobes_asm.S b/arch/s390/lib/test_kprobes_asm.S new file mode 100644 index 000000000000..ade7a3042334 --- /dev/null +++ b/arch/s390/lib/test_kprobes_asm.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> + +#define KPROBES_TARGET_START(name) \ + SYM_FUNC_START(name); \ + FTRACE_GEN_NOP_ASM(name) + +#define KPROBES_TARGET_END(name) \ + SYM_FUNC_END(name); \ + SYM_DATA(name##_offs, .quad 1b - name) + +KPROBES_TARGET_START(kprobes_target_in_insn4) + .word 0x4700 // bc 0,0 +1: .word 0x0000 + br %r14 +KPROBES_TARGET_END(kprobes_target_in_insn4) + +KPROBES_TARGET_START(kprobes_target_in_insn6_lo) + .word 0xe310 // ly 1,0 +1: .word 0x0000 + .word 0x0058 + br %r14 +KPROBES_TARGET_END(kprobes_target_in_insn6_lo) + +KPROBES_TARGET_START(kprobes_target_in_insn6_hi) + .word 0xe310 // ly 1,0 + .word 0x0000 +1: .word 0x0058 + br %r14 +KPROBES_TARGET_END(kprobes_target_in_insn6_hi) + +KPROBES_TARGET_START(kprobes_target_bp) + nop + .word 0x0000 + nop +1: br %r14 +KPROBES_TARGET_END(kprobes_target_bp) + +KPROBES_TARGET_START(kprobes_target_odd) + .byte 0x07 +1: .byte 0x07 + br %r14 +KPROBES_TARGET_END(kprobes_target_odd) diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c new file mode 100644 index 000000000000..f96b6a3737e7 --- /dev/null +++ b/arch/s390/lib/test_modules.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <kunit/test.h> +#include <linux/module.h> + +#include "test_modules.h" + +/* + * Test that modules with many relocations are loaded properly. + */ +static void test_modules_many_vmlinux_relocs(struct kunit *test) +{ + int result = 0; + +#define CALL_RETURN(i) result += test_modules_return_ ## i() + REPEAT_10000(CALL_RETURN); + KUNIT_ASSERT_EQ(test, result, 49995000); +} + +static struct kunit_case modules_testcases[] = { + KUNIT_CASE(test_modules_many_vmlinux_relocs), + {} +}; + +static struct kunit_suite modules_test_suite = { + .name = "modules_test_s390", + .test_cases = modules_testcases, +}; + +kunit_test_suites(&modules_test_suite); + +MODULE_DESCRIPTION("KUnit test that modules with many relocations are loaded properly"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h new file mode 100644 index 000000000000..6371fcf17684 --- /dev/null +++ b/arch/s390/lib/test_modules.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef TEST_MODULES_H +#define TEST_MODULES_H + +#define __REPEAT_10000_3(f, x) \ + f(x ## 0); \ + f(x ## 1); \ + f(x ## 2); \ + f(x ## 3); \ + f(x ## 4); \ + f(x ## 5); \ + f(x ## 6); \ + f(x ## 7); \ + f(x ## 8); \ + f(x ## 9) +#define __REPEAT_10000_2(f, x) \ + __REPEAT_10000_3(f, x ## 0); \ + __REPEAT_10000_3(f, x ## 1); \ + __REPEAT_10000_3(f, x ## 2); \ + __REPEAT_10000_3(f, x ## 3); \ + __REPEAT_10000_3(f, x ## 4); \ + __REPEAT_10000_3(f, x ## 5); \ + __REPEAT_10000_3(f, x ## 6); \ + __REPEAT_10000_3(f, x ## 7); \ + __REPEAT_10000_3(f, x ## 8); \ + __REPEAT_10000_3(f, x ## 9) +#define __REPEAT_10000_1(f, x) \ + __REPEAT_10000_2(f, x ## 0); \ + __REPEAT_10000_2(f, x ## 1); \ + __REPEAT_10000_2(f, x ## 2); \ + __REPEAT_10000_2(f, x ## 3); \ + __REPEAT_10000_2(f, x ## 4); \ + __REPEAT_10000_2(f, x ## 5); \ + __REPEAT_10000_2(f, x ## 6); \ + __REPEAT_10000_2(f, x ## 7); \ + __REPEAT_10000_2(f, x ## 8); \ + __REPEAT_10000_2(f, x ## 9) +#define REPEAT_10000(f) \ + __REPEAT_10000_1(f, 0); \ + __REPEAT_10000_1(f, 1); \ + __REPEAT_10000_1(f, 2); \ + __REPEAT_10000_1(f, 3); \ + __REPEAT_10000_1(f, 4); \ + __REPEAT_10000_1(f, 5); \ + __REPEAT_10000_1(f, 6); \ + __REPEAT_10000_1(f, 7); \ + __REPEAT_10000_1(f, 8); \ + __REPEAT_10000_1(f, 9) + +#define DECLARE_RETURN(i) int test_modules_return_ ## i(void) +REPEAT_10000(DECLARE_RETURN); + +#endif diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c new file mode 100644 index 000000000000..1670349a03eb --- /dev/null +++ b/arch/s390/lib/test_modules_helpers.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/export.h> + +#include "test_modules.h" + +#define DEFINE_RETURN(i) \ + int test_modules_return_ ## i(void) \ + { \ + return 1 ## i - 10000; \ + } \ + EXPORT_SYMBOL_GPL(test_modules_return_ ## i) +REPEAT_10000(DEFINE_RETURN); diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index bda7ac0ddd29..6e42100875e7 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -3,20 +3,28 @@ * Test module for unwind_for_each_frame */ -#define pr_fmt(fmt) "test_unwind: " fmt +#include <kunit/test.h> #include <asm/unwind.h> #include <linux/completion.h> #include <linux/kallsyms.h> #include <linux/kthread.h> +#include <linux/ftrace.h> #include <linux/module.h> +#include <linux/timer.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/kprobes.h> #include <linux/wait.h> #include <asm/irq.h> -#include <asm/delay.h> + +static struct kunit *current_test; #define BT_BUF_SIZE (PAGE_SIZE * 4) +static bool force_bt; +module_param_named(backtrace, force_bt, bool, 0444); +MODULE_PARM_DESC(backtrace, "print backtraces for all tests"); + /* * To avoid printk line limit split backtrace by lines */ @@ -28,7 +36,7 @@ static void print_backtrace(char *bt) p = strsep(&bt, "\n"); if (!p) break; - pr_err("%s\n", p); + kunit_err(current_test, "%s\n", p); } } @@ -39,7 +47,7 @@ static void print_backtrace(char *bt) static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, unsigned long sp) { - int frame_count, prev_is_func2, seen_func2_func1; + int frame_count, prev_is_func2, seen_func2_func1, seen_arch_rethook_trampoline; const int max_frames = 128; struct unwind_state state; size_t bt_pos = 0; @@ -48,13 +56,14 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC); if (!bt) { - pr_err("failed to allocate backtrace buffer\n"); + kunit_err(current_test, "failed to allocate backtrace buffer\n"); return -ENOMEM; } /* Unwind. */ frame_count = 0; prev_is_func2 = 0; seen_func2_func1 = 0; + seen_arch_rethook_trampoline = 0; unwind_for_each_frame(&state, task, regs, sp) { unsigned long addr = unwind_get_return_address(&state); char sym[KSYM_SYMBOL_LEN]; @@ -62,8 +71,9 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, if (frame_count++ == max_frames) break; if (state.reliable && !addr) { - pr_err("unwind state reliable but addr is 0\n"); - return -EINVAL; + kunit_err(current_test, "unwind state reliable but addr is 0\n"); + ret = -EINVAL; + break; } sprint_symbol(sym, addr); if (bt_pos < BT_BUF_SIZE) { @@ -73,28 +83,34 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, stack_type_name(state.stack_info.type), (void *)state.sp, (void *)state.ip); if (bt_pos >= BT_BUF_SIZE) - pr_err("backtrace buffer is too small\n"); + kunit_err(current_test, "backtrace buffer is too small\n"); } frame_count += 1; if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1")) seen_func2_func1 = 1; prev_is_func2 = str_has_prefix(sym, "unwindme_func2"); + if (str_has_prefix(sym, "arch_rethook_trampoline+0x0/")) + seen_arch_rethook_trampoline = 1; } /* Check the results. */ if (unwind_error(&state)) { - pr_err("unwind error\n"); + kunit_err(current_test, "unwind error\n"); ret = -EINVAL; } if (!seen_func2_func1) { - pr_err("unwindme_func2 and unwindme_func1 not found\n"); + kunit_err(current_test, "unwindme_func2 and unwindme_func1 not found\n"); ret = -EINVAL; } if (frame_count == max_frames) { - pr_err("Maximum number of frames exceeded\n"); + kunit_err(current_test, "Maximum number of frames exceeded\n"); ret = -EINVAL; } - if (ret) + if (seen_arch_rethook_trampoline) { + kunit_err(current_test, "arch_rethook_trampoline+0x0 in unwinding results\n"); + ret = -EINVAL; + } + if (ret || force_bt) print_backtrace(bt); kfree(bt); return ret; @@ -118,31 +134,187 @@ static struct unwindme *unwindme; #define UWM_REGS 0x2 /* Pass regs to test_unwind(). */ #define UWM_SP 0x4 /* Pass sp to test_unwind(). */ #define UWM_CALLER 0x8 /* Unwind starting from caller. */ -#define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */ +#define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ -#define UWM_PGM 0x40 /* Unwind from program check handler. */ +#define UWM_PGM 0x40 /* Unwind from program check handler */ +#define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */ +#define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */ +#define UWM_KRETPROBE 0x200 /* Unwind through kretprobed function. */ +#define UWM_KRETPROBE_HANDLER 0x400 /* Unwind from kretprobe handler. */ -static __always_inline unsigned long get_psw_addr(void) +static __always_inline struct pt_regs fake_pt_regs(void) { - unsigned long psw_addr; + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + regs.gprs[15] = current_stack_pointer; asm volatile( "basr %[psw_addr],0\n" - : [psw_addr] "=d" (psw_addr)); - return psw_addr; + : [psw_addr] "=d" (regs.psw.addr)); + return regs; } -#ifdef CONFIG_KPROBES -static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { struct unwindme *u = unwindme; + if (!(u->flags & UWM_KRETPROBE_HANDLER)) + return 0; + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL, (u->flags & UWM_SP) ? u->sp : 0); + return 0; } + +static noinline notrace int test_unwind_kretprobed_func(struct unwindme *u) +{ + struct pt_regs regs; + + if (!(u->flags & UWM_KRETPROBE)) + return 0; + + regs = fake_pt_regs(); + return test_unwind(NULL, (u->flags & UWM_REGS) ? ®s : NULL, + (u->flags & UWM_SP) ? u->sp : 0); +} + +static noinline int test_unwind_kretprobed_func_caller(struct unwindme *u) +{ + return test_unwind_kretprobed_func(u); +} + +static int test_unwind_kretprobe(struct unwindme *u) +{ + int ret; + struct kretprobe my_kretprobe; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + + memset(&my_kretprobe, 0, sizeof(my_kretprobe)); + my_kretprobe.handler = kretprobe_ret_handler; + my_kretprobe.maxactive = 1; + my_kretprobe.kp.addr = (kprobe_opcode_t *)test_unwind_kretprobed_func; + + ret = register_kretprobe(&my_kretprobe); + + if (ret < 0) { + kunit_err(current_test, "register_kretprobe failed %d\n", ret); + return -EINVAL; + } + + ret = test_unwind_kretprobed_func_caller(u); + unregister_kretprobe(&my_kretprobe); + unwindme = NULL; + if (u->flags & UWM_KRETPROBE_HANDLER) + ret = u->ret; + return ret; +} + +static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct unwindme *u = unwindme; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); + return 0; +} + +extern const char test_unwind_kprobed_insn[]; + +static noinline void test_unwind_kprobed_func(void) +{ + asm volatile( + " nopr %%r7\n" + "test_unwind_kprobed_insn:\n" + " nopr %%r7\n" + :); +} + +static int test_unwind_kprobe(struct unwindme *u) +{ + struct kprobe kp; + int ret; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + if (!IS_ENABLED(CONFIG_KPROBES_ON_FTRACE) && u->flags & UWM_KPROBE_ON_FTRACE) + kunit_skip(current_test, "requires CONFIG_KPROBES_ON_FTRACE"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + memset(&kp, 0, sizeof(kp)); + kp.pre_handler = kprobe_pre_handler; + kp.addr = u->flags & UWM_KPROBE_ON_FTRACE ? + (kprobe_opcode_t *)test_unwind_kprobed_func : + (kprobe_opcode_t *)test_unwind_kprobed_insn; + ret = register_kprobe(&kp); + if (ret < 0) { + kunit_err(current_test, "register_kprobe failed %d\n", ret); + return -EINVAL; + } + + test_unwind_kprobed_func(); + unregister_kprobe(&kp); + unwindme = NULL; + return u->ret; +} + +static void notrace __used test_unwind_ftrace_handler(unsigned long ip, + unsigned long parent_ip, + struct ftrace_ops *fops, + struct ftrace_regs *fregs) +{ + struct unwindme *u = (struct unwindme *)arch_ftrace_regs(fregs)->regs.gprs[2]; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &arch_ftrace_regs(fregs)->regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); +} + +static noinline int test_unwind_ftraced_func(struct unwindme *u) +{ + return READ_ONCE(u)->ret; +} + +static int test_unwind_ftrace(struct unwindme *u) +{ + int ret; +#ifdef CONFIG_DYNAMIC_FTRACE + struct ftrace_ops *fops; + + fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL); + fops->func = test_unwind_ftrace_handler; + fops->flags = FTRACE_OPS_FL_DYNAMIC | + FTRACE_OPS_FL_RECURSION | + FTRACE_OPS_FL_SAVE_REGS | + FTRACE_OPS_FL_PERMANENT; +#else + kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); #endif + ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0); + if (ret) { + kunit_err(current_test, "failed to set ftrace filter (%d)\n", ret); + return -1; + } + + ret = register_ftrace_function(fops); + if (!ret) { + ret = test_unwind_ftraced_func(u); + unregister_ftrace_function(fops); + } else { + kunit_err(current_test, "failed to register ftrace handler (%d)\n", ret); + } + + ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 1, 0); + return ret; +} + /* This function may or may not appear in the backtrace. */ static noinline int unwindme_func4(struct unwindme *u) { @@ -153,40 +325,15 @@ static noinline int unwindme_func4(struct unwindme *u) wait_event(u->task_wq, kthread_should_park()); kthread_parkme(); return 0; -#ifdef CONFIG_KPROBES - } else if (u->flags & UWM_PGM) { - struct kprobe kp; - int ret; - - unwindme = u; - memset(&kp, 0, sizeof(kp)); - kp.symbol_name = "do_report_trap"; - kp.pre_handler = pgm_pre_handler; - ret = register_kprobe(&kp); - if (ret < 0) { - pr_err("register_kprobe failed %d\n", ret); - return -EINVAL; - } - - /* - * trigger specification exception - */ - asm volatile( - " mvcl %%r1,%%r1\n" - "0: nopr %%r7\n" - EX_TABLE(0b, 0b) - :); - - unregister_kprobe(&kp); - unwindme = NULL; - return u->ret; -#endif + } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) { + return test_unwind_kprobe(u); + } else if (u->flags & (UWM_KRETPROBE | UWM_KRETPROBE_HANDLER)) { + return test_unwind_kretprobe(u); + } else if (u->flags & UWM_FTRACE) { + return test_unwind_ftrace(u); } else { - struct pt_regs regs; + struct pt_regs regs = fake_pt_regs(); - memset(®s, 0, sizeof(regs)); - regs.psw.addr = get_psw_addr(); - regs.gprs[15] = current_stack_pointer(); return test_unwind(NULL, (u->flags & UWM_REGS) ? ®s : NULL, (u->flags & UWM_SP) ? u->sp : 0); @@ -203,12 +350,16 @@ static noinline int unwindme_func3(struct unwindme *u) /* This function must appear in the backtrace. */ static noinline int unwindme_func2(struct unwindme *u) { + unsigned long flags, mflags; int rc; if (u->flags & UWM_SWITCH_STACK) { - preempt_disable(); - rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u); - preempt_enable(); + local_irq_save(flags); + local_mcck_save(mflags); + rc = call_on_stack(1, get_lowcore()->nodat_stack, + int, unwindme_func3, struct unwindme *, u); + local_mcck_restore(mflags); + local_irq_restore(flags); return rc; } else { return unwindme_func3(u); @@ -221,31 +372,27 @@ static noinline int unwindme_func1(void *u) return unwindme_func2((struct unwindme *)u); } -static void unwindme_irq_handler(struct ext_code ext_code, - unsigned int param32, - unsigned long param64) +static void unwindme_timer_fn(struct timer_list *unused) { struct unwindme *u = READ_ONCE(unwindme); - if (u && u->task == current) { + if (u) { unwindme = NULL; u->task = NULL; u->ret = unwindme_func1(u); + complete(&u->task_ready); } } +static struct timer_list unwind_timer; + static int test_unwind_irq(struct unwindme *u) { - preempt_disable(); - if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) { - pr_info("Couldn't reqister external interrupt handler"); - return -1; - } - u->task = current; unwindme = u; - udelay(1); - unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler); - preempt_enable(); + init_completion(&u->task_ready); + timer_setup(&unwind_timer, unwindme_timer_fn, 0); + mod_timer(&unwind_timer, jiffies + 1); + wait_for_completion(&u->task_ready); return u->ret; } @@ -265,7 +412,7 @@ static int test_unwind_task(struct unwindme *u) */ task = kthread_run(unwindme_func1, u, "%s", __func__); if (IS_ERR(task)) { - pr_err("kthread_run() failed\n"); + kunit_err(current_test, "kthread_run() failed\n"); return PTR_ERR(task); } /* @@ -280,68 +427,97 @@ static int test_unwind_task(struct unwindme *u) return ret; } -static int test_unwind_flags(int flags) +struct test_params { + int flags; + char *name; +}; + +/* + * Create required parameter list for tests + */ +#define TEST_WITH_FLAGS(f) { .flags = f, .name = #f } +static const struct test_params param_list[] = { + TEST_WITH_FLAGS(UWM_DEFAULT), + TEST_WITH_FLAGS(UWM_SP), + TEST_WITH_FLAGS(UWM_REGS), + TEST_WITH_FLAGS(UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_THREAD), + TEST_WITH_FLAGS(UWM_THREAD | UWM_SP), + TEST_WITH_FLAGS(UWM_THREAD | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_PGM), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP), + TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP | UWM_REGS), +}; + +/* + * Parameter description generator: required for KUNIT_ARRAY_PARAM() + */ +static void get_desc(const struct test_params *params, char *desc) +{ + strscpy(desc, params->name, KUNIT_PARAM_DESC_SIZE); +} + +/* + * Create test_unwind_gen_params + */ +KUNIT_ARRAY_PARAM(test_unwind, param_list, get_desc); + +static void test_unwind_flags(struct kunit *test) { struct unwindme u; + const struct test_params *params; - u.flags = flags; + current_test = test; + params = (const struct test_params *)test->param_value; + u.flags = params->flags; if (u.flags & UWM_THREAD) - return test_unwind_task(&u); + KUNIT_EXPECT_EQ(test, 0, test_unwind_task(&u)); else if (u.flags & UWM_IRQ) - return test_unwind_irq(&u); + KUNIT_EXPECT_EQ(test, 0, test_unwind_irq(&u)); else - return unwindme_func1(&u); + KUNIT_EXPECT_EQ(test, 0, unwindme_func1(&u)); } -static int test_unwind_init(void) -{ - int ret = 0; - -#define TEST(flags) \ -do { \ - pr_info("[ RUN ] " #flags "\n"); \ - if (!test_unwind_flags((flags))) { \ - pr_info("[ OK ] " #flags "\n"); \ - } else { \ - pr_err("[ FAILED ] " #flags "\n"); \ - ret = -EINVAL; \ - } \ -} while (0) - - TEST(UWM_DEFAULT); - TEST(UWM_SP); - TEST(UWM_REGS); - TEST(UWM_SWITCH_STACK); - TEST(UWM_SP | UWM_REGS); - TEST(UWM_CALLER | UWM_SP); - TEST(UWM_CALLER | UWM_SP | UWM_REGS); - TEST(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK); - TEST(UWM_THREAD); - TEST(UWM_THREAD | UWM_SP); - TEST(UWM_THREAD | UWM_CALLER | UWM_SP); - TEST(UWM_IRQ); - TEST(UWM_IRQ | UWM_SWITCH_STACK); - TEST(UWM_IRQ | UWM_SP); - TEST(UWM_IRQ | UWM_REGS); - TEST(UWM_IRQ | UWM_SP | UWM_REGS); - TEST(UWM_IRQ | UWM_CALLER | UWM_SP); - TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS); - TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK); -#ifdef CONFIG_KPROBES - TEST(UWM_PGM); - TEST(UWM_PGM | UWM_SP); - TEST(UWM_PGM | UWM_REGS); - TEST(UWM_PGM | UWM_SP | UWM_REGS); -#endif -#undef TEST +static struct kunit_case unwind_test_cases[] = { + KUNIT_CASE_PARAM(test_unwind_flags, test_unwind_gen_params), + {} +}; - return ret; -} +static struct kunit_suite test_unwind_suite = { + .name = "test_unwind", + .test_cases = unwind_test_cases, +}; -static void test_unwind_exit(void) -{ -} +kunit_test_suites(&test_unwind_suite); -module_init(test_unwind_init); -module_exit(test_unwind_exit); +MODULE_DESCRIPTION("KUnit test for unwind_for_each_frame"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S new file mode 100644 index 000000000000..96214f51f49b --- /dev/null +++ b/arch/s390/lib/tishift.S @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/export.h> +#include <linux/linkage.h> +#include <asm/nospec-insn.h> + + .section .noinstr.text, "ax" + + GEN_BR_THUNK %r14 + +SYM_FUNC_START(__ashlti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + srlg %r3,%r1,0(%r3) + sllg %r0,%r0,0(%r4) + sllg %r1,%r1,0(%r4) + ogr %r0,%r3 + j 1f +0: sllg %r0,%r1,-64(%r4) + lghi %r1,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) + +SYM_FUNC_START(__ashrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srag %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srag %r1,%r0,-64(%r4) + srag %r0,%r0,63 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__lshrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srlg %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srlg %r1,%r0,-64(%r4) + lghi %r0,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c4f8039a35e8..cec20db88479 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -8,437 +8,139 @@ * Gerald Schaefer (gerald.schaefer@de.ibm.com) */ -#include <linux/jump_label.h> #include <linux/uaccess.h> #include <linux/export.h> -#include <linux/errno.h> #include <linux/mm.h> -#include <asm/mmu_context.h> -#include <asm/facility.h> - -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES -static DEFINE_STATIC_KEY_FALSE(have_mvcos); - -static int __init uaccess_init(void) -{ - if (test_facility(27)) - static_branch_enable(&have_mvcos); - return 0; -} -early_initcall(uaccess_init); - -static inline int copy_with_mvcos(void) -{ - if (static_branch_likely(&have_mvcos)) - return 1; - return 0; -} -#else -static inline int copy_with_mvcos(void) -{ - return 1; -} -#endif - -void set_fs(mm_segment_t fs) -{ - current->thread.mm_segment = fs; - if (fs == USER_DS) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } else { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (fs & 1) { - if (fs == USER_DS_SACF) - __ctl_load(S390_lowcore.user_asce, 7, 7); - else - __ctl_load(S390_lowcore.kernel_asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); +#include <asm/asm-extable.h> +#include <asm/ctlreg.h> + +#ifdef CONFIG_DEBUG_ENTRY +void debug_user_asce(int exit) +{ + struct ctlreg cr1, cr7; + + local_ctl_store(1, &cr1); + local_ctl_store(7, &cr7); + if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val) + return; + panic("incorrect ASCE on kernel %s\n" + "cr1: %016lx cr7: %016lx\n" + "kernel: %016lx user: %016lx\n", + exit ? "exit" : "entry", cr1.val, cr7.val, + get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val); +} +#endif /*CONFIG_DEBUG_ENTRY */ + +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) +{ + unsigned long osize; + union oac spec = { + .oac2.key = key, + .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, + .oac2.a = 1, + }; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_FROM(0b, 0b) + EX_TABLE_UA_MVCOS_FROM(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; } } -EXPORT_SYMBOL(set_fs); -mm_segment_t enable_sacf_uaccess(void) +unsigned long _copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) { - mm_segment_t old_fs; - unsigned long asce, cr; + unsigned long res = n; - old_fs = current->thread.mm_segment; - if (old_fs & 1) - return old_fs; - current->thread.mm_segment |= 1; - asce = S390_lowcore.kernel_asce; - if (likely(old_fs == USER_DS)) { - __ctl_store(cr, 1, 1); - if (cr != S390_lowcore.kernel_asce) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - asce = S390_lowcore.user_asce; + might_fault(); + if (!should_fail_usercopy()) { + instrument_copy_from_user_before(to, from, n); + res = raw_copy_from_user_key(to, from, n, key); + instrument_copy_from_user_after(to, from, n, res); } - __ctl_store(cr, 7, 7); - if (cr != asce) { - __ctl_load(asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +EXPORT_SYMBOL(_copy_from_user_key); + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) +{ + unsigned long osize; + union oac spec = { + .oac1.key = key, + .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, + .oac1.a = 1, + }; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; } - return old_fs; -} -EXPORT_SYMBOL(enable_sacf_uaccess); - -void disable_sacf_uaccess(mm_segment_t old_fs) -{ - current->thread.mm_segment = old_fs; - if (old_fs == USER_DS && test_facility(27)) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } -} -EXPORT_SYMBOL(disable_sacf_uaccess); - -static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size) -{ - register unsigned long reg0 asm("0") = 0x01UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" - "6: jz 4f\n" - "1: algr %0,%3\n" - " slgr %1,%3\n" - " slgr %2,%3\n" - " j 0b\n" - "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ - " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" - "7: slgr %0,%4\n" - " j 5f\n" - "4: slgr %0,%0\n" - "5:\n" - EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size) -{ - unsigned long tmp1, tmp2; - mm_segment_t old_fs; - - old_fs = enable_sacf_uaccess(); - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%3\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%3\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%3\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - disable_sacf_uaccess(old_fs); - return size; -} - -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n); - return copy_from_user_mvcp(to, from, n); -} -EXPORT_SYMBOL(raw_copy_from_user); - -static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size) -{ - register unsigned long reg0 asm("0") = 0x010000UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" - "6: jz 4f\n" - "1: algr %0,%3\n" - " slgr %1,%3\n" - " slgr %2,%3\n" - " j 0b\n" - "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ - " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" - "7: slgr %0,%4\n" - " j 5f\n" - "4: slgr %0,%0\n" - "5:\n" - EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size) -{ - unsigned long tmp1, tmp2; - mm_segment_t old_fs; - - old_fs = enable_sacf_uaccess(); - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%3\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%3\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%3\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - disable_sacf_uaccess(old_fs); - return size; } -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) +unsigned long _copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) { - if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n); - return copy_to_user_mvcs(to, from, n); -} -EXPORT_SYMBOL(raw_copy_to_user); - -static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, - unsigned long size) -{ - register unsigned long reg0 asm("0") = 0x010001UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - /* FIXME: copy with reduced length. */ - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" - " jz 2f\n" - "1: algr %0,%3\n" - " slgr %1,%3\n" - " slgr %2,%3\n" - " j 0b\n" - "2:slgr %0,%0\n" - "3: \n" - EX_TABLE(0b,3b) - : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, - unsigned long size) -{ - mm_segment_t old_fs; - unsigned long tmp1; - - old_fs = enable_sacf_uaccess(); - asm volatile( - " sacf 256\n" - " aghi %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - "0: aghi %0,257\n" - "1: mvc 0(1,%1),0(%2)\n" - " la %1,1(%1)\n" - " la %2,1(%2)\n" - " aghi %0,-1\n" - " jnz 1b\n" - " j 5f\n" - "2: mvc 0(256,%1),0(%2)\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "3: aghi %0,-256\n" - " jnm 2b\n" - "4: ex %0,1b-0b(%3)\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) - : : "cc", "memory"); - disable_sacf_uaccess(old_fs); - return size; -} - -unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) -{ - if (copy_with_mvcos()) - return copy_in_user_mvcos(to, from, n); - return copy_in_user_mvc(to, from, n); -} -EXPORT_SYMBOL(raw_copy_in_user); - -static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) -{ - register unsigned long reg0 asm("0") = 0x010000UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" - " jz 4f\n" - "1: algr %0,%2\n" - " slgr %1,%2\n" - " j 0b\n" - "2: la %3,4095(%1)\n"/* %4 = to + 4095 */ - " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */ - " slgr %3,%1\n" - " clgr %0,%3\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" - " slgr %0,%3\n" - " j 5f\n" - "4: slgr %0,%0\n" - "5:\n" - EX_TABLE(0b,2b) EX_TABLE(3b,5b) - : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) - : "a" (empty_zero_page), "d" (reg0) : "cc", "memory"); - return size; -} - -static inline unsigned long clear_user_xc(void __user *to, unsigned long size) -{ - mm_segment_t old_fs; - unsigned long tmp1, tmp2; - - old_fs = enable_sacf_uaccess(); - asm volatile( - " sacf 256\n" - " aghi %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - " xc 0(1,%1),0(%1)\n" - "0: aghi %0,257\n" - " la %2,255(%1)\n" /* %2 = ptr + 255 */ - " srl %2,12\n" - " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ - " slgr %2,%1\n" - " clgr %0,%2\n" /* clear crosses next page boundary? */ - " jnh 5f\n" - " aghi %2,-1\n" - "1: ex %2,0(%3)\n" - " aghi %2,1\n" - " slgr %0,%2\n" - " j 5f\n" - "2: xc 0(256,%1),0(%1)\n" - " la %1,256(%1)\n" - "3: aghi %0,-256\n" - " jnm 2b\n" - "4: ex %0,0(%3)\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - disable_sacf_uaccess(old_fs); - return size; -} - -unsigned long __clear_user(void __user *to, unsigned long size) -{ - if (copy_with_mvcos()) - return clear_user_mvcos(to, size); - return clear_user_xc(to, size); -} -EXPORT_SYMBOL(__clear_user); - -static inline unsigned long strnlen_user_srst(const char __user *src, - unsigned long size) -{ - register unsigned long reg0 asm("0") = 0; - unsigned long tmp1, tmp2; - - asm volatile( - " la %2,0(%1)\n" - " la %3,0(%0,%1)\n" - " slgr %0,%0\n" - " sacf 256\n" - "0: srst %3,%2\n" - " jo 0b\n" - " la %0,1(%3)\n" /* strnlen_user results includes \0 */ - " slgr %0,%1\n" - "1: sacf 768\n" - EX_TABLE(0b,1b) - : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -unsigned long __strnlen_user(const char __user *src, unsigned long size) -{ - mm_segment_t old_fs; - unsigned long len; - - if (unlikely(!size)) - return 0; - old_fs = enable_sacf_uaccess(); - len = strnlen_user_srst(src, size); - disable_sacf_uaccess(old_fs); - return len; -} -EXPORT_SYMBOL(__strnlen_user); - -long __strncpy_from_user(char *dst, const char __user *src, long size) -{ - size_t done, len, offset, len_str; - - if (unlikely(size <= 0)) - return 0; - done = 0; - do { - offset = (size_t)src & (L1_CACHE_BYTES - 1); - len = min(size - done, L1_CACHE_BYTES - offset); - if (copy_from_user(dst, src, len)) - return -EFAULT; - len_str = strnlen(dst, len); - done += len_str; - src += len_str; - dst += len_str; - } while ((len_str == len) && (done < size)); - return done; + might_fault(); + if (should_fail_usercopy()) + return n; + instrument_copy_to_user(to, from, n); + return raw_copy_to_user_key(to, from, n, key); } -EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(_copy_to_user_key); diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index 29d9470dbceb..ce7bcf7c0032 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -11,10 +11,10 @@ #include <linux/raid/xor.h> #include <asm/xor.h> -static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" @@ -24,21 +24,21 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) " la %1,256(%1)\n" " la %2,256(%2)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" + "1: exrl %0,2f\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:\n" : : "d" (bytes), "a" (p1), "a" (p2) - : "0", "1", "cc", "memory"); + : "0", "cc", "memory"); } -static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 4f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -48,23 +48,24 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, " la %2,256(%2)\n" " la %3,256(%3)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " j 4f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } -static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 5f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -76,28 +77,28 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, " la %3,256(%3)\n" " la %4,256(%4)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " j 5f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } -static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { - /* Get around a gcc oddity */ - register unsigned long *reg7 asm ("7") = p5; - asm volatile( " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 6f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -111,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, " la %4,256(%4)\n" " la %5,256(%5)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " ex %0,18(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " exrl %0,5f\n" + " j 6f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - " xc 0(1,%1),0(%5)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5: xc 0(1,%1),0(%5)\n" + "6:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), - "+a" (reg7) - : : "0", "1", "cc", "memory"); + "+a" (p5) + : : "0", "cc", "memory"); } struct xor_block_template xor_block_xc = { |