diff options
90 files changed, 2265 insertions, 625 deletions
diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index 8c50e5c96ee1..1a6b91368e59 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -134,7 +134,12 @@ root_hash_sig_key_desc <key_description> the pkcs7 signature of the roothash. The pkcs7 signature is used to validate the root hash during the creation of the device mapper block device. Verification of roothash depends on the config DM_VERITY_VERIFY_ROOTHASH_SIG - being set in the kernel. + being set in the kernel. The signatures are checked against the builtin + trusted keyring by default, or the secondary trusted keyring if + DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING is set. The secondary + trusted keyring includes by default the builtin trusted keyring, and it can + also gain new certificates at run time if they are signed by a certificate + already in the secondary trusted keyring. Theory of operation =================== diff --git a/Documentation/arm/memory.rst b/Documentation/arm/memory.rst index 0521b4ce5c96..0cb1e2938823 100644 --- a/Documentation/arm/memory.rst +++ b/Documentation/arm/memory.rst @@ -45,9 +45,14 @@ fffe8000 fffeffff DTCM mapping area for platforms with fffe0000 fffe7fff ITCM mapping area for platforms with ITCM mounted inside the CPU. -ffc00000 ffefffff Fixmap mapping region. Addresses provided +ffc80000 ffefffff Fixmap mapping region. Addresses provided by fix_to_virt() will be located here. +ffc00000 ffc7ffff Guard region + +ff800000 ffbfffff Permanent, fixed read-only mapping of the + firmware provided DT blob + fee00000 feffffff Mapping of PCI I/O space. This is a static mapping within the vmalloc space. @@ -72,6 +77,11 @@ MODULES_VADDR MODULES_END-1 Kernel module space Kernel modules inserted via insmod are placed here using dynamic mappings. +TASK_SIZE MODULES_VADDR-1 KASAn shadow memory when KASan is in use. + The range from MODULES_VADDR to the top + of the memory is shadowed here with 1 bit + per byte of memory. + 00001000 TASK_SIZE-1 User space mappings Per-thread mappings are placed here via the mmap() system call. diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index e03513f6b079..0fc3fb1860c4 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -21,7 +21,7 @@ out-of-bounds accesses for global variables is only supported since Clang 11. Tag-based KASAN is only supported in Clang. -Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and +Currently generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390 and riscv architectures, and tag-based KASAN modes are supported only for arm64. Usage diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index c3fe9b266e7b..b2288dc14b72 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -8,7 +8,7 @@ ----------------------- | alpha: | TODO | | arc: | TODO | - | arm: | TODO | + | arm: | ok | | arm64: | ok | | c6x: | TODO | | csky: | TODO | diff --git a/MAINTAINERS b/MAINTAINERS index e58d4fa68b14..ad0e34bf8453 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5297,6 +5297,12 @@ F: include/linux/dma-mapping.h F: include/linux/dma-map-ops.h F: kernel/dma/ +DMA MAPPING BENCHMARK +M: Barry Song <song.bao.hua@hisilicon.com> +L: iommu@lists.linux-foundation.org +F: kernel/dma/map_benchmark.c +F: tools/testing/selftests/dma/ + DMA-BUF HEAPS FRAMEWORK M: Sumit Semwal <sumit.semwal@linaro.org> R: Benjamin Gaignard <benjamin.gaignard@linaro.org> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b8ade91281bc..138248999df7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -68,6 +68,7 @@ config ARM select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_PFN_VALID select HAVE_ARCH_SECCOMP @@ -245,7 +246,7 @@ config ARM_PATCH_PHYS_VIRT kernel in system memory. This can only be used with non-XIP MMU kernels where the base - of physical memory is at a 16MB boundary. + of physical memory is at a 2 MiB boundary. Only disable this option if you know that you do not require this feature (eg, building a kernel for a single machine) and @@ -1298,6 +1299,15 @@ config PAGE_OFFSET default 0xB0000000 if VMSPLIT_3G_OPT default 0xC0000000 +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0x1f000000 if PAGE_OFFSET=0x40000000 + default 0x5f000000 if PAGE_OFFSET=0x80000000 + default 0x9f000000 if PAGE_OFFSET=0xC0000000 + default 0x8f000000 if PAGE_OFFSET=0xB0000000 + default 0xffffffff + config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4a066c687cec..4aaec9599e8a 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux := --no-undefined -X --pic-veneer +LDFLAGS_vmlinux := --no-undefined -X --pic-veneer -z norelro ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index e1567418a2b1..fb521efcc6c2 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -24,6 +24,7 @@ OBJS += hyp-stub.o endif GCOV_PROFILE := n +KASAN_SANITIZE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index caa27322a0ab..d9cce7238a36 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -11,6 +11,12 @@ #include "efi-header.S" +#ifdef __ARMEB__ +#define OF_DT_MAGIC 0xd00dfeed +#else +#define OF_DT_MAGIC 0xedfe0dd0 +#endif + AR_CLASS( .arch armv7-a ) M_CLASS( .arch armv7-m ) @@ -116,7 +122,7 @@ /* * Debug print of the final appended DTB location */ - .macro dbgadtb, begin, end + .macro dbgadtb, begin, size #ifdef DEBUG kputc #'D' kputc #'T' @@ -129,7 +135,7 @@ kputc #'(' kputc #'0' kputc #'x' - kphex \end, 8 /* End of appended DTB */ + kphex \size, 8 /* Size of appended DTB */ kputc #')' kputc #'\n' #endif @@ -165,6 +171,16 @@ orr \res, \res, \tmp1, lsl #24 .endm + .macro be32tocpu, val, tmp +#ifndef __ARMEB__ + /* convert to little endian */ + eor \tmp, \val, \val, ror #16 + bic \tmp, \tmp, #0x00ff0000 + mov \val, \val, ror #8 + eor \val, \val, \tmp, lsr #8 +#endif + .endm + .section ".start", "ax" /* * sort out different calling conventions @@ -325,11 +341,7 @@ restart: adr r0, LC1 */ ldr lr, [r6, #0] -#ifndef __ARMEB__ - ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian -#else - ldr r1, =0xd00dfeed -#endif + ldr r1, =OF_DT_MAGIC cmp lr, r1 bne dtb_check_done @ not found @@ -345,13 +357,7 @@ restart: adr r0, LC1 /* Get the initial DTB size */ ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif + be32tocpu r5, r1 dbgadtb r6, r5 /* 50% DTB growth should be good enough */ add r5, r5, r5, lsr #1 @@ -403,13 +409,7 @@ restart: adr r0, LC1 /* Get the current DTB size */ ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert r5 (dtb size) to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif + be32tocpu r5, r1 /* preserve 64-bit alignment */ add r5, r5, #7 @@ -468,15 +468,10 @@ dtb_check_done: /* * Compute the address of the hyp vectors after relocation. - * This requires some arithmetic since we cannot directly - * reference __hyp_stub_vectors in a PC-relative way. * Call __hyp_set_vectors with the new address so that we * can HVC again after the copy. */ -0: adr r0, 0b - movw r1, #:lower16:__hyp_stub_vectors - 0b - movt r1, #:upper16:__hyp_stub_vectors - 0b - add r0, r0, r1 + adr_l r0, __hyp_stub_vectors sub r0, r0, r5 add r0, r0, r10 bl __hyp_set_vectors @@ -627,17 +622,11 @@ not_relocated: mov r0, #0 cmp r0, #HYP_MODE @ if not booted in HYP mode... bne __enter_kernel @ boot kernel directly - adr r12, .L__hyp_reentry_vectors_offset - ldr r0, [r12] - add r0, r0, r12 - + adr_l r0, __hyp_reentry_vectors bl __hyp_set_vectors __HVC(0) @ otherwise bounce to hyp mode b . @ should never be reached - - .align 2 -.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . #else b __enter_kernel #endif @@ -1440,8 +1429,7 @@ ENTRY(efi_enter_kernel) mov r4, r0 @ preserve image base mov r8, r1 @ preserve DT pointer - ARM( adrl r0, call_cache_fn ) - THUMB( adr r0, call_cache_fn ) + adr_l r0, call_cache_fn adr r1, 0f @ clean the region of code we bl cache_clean_flush @ may run with the MMU off diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c index ade5079bebbf..8c0fa276d994 100644 --- a/arch/arm/boot/compressed/string.c +++ b/arch/arm/boot/compressed/string.c @@ -7,6 +7,25 @@ #include <linux/string.h> +/* + * The decompressor is built without KASan but uses the same redirects as the + * rest of the kernel when CONFIG_KASAN is enabled, defining e.g. memcpy() + * to __memcpy() but since we are not linking with the main kernel string + * library in the decompressor, that will lead to link failures. + * + * Undefine KASan's versions, define the wrapped functions and alias them to + * the right names so that when e.g. __memcpy() appear in the code, it will + * still be linked to this local version of memcpy(). + */ +#ifdef CONFIG_KASAN +#undef memcpy +#undef memmove +#undef memset +void *__memcpy(void *__dest, __const void *__src, size_t __n) __alias(memcpy); +void *__memmove(void *__dest, __const void *__src, size_t count) __alias(memmove); +void *__memset(void *s, int c, size_t count) __alias(memset); +#endif + void *memcpy(void *__dest, __const void *__src, size_t __n) { int i = 0; diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index feac2c8b86f2..6ed30421f697 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -259,7 +259,7 @@ */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ nop ;\ @@ -270,7 +270,7 @@ .popsection #define ALT_UP_B(label) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ W(b) . + (label - 9998b) ;\ .popsection #else @@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #define _ASM_NOKPROBE(entry) #endif + .macro __adldst_l, op, reg, sym, tmp, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \tmp, .La\@ + .subsection 1 + .align 2 +.La\@: .long \sym - .Lpc\@ + .previous + .else + .ifnb \c + THUMB( ittt \c ) + .endif + movw\c \tmp, #:lower16:\sym - .Lpc\@ + movt\c \tmp, #:upper16:\sym - .Lpc\@ + .endif + +#ifndef CONFIG_THUMB2_KERNEL + .set .Lpc\@, . + 8 // PC bias + .ifc \op, add + add\c \reg, \tmp, pc + .else + \op\c \reg, [pc, \tmp] + .endif +#else +.Lb\@: add\c \tmp, \tmp, pc + /* + * In Thumb-2 builds, the PC bias depends on whether we are currently + * emitting into a .arm or a .thumb section. The size of the add opcode + * above will be 2 bytes when emitting in Thumb mode and 4 bytes when + * emitting in ARM mode, so let's use this to account for the bias. + */ + .set .Lpc\@, . + (. - .Lb\@) + + .ifnc \op, add + \op\c \reg, [\tmp] + .endif +#endif + .endm + + /* + * mov_l - move a constant value or [relocated] address into a register + */ + .macro mov_l, dst:req, imm:req + .if __LINUX_ARM_ARCH__ < 7 + ldr \dst, =\imm + .else + movw \dst, #:lower16:\imm + movt \dst, #:upper16:\imm + .endif + .endm + + /* + * adr_l - adr pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro adr_l, dst:req, sym:req, cond + __adldst_l add, \dst, \sym, \dst, \cond + .endm + + /* + * ldr_l - ldr <literal> pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro ldr_l, dst:req, sym:req, cond + __adldst_l ldr, \dst, \sym, \dst, \cond + .endm + + /* + * str_l - str <literal> pseudo-op with unlimited range + * + * @src: source register + * @sym: name of the symbol + * @tmp: mandatory scratch register + * @cond: conditional opcode suffix + */ + .macro str_l, src:req, sym:req, tmp:req, cond + __adldst_l str, \src, \sym, \tmp, \cond + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 898e9c78a7e7..595e538f5bfb 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -21,29 +21,20 @@ * assembly implementation with completely non standard calling convention * for arguments and results (beware). */ - -#ifdef __ARMEB__ -#define __xh "r0" -#define __xl "r1" -#else -#define __xl "r0" -#define __xh "r1" -#endif - static inline uint32_t __div64_32(uint64_t *n, uint32_t base) { register unsigned int __base asm("r4") = base; register unsigned long long __n asm("r0") = *n; register unsigned long long __res asm("r2"); - register unsigned int __rem asm(__xh); - asm( __asmeq("%0", __xh) + unsigned int __rem; + asm( __asmeq("%0", "r0") __asmeq("%1", "r2") - __asmeq("%2", "r0") - __asmeq("%3", "r4") + __asmeq("%2", "r4") "bl __do_div64" - : "=r" (__rem), "=r" (__res) - : "r" (__n), "r" (__base) + : "+r" (__n), "=r" (__res) + : "r" (__base) : "ip", "lr", "cc"); + __rem = __n >> 32; *n = __res; return __rem; } diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 61941f369861..b8102a6ddf16 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_NONE 0 #define R_ARM_PC24 1 #define R_ARM_ABS32 2 +#define R_ARM_REL32 3 #define R_ARM_CALL 28 #define R_ARM_JUMP24 29 #define R_ARM_TARGET1 38 @@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_PREL31 42 #define R_ARM_MOVW_ABS_NC 43 #define R_ARM_MOVT_ABS 44 +#define R_ARM_MOVW_PREL_NC 45 +#define R_ARM_MOVT_PREL 46 #define R_ARM_THM_CALL 10 #define R_ARM_THM_JUMP24 30 #define R_ARM_THM_MOVW_ABS_NC 47 #define R_ARM_THM_MOVT_ABS 48 +#define R_ARM_THM_MOVW_PREL_NC 49 +#define R_ARM_THM_MOVT_PREL 50 /* * These are used to set parameters in the core dumps. diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index c279a8a463a2..707068f852c2 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -2,7 +2,7 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -#define FIXADDR_START 0xffc00000UL +#define FIXADDR_START 0xffc80000UL #define FIXADDR_END 0xfff00000UL #define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) diff --git a/arch/arm/include/asm/kasan.h b/arch/arm/include/asm/kasan.h new file mode 100644 index 000000000000..303c35df3135 --- /dev/null +++ b/arch/arm/include/asm/kasan.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/arm/include/asm/kasan.h + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> + * + */ + +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifdef CONFIG_KASAN + +#include <asm/kasan_def.h> + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +/* + * The compiler uses a shadow offset assuming that addresses start + * from 0. Kernel addresses don't start from 0, so shadow + * for kernel really starts from 'compiler's shadow offset' + + * ('kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT) + */ + +asmlinkage void kasan_early_init(void); +extern void kasan_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif diff --git a/arch/arm/include/asm/kasan_def.h b/arch/arm/include/asm/kasan_def.h new file mode 100644 index 000000000000..5739605aa7cf --- /dev/null +++ b/arch/arm/include/asm/kasan_def.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/arm/include/asm/kasan_def.h + * + * Copyright (c) 2018 Huawei Technologies Co., Ltd. + * + * Author: Abbott Liu <liuwenliang@huawei.com> + */ + +#ifndef __ASM_KASAN_DEF_H +#define __ASM_KASAN_DEF_H + +#ifdef CONFIG_KASAN + +/* + * Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for + * the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB + * addressable by a 32bit architecture) out of the virtual address + * space to use as shadow memory for KASan as follows: + * + * +----+ 0xffffffff + * | | \ + * | | |-> Static kernel image (vmlinux) BSS and page table + * | |/ + * +----+ PAGE_OFFSET + * | | \ + * | | |-> Loadable kernel modules virtual address space area + * | |/ + * +----+ MODULES_VADDR = KASAN_SHADOW_END + * | | \ + * | | |-> The shadow area of kernel virtual address. + * | |/ + * +----+-> TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the + * | |\ shadow address of MODULES_VADDR + * | | | + * | | | + * | | |-> The user space area in lowmem. The kernel address + * | | | sanitizer do not use this space, nor does it map it. + * | | | + * | | | + * | | | + * | | | + * | |/ + * ------ 0 + * + * 1) KASAN_SHADOW_START + * This value begins with the MODULE_VADDR's shadow address. It is the + * start of kernel virtual space. Since we have modules to load, we need + * to cover also that area with shadow memory so we can find memory + * bugs in modules. + * + * 2) KASAN_SHADOW_END + * This value is the 0x100000000's shadow address: the mapping that would + * be after the end of the kernel memory at 0xffffffff. It is the end of + * kernel address sanitizer shadow area. It is also the start of the + * module area. + * + * 3) KASAN_SHADOW_OFFSET: + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * As you would expect, >> 3 is equal to dividing by 8, meaning each + * byte in the shadow memory covers 8 bytes of kernel memory, so one + * bit shadow memory per byte of kernel memory is used. + * + * The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending + * on the VMSPLIT layout of the system: the kernel and userspace can + * split up lowmem in different ways according to needs, so we calculate + * the shadow offset depending on this. + */ + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_END ((UL(1) << (32 - KASAN_SHADOW_SCALE_SHIFT)) \ + + KASAN_SHADOW_OFFSET) +#define KASAN_SHADOW_START ((KASAN_SHADOW_END >> 3) + KASAN_SHADOW_OFFSET) + +#endif +#endif diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 99035b5891ef..2f841cb65c30 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,6 +18,7 @@ #ifdef CONFIG_NEED_MACH_MEMORY_H #include <mach/memory.h> #endif +#include <asm/kasan_def.h> /* PAGE_OFFSET - the virtual address of the start of the kernel image */ #define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) @@ -28,7 +29,11 @@ * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area */ +#ifndef CONFIG_KASAN #define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M)) +#else +#define TASK_SIZE (KASAN_SHADOW_START) +#endif #define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) /* @@ -67,6 +72,10 @@ */ #define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) +#define FDT_FIXED_BASE UL(0xff800000) +#define FDT_FIXED_SIZE (2 * SECTION_SIZE) +#define FDT_VIRT_BASE(physbase) ((void *)(FDT_FIXED_BASE | (physbase) % SECTION_SIZE)) + #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* * Allow 16MB-aligned ioremap pages @@ -107,6 +116,7 @@ extern unsigned long vectors_base; #define MODULES_VADDR PAGE_OFFSET #define XIP_VIRT_ADDR(physaddr) (physaddr) +#define FDT_VIRT_BASE(physbase) ((void *)(physbase)) #endif /* !CONFIG_MMU */ @@ -173,6 +183,7 @@ extern unsigned long vectors_base; * so that all we need to do is modify the 8-bit constant field. */ #define __PV_BITS_31_24 0x81000000 +#define __PV_BITS_23_16 0x810000 #define __PV_BITS_7_0 0x81 extern unsigned long __pv_phys_pfn_offset; @@ -183,43 +194,65 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) -#define __pv_stub(from,to,instr,type) \ +#ifndef CONFIG_THUMB2_KERNEL +#define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ + "2: " instr " %0, %0, %3\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - ., 2b - .\n" \ " .popsection\n" \ : "=r" (to) \ - : "r" (from), "I" (type)) + : "r" (from), "I" (__PV_BITS_31_24), \ + "I"(__PV_BITS_23_16)) -#define __pv_stub_mov_hi(t) \ - __asm__ volatile("@ __pv_stub_mov\n" \ - "1: mov %R0, %1\n" \ +#define __pv_add_carry_stub(x, y) \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " adds %Q0, %1, %R0, lsl #20\n" \ + "1: mov %R0, %2\n" \ + " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "=r" (t) \ - : "I" (__PV_BITS_7_0)) + : "=&r" (y) \ + : "r" (x), "I" (__PV_BITS_7_0) \ + : "cc") + +#else +#define __pv_stub(from,to,instr) \ + __asm__("@ __pv_stub\n" \ + "0: movw %0, #0\n" \ + " lsl %0, #21\n" \ + " " instr " %0, %1, %0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 0b - .\n" \ + " .popsection\n" \ + : "=&r" (to) \ + : "r" (from)) #define __pv_add_carry_stub(x, y) \ - __asm__ volatile("@ __pv_add_carry_stub\n" \ - "1: adds %Q0, %1, %2\n" \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " lsls %R0, #21\n" \ + " adds %Q0, %1, %R0\n" \ + "1: mvn %R0, #0\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "+r" (y) \ - : "r" (x), "I" (__PV_BITS_31_24) \ + : "=&r" (y) \ + : "r" (x) \ : "cc") +#endif static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { phys_addr_t t; if (sizeof(phys_addr_t) == 4) { - __pv_stub(x, t, "add", __PV_BITS_31_24); + __pv_stub(x, t, "add"); } else { - __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); } return t; @@ -235,7 +268,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) * assembler expression receives 32 bit argument * in place where 'r' 32 bit operand is expected. */ - __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); + __pv_stub((unsigned long) x, t, "sub"); return t; } diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 15f4674715f8..fdee1f04f4f3 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -21,6 +21,7 @@ #define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL)) #ifdef CONFIG_ARM_LPAE +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { @@ -28,14 +29,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) } #else /* !CONFIG_ARM_LPAE */ +#define PGD_SIZE (PAGE_SIZE << 2) /* * Since we have only two-level page tables, these are trivial */ #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(mm, pmd) do { } while (0) +#ifdef CONFIG_KASAN +/* The KASan core unconditionally calls pud_populate() on all architectures */ +#define pud_populate(mm,pmd,pte) do { } while (0) +#else #define pud_populate(mm,pmd,pte) BUG() - +#endif #endif /* CONFIG_ARM_LPAE */ extern pgd_t *pgd_alloc(struct mm_struct *mm); diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index baf7d0204eb5..70fe69bdcce2 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -179,11 +179,28 @@ * the pud: the pud entry is never bad, always exists, and can't be set or * cleared. */ -#define pud_none(pud) (0) -#define pud_bad(pud) (0) -#define pud_present(pud) (1) -#define pud_clear(pudp) do { } while (0) -#define set_pud(pud,pudp) do { } while (0) +static inline int pud_none(pud_t pud) +{ + return 0; +} + +static inline int pud_bad(pud_t pud) +{ + return 0; +} + +static inline int pud_present(pud_t pud) +{ + return 1; +} + +static inline void pud_clear(pud_t *pudp) +{ +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ +} static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) { diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index b9241051e5cb..9e6b97286307 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -96,7 +96,7 @@ unsigned long get_wchan(struct task_struct *p); #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ " .pushsection \".alt.smp.init\", \"a\"\n" \ - " .long 9998b\n" \ + " .long 9998b - .\n" \ " " up "\n" \ " .popsection\n" #else diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h index 1e36c40533c1..402e3f34c7ed 100644 --- a/arch/arm/include/asm/prom.h +++ b/arch/arm/include/asm/prom.h @@ -9,12 +9,12 @@ #ifdef CONFIG_OF -extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys); +extern const struct machine_desc *setup_machine_fdt(void *dt_virt); extern void __init arm_dt_init_cpu_maps(void); #else /* CONFIG_OF */ -static inline const struct machine_desc *setup_machine_fdt(unsigned int dt_phys) +static inline const struct machine_desc *setup_machine_fdt(void *dt_virt) { return NULL; } diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h index 111a1d8a41dd..6c607c68f3ad 100644 --- a/arch/arm/include/asm/string.h +++ b/arch/arm/include/asm/string.h @@ -5,6 +5,9 @@ /* * We don't do inline string functions, since the * optimised inline asm versions are not small. + * + * The __underscore versions of some functions are for KASan to be able + * to replace them with instrumented versions. */ #define __HAVE_ARCH_STRRCHR @@ -15,15 +18,18 @@ extern char * strchr(const char * s, int c); #define __HAVE_ARCH_MEMCPY extern void * memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *dest, const void *src, __kernel_size_t n); #define __HAVE_ARCH_MEMMOVE extern void * memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *dest, const void *src, __kernel_size_t n); #define __HAVE_ARCH_MEMCHR extern void * memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void * memset(void *, int, __kernel_size_t); +extern void *__memset(void *s, int c, __kernel_size_t n); #define __HAVE_ARCH_MEMSET32 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); @@ -39,4 +45,24 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) return __memset64(p, v, n * 8, v >> 32); } +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * must use non-instrumented versions of the mem* + * functions named __memcpy() etc. All such kernel code has + * been tagged with KASAN_SANITIZE_file.o = n, which means + * that the address sanitization argument isn't passed to the + * compiler, and __SANITIZE_ADDRESS__ is not set. As a result + * these defines kick in. + */ +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif + #endif diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index eb7ce2747eb0..70d4cbc49ae1 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -13,7 +13,15 @@ #include <asm/fpstate.h> #include <asm/page.h> +#ifdef CONFIG_KASAN +/* + * KASan uses a lot of extra stack space so the thread size order needs to + * be increased. + */ +#define THREAD_SIZE_ORDER 2 +#else #define THREAD_SIZE_ORDER 1 +#endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_START_SP (THREAD_SIZE - 8) diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h index 907571fd05c6..e6eb7a2aaf1e 100644 --- a/arch/arm/include/asm/uaccess-asm.h +++ b/arch/arm/include/asm/uaccess-asm.h @@ -85,7 +85,7 @@ */ .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable ldr \tmp1, [\tsk, #TI_ADDR_LIMIT] - mov \tmp2, #TASK_SIZE + ldr \tmp2, =TASK_SIZE str \tmp2, [\tsk, #TI_ADDR_LIMIT] DACR( mrc p15, 0, \tmp0, c3, c0, 0) DACR( str \tmp0, [sp, #SVC_DACR]) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 09e67cb02b20..ae295a3bcfef 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -21,6 +21,9 @@ obj-y := elf.o entry-common.o irq.o opcodes.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o +KASAN_SANITIZE_stacktrace.o := n +KASAN_SANITIZE_traps.o := n + ifneq ($(CONFIG_ARM_UNWIND),y) obj-$(CONFIG_FRAME_POINTER) += return_address.o endif @@ -88,6 +91,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o # This is executed very early using a temporary stack when no memory allocator # nor global data is available. Everything has to be allocated on the stack. diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index 067e12edc341..f2819c25b602 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -2,11 +2,11 @@ void convert_to_tag_list(struct tag *tags); #ifdef CONFIG_ATAGS -const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, +const struct machine_desc *setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr); #else static inline const struct machine_desc * __init __noreturn -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr) { early_print("no ATAGS support: can't continue\n"); while (true); diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 6c12d9fe694e..373b61f9a4f0 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -174,7 +174,7 @@ static void __init squash_mem_tags(struct tag *tag) } const struct machine_desc * __init -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *atags_vaddr, unsigned int machine_nr) { struct tag *tags = (struct tag *)&default_tags; const struct machine_desc *mdesc = NULL, *p; @@ -195,8 +195,8 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) if (!mdesc) return NULL; - if (__atags_pointer) - tags = phys_to_virt(__atags_pointer); + if (atags_vaddr) + tags = atags_vaddr; else if (mdesc->atag_offset) tags = (void *)(PAGE_OFFSET + mdesc->atag_offset); diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 7f0745a97e20..28311dd0fee6 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -203,12 +203,12 @@ static const void * __init arch_get_next_mach(const char *const **match) /** * setup_machine_fdt - Machine setup when an dtb was passed to the kernel - * @dt_phys: physical address of dt blob + * @dt_virt: virtual address of dt blob * * If a dtb was passed to the kernel in r2, then use it to choose the * correct machine_desc and to setup the system. */ -const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) +const struct machine_desc * __init setup_machine_fdt(void *dt_virt) { const struct machine_desc *mdesc, *mdesc_best = NULL; @@ -221,7 +221,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) mdesc_best = &__mach_desc_GENERIC_DT; #endif - if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) + if (!dt_virt || !early_init_dt_verify(dt_virt)) return NULL; mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 55a47df04773..0ea8529a4872 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -252,31 +252,10 @@ __und_svc: #else svc_entry #endif - @ - @ call emulation code, which returns using r9 if it has emulated - @ the instruction, or the more conventional lr if we are to treat - @ this as a real undefined instruction - @ - @ r0 - instruction - @ -#ifndef CONFIG_THUMB2_KERNEL - ldr r0, [r4, #-4] -#else - mov r1, #2 - ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 - cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 - blo __und_svc_fault - ldrh r9, [r4] @ bottom 16 bits - add r4, r4, #2 - str r4, [sp, #S_PC] - orr r0, r9, r0, lsl #16 -#endif - badr r9, __und_svc_finish - mov r2, r4 - bl call_fpe mov r1, #4 @ PC correction to apply -__und_svc_fault: + THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode? + THUMB( movne r1, #2 ) @ if so, fix up PC correction mov r0, sp @ struct pt_regs *regs bl __und_fault @@ -427,7 +406,8 @@ ENDPROC(__fiq_abt) @ if it was interrupted in a critical region. Here we @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. - cmp r4, #TASK_SIZE + ldr r0, =TASK_SIZE + cmp r4, r0 blhs kuser_cmpxchg64_fixup #endif #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 77d16390a524..e0d7833a1827 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -50,7 +50,8 @@ __ret_fast_syscall: UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing movs r1, r1, lsl #16 @@ -87,7 +88,8 @@ __ret_fast_syscall: #endif disable_irq_notrace @ disable interrupts ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing movs r1, r1, lsl #16 @@ -128,7 +130,8 @@ ret_slow_syscall: disable_irq_notrace @ disable interrupts ENTRY(ret_to_user_from_irq) ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] movs r1, r1, lsl #16 diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 4a3982812a40..29b2eda136bb 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -95,7 +95,7 @@ __mmap_switched: THUMB( ldmia r4!, {r0, r1, r2, r3} ) THUMB( mov sp, r3 ) sub r2, r2, r1 - bl memcpy @ copy .data to RAM + bl __memcpy @ copy .data to RAM #endif ARM( ldmia r4!, {r0, r1, sp} ) @@ -103,7 +103,7 @@ __mmap_switched: THUMB( mov sp, r3 ) sub r2, r1, r0 mov r1, #0 - bl memset @ clear .bss + bl __memset @ clear .bss ldmia r4, {r0, r1, r2, r3} str r9, [r0] @ Save processor ID @@ -111,6 +111,9 @@ __mmap_switched: str r8, [r2] @ Save atags pointer cmp r3, #0 strne r10, [r3] @ Save control register values +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif mov lr, #0 b start_kernel ENDPROC(__mmap_switched) @@ -170,11 +173,12 @@ ENDPROC(lookup_processor_type) * r9 = cpuid (preserved) */ __lookup_processor_type: - adr r3, __lookup_processor_type_data - ldmia r3, {r4 - r6} - sub r3, r3, r4 @ get offset between virt&phys - add r5, r5, r3 @ convert virt addresses to - add r6, r6, r3 @ physical address space + /* + * Look in <asm/procinfo.h> for information about the __proc_info + * structure. + */ + adr_l r5, __proc_info_begin + adr_l r6, __proc_info_end 1: ldmia r5, {r3, r4} @ value, mask and r4, r4, r9 @ mask wanted bits teq r3, r4 @@ -186,17 +190,6 @@ __lookup_processor_type: 2: ret lr ENDPROC(__lookup_processor_type) -/* - * Look in <asm/procinfo.h> for information about the __proc_info structure. - */ - .align 2 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .long . - .long __proc_info_begin - .long __proc_info_end - .size __lookup_processor_type_data, . - __lookup_processor_type_data - __error_lpae: #ifdef CONFIG_DEBUG_LL adr r0, str_lpae diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f8904227e7fd..7f62c5eccdf3 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -103,10 +103,8 @@ ENTRY(stext) #endif #ifndef CONFIG_XIP_KERNEL - adr r3, 2f - ldmia r3, {r4, r8} - sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) - add r8, r8, r4 @ PHYS_OFFSET + adr_l r8, _text @ __pa(_text) + sub r8, r8, #TEXT_OFFSET @ PHYS_OFFSET #else ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case #endif @@ -158,10 +156,6 @@ ENTRY(stext) 1: b __enable_mmu ENDPROC(stext) .ltorg -#ifndef CONFIG_XIP_KERNEL -2: .long . - .long PAGE_OFFSET -#endif /* * Setup the initial page tables. We only setup the barest @@ -224,11 +218,8 @@ __create_page_tables: * Create identity mapping to cater for __enable_mmu. * This identity mapping will be removed by paging_init(). */ - adr r0, __turn_mmu_on_loc - ldmia r0, {r3, r5, r6} - sub r0, r0, r3 @ virt->phys offset - add r5, r5, r0 @ phys __turn_mmu_on - add r6, r6, r0 @ phys __turn_mmu_on_end + adr_l r5, __turn_mmu_on @ _pa(__turn_mmu_on) + adr_l r6, __turn_mmu_on_end @ _pa(__turn_mmu_on_end) mov r5, r5, lsr #SECTION_SHIFT mov r6, r6, lsr #SECTION_SHIFT @@ -274,11 +265,10 @@ __create_page_tables: * We map 2 sections in case the ATAGs/DTB crosses a section boundary. */ mov r0, r2, lsr #SECTION_SHIFT - movs r0, r0, lsl #SECTION_SHIFT - subne r3, r0, r8 - addne r3, r3, #PAGE_OFFSET - addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) - orrne r6, r7, r0 + cmp r2, #0 + ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER) + addne r3, r3, r4 + orrne r6, r7, r0, lsl #SECTION_SHIFT strne r6, [r3], #1 << PMD_ORDER addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] @@ -351,11 +341,6 @@ __create_page_tables: ret lr ENDPROC(__create_page_tables) .ltorg - .align -__turn_mmu_on_loc: - .long . - .long __turn_mmu_on - .long __turn_mmu_on_end #if defined(CONFIG_SMP) .text @@ -391,10 +376,8 @@ ENTRY(secondary_startup) /* * Use the page tables supplied from __cpu_up. */ - adr r4, __secondary_data - ldmia r4, {r5, r7, r12} @ address to jump to after - sub lr, r4, r5 @ mmu has been enabled - add r3, r7, lr + adr_l r3, secondary_data + mov_l r12, __secondary_switched ldrd r4, r5, [r3, #0] @ get secondary_data.pgdir ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps @@ -409,22 +392,13 @@ ARM_BE8(eor r4, r4, r5) @ without using a temp reg. ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) - /* - * r6 = &secondary_data - */ ENTRY(__secondary_switched) - ldr sp, [r7, #12] @ get secondary_data.stack + ldr_l r7, secondary_data + 12 @ get secondary_data.stack + mov sp, r7 mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) - .align - - .type __secondary_data, %object -__secondary_data: - .long . - .long secondary_data - .long __secondary_switched #endif /* defined(CONFIG_SMP) */ @@ -539,19 +513,11 @@ ARM_BE8(rev r0, r0) @ byteswap if big endian retne lr __fixup_smp_on_up: - adr r0, 1f - ldmia r0, {r3 - r5} - sub r3, r0, r3 - add r4, r4, r3 - add r5, r5, r3 + adr_l r4, __smpalt_begin + adr_l r5, __smpalt_end b __do_fixup_smp_on_up ENDPROC(__fixup_smp) - .align -1: .word . - .word __smpalt_begin - .word __smpalt_end - .pushsection .data .align 2 .globl smp_on_up @@ -565,14 +531,15 @@ smp_on_up: __do_fixup_smp_on_up: cmp r4, r5 reths lr - ldmia r4!, {r0, r6} - ARM( str r6, [r0, r3] ) - THUMB( add r0, r0, r3 ) + ldmia r4, {r0, r6} + ARM( str r6, [r0, r4] ) + THUMB( add r0, r0, r4 ) + add r4, r4, #8 #ifdef __ARMEB__ THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. #endif THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords - THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r0. THUMB( strh r6, [r0] ) b __do_fixup_smp_on_up ENDPROC(__do_fixup_smp_on_up) @@ -581,151 +548,8 @@ ENTRY(fixup_smp) stmfd sp!, {r4 - r6, lr} mov r4, r0 add r5, r0, r1 - mov r3, #0 bl __do_fixup_smp_on_up ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -#ifdef __ARMEB__ -#define LOW_OFFSET 0x4 -#define HIGH_OFFSET 0x0 -#else -#define LOW_OFFSET 0x0 -#define HIGH_OFFSET 0x4 -#endif - -#ifdef CONFIG_ARM_PATCH_PHYS_VIRT - -/* __fixup_pv_table - patch the stub instructions with the delta between - * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and - * can be expressed by an immediate shifter operand. The stub instruction - * has a form of '(add|sub) rd, rn, #imm'. - */ - __HEAD -__fixup_pv_table: - adr r0, 1f - ldmia r0, {r3-r7} - mvn ip, #0 - subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET - add r4, r4, r3 @ adjust table start address - add r5, r5, r3 @ adjust table end address - add r6, r6, r3 @ adjust __pv_phys_pfn_offset address - add r7, r7, r3 @ adjust __pv_offset address - mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN - str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset - strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits - mov r6, r3, lsr #24 @ constant for add/sub instructions - teq r3, r6, lsl #24 @ must be 16MiB aligned -THUMB( it ne @ cross section branch ) - bne __error - str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits - b __fixup_a_pv_table -ENDPROC(__fixup_pv_table) - - .align -1: .long . - .long __pv_table_begin - .long __pv_table_end -2: .long __pv_phys_pfn_offset - .long __pv_offset - - .text -__fixup_a_pv_table: - adr r0, 3f - ldr r6, [r0] - add r6, r6, r3 - ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word - ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word - mov r6, r6, lsr #24 - cmn r0, #1 -#ifdef CONFIG_THUMB2_KERNEL - moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq 2f - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 - b 2f -1: add r7, r3 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne 2f - ldrh ip, [r7] -ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 -ARM_BE8(rev16 ip, ip) - strh ip, [r7] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - bx lr -#else -#ifdef CONFIG_CPU_ENDIAN_BE8 - moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction -#else - moveq r0, #0x400000 @ set bit 22, mov to mvn instruction -#endif - b 2f -1: ldr ip, [r7, r3] -#ifdef CONFIG_CPU_ENDIAN_BE8 - @ in BE8, we load data in BE, but instructions still in LE - bic ip, ip, #0xff000000 - tst ip, #0x000f0000 @ check the rotation field - orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 - biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 -#else - bic ip, ip, #0x000000ff - tst ip, #0xf00 @ check the rotation field - orrne ip, ip, r6 @ mask in offset bits 31-24 - biceq ip, ip, #0x400000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 -#endif - str ip, [r7, r3] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - ret lr -#endif -ENDPROC(__fixup_a_pv_table) - - .align -3: .long __pv_offset - -ENTRY(fixup_pv_table) - stmfd sp!, {r4 - r7, lr} - mov r3, #0 @ no offset - mov r4, r0 @ r0 = table start - add r5, r0, r1 @ r1 = table size - bl __fixup_a_pv_table - ldmfd sp!, {r4 - r7, pc} -ENDPROC(fixup_pv_table) - - .data - .align 2 - .globl __pv_phys_pfn_offset - .type __pv_phys_pfn_offset, %object -__pv_phys_pfn_offset: - .word 0 - .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset - - .globl __pv_offset - .type __pv_offset, %object -__pv_offset: - .quad 0 - .size __pv_offset, . -__pv_offset -#endif - #include "head-common.S" diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 26d8e03b1dd3..b699b22a4db1 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -24,41 +24,38 @@ ENTRY(__boot_cpu_mode) .text /* - * Save the primary CPU boot mode. Requires 3 scratch registers. + * Save the primary CPU boot mode. Requires 2 scratch registers. */ - .macro store_primary_cpu_mode reg1, reg2, reg3 + .macro store_primary_cpu_mode reg1, reg2 mrs \reg1, cpsr and \reg1, \reg1, #MODE_MASK - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - str \reg1, [\reg2, \reg3] + str_l \reg1, __boot_cpu_mode, \reg2 .endm /* * Compare the current mode with the one saved on the primary CPU. * If they don't match, record that fact. The Z bit indicates * if there's a match or not. - * Requires 3 additionnal scratch registers. + * Requires 2 additional scratch registers. */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - ldr \reg1, [\reg2, \reg3] + .macro compare_cpu_mode_with_primary mode, reg1, reg2 + adr_l \reg2, __boot_cpu_mode + ldr \reg1, [\reg2] cmp \mode, \reg1 @ matches primary CPU boot mode? orrne \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH - strne \reg1, [\reg2, \reg3] @ record what happened and give up + strne \reg1, [\reg2] @ record what happened and give up .endm #else /* ZIMAGE */ - .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req + .macro store_primary_cpu_mode reg1:req, reg2:req .endm /* * The zImage loader only runs on one CPU, so we don't bother with mult-CPU * consistency checking: */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + .macro compare_cpu_mode_with_primary mode, reg1, reg2 cmp \mode, \mode .endm @@ -73,7 +70,7 @@ ENTRY(__boot_cpu_mode) */ @ Call this from the primary CPU ENTRY(__hyp_stub_install) - store_primary_cpu_mode r4, r5, r6 + store_primary_cpu_mode r4, r5 ENDPROC(__hyp_stub_install) @ fall through... @@ -87,7 +84,7 @@ ENTRY(__hyp_stub_install_secondary) * If the secondary has booted with a different mode, give up * immediately. */ - compare_cpu_mode_with_primary r4, r5, r6, r7 + compare_cpu_mode_with_primary r4, r5, r6 retne lr /* @@ -228,12 +225,6 @@ ENTRY(__hyp_soft_restart) ret lr ENDPROC(__hyp_soft_restart) -#ifndef ZIMAGE -.align 2 -.L__boot_cpu_mode_offset: - .long __boot_cpu_mode - . -#endif - .align 5 ENTRY(__hyp_stub_vectors) __hyp_stub_reset: W(b) . diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 0dcae787b004..d2b4ac06e4ed 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -16,6 +16,7 @@ #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/assembler.h> +#include "iwmmxt.h" #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) #define PJ4(code...) code @@ -113,33 +114,33 @@ concan_save: concan_dump: - wstrw wCSSF, [r1, #MMX_WCSSF] - wstrw wCASF, [r1, #MMX_WCASF] - wstrw wCGR0, [r1, #MMX_WCGR0] - wstrw wCGR1, [r1, #MMX_WCGR1] - wstrw wCGR2, [r1, #MMX_WCGR2] - wstrw wCGR3, [r1, #MMX_WCGR3] + wstrw wCSSF, r1, MMX_WCSSF + wstrw wCASF, r1, MMX_WCASF + wstrw wCGR0, r1, MMX_WCGR0 + wstrw wCGR1, r1, MMX_WCGR1 + wstrw wCGR2, r1, MMX_WCGR2 + wstrw wCGR3, r1, MMX_WCGR3 1: @ MUP? wRn tst r2, #0x2 beq 2f - wstrd wR0, [r1, #MMX_WR0] - wstrd wR1, [r1, #MMX_WR1] - wstrd wR2, [r1, #MMX_WR2] - wstrd wR3, [r1, #MMX_WR3] - wstrd wR4, [r1, #MMX_WR4] - wstrd wR5, [r1, #MMX_WR5] - wstrd wR6, [r1, #MMX_WR6] - wstrd wR7, [r1, #MMX_WR7] - wstrd wR8, [r1, #MMX_WR8] - wstrd wR9, [r1, #MMX_WR9] - wstrd wR10, [r1, #MMX_WR10] - wstrd wR11, [r1, #MMX_WR11] - wstrd wR12, [r1, #MMX_WR12] - wstrd wR13, [r1, #MMX_WR13] - wstrd wR14, [r1, #MMX_WR14] - wstrd wR15, [r1, #MMX_WR15] + wstrd wR0, r1, MMX_WR0 + wstrd wR1, r1, MMX_WR1 + wstrd wR2, r1, MMX_WR2 + wstrd wR3, r1, MMX_WR3 + wstrd wR4, r1, MMX_WR4 + wstrd wR5, r1, MMX_WR5 + wstrd wR6, r1, MMX_WR6 + wstrd wR7, r1, MMX_WR7 + wstrd wR8, r1, MMX_WR8 + wstrd wR9, r1, MMX_WR9 + wstrd wR10, r1, MMX_WR10 + wstrd wR11, r1, MMX_WR11 + wstrd wR12, r1, MMX_WR12 + wstrd wR13, r1, MMX_WR13 + wstrd wR14, r1, MMX_WR14 + wstrd wR15, r1, MMX_WR15 2: teq r0, #0 @ anything to load? reteq lr @ if not, return @@ -147,30 +148,30 @@ concan_dump: concan_load: @ Load wRn - wldrd wR0, [r0, #MMX_WR0] - wldrd wR1, [r0, #MMX_WR1] - wldrd wR2, [r0, #MMX_WR2] - wldrd wR3, [r0, #MMX_WR3] - wldrd wR4, [r0, #MMX_WR4] - wldrd wR5, [r0, #MMX_WR5] - wldrd wR6, [r0, #MMX_WR6] - wldrd wR7, [r0, #MMX_WR7] - wldrd wR8, [r0, #MMX_WR8] - wldrd wR9, [r0, #MMX_WR9] - wldrd wR10, [r0, #MMX_WR10] - wldrd wR11, [r0, #MMX_WR11] - wldrd wR12, [r0, #MMX_WR12] - wldrd wR13, [r0, #MMX_WR13] - wldrd wR14, [r0, #MMX_WR14] - wldrd wR15, [r0, #MMX_WR15] + wldrd wR0, r0, MMX_WR0 + wldrd wR1, r0, MMX_WR1 + wldrd wR2, r0, MMX_WR2 + wldrd wR3, r0, MMX_WR3 + wldrd wR4, r0, MMX_WR4 + wldrd wR5, r0, MMX_WR5 + wldrd wR6, r0, MMX_WR6 + wldrd wR7, r0, MMX_WR7 + wldrd wR8, r0, MMX_WR8 + wldrd wR9, r0, MMX_WR9 + wldrd wR10, r0, MMX_WR10 + wldrd wR11, r0, MMX_WR11 + wldrd wR12, r0, MMX_WR12 + wldrd wR13, r0, MMX_WR13 + wldrd wR14, r0, MMX_WR14 + wldrd wR15, r0, MMX_WR15 @ Load wCx - wldrw wCSSF, [r0, #MMX_WCSSF] - wldrw wCASF, [r0, #MMX_WCASF] - wldrw wCGR0, [r0, #MMX_WCGR0] - wldrw wCGR1, [r0, #MMX_WCGR1] - wldrw wCGR2, [r0, #MMX_WCGR2] - wldrw wCGR3, [r0, #MMX_WCGR3] + wldrw wCSSF, r0, MMX_WCSSF + wldrw wCASF, r0, MMX_WCASF + wldrw wCGR0, r0, MMX_WCGR0 + wldrw wCGR1, r0, MMX_WCGR1 + wldrw wCGR2, r0, MMX_WCGR2 + wldrw wCGR3, r0, MMX_WCGR3 @ clear CUP/MUP (only if r1 != 0) teq r1, #0 diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h new file mode 100644 index 000000000000..fb627286f5bb --- /dev/null +++ b/arch/arm/kernel/iwmmxt.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __IWMMXT_H__ +#define __IWMMXT_H__ + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +.set .LwR\b, \b +.set .Lr\b, \b +.endr + +.set .LwCSSF, 0x2 +.set .LwCASF, 0x3 +.set .LwCGR0, 0x8 +.set .LwCGR1, 0x9 +.set .LwCGR2, 0xa +.set .LwCGR3, 0xb + +.macro wldrd, reg:req, base:req, offset:req +.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wldrw, reg:req, base:req, offset:req +.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrd, reg:req, base:req, offset:req +.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrw, reg:req, base:req, offset:req +.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +#ifdef __clang__ + +#define wCon c1 + +.macro tmrc, dest:req, control:req +mrc p1, 0, \dest, \control, c0, 0 +.endm + +.macro tmcr, control:req, src:req +mcr p1, 0, \src, \control, c0, 0 +.endm +#endif + +#endif diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index e15444b25ca0..beac45e89ba6 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, *(u32 *)loc |= offset & 0x7fffffff; break; + case R_ARM_REL32: + *(u32 *)loc += sym->st_value - loc; + break; + case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL) offset >>= 16; tmp &= 0xfff0f000; @@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: upper = __mem_to_opcode_thumb16(*(u16 *)loc); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); @@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL) offset >>= 16; upper = (u16)((upper & 0xfbf0) | diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S new file mode 100644 index 000000000000..fb53db78fe78 --- /dev/null +++ b/arch/arm/kernel/phys2virt.S @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 1994-2002 Russell King + * Copyright (c) 2003, 2020 ARM Limited + * All Rights Reserved + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/page.h> + +#ifdef __ARMEB__ +#define LOW_OFFSET 0x4 +#define HIGH_OFFSET 0x0 +#else +#define LOW_OFFSET 0x0 +#define HIGH_OFFSET 0x4 +#endif + +/* + * __fixup_pv_table - patch the stub instructions with the delta between + * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be + * 2 MiB aligned. + * + * Called from head.S, which expects the following registers to be preserved: + * r1 = machine no, r2 = atags or dtb, + * r8 = phys_offset, r9 = cpuid, r10 = procinfo + */ + __HEAD +ENTRY(__fixup_pv_table) + mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN + str_l r0, __pv_phys_pfn_offset, r3 + + adr_l r0, __pv_offset + subs r3, r8, #PAGE_OFFSET @ PHYS_OFFSET - PAGE_OFFSET + mvn ip, #0 + strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits + str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits + + mov r0, r3, lsr #21 @ constant for add/sub instructions + teq r3, r0, lsl #21 @ must be 2 MiB aligned + bne 0f + + adr_l r4, __pv_table_begin + adr_l r5, __pv_table_end + b __fixup_a_pv_table + +0: mov r0, r0 @ deadloop on error + b 0b +ENDPROC(__fixup_pv_table) + + .text +__fixup_a_pv_table: + adr_l r6, __pv_offset + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word + cmn r0, #1 +#ifdef CONFIG_THUMB2_KERNEL + @ + @ The Thumb-2 versions of the patchable sequences are + @ + @ phys-to-virt: movw <reg>, #offset<31:21> + @ lsl <reg>, #21 + @ sub <VA>, <PA>, <reg> + @ + @ virt-to-phys (non-LPAE): movw <reg>, #offset<31:21> + @ lsl <reg>, #21 + @ add <PA>, <VA>, <reg> + @ + @ virt-to-phys (LPAE): movw <reg>, #offset<31:21> + @ lsl <reg>, #21 + @ adds <PAlo>, <VA>, <reg> + @ mov <PAhi>, #offset<39:32> + @ adc <PAhi>, <PAhi>, #0 + @ + @ In the non-LPAE case, all patchable instructions are MOVW + @ instructions, where we need to patch in the offset into the + @ second halfword of the opcode (the 16-bit immediate is encoded + @ as imm4:i:imm3:imm8) + @ + @ 15 11 10 9 4 3 0 15 14 12 11 8 7 0 + @ +-----------+---+-------------+------++---+------+----+------+ + @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+-------------+------++---+------+----+------+ + @ + @ In the LPAE case, we also need to patch in the high word of the + @ offset into the immediate field of the MOV instruction, or patch it + @ to a MVN instruction if the offset is negative. In this case, we + @ need to inspect the first halfword of the opcode, to check whether + @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if + @ needed. The encoding of the immediate is rather complex for values + @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower + @ order bits, which can be patched into imm8 directly (and i:imm3 + @ cleared) + @ + @ 15 11 10 9 5 0 15 14 12 11 8 7 0 + @ +-----------+---+---------------------++---+------+----+------+ + @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+---------------------++---+------+----+------+ + @ + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction + lsrs r3, r6, #29 @ isolate top 3 bits of displacement + ubfx r6, r6, #21, #8 @ put bits 28:21 into the MOVW imm8 field + bfi r6, r3, #12, #3 @ put bits 31:29 into the MOVW imm3 field + b .Lnext +.Lloop: add r7, r4 + adds r4, #4 @ clears Z flag +#ifdef CONFIG_ARM_LPAE + ldrh ip, [r7] +ARM_BE8(rev16 ip, ip) + tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear + bne 0f @ skip to MOVW handling (Z flag is clear) + bic ip, #0x20 @ clear bit 5 (MVN -> MOV) + orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0 +ARM_BE8(rev16 ip, ip) + strh ip, [r7] + @ Z flag is set +0: +#endif + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + and ip, #0xf00 @ clear everything except Rd field + orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits + orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] +#else +#ifdef CONFIG_CPU_ENDIAN_BE8 +@ in BE8, we load data in BE, but instructions still in LE +#define PV_BIT24 0x00000001 +#define PV_IMM8_MASK 0xff000000 +#define PV_IMMR_MSB 0x00080000 +#else +#define PV_BIT24 0x01000000 +#define PV_IMM8_MASK 0x000000ff +#define PV_IMMR_MSB 0x00000800 +#endif + + @ + @ The ARM versions of the patchable sequences are + @ + @ phys-to-virt: sub <VA>, <PA>, #offset<31:24>, lsl #24 + @ sub <VA>, <PA>, #offset<23:16>, lsl #16 + @ + @ virt-to-phys (non-LPAE): add <PA>, <VA>, #offset<31:24>, lsl #24 + @ add <PA>, <VA>, #offset<23:16>, lsl #16 + @ + @ virt-to-phys (LPAE): movw <reg>, #offset<31:20> + @ adds <PAlo>, <VA>, <reg>, lsl #20 + @ mov <PAhi>, #offset<39:32> + @ adc <PAhi>, <PAhi>, #0 + @ + @ In the non-LPAE case, all patchable instructions are ADD or SUB + @ instructions, where we need to patch in the offset into the + @ immediate field of the opcode, which is emitted with the correct + @ rotation value. (The effective value of the immediate is imm12<7:0> + @ rotated right by [2 * imm12<11:8>] bits) + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 | + @ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 | + @ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 | + @ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ + @ In the LPAE case, we use a MOVW instruction to carry the low offset + @ word, and patch in the high word of the offset into the immediate + @ field of the subsequent MOV instruction, or patch it to a MVN + @ instruction if the offset is negative. We can distinguish MOVW + @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be + @ distinguished from MOV/MVN (all using the encodings above) using + @ bit 24. + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + mov r3, r6, lsr #16 @ put offset bits 31-16 into r3 + mov r6, r6, lsr #24 @ put offset bits 31-24 into r6 + and r3, r3, #0xf0 @ only keep offset bits 23-20 in r3 + b .Lnext +.Lloop: ldr ip, [r7, r4] +#ifdef CONFIG_ARM_LPAE + tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear + beq 1f +ARM_BE8(rev ip, ip) + tst ip, #0xc00000 @ MOVW has bits 23:22 clear + bic ip, ip, #0x400000 @ clear bit 22 + bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction + orreq ip, ip, r6, lsl #4 @ MOVW -> mask in offset bits 31-24 + orreq ip, ip, r3, lsr #4 @ MOVW -> mask in offset bits 23-20 + orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) +ARM_BE8(rev ip, ip) + b 2f +1: +#endif + tst ip, #PV_IMMR_MSB @ rotation value >= 16 ? + bic ip, ip, #PV_IMM8_MASK + orreq ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + orrne ip, ip, r3 ARM_BE8(, lsl #24) @ mask in offset bits 23-20 +2: + str ip, [r7, r4] + add r4, r4, #4 +#endif + +.Lnext: + cmp r4, r5 + ldrcc r7, [r4] @ use branch for delay slot + bcc .Lloop + ret lr +ENDPROC(__fixup_a_pv_table) + +ENTRY(fixup_pv_table) + stmfd sp!, {r4 - r7, lr} + mov r4, r0 @ r0 = table start + add r5, r0, r1 @ r1 = table size + bl __fixup_a_pv_table + ldmfd sp!, {r4 - r7, pc} +ENDPROC(fixup_pv_table) + + .data + .align 2 + .globl __pv_phys_pfn_offset + .type __pv_phys_pfn_offset, %object +__pv_phys_pfn_offset: + .word 0 + .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset + + .globl __pv_offset + .type __pv_offset, %object +__pv_offset: + .quad 0 + .size __pv_offset, . -__pv_offset diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3f65d0ac9f63..1a5edf562e85 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -18,6 +18,7 @@ #include <linux/of_platform.h> #include <linux/init.h> #include <linux/kexec.h> +#include <linux/libfdt.h> #include <linux/of_fdt.h> #include <linux/cpu.h> #include <linux/interrupt.h> @@ -58,6 +59,7 @@ #include <asm/unwind.h> #include <asm/memblock.h> #include <asm/virt.h> +#include <asm/kasan.h> #include "atags.h" @@ -763,7 +765,7 @@ int __init arm_add_memory(u64 start, u64 size) #ifndef CONFIG_PHYS_ADDR_T_64BIT if (aligned_start > ULONG_MAX) { pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n", - (long long)start); + start); return -EINVAL; } @@ -1081,19 +1083,27 @@ void __init hyp_mode_check(void) void __init setup_arch(char **cmdline_p) { - const struct machine_desc *mdesc; + const struct machine_desc *mdesc = NULL; + void *atags_vaddr = NULL; + + if (__atags_pointer) + atags_vaddr = FDT_VIRT_BASE(__atags_pointer); setup_processor(); - mdesc = setup_machine_fdt(__atags_pointer); + if (atags_vaddr) { + mdesc = setup_machine_fdt(atags_vaddr); + if (mdesc) + memblock_reserve(__atags_pointer, + fdt_totalsize(atags_vaddr)); + } if (!mdesc) - mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type); + mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type); if (!mdesc) { early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n"); early_print(" r1=0x%08x, r2=0x%08x\n", __machine_arch_type, __atags_pointer); if (__atags_pointer) - early_print(" r2[]=%*ph\n", 16, - phys_to_virt(__atags_pointer)); + early_print(" r2[]=%*ph\n", 16, atags_vaddr); dump_machine_table(); } @@ -1126,7 +1136,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* * Make sure the calculation for lowmem/highmem is set appropriately - * before reserving/allocating any mmeory + * before reserving/allocating any memory */ adjust_lowmem_bounds(); arm_memblock_init(mdesc); @@ -1136,6 +1146,7 @@ void __init setup_arch(char **cmdline_p) early_ioremap_reset(); paging_init(mdesc); + kasan_init(); request_standard_resources(mdesc); if (mdesc->restart) diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 5dc8b80bb693..43077e11dafd 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -72,8 +72,9 @@ ENTRY(__cpu_suspend) ldr r3, =sleep_save_sp stmfd sp!, {r0, r1} @ save suspend func arg and pointer ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(ldr r0, =mpidr_hash) + ALT_SMP(W(nop)) @ don't use adr_l inside ALT_SMP() ALT_UP_B(1f) + adr_l r0, mpidr_hash /* This ldmia relies on the memory layout of the mpidr_hash struct */ ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts compute_mpidr_hash r0, r6, r7, r8, r2, r1 @@ -147,9 +148,8 @@ no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) - adr r2, mpidr_hash_ptr - ldr r3, [r2] - add r2, r2, r3 @ r2 = struct mpidr_hash phys address + adr_l r2, mpidr_hash @ r2 = struct mpidr_hash phys address + /* * This ldmia relies on the memory layout of the mpidr_hash * struct mpidr_hash. @@ -157,10 +157,7 @@ no_hyp: ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: - adr r0, _sleep_save_sp - ldr r2, [r0] - add r0, r0, r2 - ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr_l r0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr r0, [r0, r1, lsl #2] @ load phys pgd, stack, resume fn @@ -177,12 +174,6 @@ ENDPROC(cpu_resume_arm) ENDPROC(cpu_resume_no_hyp) #endif - .align 2 -_sleep_save_sp: - .long sleep_save_sp - . -mpidr_hash_ptr: - .long mpidr_hash - . @ mpidr_hash struct offset - .data .align 2 .type sleep_save_sp, #object diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 48099c6e1e4a..6ab2b0ad5f40 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -524,14 +524,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } static const char *ipi_types[NR_IPI] __tracepoint_string = { -#define S(x,s) [x] = s - S(IPI_WAKEUP, "CPU wakeup interrupts"), - S(IPI_TIMER, "Timer broadcast interrupts"), - S(IPI_RESCHEDULE, "Rescheduling interrupts"), - S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CPU_STOP, "CPU stop interrupts"), - S(IPI_IRQ_WORK, "IRQ work interrupts"), - S(IPI_COMPLETION, "completion interrupts"), + [IPI_WAKEUP] = "CPU wakeup interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_COMPLETION] = "completion interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index d2bd0df2318d..59fdf257bf8b 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -18,9 +18,6 @@ #warning Your compiler does not have EABI support. #warning ARM unwind is known to compile only with EABI compilers. #warning Change compiler or disable ARM_UNWIND option. -#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) && !defined(__clang__) -#warning Your compiler is too buggy; it is known to not compile ARM unwind support. -#warning Change compiler or disable ARM_UNWIND option. #endif #endif /* __CHECKER__ */ @@ -236,7 +233,11 @@ static int unwind_pop_register(struct unwind_ctrl_block *ctrl, if (*vsp >= (unsigned long *)ctrl->sp_high) return -URC_FAILURE; - ctrl->vrs[reg] = *(*vsp)++; + /* Use READ_ONCE_NOCHECK here to avoid this memory access + * from being tracked by KASAN. + */ + ctrl->vrs[reg] = READ_ONCE_NOCHECK(*(*vsp)); + (*vsp)++; return URC_OK; } diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 09a333153dc6..e4caf48c089f 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -58,10 +58,12 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(__memcpy) ENTRY(mmiocpy) -ENTRY(memcpy) +WEAK(memcpy) #include "copy_template.S" ENDPROC(memcpy) ENDPROC(mmiocpy) +ENDPROC(__memcpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index b50e5770fb44..6fecc12a1f51 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -24,12 +24,13 @@ * occurring in the opposite direction. */ -ENTRY(memmove) +ENTRY(__memmove) +WEAK(memmove) UNWIND( .fnstart ) subs ip, r0, r1 cmphi r2, ip - bls memcpy + bls __memcpy stmfd sp!, {r0, r4, lr} UNWIND( .fnend ) @@ -222,3 +223,4 @@ ENTRY(memmove) 18: backward_copy_shift push=24 pull=8 ENDPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 6ca4535c47fb..9817cb258c1a 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -13,8 +13,9 @@ .text .align 5 +ENTRY(__memset) ENTRY(mmioset) -ENTRY(memset) +WEAK(memset) UNWIND( .fnstart ) ands r3, r0, #3 @ 1 unaligned? mov ip, r0 @ preserve r0 as return value @@ -132,6 +133,7 @@ UNWIND( .fnstart ) UNWIND( .fnend ) ENDPROC(memset) ENDPROC(mmioset) +ENDPROC(__memset) ENTRY(__memset32) UNWIND( .fnstart ) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 65e4482e3849..02692fbe2db5 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -743,6 +743,7 @@ config SWP_EMULATE config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN + depends on !LD_IS_LLD help Say Y if you plan on running a kernel in big-endian mode. Note that your board must be properly built and your board diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index c4ce477c5261..3510503bc5e6 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -7,6 +7,7 @@ obj-y := extable.o fault.o init.o iomap.o obj-y += dma-mapping$(MMUEXT).o obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ mmap.o pgd.o mmu.o pageattr.o +KASAN_SANITIZE_mmu.o := n ifneq ($(CONFIG_MMU),y) obj-y += nommu.o @@ -16,6 +17,7 @@ endif obj-$(CONFIG_ARM_PTDUMP_CORE) += dump.o obj-$(CONFIG_ARM_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_MODULES) += proc-syms.o +KASAN_SANITIZE_physaddr.o := n obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o @@ -110,3 +112,6 @@ obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o obj-$(CONFIG_CACHE_UNIPHIER) += cache-uniphier.o + +KASAN_SANITIZE_kasan_init.o := n +obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index db623d7c30de..828a2561b229 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -223,7 +223,6 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); - early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* reserve memory for DMA contiguous allocations */ diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c new file mode 100644 index 000000000000..9c348042a724 --- /dev/null +++ b/arch/arm/mm/kasan_init.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file contains kasan initialization code for ARM. + * + * Copyright (c) 2018 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> + * Author: Linus Walleij <linus.walleij@linaro.org> + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/sched/task.h> +#include <linux/start_kernel.h> +#include <linux/pgtable.h> +#include <asm/cputype.h> +#include <asm/highmem.h> +#include <asm/mach/map.h> +#include <asm/memory.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/procinfo.h> +#include <asm/proc-fns.h> + +#include "mm.h" + +static pgd_t tmp_pgd_table[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss; + +static __init void *kasan_alloc_block(size_t size) +{ + return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE); +} + +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + do { + pte_t entry; + void *p; + + next = addr + PAGE_SIZE; + + if (!early) { + if (!pte_none(READ_ONCE(*ptep))) + continue; + + p = kasan_alloc_block(PAGE_SIZE); + if (!p) { + panic("%s failed to allocate shadow page for address 0x%lx\n", + __func__, addr); + return; + } + memset(p, KASAN_SHADOW_INIT, PAGE_SIZE); + entry = pfn_pte(virt_to_pfn(p), + __pgprot(pgprot_val(PAGE_KERNEL))); + } else if (pte_none(READ_ONCE(*ptep))) { + /* + * The early shadow memory is mapping all KASan + * operations to one and the same page in memory, + * "kasan_early_shadow_page" so that the instrumentation + * will work on a scratch area until we can set up the + * proper KASan shadow memory. + */ + entry = pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(_L_PTE_DEFAULT | L_PTE_DIRTY | L_PTE_XN)); + } else { + /* + * Early shadow mappings are PMD_SIZE aligned, so if the + * first entry is already set, they must all be set. + */ + return; + } + + set_pte_at(&init_mm, addr, ptep, entry); + } while (ptep++, addr = next, addr != end); +} + +/* + * The pmd (page middle directory) is only used on LPAE + */ +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pmd_t *pmdp = pmd_offset(pudp, addr); + + do { + if (pmd_none(*pmdp)) { + /* + * We attempt to allocate a shadow block for the PMDs + * used by the PTEs for this address if it isn't already + * allocated. + */ + void *p = early ? kasan_early_shadow_pte : + kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pmd_populate_kernel(&init_mm, pmdp, p); + flush_pmd_entry(pmdp); + } + + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, early); + } while (pmdp++, addr = next, addr != end); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + bool early) +{ + unsigned long next; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + + pgdp = pgd_offset_k(addr); + + do { + /* + * Allocate and populate the shadow block of p4d folded into + * pud folded into pmd if it doesn't already exist + */ + if (!early && pgd_none(*pgdp)) { + void *p = kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pgd_populate(&init_mm, pgdp, p); + } + + next = pgd_addr_end(addr, end); + /* + * We just immediately jump over the p4d and pud page + * directories since we believe ARM32 will never gain four + * nor five level page tables. + */ + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); + + kasan_pmd_populate(pudp, addr, next, early); + } while (pgdp++, addr = next, addr != end); +} + +extern struct proc_info_list *lookup_processor_type(unsigned int); + +void __init kasan_early_init(void) +{ + struct proc_info_list *list; + + /* + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm/mm/proc-*.S + */ + list = lookup_processor_type(read_cpuid_id()); + if (list) { +#ifdef MULTI_CPU + processor = *list->proc; +#endif + } + + BUILD_BUG_ON((KASAN_SHADOW_END - (1UL << 29)) != KASAN_SHADOW_OFFSET); + /* + * We walk the page table and set all of the shadow memory to point + * to the scratch page. + */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, true); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start && start < end; start += PMD_SIZE) + pmd_clear(pmd_off_k(start)); +} + +static int __init create_mapping(void *start, void *end) +{ + void *shadow_start, *shadow_end; + + shadow_start = kasan_mem_to_shadow(start); + shadow_end = kasan_mem_to_shadow(end); + + pr_info("Mapping kernel virtual memory block: %px-%px at shadow: %px-%px\n", + start, end, shadow_start, shadow_end); + + kasan_pgd_populate((unsigned long)shadow_start & PAGE_MASK, + PAGE_ALIGN((unsigned long)shadow_end), false); + return 0; +} + +void __init kasan_init(void) +{ + phys_addr_t pa_start, pa_end; + u64 i; + + /* + * We are going to perform proper setup of shadow memory. + * + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code can't execute without shadow memory. + * + * To keep the early shadow memory MMU tables around while setting up + * the proper shadow memory, we copy swapper_pg_dir (the initial page + * table) to tmp_pgd_table and use that to keep the early shadow memory + * mapped until the full shadow setup is finished. Then we swap back + * to the proper swapper_pg_dir. + */ + + memcpy(tmp_pgd_table, swapper_pg_dir, sizeof(tmp_pgd_table)); +#ifdef CONFIG_ARM_LPAE + /* We need to be in the same PGD or this won't work */ + BUILD_BUG_ON(pgd_index(KASAN_SHADOW_START) != + pgd_index(KASAN_SHADOW_END)); + memcpy(tmp_pmd_table, + pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)), + sizeof(tmp_pmd_table)); + set_pgd(&tmp_pgd_table[pgd_index(KASAN_SHADOW_START)], + __pgd(__pa(tmp_pmd_table) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); +#endif + cpu_switch_mm(tmp_pgd_table, &init_mm); + local_flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)-1UL) + 1); + + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = __va(pa_start); + void *end = __va(pa_end); + + /* Do not attempt to shadow highmem */ + if (pa_start >= arm_lowmem_limit) { + pr_info("Skip highmem block at %pa-%pa\n", &pa_start, &pa_end); + continue; + } + if (pa_end > arm_lowmem_limit) { + pr_info("Truncating shadow for memory block at %pa-%pa to lowmem region at %pa\n", + &pa_start, &pa_end, &arm_lowmem_limit); + end = __va(arm_lowmem_limit); + } + if (start >= end) { + pr_info("Skipping invalid memory block %pa-%pa (virtual %p-%p)\n", + &pa_start, &pa_end, start, end); + continue; + } + + create_mapping(start, end); + } + + /* + * 1. The module global variables are in MODULES_VADDR ~ MODULES_END, + * so we need to map this area. + * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR + * ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't + * use kasan_populate_zero_shadow. + */ + create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE)); + + /* + * KAsan may reuse the contents of kasan_early_shadow_pte directly, so + * we should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte_at(&init_mm, KASAN_SHADOW_START + i*PAGE_SIZE, + &kasan_early_shadow_pte[i], + pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(pgprot_val(PAGE_KERNEL) + | L_PTE_RDONLY))); + + cpu_switch_mm(swapper_pg_dir, &init_mm); + local_flush_tlb_all(); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + pr_info("Kernel address sanitizer initialized\n"); + init_task.kasan_depth = 0; +} diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ab69250a86bc..c06ebfbc48c4 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -29,6 +29,7 @@ #include <asm/procinfo.h> #include <asm/memory.h> #include <asm/pgalloc.h> +#include <asm/kasan_def.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -39,6 +40,8 @@ #include "mm.h" #include "tcm.h" +extern unsigned long __atags_pointer; + /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. @@ -946,7 +949,7 @@ static void __init create_mapping(struct map_desc *md) return; } - if ((md->type == MT_DEVICE || md->type == MT_ROM) && + if (md->type == MT_DEVICE && md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", @@ -1253,8 +1256,25 @@ static inline void prepare_page_table(void) /* * Clear out all the mappings below the kernel image. */ +#ifdef CONFIG_KASAN + /* + * KASan's shadow memory inserts itself between the TASK_SIZE + * and MODULES_VADDR. Do not clear the KASan shadow memory mappings. + */ + for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + /* + * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes + * equal to MODULES_VADDR and then we exit the pmd clearing. If we + * are using a thumb-compiled kernel, there there will be 8MB more + * to clear as KASan always offset to 16 MB below MODULES_VADDR. + */ + for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +#else for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); +#endif #ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */ @@ -1333,6 +1353,15 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); + if (__atags_pointer) { + /* create a read-only mapping of the device tree */ + map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); + map.virtual = FDT_FIXED_BASE; + map.length = FDT_FIXED_SIZE; + map.type = MT_ROM; + create_mapping(&map); + } + /* * Map the kernel if it is XIP. * It is always first in the modulearea. @@ -1489,8 +1518,7 @@ static void __init map_lowmem(void) } #ifdef CONFIG_ARM_PV_FIXUP -extern unsigned long __atags_pointer; -typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +typedef void pgtables_remap(long long offset, unsigned long pgd); pgtables_remap lpae_pgtables_remap_asm; /* @@ -1503,7 +1531,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) unsigned long pa_pgd; unsigned int cr, ttbcr; long long offset; - void *boot_data; if (!mdesc->pv_fixup) return; @@ -1520,7 +1547,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) */ lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); pa_pgd = __pa(swapper_pg_dir); - boot_data = __va(__atags_pointer); barrier(); pr_info("Switching physical address space to 0x%08llx\n", @@ -1556,7 +1582,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc) * needs to be assembly. It's fairly simple, as we're using the * temporary tables setup by the initial assembly code. */ - lpae_pgtables_remap(offset, pa_pgd, boot_data); + lpae_pgtables_remap(offset, pa_pgd); /* Re-enable the caches and cacheable TLB walks */ asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index c5e1b27046a8..f8e9bc58a84f 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -66,7 +66,21 @@ pgd_t *pgd_alloc(struct mm_struct *mm) new_pmd = pmd_alloc(mm, new_pud, 0); if (!new_pmd) goto no_pmd; -#endif +#ifdef CONFIG_KASAN + /* + * Copy PMD table for KASAN shadow mappings. + */ + init_pgd = pgd_offset_k(TASK_SIZE); + init_p4d = p4d_offset(init_pgd, TASK_SIZE); + init_pud = pud_offset(init_p4d, TASK_SIZE); + init_pmd = pmd_offset(init_pud, TASK_SIZE); + new_pmd = pmd_offset(new_pud, TASK_SIZE); + memcpy(new_pmd, init_pmd, + (pmd_index(MODULES_VADDR) - pmd_index(TASK_SIZE)) + * sizeof(pmd_t)); + clean_dcache_area(new_pmd, PTRS_PER_PMD * sizeof(pmd_t)); +#endif /* CONFIG_KASAN */ +#endif /* CONFIG_LPAE */ if (!vectors_high()) { /* diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S index 8eade0416739..5c5e1952000a 100644 --- a/arch/arm/mm/pv-fixup-asm.S +++ b/arch/arm/mm/pv-fixup-asm.S @@ -39,8 +39,8 @@ ENTRY(lpae_pgtables_remap_asm) /* Update level 2 entries for the boot data */ add r7, r2, #0x1000 - add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER - bic r7, r7, #(1 << L2_ORDER) - 1 + movw r3, #FDT_FIXED_BASE >> (SECTION_SHIFT - L2_ORDER) + add r7, r7, r3 ldrd r4, r5, [r7] adds r4, r4, r0 adc r5, r5, r1 diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 150ce6e6a5d3..b558bee0e1f6 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -42,6 +42,8 @@ GCOV_PROFILE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n +KASAN_SANITIZE := n + # Force dependency $(obj)/vdso.o : $(obj)/vdso.so diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 0186cf9da890..27b0a1f27fbd 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -37,20 +37,3 @@ ENDPROC(vfp_null_entry) .align 2 .LCvfp: .word vfp_vector - -@ This code is called if the VFP does not exist. It needs to flag the -@ failure to the VFP initialisation code. - - __INIT -ENTRY(vfp_testing_entry) - dec_preempt_count_ti r10, r4 - ldr r0, VFP_arch_address - str r0, [r0] @ set to non-zero value - ret r9 @ we have handled the fault -ENDPROC(vfp_testing_entry) - - .align 2 -VFP_arch_address: - .word VFP_arch - - __FINIT diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index 4fcff9f59947..d5837bf05a9a 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -79,11 +79,6 @@ ENTRY(vfp_support_entry) DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 .fpu vfpv2 - ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions - and r3, r3, #MODE_MASK @ are supported in kernel mode - teq r3, #USR_MODE - bne vfp_kmode_exception @ Returns through lr - VFPFMRX r1, FPEXC @ Is the VFP enabled? DBGSTR1 "fpexc %08x", r1 tst r1, #FPEXC_EN diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 8c9e7f9f0277..2cb355c1b5b7 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -23,6 +23,7 @@ #include <asm/cputype.h> #include <asm/system_info.h> #include <asm/thread_notify.h> +#include <asm/traps.h> #include <asm/vfp.h> #include "vfpinstr.h" @@ -31,7 +32,6 @@ /* * Our undef handlers (in entry.S) */ -asmlinkage void vfp_testing_entry(void); asmlinkage void vfp_support_entry(void); asmlinkage void vfp_null_entry(void); @@ -42,7 +42,7 @@ asmlinkage void (*vfp_vector)(void) = vfp_null_entry; * Used in startup: set to non-zero if VFP checks fail * After startup, holds VFP architecture */ -unsigned int VFP_arch; +static unsigned int __initdata VFP_arch; /* * The pointer to the vfpstate structure of the thread which currently @@ -436,7 +436,7 @@ static void vfp_enable(void *unused) * present on all CPUs within a SMP complex. Needs to be called prior to * vfp_init(). */ -void vfp_disable(void) +void __init vfp_disable(void) { if (VFP_arch) { pr_debug("%s: should be called prior to vfp_init\n", __func__); @@ -642,7 +642,9 @@ static int vfp_starting_cpu(unsigned int unused) return 0; } -void vfp_kmode_exception(void) +#ifdef CONFIG_KERNEL_MODE_NEON + +static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr) { /* * If we reach this point, a floating point exception has been raised @@ -660,9 +662,51 @@ void vfp_kmode_exception(void) pr_crit("BUG: unsupported FP instruction in kernel mode\n"); else pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n"); + pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC)); + return 1; } -#ifdef CONFIG_KERNEL_MODE_NEON +static struct undef_hook vfp_kmode_exception_hook[] = {{ + .instr_mask = 0xfe000000, + .instr_val = 0xf2000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf4000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xef000000, + .instr_val = 0xef000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf9000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}}; + +static int __init vfp_kmode_exception_hook_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++) + register_undef_hook(&vfp_kmode_exception_hook[i]); + return 0; +} +subsys_initcall(vfp_kmode_exception_hook_init); /* * Kernel-side NEON support functions @@ -708,6 +752,21 @@ EXPORT_SYMBOL(kernel_neon_end); #endif /* CONFIG_KERNEL_MODE_NEON */ +static int __init vfp_detect(struct pt_regs *regs, unsigned int instr) +{ + VFP_arch = UINT_MAX; /* mark as not present */ + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook vfp_detect_hook __initdata = { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_detect, +}; + /* * VFP support code initialisation. */ @@ -728,10 +787,11 @@ static int __init vfp_init(void) * The handler is already setup to just log calls, so * we just need to read the VFPSID register. */ - vfp_vector = vfp_testing_entry; + register_undef_hook(&vfp_detect_hook); barrier(); vfpsid = fmrx(FPSID); barrier(); + unregister_undef_hook(&vfp_detect_hook); vfp_vector = vfp_null_entry; pr_info("VFP support v0.3: "); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ae7391627054..107bb4319e0e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -161,6 +161,7 @@ config PPC select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DMA_OPS if PPC64 select DMA_OPS_BYPASS if PPC64 + select ARCH_HAS_DMA_MAP_DIRECT if PPC64 && PPC_PSERIES select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index a1c744194018..111249fd619d 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -10,6 +10,63 @@ #include <linux/pci.h> #include <asm/iommu.h> +#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT +#define can_map_direct(dev, addr) \ + ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr))) + +bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr) +{ + if (likely(!dev->bus_dma_limit)) + return false; + + return can_map_direct(dev, addr); +} + +#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset) + +bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle) +{ + if (likely(!dev->bus_dma_limit)) + return false; + + return is_direct_handle(dev, dma_handle); +} + +bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, + int nents) +{ + struct scatterlist *s; + int i; + + if (likely(!dev->bus_dma_limit)) + return false; + + for_each_sg(sg, s, nents, i) { + if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length)) + return false; + } + + return true; +} + +bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, + int nents) +{ + struct scatterlist *s; + int i; + + if (likely(!dev->bus_dma_limit)) + return false; + + for_each_sg(sg, s, nents, i) { + if (!is_direct_handle(dev, s->dma_address + s->length)) + return false; + } + + return true; +} +#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */ + /* * Generic iommu implementation */ @@ -90,8 +147,18 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) struct iommu_table *tbl = get_iommu_table_base(dev); if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { - dev->dma_ops_bypass = true; - dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + /* + * dma_iommu_bypass_supported() sets dma_max when there is + * 1:1 mapping but it is somehow limited. + * ibm,pmemory is one example. + */ + dev->dma_ops_bypass = dev->bus_dma_limit == 0; + if (!dev->dma_ops_bypass) + dev_warn(dev, + "iommu: 64-bit OK but direct DMA is limited by %llx\n", + dev->bus_dma_limit); + else + dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); return 1; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e4198700ed1a..9fc5217f0c8e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -839,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop) np, ret); } -static u64 find_existing_ddw(struct device_node *pdn) +static u64 find_existing_ddw(struct device_node *pdn, int *window_shift) { struct direct_window *window; const struct dynamic_dma_window_prop *direct64; @@ -851,6 +851,7 @@ static u64 find_existing_ddw(struct device_node *pdn) if (window->device == pdn) { direct64 = window->prop; dma_addr = be64_to_cpu(direct64->dma_base); + *window_shift = be32_to_cpu(direct64->window_shift); break; } } @@ -1111,11 +1112,12 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) */ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) { - int len, ret; + int len = 0, ret; + int max_ram_len = order_base_2(ddw_memory_hotplug_max()); struct ddw_query_response query; struct ddw_create_response create; int page_shift; - u64 dma_addr, max_addr; + u64 dma_addr; struct device_node *dn; u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct direct_window *window; @@ -1123,10 +1125,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct dynamic_dma_window_prop *ddwprop; struct failed_ddw_pdn *fpdn; bool default_win_removed = false; + bool pmem_present; + + dn = of_find_node_by_type(NULL, "ibm,pmemory"); + pmem_present = dn != NULL; + of_node_put(dn); mutex_lock(&direct_window_init_mutex); - dma_addr = find_existing_ddw(pdn); + dma_addr = find_existing_ddw(pdn, &len); if (dma_addr != 0) goto out_unlock; @@ -1212,14 +1219,29 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) } /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ - max_addr = ddw_memory_hotplug_max(); - if (query.largest_available_block < (max_addr >> page_shift)) { - dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu " - "%llu-sized pages\n", max_addr, query.largest_available_block, - 1ULL << page_shift); + /* + * The "ibm,pmemory" can appear anywhere in the address space. + * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS + * for the upper limit and fallback to max RAM otherwise but this + * disables device::dma_ops_bypass. + */ + len = max_ram_len; + if (pmem_present) { + if (query.largest_available_block >= + (1ULL << (MAX_PHYSMEM_BITS - page_shift))) + len = MAX_PHYSMEM_BITS - page_shift; + else + dev_info(&dev->dev, "Skipping ibm,pmemory"); + } + + if (query.largest_available_block < (1ULL << (len - page_shift))) { + dev_dbg(&dev->dev, + "can't map partition max 0x%llx with %llu %llu-sized pages\n", + 1ULL << len, + query.largest_available_block, + 1ULL << page_shift); goto out_failed; } - len = order_base_2(max_addr); win64 = kzalloc(sizeof(struct property), GFP_KERNEL); if (!win64) { dev_info(&dev->dev, @@ -1299,6 +1321,15 @@ out_failed: out_unlock: mutex_unlock(&direct_window_init_mutex); + + /* + * If we have persistent memory and the window size is only as big + * as RAM, then we failed to create a window to cover persistent + * memory and need to set the DMA limit. + */ + if (pmem_present && dma_addr && (len == max_ram_len)) + dev->dev.bus_dma_limit = dma_addr + (1ULL << len); + return dma_addr; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fb55981f2a0d..7f5aec758f0e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -303,11 +303,12 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check) local_irq_enable(); if (handle_user_split_lock(regs, error_code)) - return; + goto out; do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs, error_code, BUS_ADRALN, NULL); +out: local_irq_disable(); } diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index a250481b5a97..3bc2551577a3 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -11,13 +11,6 @@ * convert raw register values is from https://github.com/ocerman/zenpower. * The information is not confirmed from chip datasheets, but experiments * suggest that it provides reasonable temperature values. - * - Register addresses to read chip voltage and current are also from - * https://github.com/ocerman/zenpower, and not confirmed from chip - * datasheets. Current calibration is board specific and not typically - * shared by board vendors. For this reason, current values are - * normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC - * current. Reported values can be adjusted using the sensors configuration - * file. */ #include <linux/bitops.h> @@ -109,10 +102,7 @@ struct k10temp_data { int temp_offset; u32 temp_adjust_mask; u32 show_temp; - u32 svi_addr[2]; bool is_zen; - bool show_current; - int cfactor[2]; }; #define TCTL_BIT 0 @@ -137,16 +127,6 @@ static const struct tctl_offset tctl_offset_table[] = { { 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */ }; -static bool is_threadripper(void) -{ - return strstr(boot_cpu_data.x86_model_id, "Threadripper"); -} - -static bool is_epyc(void) -{ - return strstr(boot_cpu_data.x86_model_id, "EPYC"); -} - static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval) { pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval); @@ -211,16 +191,6 @@ static const char *k10temp_temp_label[] = { "Tccd8", }; -static const char *k10temp_in_label[] = { - "Vcore", - "Vsoc", -}; - -static const char *k10temp_curr_label[] = { - "Icore", - "Isoc", -}; - static int k10temp_read_labels(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) @@ -229,50 +199,6 @@ static int k10temp_read_labels(struct device *dev, case hwmon_temp: *str = k10temp_temp_label[channel]; break; - case hwmon_in: - *str = k10temp_in_label[channel]; - break; - case hwmon_curr: - *str = k10temp_curr_label[channel]; - break; - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int k10temp_read_curr(struct device *dev, u32 attr, int channel, - long *val) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - u32 regval; - - switch (attr) { - case hwmon_curr_input: - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - data->svi_addr[channel], ®val); - *val = DIV_ROUND_CLOSEST(data->cfactor[channel] * - (regval & 0xff), - 1000); - break; - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int k10temp_read_in(struct device *dev, u32 attr, int channel, long *val) -{ - struct k10temp_data *data = dev_get_drvdata(dev); - u32 regval; - - switch (attr) { - case hwmon_in_input: - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - data->svi_addr[channel], ®val); - regval = (regval >> 16) & 0xff; - *val = DIV_ROUND_CLOSEST(155000 - regval * 625, 100); - break; default: return -EOPNOTSUPP; } @@ -331,10 +257,6 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type, switch (type) { case hwmon_temp: return k10temp_read_temp(dev, attr, channel, val); - case hwmon_in: - return k10temp_read_in(dev, attr, channel, val); - case hwmon_curr: - return k10temp_read_curr(dev, attr, channel, val); default: return -EOPNOTSUPP; } @@ -383,11 +305,6 @@ static umode_t k10temp_is_visible(const void *_data, return 0; } break; - case hwmon_in: - case hwmon_curr: - if (!data->show_current) - return 0; - break; default: return 0; } @@ -517,20 +434,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) case 0x8: /* Zen+ */ case 0x11: /* Zen APU */ case 0x18: /* Zen+ APU */ - data->show_current = !is_threadripper() && !is_epyc(); - data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE0; - data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1; - data->cfactor[0] = F17H_M01H_CFACTOR_ICORE; - data->cfactor[1] = F17H_M01H_CFACTOR_ISOC; k10temp_get_ccd_support(pdev, data, 4); break; case 0x31: /* Zen2 Threadripper */ case 0x71: /* Zen2 */ - data->show_current = !is_threadripper() && !is_epyc(); - data->cfactor[0] = F17H_M31H_CFACTOR_ICORE; - data->cfactor[1] = F17H_M31H_CFACTOR_ISOC; - data->svi_addr[0] = F17H_M31H_SVI_TEL_PLANE0; - data->svi_addr[1] = F17H_M31H_SVI_TEL_PLANE1; k10temp_get_ccd_support(pdev, data, 8); break; } @@ -542,11 +449,6 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) switch (boot_cpu_data.x86_model) { case 0x0 ... 0x1: /* Zen3 */ - data->show_current = true; - data->svi_addr[0] = F19H_M01_SVI_TEL_PLANE0; - data->svi_addr[1] = F19H_M01_SVI_TEL_PLANE1; - data->cfactor[0] = F19H_M01H_CFACTOR_ICORE; - data->cfactor[1] = F19H_M01H_CFACTOR_ISOC; k10temp_get_ccd_support(pdev, data, 8); break; } diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 30ba3573626c..b7e2d9666614 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -463,6 +463,15 @@ config DM_MULTIPATH_HST If unsure, say N. +config DM_MULTIPATH_IOA + tristate "I/O Path Selector based on CPU submission" + depends on DM_MULTIPATH + help + This path selector selects the path based on the CPU the IO is + executed on and the CPU to path mapping setup at path addition time. + + If unsure, say N. + config DM_DELAY tristate "I/O delaying target" depends on BLK_DEV_DM @@ -530,11 +539,22 @@ config DM_VERITY_VERIFY_ROOTHASH_SIG bool "Verity data device root hash signature verification support" depends on DM_VERITY select SYSTEM_DATA_VERIFICATION - help + help Add ability for dm-verity device to be validated if the pre-generated tree of cryptographic checksums passed has a pkcs#7 signature file that can validate the roothash of the tree. + By default, rely on the builtin trusted keyring. + + If unsure, say N. + +config DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING + bool "Verity data device root hash signature verification with secondary keyring" + depends on DM_VERITY_VERIFY_ROOTHASH_SIG + depends on SECONDARY_TRUSTED_KEYRING + help + Rely on the secondary trusted keyring to verify dm-verity signatures. + If unsure, say N. config DM_VERITY_FEC diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 6d3e234dc46a..ef7ddc27685c 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -7,23 +7,28 @@ dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \ dm-rq.o dm-multipath-y += dm-path-selector.o dm-mpath.o +dm-historical-service-time-y += dm-ps-historical-service-time.o +dm-io-affinity-y += dm-ps-io-affinity.o +dm-queue-length-y += dm-ps-queue-length.o +dm-round-robin-y += dm-ps-round-robin.o +dm-service-time-y += dm-ps-service-time.o dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-snap-persistent.o dm-mirror-y += dm-raid1.o -dm-log-userspace-y \ - += dm-log-userspace-base.o dm-log-userspace-transfer.o +dm-log-userspace-y += dm-log-userspace-base.o dm-log-userspace-transfer.o dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o dm-thin-pool-y += dm-thin.o dm-thin-metadata.o dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \ dm-cache-background-tracker.o -dm-cache-smq-y += dm-cache-policy-smq.o +dm-cache-smq-y += dm-cache-policy-smq.o dm-ebs-y += dm-ebs-target.o dm-era-y += dm-era-target.o dm-clone-y += dm-clone-target.o dm-clone-metadata.o dm-verity-y += dm-verity-target.o +dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o + md-mod-y += md.o md-bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o -dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o linear-y += md-linear.o multipath-y += md-multipath.o faulty-y += md-faulty.o @@ -59,14 +64,15 @@ obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o obj-$(CONFIG_DM_MULTIPATH_HST) += dm-historical-service-time.o +obj-$(CONFIG_DM_MULTIPATH_IOA) += dm-io-affinity.o obj-$(CONFIG_DM_SWITCH) += dm-switch.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o -obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ +obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o obj-$(CONFIG_DM_ZERO) += dm-zero.o -obj-$(CONFIG_DM_RAID) += dm-raid.o -obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o +obj-$(CONFIG_DM_RAID) += dm-raid.o +obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o obj-$(CONFIG_DM_VERITY) += dm-verity.o obj-$(CONFIG_DM_CACHE) += dm-cache.o obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 4bc453f5bbaa..541c45027cc8 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2840,7 +2840,6 @@ static void cache_postsuspend(struct dm_target *ti) static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, bool dirty, uint32_t hint, bool hint_valid) { - int r; struct cache *cache = context; if (dirty) { @@ -2849,11 +2848,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, } else clear_bit(from_cblock(cblock), cache->dirty_bitset); - r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); - if (r) - return r; - - return 0; + return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); } /* diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 392337f16ecf..5f9f9b3a226d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1090,16 +1090,16 @@ static const struct crypt_iv_operations crypt_iv_tcw_ops = { .post = crypt_iv_tcw_post }; -static struct crypt_iv_operations crypt_iv_random_ops = { +static const struct crypt_iv_operations crypt_iv_random_ops = { .generator = crypt_iv_random_gen }; -static struct crypt_iv_operations crypt_iv_eboiv_ops = { +static const struct crypt_iv_operations crypt_iv_eboiv_ops = { .ctr = crypt_iv_eboiv_ctr, .generator = crypt_iv_eboiv_gen }; -static struct crypt_iv_operations crypt_iv_elephant_ops = { +static const struct crypt_iv_operations crypt_iv_elephant_ops = { .ctr = crypt_iv_elephant_ctr, .dtr = crypt_iv_elephant_dtr, .init = crypt_iv_elephant_init, @@ -3166,11 +3166,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) - cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, + cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1, devname); else - cc->crypt_queue = alloc_workqueue("kcryptd/%s", - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, + cc->crypt_queue = alloc_workqueue("kcryptd-%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | + WQ_UNBOUND | WQ_SYSFS, num_online_cpus(), devname); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index cb85610527c2..55bcfb74f51f 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -86,7 +86,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv else ba = dm_bufio_new(ec->bufio, block, &b); - if (unlikely(IS_ERR(ba))) { + if (IS_ERR(ba)) { /* * Carry on with next buffer, if any, to issue all possible * data but return error. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index cd0478d44058..5e306bba4375 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1600,6 +1600,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para if (!argc) { DMWARN("Empty message received."); + r = -EINVAL; goto out_argv; } diff --git a/drivers/md/dm-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 186f91e2752c..186f91e2752c 100644 --- a/drivers/md/dm-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c new file mode 100644 index 000000000000..077655cd4fae --- /dev/null +++ b/drivers/md/dm-ps-io-affinity.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Oracle Corporation + * + * Module Author: Mike Christie + */ +#include "dm-path-selector.h" + +#include <linux/device-mapper.h> +#include <linux/module.h> + +#define DM_MSG_PREFIX "multipath io-affinity" + +struct path_info { + struct dm_path *path; + cpumask_var_t cpumask; + refcount_t refcount; + bool failed; +}; + +struct selector { + struct path_info **path_map; + cpumask_var_t path_mask; + atomic_t map_misses; +}; + +static void ioa_free_path(struct selector *s, unsigned int cpu) +{ + struct path_info *pi = s->path_map[cpu]; + + if (!pi) + return; + + if (refcount_dec_and_test(&pi->refcount)) { + cpumask_clear_cpu(cpu, s->path_mask); + free_cpumask_var(pi->cpumask); + kfree(pi); + + s->path_map[cpu] = NULL; + } +} + +static int ioa_add_path(struct path_selector *ps, struct dm_path *path, + int argc, char **argv, char **error) +{ + struct selector *s = ps->context; + struct path_info *pi = NULL; + unsigned int cpu; + int ret; + + if (argc != 1) { + *error = "io-affinity ps: invalid number of arguments"; + return -EINVAL; + } + + pi = kzalloc(sizeof(*pi), GFP_KERNEL); + if (!pi) { + *error = "io-affinity ps: Error allocating path context"; + return -ENOMEM; + } + + pi->path = path; + path->pscontext = pi; + refcount_set(&pi->refcount, 1); + + if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) { + *error = "io-affinity ps: Error allocating cpumask context"; + ret = -ENOMEM; + goto free_pi; + } + + ret = cpumask_parse(argv[0], pi->cpumask); + if (ret) { + *error = "io-affinity ps: invalid cpumask"; + ret = -EINVAL; + goto free_mask; + } + + for_each_cpu(cpu, pi->cpumask) { + if (cpu >= nr_cpu_ids) { + DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u", + cpu, nr_cpu_ids); + break; + } + + if (s->path_map[cpu]) { + DMWARN("CPU mapping for %u exists. Ignoring.", cpu); + continue; + } + + cpumask_set_cpu(cpu, s->path_mask); + s->path_map[cpu] = pi; + refcount_inc(&pi->refcount); + continue; + } + + if (refcount_dec_and_test(&pi->refcount)) { + *error = "io-affinity ps: No new/valid CPU mapping found"; + ret = -EINVAL; + goto free_mask; + } + + return 0; + +free_mask: + free_cpumask_var(pi->cpumask); +free_pi: + kfree(pi); + return ret; +} + +static int ioa_create(struct path_selector *ps, unsigned argc, char **argv) +{ + struct selector *s; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *), + GFP_KERNEL); + if (!s->path_map) + goto free_selector; + + if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL)) + goto free_map; + + atomic_set(&s->map_misses, 0); + ps->context = s; + return 0; + +free_map: + kfree(s->path_map); +free_selector: + kfree(s); + return -ENOMEM; +} + +static void ioa_destroy(struct path_selector *ps) +{ + struct selector *s = ps->context; + unsigned cpu; + + for_each_cpu(cpu, s->path_mask) + ioa_free_path(s, cpu); + + free_cpumask_var(s->path_mask); + kfree(s->path_map); + kfree(s); + + ps->context = NULL; +} + +static int ioa_status(struct path_selector *ps, struct dm_path *path, + status_type_t type, char *result, unsigned int maxlen) +{ + struct selector *s = ps->context; + struct path_info *pi; + int sz = 0; + + if (!path) { + DMEMIT("0 "); + return sz; + } + + switch(type) { + case STATUSTYPE_INFO: + DMEMIT("%d ", atomic_read(&s->map_misses)); + break; + case STATUSTYPE_TABLE: + pi = path->pscontext; + DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask)); + break; + } + + return sz; +} + +static void ioa_fail_path(struct path_selector *ps, struct dm_path *p) +{ + struct path_info *pi = p->pscontext; + + pi->failed = true; +} + +static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p) +{ + struct path_info *pi = p->pscontext; + + pi->failed = false; + return 0; +} + +static struct dm_path *ioa_select_path(struct path_selector *ps, + size_t nr_bytes) +{ + unsigned int cpu, node; + struct selector *s = ps->context; + const struct cpumask *cpumask; + struct path_info *pi; + int i; + + cpu = get_cpu(); + + pi = s->path_map[cpu]; + if (pi && !pi->failed) + goto done; + + /* + * Perf is not optimal, but we at least try the local node then just + * try not to fail. + */ + if (!pi) + atomic_inc(&s->map_misses); + + node = cpu_to_node(cpu); + cpumask = cpumask_of_node(node); + for_each_cpu(i, cpumask) { + pi = s->path_map[i]; + if (pi && !pi->failed) + goto done; + } + + for_each_cpu(i, s->path_mask) { + pi = s->path_map[i]; + if (pi && !pi->failed) + goto done; + } + pi = NULL; + +done: + put_cpu(); + return pi ? pi->path : NULL; +} + +static struct path_selector_type ioa_ps = { + .name = "io-affinity", + .module = THIS_MODULE, + .table_args = 1, + .info_args = 1, + .create = ioa_create, + .destroy = ioa_destroy, + .status = ioa_status, + .add_path = ioa_add_path, + .fail_path = ioa_fail_path, + .reinstate_path = ioa_reinstate_path, + .select_path = ioa_select_path, +}; + +static int __init dm_ioa_init(void) +{ + int ret = dm_register_path_selector(&ioa_ps); + + if (ret < 0) + DMERR("register failed %d", ret); + return ret; +} + +static void __exit dm_ioa_exit(void) +{ + int ret = dm_unregister_path_selector(&ioa_ps); + + if (ret < 0) + DMERR("unregister failed %d", ret); +} + +module_init(dm_ioa_init); +module_exit(dm_ioa_exit); + +MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on"); +MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-ps-queue-length.c index 5fd018d18418..5fd018d18418 100644 --- a/drivers/md/dm-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-ps-round-robin.c index bdbb7e6e8212..bdbb7e6e8212 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-ps-service-time.c index 9cfda665e9eb..9cfda665e9eb 100644 --- a/drivers/md/dm-service-time.c +++ b/drivers/md/dm-ps-service-time.c diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 151d022b032d..df359d33cda8 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -496,7 +496,7 @@ static void stripe_io_hints(struct dm_target *ti, static struct target_type stripe_target = { .name = "striped", .version = {1, 6, 0}, - .features = DM_TARGET_PASSES_INTEGRITY, + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index bff4c7fa1cd2..262e2b0fd975 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -550,6 +550,7 @@ static int switch_iterate_devices(struct dm_target *ti, static struct target_type switch_target = { .name = "switch", .version = {1, 1, 0}, + .features = DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = switch_ctr, .dtr = switch_dtr, diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index e673dacf6418..7357c1bd5863 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -178,6 +178,7 @@ static void unstripe_io_hints(struct dm_target *ti, static struct target_type unstripe_target = { .name = "unstriped", .version = {1, 1, 0}, + .features = DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = unstripe_ctr, .dtr = unstripe_dtr, diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index f74982dcbea0..6b8e5bdd8526 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -538,6 +538,15 @@ static int verity_verify_io(struct dm_verity_io *io) } /* + * Skip verity work in response to I/O error when system is shutting down. + */ +static inline bool verity_is_system_shutting_down(void) +{ + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF + || system_state == SYSTEM_RESTART; +} + +/* * End one "io" structure with a given error. */ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) @@ -564,7 +573,8 @@ static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; - if (bio->bi_status && !verity_fec_is_enabled(io->v)) { + if (bio->bi_status && + (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { verity_finish_io(io, bio->bi_status); return; } diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c index 614e43db93aa..29385dc470d5 100644 --- a/drivers/md/dm-verity-verify-sig.c +++ b/drivers/md/dm-verity-verify-sig.c @@ -119,8 +119,13 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len, } ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data, - sig_len, NULL, VERIFYING_UNSPECIFIED_SIGNATURE, - NULL, NULL); + sig_len, +#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING + VERIFY_USE_SECONDARY_KEYRING, +#else + NULL, +#endif + VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); return ret; } diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index b65ca8dcfbdc..faa1dbffc8b4 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -59,6 +59,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio) static struct target_type zero_target = { .name = "zero", .version = {1, 1, 0}, + .features = DM_TARGET_NOWAIT, .module = THIS_MODULE, .ctr = zero_ctr, .map = zero_map, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5b2f371ec4bb..b3c3c8b4cb42 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1586,7 +1586,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, ci.sector_count = bio_sectors(bio); while (ci.sector_count && !error) { error = __split_and_process_non_flush(&ci); - if (current->bio_list && ci.sector_count && !error) { + if (ci.sector_count && !error) { /* * Remainder must be passed to submit_bio_noacct() * so that it gets handled *after* bios already submitted diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index b0983e2a4e2c..b839dd1b459f 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -267,6 +267,7 @@ static void configfs_remove_dirent(struct dentry *dentry) * configfs_create_dir - create a directory for an config_item. * @item: config_itemwe're creating directory for. * @dentry: config_item's dentry. + * @frag: config_item's fragment. * * Note: user-created entries won't be allowed under this new directory * until it is validated by configfs_dir_set_ready() diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 675d0e7058c5..314d5407a1be 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -659,7 +659,7 @@ static int exfat_load_upcase_table(struct super_block *sb, unsigned char skip = false; unsigned short *upcase_table; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -715,7 +715,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb) unsigned short uni = 0, *upcase_table; unsigned int index = 0; - upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); if (!upcase_table) return -ENOMEM; @@ -803,5 +803,5 @@ load_default: void exfat_free_upcase_table(struct exfat_sb_info *sbi) { - kfree(sbi->vol_utbl); + kvfree(sbi->vol_utbl); } diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 03925e438ec3..70fcd0f610ea 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -317,6 +317,20 @@ static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); +#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT +bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr); +bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle); +bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +#else +#define arch_dma_map_page_direct(d, a) (false) +#define arch_dma_unmap_page_direct(d, a) (false) +#define arch_dma_map_sg_direct(d, s, n) (false) +#define arch_dma_unmap_sg_direct(d, s, n) (false) +#endif + #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index fd2db2665fc6..479fc145acfc 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -20,6 +20,10 @@ config DMA_OPS config DMA_OPS_BYPASS bool +# Lets platform IOMMU driver choose between bypass and IOMMU +config ARCH_HAS_DMA_MAP_DIRECT + bool + config NEED_SG_DMA_LENGTH bool @@ -220,3 +224,12 @@ config DMA_API_DEBUG_SG is technically out-of-spec. If unsure, say N. + +config DMA_MAP_BENCHMARK + bool "Enable benchmarking of streaming DMA mapping" + depends on DEBUG_FS + help + Provides /sys/kernel/debug/dma_map_benchmark that helps with testing + performance of dma_(un)map_page. + + See tools/testing/selftests/dma/dma_map_benchmark.c diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index cd1d86358a7a..0dd65ec1d234 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o obj-$(CONFIG_DMA_REMAP) += remap.o +obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 16b95ff12e4d..3d63d91cba5c 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -20,7 +20,7 @@ * coders, etc. * * Such devices often require big memory buffers (a full HD frame - * is, for instance, more then 2 mega pixels large, i.e. more than 6 + * is, for instance, more than 2 mega pixels large, i.e. more than 6 * MB of memory), which makes mechanisms such as kmalloc() or * alloc_page() ineffective. * diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c new file mode 100644 index 000000000000..b1496e744c68 --- /dev/null +++ b/kernel/dma/map_benchmark.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Hisilicon Limited. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/timekeeping.h> + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u64 expansion[10]; /* For future use */ +}; + +struct map_benchmark_data { + struct map_benchmark bparam; + struct device *dev; + struct dentry *debugfs; + enum dma_data_direction dir; + atomic64_t sum_map_100ns; + atomic64_t sum_unmap_100ns; + atomic64_t sum_sq_map; + atomic64_t sum_sq_unmap; + atomic64_t loops; +}; + +static int map_benchmark_thread(void *data) +{ + void *buf; + dma_addr_t dma_addr; + struct map_benchmark_data *map = data; + int ret = 0; + + buf = (void *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + + while (!kthread_should_stop()) { + u64 map_100ns, unmap_100ns, map_sq, unmap_sq; + ktime_t map_stime, map_etime, unmap_stime, unmap_etime; + ktime_t map_delta, unmap_delta; + + /* + * for a non-coherent device, if we don't stain them in the + * cache, this will give an underestimate of the real-world + * overhead of BIDIRECTIONAL or TO_DEVICE mappings; + * 66 means evertything goes well! 66 is lucky. + */ + if (map->dir != DMA_FROM_DEVICE) + memset(buf, 0x66, PAGE_SIZE); + + map_stime = ktime_get(); + dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir); + if (unlikely(dma_mapping_error(map->dev, dma_addr))) { + pr_err("dma_map_single failed on %s\n", + dev_name(map->dev)); + ret = -ENOMEM; + goto out; + } + map_etime = ktime_get(); + map_delta = ktime_sub(map_etime, map_stime); + + unmap_stime = ktime_get(); + dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); + unmap_etime = ktime_get(); + unmap_delta = ktime_sub(unmap_etime, unmap_stime); + + /* calculate sum and sum of squares */ + + map_100ns = div64_ul(map_delta, 100); + unmap_100ns = div64_ul(unmap_delta, 100); + map_sq = map_100ns * map_100ns; + unmap_sq = unmap_100ns * unmap_100ns; + + atomic64_add(map_100ns, &map->sum_map_100ns); + atomic64_add(unmap_100ns, &map->sum_unmap_100ns); + atomic64_add(map_sq, &map->sum_sq_map); + atomic64_add(unmap_sq, &map->sum_sq_unmap); + atomic64_inc(&map->loops); + } + +out: + free_page((unsigned long)buf); + return ret; +} + +static int do_map_benchmark(struct map_benchmark_data *map) +{ + struct task_struct **tsk; + int threads = map->bparam.threads; + int node = map->bparam.node; + const cpumask_t *cpu_mask = cpumask_of_node(node); + u64 loops; + int ret = 0; + int i; + + tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL); + if (!tsk) + return -ENOMEM; + + get_device(map->dev); + + for (i = 0; i < threads; i++) { + tsk[i] = kthread_create_on_node(map_benchmark_thread, map, + map->bparam.node, "dma-map-benchmark/%d", i); + if (IS_ERR(tsk[i])) { + pr_err("create dma_map thread failed\n"); + ret = PTR_ERR(tsk[i]); + goto out; + } + + if (node != NUMA_NO_NODE) + kthread_bind_mask(tsk[i], cpu_mask); + } + + /* clear the old value in the previous benchmark */ + atomic64_set(&map->sum_map_100ns, 0); + atomic64_set(&map->sum_unmap_100ns, 0); + atomic64_set(&map->sum_sq_map, 0); + atomic64_set(&map->sum_sq_unmap, 0); + atomic64_set(&map->loops, 0); + + for (i = 0; i < threads; i++) + wake_up_process(tsk[i]); + + msleep_interruptible(map->bparam.seconds * 1000); + + /* wait for the completion of benchmark threads */ + for (i = 0; i < threads; i++) { + ret = kthread_stop(tsk[i]); + if (ret) + goto out; + } + + loops = atomic64_read(&map->loops); + if (likely(loops > 0)) { + u64 map_variance, unmap_variance; + u64 sum_map = atomic64_read(&map->sum_map_100ns); + u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns); + u64 sum_sq_map = atomic64_read(&map->sum_sq_map); + u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap); + + /* average latency */ + map->bparam.avg_map_100ns = div64_u64(sum_map, loops); + map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops); + + /* standard deviation of latency */ + map_variance = div64_u64(sum_sq_map, loops) - + map->bparam.avg_map_100ns * + map->bparam.avg_map_100ns; + unmap_variance = div64_u64(sum_sq_unmap, loops) - + map->bparam.avg_unmap_100ns * + map->bparam.avg_unmap_100ns; + map->bparam.map_stddev = int_sqrt64(map_variance); + map->bparam.unmap_stddev = int_sqrt64(unmap_variance); + } + +out: + put_device(map->dev); + kfree(tsk); + return ret; +} + +static long map_benchmark_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct map_benchmark_data *map = file->private_data; + void __user *argp = (void __user *)arg; + u64 old_dma_mask; + + int ret; + + if (copy_from_user(&map->bparam, argp, sizeof(map->bparam))) + return -EFAULT; + + switch (cmd) { + case DMA_MAP_BENCHMARK: + if (map->bparam.threads == 0 || + map->bparam.threads > DMA_MAP_MAX_THREADS) { + pr_err("invalid thread number\n"); + return -EINVAL; + } + + if (map->bparam.seconds == 0 || + map->bparam.seconds > DMA_MAP_MAX_SECONDS) { + pr_err("invalid duration seconds\n"); + return -EINVAL; + } + + if (map->bparam.node != NUMA_NO_NODE && + !node_possible(map->bparam.node)) { + pr_err("invalid numa node\n"); + return -EINVAL; + } + + switch (map->bparam.dma_dir) { + case DMA_MAP_BIDIRECTIONAL: + map->dir = DMA_BIDIRECTIONAL; + break; + case DMA_MAP_FROM_DEVICE: + map->dir = DMA_FROM_DEVICE; + break; + case DMA_MAP_TO_DEVICE: + map->dir = DMA_TO_DEVICE; + break; + default: + pr_err("invalid DMA direction\n"); + return -EINVAL; + } + + old_dma_mask = dma_get_mask(map->dev); + + ret = dma_set_mask(map->dev, + DMA_BIT_MASK(map->bparam.dma_bits)); + if (ret) { + pr_err("failed to set dma_mask on device %s\n", + dev_name(map->dev)); + return -EINVAL; + } + + ret = do_map_benchmark(map); + + /* + * restore the original dma_mask as many devices' dma_mask are + * set by architectures, acpi, busses. When we bind them back + * to their original drivers, those drivers shouldn't see + * dma_mask changed by benchmark + */ + dma_set_mask(map->dev, old_dma_mask); + break; + default: + return -EINVAL; + } + + if (copy_to_user(argp, &map->bparam, sizeof(map->bparam))) + return -EFAULT; + + return ret; +} + +static const struct file_operations map_benchmark_fops = { + .open = simple_open, + .unlocked_ioctl = map_benchmark_ioctl, +}; + +static void map_benchmark_remove_debugfs(void *data) +{ + struct map_benchmark_data *map = (struct map_benchmark_data *)data; + + debugfs_remove(map->debugfs); +} + +static int __map_benchmark_probe(struct device *dev) +{ + struct dentry *entry; + struct map_benchmark_data *map; + int ret; + + map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map->dev = dev; + + ret = devm_add_action(dev, map_benchmark_remove_debugfs, map); + if (ret) { + pr_err("Can't add debugfs remove action\n"); + return ret; + } + + /* + * we only permit a device bound with this driver, 2nd probe + * will fail + */ + entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map, + &map_benchmark_fops); + if (IS_ERR(entry)) + return PTR_ERR(entry); + map->debugfs = entry; + + return 0; +} + +static int map_benchmark_platform_probe(struct platform_device *pdev) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct platform_driver map_benchmark_platform_driver = { + .driver = { + .name = "dma_map_benchmark", + }, + .probe = map_benchmark_platform_probe, +}; + +static int +map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct pci_driver map_benchmark_pci_driver = { + .name = "dma_map_benchmark", + .probe = map_benchmark_pci_probe, +}; + +static int __init map_benchmark_init(void) +{ + int ret; + + ret = pci_register_driver(&map_benchmark_pci_driver); + if (ret) + return ret; + + ret = platform_driver_register(&map_benchmark_platform_driver); + if (ret) { + pci_unregister_driver(&map_benchmark_pci_driver); + return ret; + } + + return 0; +} + +static void __exit map_benchmark_cleanup(void) +{ + platform_driver_unregister(&map_benchmark_platform_driver); + pci_unregister_driver(&map_benchmark_pci_driver); +} + +module_init(map_benchmark_init); +module_exit(map_benchmark_cleanup); + +MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>"); +MODULE_DESCRIPTION("dma_map benchmark driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 51bb8fa8eb89..f87a89d08654 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -149,7 +149,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); else addr = ops->map_page(dev, page, offset, size, dir, attrs); @@ -165,7 +166,8 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); else if (ops->unmap_page) ops->unmap_page(dev, addr, size, dir, attrs); @@ -188,7 +190,8 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, if (WARN_ON_ONCE(!dev->dma_mask)) return 0; - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); else ents = ops->map_sg(dev, sg, nents, dir, attrs); @@ -207,7 +210,8 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); - if (dma_map_direct(dev, ops)) + if (dma_map_direct(dev, ops) || + arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index d4637f72239b..5f84e6cdb78e 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -38,9 +38,6 @@ static void __init dma_atomic_pool_debugfs_init(void) struct dentry *root; root = debugfs_create_dir("dma_pools", NULL); - if (IS_ERR_OR_NULL(root)) - return; - debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma); debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32); debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel); diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile new file mode 100644 index 000000000000..aa8e8b5b3864 --- /dev/null +++ b/tools/testing/selftests/dma/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -I../../../../usr/include/ + +TEST_GEN_PROGS := dma_map_benchmark + +include ../lib.mk diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config new file mode 100644 index 000000000000..6102ee3c43cd --- /dev/null +++ b/tools/testing/selftests/dma/config @@ -0,0 +1 @@ +CONFIG_DMA_MAP_BENCHMARK=y diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c new file mode 100644 index 000000000000..7065163a8388 --- /dev/null +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Hisilicon Limited. + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <linux/types.h> + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +static char *directions[] = { + "BIDIRECTIONAL", + "TO_DEVICE", + "FROM_DEVICE", +}; + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u64 expansion[10]; /* For future use */ +}; + +int main(int argc, char **argv) +{ + struct map_benchmark map; + int fd, opt; + /* default single thread, run 20 seconds on NUMA_NO_NODE */ + int threads = 1, seconds = 20, node = -1; + /* default dma mask 32bit, bidirectional DMA */ + int bits = 32, dir = DMA_MAP_BIDIRECTIONAL; + + int cmd = DMA_MAP_BENCHMARK; + char *p; + + while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) { + switch (opt) { + case 't': + threads = atoi(optarg); + break; + case 's': + seconds = atoi(optarg); + break; + case 'n': + node = atoi(optarg); + break; + case 'b': + bits = atoi(optarg); + break; + case 'd': + dir = atoi(optarg); + break; + default: + return -1; + } + } + + if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) { + fprintf(stderr, "invalid number of threads, must be in 1-%d\n", + DMA_MAP_MAX_THREADS); + exit(1); + } + + if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) { + fprintf(stderr, "invalid number of seconds, must be in 1-%d\n", + DMA_MAP_MAX_SECONDS); + exit(1); + } + + /* suppose the mininum DMA zone is 1MB in the world */ + if (bits < 20 || bits > 64) { + fprintf(stderr, "invalid dma mask bit, must be in 20-64\n"); + exit(1); + } + + if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE && + dir != DMA_MAP_FROM_DEVICE) { + fprintf(stderr, "invalid dma direction\n"); + exit(1); + } + + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); + if (fd == -1) { + perror("open"); + exit(1); + } + + map.seconds = seconds; + map.threads = threads; + map.node = node; + map.dma_bits = bits; + map.dma_dir = dir; + if (ioctl(fd, cmd, &map)) { + perror("ioctl"); + exit(1); + } + + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", + threads, seconds, node, dir[directions]); + printf("average map latency(us):%.1f standard deviation:%.1f\n", + map.avg_map_100ns/10.0, map.map_stddev/10.0); + printf("average unmap latency(us):%.1f standard deviation:%.1f\n", + map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0); + + return 0; +} |