diff options
| author | Jiri Kosina <jkosina@suse.cz> | 2022-01-10 09:49:13 +0100 |
|---|---|---|
| committer | Jiri Kosina <jkosina@suse.cz> | 2022-01-10 09:49:13 +0100 |
| commit | 8a2094d679d921d104d3296528d4fa419702ce1c (patch) | |
| tree | 5feeb2f8b94eb3632109b778276ccd1007486770 /arch | |
| parent | 3809fe479861194e310c23ed48b010c7c0f72d22 (diff) | |
| parent | b60d3c803d7603432a08aeaf988aff53b3a5ec64 (diff) | |
Merge branch 'for-5.17/core' into for-linus
- support for USI style pens (Tero Kristo, Mika Westerberg)
- quirk for devices that need inverted X/Y axes (Alistair Francis)
- small core code cleanups and deduplication (Benjamin Tissoires)
Diffstat (limited to 'arch')
450 files changed, 4336 insertions, 3019 deletions
diff --git a/arch/alpha/Kbuild b/arch/alpha/Kbuild index c2302017403a..345d79df24bb 100644 --- a/arch/alpha/Kbuild +++ b/arch/alpha/Kbuild @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += kernel/ mm/ obj-$(CONFIG_MATHEMU) += math-emu/ + +# for cleaning +subdir- += boot diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile index 52529ee42dac..881cb913e23a 100644 --- a/arch/alpha/Makefile +++ b/arch/alpha/Makefile @@ -55,9 +55,6 @@ $(boot)/vmlinux.gz: vmlinux bootimage bootpfile bootpzfile: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - archheaders: $(Q)$(MAKE) $(build)=arch/alpha/kernel/syscalls all diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index 72af1e72d833..6b8ed12936b6 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -233,7 +233,7 @@ albacore_init_arch(void) unsigned long size; size = initrd_end - initrd_start; - memblock_free(__pa(initrd_start), PAGE_ALIGN(size)); + memblock_free((void *)initrd_start, PAGE_ALIGN(size)); if (!move_initrd(pci_mem)) printk("irongate_init_arch: initrd too big " "(%ldK)\ndisabling initrd\n", diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index e805106409f7..2ae34702456c 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -129,9 +129,7 @@ dik_show_trace(unsigned long *sp, const char *loglvl) extern char _stext[], _etext[]; unsigned long tmp = *sp; sp++; - if (tmp < (unsigned long) &_stext) - continue; - if (tmp >= (unsigned long) &_etext) + if (!is_kernel_text(tmp)) continue; printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp); if (i > 40) { diff --git a/arch/arc/Kbuild b/arch/arc/Kbuild index 699d8cae9b1f..b94102fff68b 100644 --- a/arch/arc/Kbuild +++ b/arch/arc/Kbuild @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += kernel/ obj-y += mm/ + +# for cleaning +subdir- += boot diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 8782a03f24a8..f252e7b924e9 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -112,6 +112,3 @@ uImage: $(uimage-default-y) @$(kecho) ' Image $(boot)/uImage is ready' CLEAN_FILES += $(boot)/uImage - -archclean: - $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 3793876f42d9..8e90052f6f05 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -294,7 +294,7 @@ int elf_check_arch(const struct elf32_hdr *x) eflags = x->e_flags; if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) { pr_err("ABI mismatch - you need newer toolchain\n"); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return 0; } diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 699ecf119641..ce4e939a7f07 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -59,13 +59,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) low_mem_sz = size; in_use = 1; - memblock_add_node(base, size, 0); + memblock_add_node(base, size, 0, MEMBLOCK_NONE); } else { #ifdef CONFIG_HIGHMEM high_mem_start = base; high_mem_sz = size; in_use = 1; - memblock_add_node(base, size, 1); + memblock_add_node(base, size, 1, MEMBLOCK_NONE); memblock_reserve(base, size); #endif } @@ -173,7 +173,7 @@ static void __init highmem_init(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; - memblock_free(high_mem_start, high_mem_sz); + memblock_phys_free(high_mem_start, high_mem_sz); for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); #endif diff --git a/arch/arm/Kbuild b/arch/arm/Kbuild index 5208f7061524..b506622e7e23 100644 --- a/arch/arm/Kbuild +++ b/arch/arm/Kbuild @@ -9,3 +9,6 @@ obj-y += kernel/ mm/ common/ obj-y += probes/ obj-y += net/ obj-y += crypto/ + +# for cleaning +subdir- += boot diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f0f9e8bec83a..c2724d986fa0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1463,6 +1463,7 @@ config HIGHMEM bool "High Memory Support" depends on MMU select KMAP_LOCAL + select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY help The address space of ARM processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1c540157e283..77172d555c7e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -60,15 +60,15 @@ KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m -arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) +arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m +arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 -march=armv7-a +arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 -march=armv6 # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) +arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 -march=armv6k endif -arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) +arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 -march=armv5te arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m @@ -82,7 +82,7 @@ tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) +tune-$(CONFIG_CPU_ARM946E) =-mtune=arm9e tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi @@ -90,11 +90,11 @@ tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale) -tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) -tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_XSCALE) =-mtune=xscale +tune-$(CONFIG_CPU_XSC3) =-mtune=xscale +tune-$(CONFIG_CPU_FEROCEON) =-mtune=xscale +tune-$(CONFIG_CPU_V6) =-mtune=arm1136j-s +tune-$(CONFIG_CPU_V6K) =-mtune=arm1136j-s # Evaluate tune cc-option calls now tune-y := $(tune-y) @@ -318,10 +318,6 @@ ifeq ($(CONFIG_VDSO),y) $(Q)$(MAKE) $(build)=arch/arm/vdso $@ endif -# We use MRPROPER_FILES and CLEAN_FILES now -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - # My testing targets (bypasses dependencies) bp:; $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/bootpImage diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 24c19d63ff0a..dfeed440254a 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -77,16 +77,6 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, ®s->ARM_r0 + 1, 5 * sizeof(args[0])); } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - regs->ARM_ORIG_r0 = args[0]; - args++; - - memcpy(®s->ARM_r0 + 1, args, 5 * sizeof(args[0])); -} - static inline int syscall_get_arch(struct task_struct *task) { /* ARM tasks don't change audit architectures on the fly. */ diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index 96a484095194..258586e31333 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -339,7 +339,7 @@ err_fabric: err_sysctrl: iounmap(relocation); err_reloc: - memblock_free(hip04_boot_method[0], hip04_boot_method[1]); + memblock_phys_free(hip04_boot_method[0], hip04_boot_method[1]); err: return ret; } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 6162a070a410..6d0cb0f7bc54 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -158,7 +158,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) panic("Failed to steal %pa bytes at %pS\n", &size, (void *)_RET_IP_); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); return phys; diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c index 4b1619584b23..5ad0d6c56d56 100644 --- a/arch/arm/mm/kasan_init.c +++ b/arch/arm/mm/kasan_init.c @@ -32,7 +32,7 @@ pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss; static __init void *kasan_alloc_block(size_t size) { return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE); + MEMBLOCK_ALLOC_NOLEAKTRACE, NUMA_NO_NODE); } static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index a4e006005107..274e4f73fd33 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -390,9 +390,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START); BUG_ON(idx >= __end_of_fixed_addresses); - /* we only support device mappings until pgprot_kernel has been set */ + /* We support only device mappings before pgprot_kernel is set. */ if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) && - pgprot_val(pgprot_kernel) == 0)) + pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0)) return; if (pgprot_val(prot)) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7f1c106b746f..7619fbffcea2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -442,7 +442,6 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op); EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); -EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index b11bba542fac..f794dac9859a 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -88,7 +88,6 @@ HYPERCALL2(hvm_op); HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); -HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index ea7ab4ca81f9..5bfbf7d79c99 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -4,3 +4,6 @@ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/ obj-$(CONFIG_CRYPTO) += crypto/ + +# for cleaning +subdir- += boot diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 176d6fddc4f2..c4207cf9bb17 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1163,6 +1163,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK def_bool y depends on NUMA +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + depends on NUMA + source "kernel/Kconfig.hz" config ARCH_SPARSEMEM_ENABLE diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c744b1e7b356..e8cfc5868aa8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -182,13 +182,6 @@ ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y) endif endif - -# We use MRPROPER_FILES and CLEAN_FILES now -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso - $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso32 - ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. # In order to do that, we should use the archprepare target, but we can't since diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index a305ce256090..d52a0b269ee8 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -68,6 +68,7 @@ #define ESR_ELx_EC_MAX (0x3F) #define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_WIDTH (6) #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) #define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4be8486042a7..2a5f7f38006f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -584,7 +584,7 @@ struct kvm_vcpu_stat { u64 exits; }; -int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); +void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 84fbb52b4224..c4ba047a82d2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -67,9 +67,15 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * page table entry, taking care of 52-bit addresses. */ #ifdef CONFIG_ARM64_PA_BITS_52 -#define __pte_to_phys(pte) \ - ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) -#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return (pte_val(pte) & PTE_ADDR_LOW) | + ((pte_val(pte) & PTE_ADDR_HIGH) << 36); +} +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return (phys | (phys >> 36)) & PTE_ADDR_MASK; +} #else #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) #define __phys_to_pte_val(phys) (phys) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 03e20895453a..4cfe9b49709b 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -73,16 +73,6 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - regs->orig_x0 = args[0]; - args++; - - memcpy(®s->regs[1], args, 5 * sizeof(args[0])); -} - /* * We don't care about endianness (__AUDIT_ARCH_LE bit) here because * AArch64 has the same system calls both on little- and big- endian. diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 3f1490bfb938..88b3e2a21408 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -81,3 +81,6 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif + +# for cleaning +subdir- += vdso vdso32 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ecbdff795f5e..6f3e677d88f1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -573,15 +573,19 @@ static const struct arm64_ftr_bits ftr_raz[] = { ARM64_FTR_END, }; -#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \ +#define __ARM64_FTR_REG_OVERRIDE(id_str, id, table, ovr) { \ .sys_id = id, \ .reg = &(struct arm64_ftr_reg){ \ - .name = #id, \ + .name = id_str, \ .override = (ovr), \ .ftr_bits = &((table)[0]), \ }} -#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override) +#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) \ + __ARM64_FTR_REG_OVERRIDE(#id, id, table, ovr) + +#define ARM64_FTR_REG(id, table) \ + __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; @@ -2864,6 +2868,7 @@ bool this_cpu_has_cap(unsigned int n) return false; } +EXPORT_SYMBOL_GPL(this_cpu_has_cap); /* * This helper function is used in a narrow window when, diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 945e6bb326e3..700767dfd221 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # potential future proofing if we end up with internal calls to the exported # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). -ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ +ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index c8fec493a450..6c01b63ff56d 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -102,7 +102,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 -VDSO_LDFLAGS += -nostdlib -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 # Borrow vdsomunge.c from the arm vDSO diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f5490afe1ebf..e4727dc771bf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -223,7 +223,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + /* + * ARM64 treats KVM_CAP_NR_CPUS differently from all other + * architectures, as it does not always bound it to + * KVM_CAP_MAX_VCPUS. It should not matter much because + * this is just an advisory value. + */ + r = min_t(unsigned int, num_online_cpus(), + kvm_arm_default_max_vcpus()); break; case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ -1389,12 +1396,9 @@ long kvm_arch_vm_ioctl(struct file *filp, return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } case KVM_ARM_PREFERRED_TARGET: { - int err; struct kvm_vcpu_init init; - err = kvm_vcpu_preferred_target(&init); - if (err) - return err; + kvm_vcpu_preferred_target(&init); if (copy_to_user(argp, &init, sizeof(init))) return -EFAULT; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 5ce26bedf23c..e116c7767730 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -869,13 +869,10 @@ u32 __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_GENERIC_V8; } -int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { u32 target = kvm_target_cpu(); - if (target < 0) - return -ENODEV; - memset(init, 0, sizeof(*init)); /* @@ -885,8 +882,6 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) * target type. */ init->target = (__u32)target; - - return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9aa9b73475c9..b6b6801d96d5 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -44,7 +44,7 @@ el1_sync: // Guest trapped into EL2 mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT + ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH cmp x0, #ESR_ELx_EC_HVC64 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 0c6116d34e18..3d613e721a75 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -141,7 +141,7 @@ SYM_FUNC_END(__host_hvc) .L__vect_start\@: stp x0, x1, [sp, #-16]! mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT + ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH cmp x0, #ESR_ELx_EC_HVC64 b.eq __host_hvc b __host_exit diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 862c7b514e20..578f71798c2e 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -178,7 +178,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, phys = kvm_pte_to_phys(pte); if (!addr_is_memory(phys)) - return 0; + return -EINVAL; /* * Adjust the host stage-2 mappings to match the ownership attributes @@ -207,8 +207,18 @@ static int finalize_host_mappings(void) .cb = finalize_host_mappings_walker, .flags = KVM_PGTABLE_WALK_LEAF, }; + int i, ret; + + for (i = 0; i < hyp_memblock_nr; i++) { + struct memblock_region *reg = &hyp_memory[i]; + u64 start = (u64)hyp_phys_to_virt(reg->base); + + ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker); + if (ret) + return ret; + } - return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker); + return 0; } void __noreturn __pkvm_init_finalise(void) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 3787ee6fb1a2..792cf6e6ac92 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -474,7 +474,7 @@ bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } -/** +/* * Handler for protected VM restricted exceptions. * * Inject an undefined exception into the guest and return true to indicate that diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 0941180a86d3..29490be2546b 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -9,6 +9,8 @@ ifeq ($(CONFIG_KERNEL_MODE_NEON), y) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only CFLAGS_xor-neon.o += -ffreestanding +# Enable <arm_neon.h> +CFLAGS_xor-neon.o += -isystem $(shell $(CC) -print-file-name=include) endif lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 61b52a92b8b6..c12cd700598f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -36,7 +36,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) { void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_KASAN, node); + MEMBLOCK_ALLOC_NOLEAKTRACE, node); if (!p) panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n", __func__, PAGE_SIZE, PAGE_SIZE, node, @@ -49,7 +49,8 @@ static phys_addr_t __init kasan_alloc_raw_page(int node) { void *p = memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_KASAN, node); + MEMBLOCK_ALLOC_NOLEAKTRACE, + node); if (!p) panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n", __func__, PAGE_SIZE, PAGE_SIZE, node, @@ -287,13 +288,29 @@ static void __init kasan_init_depth(void) init_task.kasan_depth = 0; } +#ifdef CONFIG_KASAN_VMALLOC +void __init kasan_populate_early_vm_area_shadow(void *start, unsigned long size) +{ + unsigned long shadow_start, shadow_end; + + if (!is_vmalloc_or_module_addr(start)) + return; + + shadow_start = (unsigned long)kasan_mem_to_shadow(start); + shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(start + size); + shadow_end = ALIGN(shadow_end, PAGE_SIZE); + kasan_map_populate(shadow_start, shadow_end, NUMA_NO_NODE); +} +#endif + void __init kasan_init(void) { kasan_init_shadow(); kasan_init_depth(); #if defined(CONFIG_KASAN_GENERIC) /* CONFIG_KASAN_SW_TAGS also requires kasan_init_sw_tags(). */ - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (generic)\n"); #endif } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index fd85b51b9d50..acfae9b41cc8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -96,7 +96,8 @@ static phys_addr_t __init early_pgtable_alloc(int shift) phys_addr_t phys; void *ptr; - phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, + MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys) panic("Failed to allocate page table page\n"); @@ -738,8 +739,8 @@ void __init paging_init(void) cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); init_mm.pgd = swapper_pg_dir; - memblock_free(__pa_symbol(init_pg_dir), - __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); + memblock_phys_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); memblock_allow_resize(); } diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 5b09aca55108..9d01361696a1 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -80,7 +80,6 @@ HYPERCALL2(hvm_op); HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); -HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); diff --git a/arch/csky/Kbuild b/arch/csky/Kbuild index a4e40e534e6a..4e39f7abdeb6 100644 --- a/arch/csky/Kbuild +++ b/arch/csky/Kbuild @@ -1 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only + +# for cleaning +subdir- += boot diff --git a/arch/csky/Makefile b/arch/csky/Makefile index 37f593a4bf53..866805077636 100644 --- a/arch/csky/Makefile +++ b/arch/csky/Makefile @@ -76,9 +76,6 @@ all: zImage zImage Image uImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - define archhelp echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index f624fa3bbc22..0de5734950bf 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -59,15 +59,6 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, memcpy(args, ®s->a1, 5 * sizeof(args[0])); } -static inline void -syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - const unsigned long *args) -{ - regs->orig_a0 = args[0]; - args++; - memcpy(®s->a1, args, 5 * sizeof(regs->a1)); -} - static inline int syscall_get_arch(struct task_struct *task) { diff --git a/arch/h8300/Kbuild b/arch/h8300/Kbuild index b2583e7efbd1..e4703f3534cc 100644 --- a/arch/h8300/Kbuild +++ b/arch/h8300/Kbuild @@ -1,2 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += kernel/ mm/ boot/dts/ + +# for cleaning +subdir- += boot diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile index eb4cb8f6830c..807f41e60ee4 100644 --- a/arch/h8300/Makefile +++ b/arch/h8300/Makefile @@ -34,9 +34,6 @@ libs-y += arch/$(ARCH)/lib/ boot := arch/h8300/boot -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - vmlinux.srec vmlinux.bin zImage uImage.bin: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h deleted file mode 100644 index ee6c61423a05..000000000000 --- a/arch/hexagon/include/asm/timer-regs.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Timer support for Hexagon - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#ifndef _ASM_TIMER_REGS_H -#define _ASM_TIMER_REGS_H - -/* This stuff should go into a platform specific file */ -#define TCX0_CLK_RATE 19200 -#define TIMER_ENABLE 0 -#define TIMER_CLR_ON_MATCH 1 - -/* - * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until - * release 1.1, and then it's "adjustable" and probably not defaulted. - */ -#define RTOS_TIMER_INT 3 -#ifdef CONFIG_HEXAGON_COMET -#define RTOS_TIMER_REGS_ADDR 0xAB000000UL -#endif -#define SLEEP_CLK_RATE 32000 - -#endif diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h index 8d4ec76fceb4..dfe69e118b2b 100644 --- a/arch/hexagon/include/asm/timex.h +++ b/arch/hexagon/include/asm/timex.h @@ -7,11 +7,10 @@ #define _ASM_TIMEX_H #include <asm-generic/timex.h> -#include <asm/timer-regs.h> #include <asm/hexagon_vm.h> /* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */ -#define CLOCK_TICK_RATE TCX0_CLK_RATE +#define CLOCK_TICK_RATE 19200 #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/hexagon/kernel/.gitignore b/arch/hexagon/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/hexagon/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c index feffe527ac92..febc95714d75 100644 --- a/arch/hexagon/kernel/time.c +++ b/arch/hexagon/kernel/time.c @@ -17,9 +17,10 @@ #include <linux/of_irq.h> #include <linux/module.h> -#include <asm/timer-regs.h> #include <asm/hexagon_vm.h> +#define TIMER_ENABLE BIT(0) + /* * For the clocksource we need: * pcycle frequency (600MHz) @@ -33,6 +34,13 @@ cycles_t pcycle_freq_mhz; cycles_t thread_freq_mhz; cycles_t sleep_clk_freq; +/* + * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until + * release 1.1, and then it's "adjustable" and probably not defaulted. + */ +#define RTOS_TIMER_INT 3 +#define RTOS_TIMER_REGS_ADDR 0xAB000000UL + static struct resource rtos_timer_resources[] = { { .start = RTOS_TIMER_REGS_ADDR, @@ -80,7 +88,7 @@ static int set_next_event(unsigned long delta, struct clock_event_device *evt) iowrite32(0, &rtos_timer->clear); iowrite32(delta, &rtos_timer->match); - iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable); + iowrite32(TIMER_ENABLE, &rtos_timer->enable); return 0; } diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c index d35d69d6588c..55f75392857b 100644 --- a/arch/hexagon/lib/io.c +++ b/arch/hexagon/lib/io.c @@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *addr, void *data, int len) *dst++ = *src; } +EXPORT_SYMBOL(__raw_readsw); /* * __raw_writesw - read words a short at a time @@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesw); /* Pretty sure len is pre-adjusted for the length of the access already */ void __raw_readsl(const void __iomem *addr, void *data, int len) @@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *addr, void *data, int len) } +EXPORT_SYMBOL(__raw_readsl); void __raw_writesl(void __iomem *addr, const void *data, int len) { @@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesl); diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 7e548c654a29..3b3ac3e1f272 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -67,8 +67,6 @@ vmlinux.bin: vmlinux FORCE unwcheck: vmlinux -$(Q)READELF=$(READELF) $(PYTHON3) $(srctree)/arch/ia64/scripts/unwcheck.py $< -archclean: - archheaders: $(Q)$(MAKE) $(build)=arch/ia64/kernel/syscalls all diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 0d23c0049301..2b02a3fb862a 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -55,21 +55,8 @@ static inline void syscall_set_return_value(struct task_struct *task, } } -extern void ia64_syscall_get_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned long *args, int rw); -static inline void syscall_get_arguments(struct task_struct *task, - struct pt_regs *regs, - unsigned long *args) -{ - ia64_syscall_get_set_arguments(task, regs, args, 0); -} - -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - unsigned long *args) -{ - ia64_syscall_get_set_arguments(task, regs, args, 1); -} +extern void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned long *args); static inline int syscall_get_arch(struct task_struct *task) { diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index df28c7dd164f..6a1439eaa050 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2001,17 +2001,16 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) return &user_ia64_view; } -struct syscall_get_set_args { +struct syscall_get_args { unsigned int i; unsigned int n; unsigned long *args; struct pt_regs *regs; - int rw; }; -static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) +static void syscall_get_args_cb(struct unw_frame_info *info, void *data) { - struct syscall_get_set_args *args = data; + struct syscall_get_args *args = data; struct pt_regs *pt = args->regs; unsigned long *krbs, cfm, ndirty, nlocals, nouts; int i, count; @@ -2042,37 +2041,31 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) /* Iterate over outs. */ for (i = 0; i < count; i++) { int j = ndirty + nlocals + i + args->i; - if (args->rw) - *ia64_rse_skip_regs(krbs, j) = args->args[i]; - else - args->args[i] = *ia64_rse_skip_regs(krbs, j); + args->args[i] = *ia64_rse_skip_regs(krbs, j); } - if (!args->rw) { - while (i < args->n) { - args->args[i] = 0; - i++; - } + while (i < args->n) { + args->args[i] = 0; + i++; } } -void ia64_syscall_get_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned long *args, int rw) +void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned long *args) { - struct syscall_get_set_args data = { + struct syscall_get_args data = { .i = 0, .n = 6, .args = args, .regs = regs, - .rw = rw, }; if (task == current) - unw_init_running(syscall_get_set_args_cb, &data); + unw_init_running(syscall_get_args_cb, &data); else { struct unw_frame_info ufi; memset(&ufi, 0, sizeof(ufi)); unw_init_from_blocked_task(&ufi, task); - syscall_get_set_args_cb(&ufi, &data); + syscall_get_args_cb(&ufi, &data); } } diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 42e025cfbd08..24901d809301 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -153,7 +153,7 @@ find_memory (void) efi_memmap_walk(find_max_min_low_pfn, NULL); max_pfn = max_low_pfn; - memblock_add_node(0, PFN_PHYS(max_low_pfn), 0); + memblock_add_node(0, PFN_PHYS(max_low_pfn), 0, MEMBLOCK_NONE); find_initrd(); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 5c6da8d83c1a..5d165607bf35 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -378,7 +378,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid) #endif if (start < end) - memblock_add_node(__pa(start), end - start, nid); + memblock_add_node(__pa(start), end - start, nid, MEMBLOCK_NONE); return 0; } diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 277d61a09463..0d00ef5117dc 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -53,17 +53,6 @@ config M68000 System-On-Chip devices (eg 68328, 68302, etc). It does not contain a paging MMU. -config MCPU32 - bool - select CPU_HAS_NO_BITFIELDS - select CPU_HAS_NO_CAS - select CPU_HAS_NO_UNALIGNED - select CPU_NO_EFFICIENT_FFS - help - The Freescale (was then Motorola) CPU32 is a CPU core that is - based on the 68020 processor. For the most part it is used in - System-On-Chip parts, and does not contain a paging MMU. - config M68020 bool "68020 support" depends on MMU diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 36fa0c3ef129..eeab4f3e6c19 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -203,6 +203,7 @@ config INIT_LCD config MEMORY_RESERVE int "Memory reservation (MiB)" depends on (UCSIMM || UCDIMM) + default 0 help Reserve certain memory regions on 68x328 based boards. diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index dd0c0ec67f67..740fc97b9c0f 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -2,9 +2,7 @@ # m68k/Makefile # # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture +# architecture-specific flags and dependencies. # # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 7b414099e5fc..7b93e1fd8ffa 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -451,7 +451,7 @@ static inline unsigned long ffz(unsigned long word) * generic functions for those. */ #if (defined(__mcfisaaplus__) || defined(__mcfisac__)) && \ - !defined(CONFIG_M68000) && !defined(CONFIG_MCPU32) + !defined(CONFIG_M68000) static inline unsigned long __ffs(unsigned long x) { __asm__ __volatile__ ("bitrev %0; ff1 %0" diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 9718ce94cc84..34d6458340b0 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1145,7 +1145,7 @@ asmlinkage void set_esp0(unsigned long ssp) */ asmlinkage void fpsp040_die(void) { - force_sigsegv(SIGSEGV); + force_exit_sig(SIGSEGV); } #ifdef CONFIG_M68KFPU_EMU diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index eac9dde65193..6f1f25125294 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -174,7 +174,8 @@ void __init cf_bootmem_alloc(void) m68k_memory[0].addr = _rambase; m68k_memory[0].size = _ramend - _rambase; - memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0, + MEMBLOCK_NONE); /* compute total pages in system */ num_pages = PFN_DOWN(_ramend - _rambase); diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 9f3f77785aa7..2b05bb2bac00 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -410,7 +410,8 @@ void __init paging_init(void) min_addr = m68k_memory[0].addr; max_addr = min_addr + m68k_memory[0].size; - memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0, + MEMBLOCK_NONE); for (i = 1; i < m68k_num_memory;) { if (m68k_memory[i].addr < min_addr) { printk("Ignoring memory chunk at 0x%lx:0x%lx before the first chunk\n", @@ -421,7 +422,8 @@ void __init paging_init(void) (m68k_num_memory - i) * sizeof(struct m68k_mem_info)); continue; } - memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i); + memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i, + MEMBLOCK_NONE); addr = m68k_memory[i].addr + m68k_memory[i].size; if (addr > max_addr) max_addr = addr; diff --git a/arch/microblaze/Kbuild b/arch/microblaze/Kbuild index a1c597889319..077a0b8e9615 100644 --- a/arch/microblaze/Kbuild +++ b/arch/microblaze/Kbuild @@ -3,3 +3,6 @@ obj-y += kernel/ obj-y += mm/ obj-$(CONFIG_PCI) += pci/ obj-y += boot/dts/ + +# for cleaning +subdir- += boot diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 9adc6b6434df..e775a696aa6f 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -60,9 +60,6 @@ export DTB all: linux.bin -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - archheaders: $(Q)$(MAKE) $(build)=arch/microblaze/kernel/syscalls all diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h index 3a6924f3cbde..5eb3f624cc59 100644 --- a/arch/microblaze/include/asm/syscall.h +++ b/arch/microblaze/include/asm/syscall.h @@ -58,28 +58,6 @@ static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs, return ~0; } -static inline void microblaze_set_syscall_arg(struct pt_regs *regs, - unsigned int n, - unsigned long val) -{ - switch (n) { - case 5: - regs->r10 = val; - case 4: - regs->r9 = val; - case 3: - regs->r8 = val; - case 2: - regs->r7 = val; - case 1: - regs->r6 = val; - case 0: - regs->r5 = val; - default: - BUG(); - } -} - static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) @@ -91,17 +69,6 @@ static inline void syscall_get_arguments(struct task_struct *task, *args++ = microblaze_get_syscall_arg(regs, i++); } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - unsigned int i = 0; - unsigned int n = 6; - - while (n--) - microblaze_set_syscall_arg(regs, i++, *args++); -} - asmlinkage unsigned long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_leave(struct pt_regs *regs); diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index c1833b159d3b..9f73265aad4e 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -34,6 +34,7 @@ #include <linux/mm_types.h> #include <linux/pgtable.h> #include <linux/memblock.h> +#include <linux/kallsyms.h> #include <asm/pgalloc.h> #include <linux/io.h> @@ -171,7 +172,7 @@ void __init mapin_ram(void) for (s = 0; s < lowmem_size; s += PAGE_SIZE) { f = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_SHARED | _PAGE_HWEXEC; - if ((char *) v < _stext || (char *) v >= _etext) + if (!is_kernel_text(v)) f |= _PAGE_WRENABLE; else /* On the MicroBlaze, no user access diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 557585f1be41..622a4867f9e9 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -587,13 +587,12 @@ static void pcibios_fixup_resources(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); -int pcibios_add_device(struct pci_dev *dev) +int pcibios_device_add(struct pci_dev *dev) { dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); return 0; } -EXPORT_SYMBOL(pcibios_add_device); /* * Reparent resource children of pr that conflict with res diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild index d5d6ef9bb986..9e8071f0e58f 100644 --- a/arch/mips/Kbuild +++ b/arch/mips/Kbuild @@ -25,3 +25,6 @@ obj-y += vdso/ ifdef CONFIG_KVM obj-y += kvm/ endif + +# for cleaning +subdir- += boot diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index 2c57994b5217..30193bcf9caa 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -37,4 +37,4 @@ platform-$(CONFIG_MACH_TX49XX) += txx9/ platform-$(CONFIG_MACH_VR41XX) += vr41xx/ # include the platform specific files -include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y)) +include $(patsubst %/, $(srctree)/arch/mips/%/Platform, $(platform-y)) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 86510741d49d..de60ad190057 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -292,6 +292,8 @@ config BMIPS_GENERIC select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select HARDIRQS_SW_RESEND + select HAVE_PCI + select PCI_DRIVERS_GENERIC help Build a generic DT-based kernel image that boots on select BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top @@ -333,6 +335,9 @@ config BCM63XX select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK + select SYS_HAS_CPU_BMIPS32_3300 + select SYS_HAS_CPU_BMIPS4350 + select SYS_HAS_CPU_BMIPS4380 select SWAP_IO_SPACE select GPIOLIB select MIPS_L1_CACHE_SHIFT_4 diff --git a/arch/mips/Makefile b/arch/mips/Makefile index ea3cd080a1c7..ace7f033de07 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -8,8 +8,7 @@ # Copyright (C) 2002, 2003, 2004 Maciej W. Rozycki # # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" cleaning up for this architecture. +# architecture-specific flags and dependencies. # archscripts: scripts_basic @@ -254,7 +253,9 @@ endif # # Board-dependent options and extra files # +ifdef need-compiler include $(srctree)/arch/mips/Kbuild.platforms +endif ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) @@ -426,11 +427,6 @@ endif $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) -archclean: - $(Q)$(MAKE) $(clean)=arch/mips/boot - $(Q)$(MAKE) $(clean)=arch/mips/boot/compressed - $(Q)$(MAKE) $(clean)=arch/mips/boot/tools - archheaders: $(Q)$(MAKE) $(build)=arch/mips/kernel/syscalls all diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c index 5a3e325275d0..1c91064cb448 100644 --- a/arch/mips/bcm63xx/clk.c +++ b/arch/mips/bcm63xx/clk.c @@ -381,6 +381,12 @@ void clk_disable(struct clk *clk) EXPORT_SYMBOL(clk_disable); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + unsigned long clk_get_rate(struct clk *clk) { if (!clk) diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index a3da2c5d63c2..196c44fa72d9 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -171,3 +171,6 @@ $(obj)/vmlinux.itb: $(obj)/vmlinux.its $(obj)/vmlinux.bin FORCE $(obj)/vmlinux.%.itb: $(obj)/vmlinux.%.its $(obj)/vmlinux.bin.% FORCE $(call if_changed,itb-image,$<) + +# for cleaning +subdir- += compressed tools diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore deleted file mode 100644 index d358395614c9..000000000000 --- a/arch/mips/boot/compressed/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ashldi3.c -bswapsi.c diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 3548b3b45269..2861a05c2e0c 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -50,19 +50,9 @@ vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY) += $(obj)/uart-alchemy.o vmlinuzobjs-$(CONFIG_ATH79) += $(obj)/uart-ath79.o endif -extra-y += uart-ath79.c -$(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c - $(call cmd,shipped) - vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o -extra-y += ashldi3.c -$(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE - $(call if_changed,shipped) - -extra-y += bswapsi.c -$(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE - $(call if_changed,shipped) +vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o targets := $(notdir $(vmlinuzobjs-y)) diff --git a/arch/mips/boot/compressed/ashldi3.c b/arch/mips/boot/compressed/ashldi3.c new file mode 100644 index 000000000000..f7bf6a7aae31 --- /dev/null +++ b/arch/mips/boot/compressed/ashldi3.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../../../lib/ashldi3.c" diff --git a/arch/mips/boot/compressed/bswapdi.c b/arch/mips/boot/compressed/bswapdi.c new file mode 100644 index 000000000000..acb28aebb025 --- /dev/null +++ b/arch/mips/boot/compressed/bswapdi.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../lib/bswapdi.c" diff --git a/arch/mips/boot/compressed/bswapsi.c b/arch/mips/boot/compressed/bswapsi.c new file mode 100644 index 000000000000..fdb9c6476904 --- /dev/null +++ b/arch/mips/boot/compressed/bswapsi.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../lib/bswapsi.c" diff --git a/arch/mips/boot/compressed/uart-ath79.c b/arch/mips/boot/compressed/uart-ath79.c new file mode 100644 index 000000000000..d686820921be --- /dev/null +++ b/arch/mips/boot/compressed/uart-ath79.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../ath79/early_printk.c" diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi index a1f0b71c9223..0c6a5a4266f4 100644 --- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <dt-bindings/clock/jz4725b-cgu.h> +#include <dt-bindings/clock/ingenic,jz4725b-cgu.h> #include <dt-bindings/clock/ingenic,tcu.h> / { diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index c1afdfdaa8a3..772542e1f266 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <dt-bindings/clock/jz4740-cgu.h> +#include <dt-bindings/clock/ingenic,jz4740-cgu.h> #include <dt-bindings/clock/ingenic,tcu.h> / { diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi index 05c00b93088e..dfe74328ae5d 100644 --- a/arch/mips/boot/dts/ingenic/jz4770.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <dt-bindings/clock/jz4770-cgu.h> +#include <dt-bindings/clock/ingenic,jz4770-cgu.h> #include <dt-bindings/clock/ingenic,tcu.h> / { diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index 28adc3d93975..b0a4e2e019c3 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <dt-bindings/clock/jz4780-cgu.h> +#include <dt-bindings/clock/ingenic,jz4780-cgu.h> #include <dt-bindings/clock/ingenic,tcu.h> #include <dt-bindings/dma/jz4780-dma.h> diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index dec7909d4baa..8bd27edef216 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <dt-bindings/clock/ingenic,tcu.h> -#include <dt-bindings/clock/x1000-cgu.h> +#include <dt-bindings/clock/ingenic,x1000-cgu.h> #include <dt-bindings/dma/x1000-dma.h> / { diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi index 215257f8bb1a..2595df8671c7 100644 --- a/arch/mips/boot/dts/ingenic/x1830.dtsi +++ b/arch/mips/boot/dts/ingenic/x1830.dtsi @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <dt-bindings/clock/ingenic,tcu.h> -#include <dt-bindings/clock/x1830-cgu.h> +#include <dt-bindings/clock/ingenic,x1830-cgu.h> #include <dt-bindings/dma/x1830-dma.h> / { diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig index 625bd2d7e685..5956fb95c19f 100644 --- a/arch/mips/configs/bmips_stb_defconfig +++ b/arch/mips/configs/bmips_stb_defconfig @@ -1,6 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_NO_HZ=y +CONFIG_HZ=1000 CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set @@ -8,17 +9,34 @@ CONFIG_EXPERT=y CONFIG_BMIPS_GENERIC=y CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_HIGHMEM=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_NR_CPUS=4 +CONFIG_CC_STACKPROTECTOR_STRONG=y # CONFIG_SECCOMP is not set CONFIG_MIPS_O32_FP64_SUPPORT=y +# CONFIG_RD_GZIP is not set +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +CONFIG_RD_XZ=y +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_PCI=y +CONFIG_PCI_MSI=y +CONFIG_PCIEASPM_POWERSAVE=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCIE_BRCMSTB=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_STAT_DETAILS=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_BMIPS_CPUFREQ=y # CONFIG_BLK_DEV_BSG is not set CONFIG_NET=y @@ -32,32 +50,99 @@ CONFIG_INET=y # CONFIG_INET_DIAG is not set CONFIG_CFG80211=y CONFIG_NL80211_TESTMODE=y +CONFIG_WIRELESS=y CONFIG_MAC80211=y +CONFIG_NL80211=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_BRCMSTB_GISB_ARB=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_UDP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_IPV6 is not set +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_BRIDGE=y +CONFIG_BRIDGE_NETFILTER=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_NET_DSA=y +CONFIG_NET_SWITCHDEV=y +CONFIG_DMA_CMA=y +CONFIG_CMA_ALIGNMENT=12 +CONFIG_SPI=y +CONFIG_SPI_BRCMSTB=y CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y +CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP=y +CONFIG_MTD_CFI_STAA=y +CONFIG_MTD_ROM=y +CONFIG_MTD_ABSENT=y +CONFIG_MTD_PHYSMAP_OF=y +CONFIG_MTD_M25P80=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_BRCMNAND=y +CONFIG_MTD_SPI_NOR=y +# CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set +CONFIG_MTD_UBI=y +CONFIG_MTD_UBI_GLUEBI=y +CONFIG_PROC_DEVICETREE=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 # CONFIG_BLK_DEV is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y +CONFIG_VLAN_8021Q=y +CONFIG_MACVLAN=y CONFIG_BCMGENET=y CONFIG_USB_USBNET=y -# CONFIG_INPUT is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y # CONFIG_SERIO is not set -# CONFIG_VT is not set +CONFIG_VT=y +CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_BRCMSTB=y CONFIG_POWER_RESET_SYSCON=y CONFIG_POWER_SUPPLY=y # CONFIG_HWMON is not set @@ -69,22 +154,76 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_SOC_BRCMSTB=y +CONFIG_MMC=y +CONFIG_MMC_BLOCK_MINORS=16 +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set +CONFIG_PROC_KCORE=y +CONFIG_CIFS=y +CONFIG_JBD2_DEBUG=y CONFIG_FUSE_FS=y +CONFIG_FHANDLE=y +CONFIG_CGROUPS=y +CONFIG_CUSE=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y CONFIG_TMPFS=y +CONFIG_JFFS2_FS=y +CONFIG_UBIFS_FS=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y CONFIG_NFS_FS=y -CONFIG_CIFS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y -# CONFIG_CRYPTO_HW is not set CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_INFO_REDUCED is not set CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEBUG_USER=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon" +# CONFIG_MIPS_CMDLINE_FROM_DTB is not set +CONFIG_MIPS_CMDLINE_DTB_EXTEND=y +# CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER is not set +# CONFIG_CRYPTO_HW is not set +CONFIG_DT_BCM974XX=y +CONFIG_FW_CFE=y +CONFIG_ATA=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_BRCMSTB=y +CONFIG_GENERIC_PHY=y +CONFIG_GPIOLIB=y +CONFIG_GPIO_SYSFS=y +CONFIG_PHY_BRCM_USB=y +CONFIG_PHY_BRCM_SATA=y +CONFIG_PM_RUNTIME=y +CONFIG_PM_DEBUG=y +CONFIG_SYSVIPC=y +CONFIG_FUNCTION_GRAPH_TRACER=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_FUNCTION_TRACER=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_IRQSOFF_TRACER=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_TRACER_SNAPSHOT=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_STACK_TRACER=y diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index eaad0ed4b523..a8a30bb1dee8 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -117,21 +117,21 @@ static void __init dec_be_init(void) { switch (mips_machtype) { case MACH_DS23100: /* DS2100/DS3100 Pmin/Pmax */ - board_be_handler = dec_kn01_be_handler; + mips_set_be_handler(dec_kn01_be_handler); busirq_handler = dec_kn01_be_interrupt; busirq_flags |= IRQF_SHARED; dec_kn01_be_init(); break; case MACH_DS5000_1XX: /* DS5000/1xx 3min */ case MACH_DS5000_XX: /* DS5000/xx Maxine */ - board_be_handler = dec_kn02xa_be_handler; + mips_set_be_handler(dec_kn02xa_be_handler); busirq_handler = dec_kn02xa_be_interrupt; dec_kn02xa_be_init(); break; case MACH_DS5000_200: /* DS5000/200 3max */ case MACH_DS5000_2X0: /* DS5000/240 3max+ */ case MACH_DS5900: /* DS5900 bigmax */ - board_be_handler = dec_ecc_be_handler; + mips_set_be_handler(dec_ecc_be_handler); busirq_handler = dec_ecc_be_interrupt; dec_ecc_be_init(); break; diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c index a3aa22c77cad..a07a5edbcda7 100644 --- a/arch/mips/generic/yamon-dt.c +++ b/arch/mips/generic/yamon-dt.c @@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array( __init int yamon_dt_append_memory(void *fdt, const struct yamon_mem_region *regions) { - unsigned long phys_memsize, memsize; + unsigned long phys_memsize = 0, memsize; __be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES]; unsigned int mem_entries; int i, err, mem_off; diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h index b710e76c9c65..15cde638b407 100644 --- a/arch/mips/include/asm/traps.h +++ b/arch/mips/include/asm/traps.h @@ -15,7 +15,7 @@ #define MIPS_BE_FATAL 2 /* treat as an unrecoverable error */ extern void (*board_be_init)(void); -extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup); +void mips_set_be_handler(int (*handler)(struct pt_regs *reg, int is_fixup)); extern void (*board_nmi_handler_setup)(void); extern void (*board_ejtag_handler_setup)(void); diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 12e58053544f..cbf6db98cfb3 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -29,8 +29,8 @@ #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ - PTR 9b+4,bad_stack; \ + PTR 9b,fault; \ + PTR 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 2afa3eef486a..5512cd586e6e 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -240,12 +240,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op) { return -ENOSYS; } - -/* - * If we ever come here the user sp is bad. Zap the process right away. - * Due to the bad stack signaling wouldn't work. - */ -asmlinkage void bad_stack(void) -{ - do_exit(SIGSEGV); -} diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 70e32de2bcaa..72d02d363f36 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -387,3 +387,4 @@ 446 n32 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n32 process_mrelease sys_process_mrelease +449 n32 futex_waitv sys_futex_waitv diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 1ca7bc337932..e2c481fcede6 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -363,3 +363,4 @@ 446 n64 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease +449 n64 futex_waitv sys_futex_waitv diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index a61c35edaa74..3714c97b2643 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -436,3 +436,4 @@ 446 o32 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 o32 process_mrelease sys_process_mrelease +449 o32 futex_waitv sys_futex_waitv diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 6f07362de5ce..d26b0fb8ea06 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -103,13 +103,19 @@ extern asmlinkage void handle_reserved(void); extern void tlb_do_page_fault_0(void); void (*board_be_init)(void); -int (*board_be_handler)(struct pt_regs *regs, int is_fixup); +static int (*board_be_handler)(struct pt_regs *regs, int is_fixup); void (*board_nmi_handler_setup)(void); void (*board_ejtag_handler_setup)(void); void (*board_bind_eic_interrupt)(int irq, int regset); void (*board_ebase_setup)(void); void(*board_cache_error_setup)(void); +void mips_set_be_handler(int (*handler)(struct pt_regs *regs, int is_fixup)) +{ + board_be_handler = handler; +} +EXPORT_SYMBOL_GPL(mips_set_be_handler); + static void show_raw_backtrace(unsigned long reg29, const char *loglvl, bool user) { diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 562aa878b266..aa20d074d388 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1067,7 +1067,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index dd819e31fcbb..4916cccf378f 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -158,6 +158,12 @@ void clk_deactivate(struct clk *clk) } EXPORT_SYMBOL(clk_deactivate); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + static inline u32 get_counter_resolution(void) { u32 res; diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index f03fc52ed1d6..ee8de1735b7c 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -77,7 +77,9 @@ void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(node_psize), node); + memblock_add_node(PFN_PHYS(start_pfn), + PFN_PHYS(node_psize), node, + MEMBLOCK_NONE); break; case SYSTEM_RAM_RESERVED: pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 19347dc6bbf8..325e1552cbea 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -529,7 +529,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free_early(__pa(ptr), size); + memblock_free(ptr, size); } void __init setup_per_cpu_areas(void) diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig index c800bf5559b5..120adad51d6a 100644 --- a/arch/mips/ralink/Kconfig +++ b/arch/mips/ralink/Kconfig @@ -51,7 +51,8 @@ choice select SYS_SUPPORTS_HIGHMEM select MIPS_GIC select CLKSRC_MIPS_GIC - select HAVE_PCI if PCI_MT7621 + select HAVE_PCI + select PCI_DRIVERS_GENERIC select SOC_BUS endchoice diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c index dc0110a607a5..afe8a61078e4 100644 --- a/arch/mips/sgi-ip22/ip22-berr.c +++ b/arch/mips/sgi-ip22/ip22-berr.c @@ -112,5 +112,5 @@ static int ip22_be_handler(struct pt_regs *regs, int is_fixup) void __init ip22_be_init(void) { - board_be_handler = ip22_be_handler; + mips_set_be_handler(ip22_be_handler); } diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index c61362d9ea95..16ca470deb80 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -468,7 +468,7 @@ static int ip28_be_handler(struct pt_regs *regs, int is_fixup) void __init ip22_be_init(void) { - board_be_handler = ip28_be_handler; + mips_set_be_handler(ip28_be_handler); } int ip28_show_be_info(struct seq_file *m) diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 5a38ae6bdfa9..923a63a51cda 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -85,7 +85,7 @@ void __init ip27_be_init(void) int cpu = LOCAL_HUB_L(PI_CPU_NUM); int cpuoff = cpu << 8; - board_be_handler = ip27_be_handler; + mips_set_be_handler(ip27_be_handler); LOCAL_HUB_S(PI_ERR_INT_PEND, cpu ? PI_ERR_CLEAR_ALL_B : PI_ERR_CLEAR_ALL_A); diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 6173684b5aaa..adc2faeecf7c 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -341,7 +341,8 @@ static void __init szmem(void) continue; } memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)), - PFN_PHYS(slot_psize), node); + PFN_PHYS(slot_psize), node, + MEMBLOCK_NONE); } } } diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c index 44b1607e964d..75a34684e704 100644 --- a/arch/mips/sgi-ip30/ip30-setup.c +++ b/arch/mips/sgi-ip30/ip30-setup.c @@ -69,10 +69,10 @@ static void __init ip30_mem_init(void) total_mem += size; if (addr >= IP30_REAL_MEMORY_START) - memblock_free(addr, size); + memblock_phys_free(addr, size); else if ((addr + size) > IP30_REAL_MEMORY_START) - memblock_free(IP30_REAL_MEMORY_START, - size - IP30_MAX_PROM_MEMORY); + memblock_phys_free(IP30_REAL_MEMORY_START, + size - IP30_MAX_PROM_MEMORY); } pr_info("Detected %luMB of physical memory.\n", MEM_SHIFT(total_mem)); } diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index c860f95ab7ed..478b63b4c808 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -34,5 +34,5 @@ static int ip32_be_handler(struct pt_regs *regs, int is_fixup) void __init ip32_be_init(void) { - board_be_handler = ip32_be_handler; + mips_set_be_handler(ip32_be_handler); } diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index f07b15dd1c1a..72a31eeeebba 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -122,7 +122,7 @@ void __init plat_mem_setup(void) #error invalid SiByte board configuration #endif - board_be_handler = swarm_be_handler; + mips_set_be_handler(swarm_be_handler); if (xicor_probe()) swarm_rtc_type = RTC_XICOR; diff --git a/arch/mips/txx9/generic/setup_tx4927.c b/arch/mips/txx9/generic/setup_tx4927.c index 46e9c4101386..63f9725b2eb0 100644 --- a/arch/mips/txx9/generic/setup_tx4927.c +++ b/arch/mips/txx9/generic/setup_tx4927.c @@ -80,7 +80,7 @@ static int tx4927_be_handler(struct pt_regs *regs, int is_fixup) } static void __init tx4927_be_init(void) { - board_be_handler = tx4927_be_handler; + mips_set_be_handler(tx4927_be_handler); } static struct resource tx4927_sdram_resource[4]; diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c index 17395d5d15ca..ba646548c5f6 100644 --- a/arch/mips/txx9/generic/setup_tx4938.c +++ b/arch/mips/txx9/generic/setup_tx4938.c @@ -82,7 +82,7 @@ static int tx4938_be_handler(struct pt_regs *regs, int is_fixup) } static void __init tx4938_be_init(void) { - board_be_handler = tx4938_be_handler; + mips_set_be_handler(tx4938_be_handler); } static struct resource tx4938_sdram_resource[4]; diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c index bf8a3cdababf..f5f59b7401a3 100644 --- a/arch/mips/txx9/generic/setup_tx4939.c +++ b/arch/mips/txx9/generic/setup_tx4939.c @@ -86,7 +86,7 @@ static int tx4939_be_handler(struct pt_regs *regs, int is_fixup) } static void __init tx4939_be_init(void) { - board_be_handler = tx4939_be_handler; + mips_set_be_handler(tx4939_be_handler); } static struct resource tx4939_sdram_resource[4]; diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 1b2ea34c3d3b..d65f55f67e19 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -57,7 +57,7 @@ endif # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ + $(filter -E%,$(KBUILD_CFLAGS)) -shared \ -G 0 --eh-frame-hdr --hash-style=sysv --build-id=sha1 -T CFLAGS_REMOVE_vdso.o = $(CC_FLAGS_FTRACE) diff --git a/arch/nds32/Kbuild b/arch/nds32/Kbuild index a4e40e534e6a..4e39f7abdeb6 100644 --- a/arch/nds32/Kbuild +++ b/arch/nds32/Kbuild @@ -1 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only + +# for cleaning +subdir- += boot diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index ccdca7142020..797ad9b450af 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -9,6 +9,8 @@ endif # Avoid generating FPU instructions arch-y += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft +# Enable <nds32_intrinsic.h> +KBUILD_CFLAGS += -isystem $(shell $(CC) -print-file-name=include) KBUILD_CFLAGS += $(call cc-option, -mno-sched-prolog-epilog) KBUILD_CFLAGS += -mcmodel=large @@ -62,9 +64,6 @@ prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso include/generated/vdso-offsets.h -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - define archhelp echo ' Image - kernel image (arch/$(ARCH)/boot/Image)' endef diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index 7b5180d78e20..90aa56c94af1 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -132,28 +132,6 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, memcpy(args, ®s->uregs[0] + 1, 5 * sizeof(args[0])); } -/** - * syscall_set_arguments - change system call parameter value - * @task: task of interest, must be in system call entry tracing - * @regs: task_pt_regs() of @task - * @args: array of argument values to store - * - * Changes 6 arguments to the system call. The first argument gets value - * @args[0], and so on. - * - * It's only valid to call this when @task is stopped for tracing on - * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - */ -static inline void -syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - const unsigned long *args) -{ - regs->orig_r0 = args[0]; - args++; - - memcpy(®s->uregs[0] + 1, args, 5 * sizeof(args[0])); -} - static inline int syscall_get_arch(struct task_struct *task) { diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index f06421c645af..ca75d475eda4 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -118,7 +118,7 @@ DEFINE_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. */ -void die(const char *str, struct pt_regs *regs, int err) +void __noreturn die(const char *str, struct pt_regs *regs, int err) { struct task_struct *tsk = current; static int die_counter; diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index f02524eb6d56..1d139b117168 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -13,7 +13,7 @@ #include <asm/tlbflush.h> -extern void die(const char *str, struct pt_regs *regs, long err); +extern void __noreturn die(const char *str, struct pt_regs *regs, long err); /* * This is useful to dump out the page tables associated with @@ -299,10 +299,6 @@ no_context: show_pte(mm, addr); die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); - - return; /* * We ran out of memory, or some other thing happened to us that made diff --git a/arch/nios2/Kbuild b/arch/nios2/Kbuild index a4e40e534e6a..4e39f7abdeb6 100644 --- a/arch/nios2/Kbuild +++ b/arch/nios2/Kbuild @@ -1 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only + +# for cleaning +subdir- += boot diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile index 52c03e60b114..02d678559066 100644 --- a/arch/nios2/Makefile +++ b/arch/nios2/Makefile @@ -8,8 +8,7 @@ # Written by Fredrik Markstrom # # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" cleaning up for this architecture. +# architecture-specific flags and dependencies. # # Nios2 port by Wind River Systems Inc trough: # fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com @@ -53,14 +52,12 @@ core-y += $(nios2-boot)/dts/ all: vmImage -archclean: - $(Q)$(MAKE) $(clean)=$(nios2-boot) - $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@ install: - $(Q)$(MAKE) $(build)=$(nios2-boot) BOOTIMAGE=$(KBUILD_IMAGE) install + sh $(srctree)/$(nios2-boot)/install.sh $(KERNELRELEASE) \ + $(KBUILD_IMAGE) System.map "$(INSTALL_PATH)" define archhelp echo '* vmImage - Kernel-only image for U-Boot ($(KBUILD_IMAGE))' diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 37dfc7e584bc..8c3ad76602f3 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -30,6 +30,3 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE $(obj)/compressed/vmlinux: $(obj)/vmlinux.gz FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ - -install: - sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h index 526449edd768..fff52205fb65 100644 --- a/arch/nios2/include/asm/syscall.h +++ b/arch/nios2/include/asm/syscall.h @@ -58,17 +58,6 @@ static inline void syscall_get_arguments(struct task_struct *task, *args = regs->r9; } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, const unsigned long *args) -{ - regs->r4 = *args++; - regs->r5 = *args++; - regs->r6 = *args++; - regs->r7 = *args++; - regs->r8 = *args++; - regs->r9 = *args; -} - static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_NIOS2; diff --git a/arch/openrisc/Kbuild b/arch/openrisc/Kbuild index 4234b4c03e72..b0b0f2b03f87 100644 --- a/arch/openrisc/Kbuild +++ b/arch/openrisc/Kbuild @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += lib/ kernel/ mm/ obj-y += boot/dts/ + +# for cleaning +subdir- += boot diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile index c52de526e518..760b734fb822 100644 --- a/arch/openrisc/Makefile +++ b/arch/openrisc/Makefile @@ -1,9 +1,7 @@ # BK Id: %F% %I% %G% %U% %#% # # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture +# architecture-specific flags and dependencies. # # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive @@ -48,6 +46,3 @@ PHONY += vmlinux.bin vmlinux.bin: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - -archclean: - $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h index e6383be2a195..903ed882bdec 100644 --- a/arch/openrisc/include/asm/syscall.h +++ b/arch/openrisc/include/asm/syscall.h @@ -57,13 +57,6 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, memcpy(args, ®s->gpr[3], 6 * sizeof(args[0])); } -static inline void -syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - const unsigned long *args) -{ - memcpy(®s->gpr[3], args, 6 * sizeof(args[0])); -} - static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_OPENRISC; diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 1b16d97e7da7..a82b2caaa560 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -33,7 +33,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, * Flush the page out of the TLB so that the new page flags get * picked up next time there's an access */ - flush_tlb_page(NULL, addr); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); /* Flush page out of dcache */ for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size) @@ -56,7 +56,7 @@ page_clear_nocache(pte_t *pte, unsigned long addr, * Flush the page out of the TLB so that the new page flags get * picked up next time there's an access */ - flush_tlb_page(NULL, addr); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); return 0; } diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 1ebcff271096..99516c9191c7 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -28,8 +28,6 @@ #include <asm/ucontext.h> #include <linux/uaccess.h> -#define DEBUG_SIG 0 - struct rt_sigframe { struct siginfo info; struct ucontext uc; diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index da21e22bf4da..27041db2c8b0 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -268,7 +268,7 @@ static inline void ipi_flush_tlb_range(void *info) local_flush_tlb_range(NULL, fd->addr1, fd->addr2); } -static void smp_flush_tlb_range(struct cpumask *cmask, unsigned long start, +static void smp_flush_tlb_range(const struct cpumask *cmask, unsigned long start, unsigned long end) { unsigned int cpuid; @@ -316,7 +316,9 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - smp_flush_tlb_range(mm_cpumask(vma->vm_mm), start, end); + const struct cpumask *cmask = vma ? mm_cpumask(vma->vm_mm) + : cpu_online_mask; + smp_flush_tlb_range(cmask, start, end); } /* Instruction cache invalidate - performed on each cpu */ diff --git a/arch/openrisc/kernel/time.c b/arch/openrisc/kernel/time.c index b82866061958..a6e69386f82a 100644 --- a/arch/openrisc/kernel/time.c +++ b/arch/openrisc/kernel/time.c @@ -127,7 +127,7 @@ irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs) return IRQ_HANDLED; } -/** +/* * Clocksource: Based on OpenRISC timer/counter * * This sets up the OpenRISC Tick Timer as a clock source. The tick timer diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index aa1e709405ac..0898cb159fac 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -197,7 +197,7 @@ void nommu_dump_state(struct pt_regs *regs, } /* This is normally the 'Oops' routine */ -void die(const char *str, struct pt_regs *regs, long err) +void __noreturn die(const char *str, struct pt_regs *regs, long err) { console_verbose(); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index c730d1a51686..f0fa6394a58e 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -32,7 +32,7 @@ unsigned long pte_errors; /* updated by do_page_fault() */ */ volatile pgd_t *current_pgd[NR_CPUS]; -extern void die(char *, struct pt_regs *, long); +extern void __noreturn die(char *, struct pt_regs *, long); /* * This routine handles page faults. It determines the address, @@ -248,8 +248,6 @@ no_context: die("Oops", regs, write_acc); - do_exit(SIGKILL); - /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. diff --git a/arch/parisc/Kbuild b/arch/parisc/Kbuild index 3c068b700a81..a6d3b280ba0c 100644 --- a/arch/parisc/Kbuild +++ b/arch/parisc/Kbuild @@ -1,2 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += mm/ kernel/ math-emu/ + +# for cleaning +subdir- += boot diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index fcde3ffa0221..8db4af4879d0 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -2,9 +2,7 @@ # parisc/Makefile # # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture +# architecture-specific flags and dependencies. # # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive @@ -181,8 +179,5 @@ define archhelp @echo ' zinstall - Install compressed vmlinuz kernel' endef -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - archheaders: $(Q)$(MAKE) $(build)=arch/parisc/kernel/syscalls all diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index d6fd8fa7ed8c..53061cb2cf7f 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -231,6 +231,7 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_FONTS=y +CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 7085df079702..39e7985086f9 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -3,38 +3,19 @@ * Copyright (C) 1999 Hewlett-Packard (Frank Rowand) * Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org> * Copyright (C) 1999 SuSE GmbH + * Copyright (C) 2021 Helge Deller <deller@gmx.de> */ #ifndef _PARISC_ASSEMBLY_H #define _PARISC_ASSEMBLY_H -#define CALLEE_FLOAT_FRAME_SIZE 80 - #ifdef CONFIG_64BIT -#define LDREG ldd -#define STREG std -#define LDREGX ldd,s -#define LDREGM ldd,mb -#define STREGM std,ma -#define SHRREG shrd -#define SHLREG shld -#define ANDCM andcm,* -#define COND(x) * ## x #define RP_OFFSET 16 #define FRAME_SIZE 128 #define CALLEE_REG_FRAME_SIZE 144 #define REG_SZ 8 #define ASM_ULONG_INSN .dword #else /* CONFIG_64BIT */ -#define LDREG ldw -#define STREG stw -#define LDREGX ldwx,s -#define LDREGM ldwm -#define STREGM stwm -#define SHRREG shr -#define SHLREG shlw -#define ANDCM andcm -#define COND(x) x #define RP_OFFSET 20 #define FRAME_SIZE 64 #define CALLEE_REG_FRAME_SIZE 128 @@ -45,6 +26,7 @@ /* Frame alignment for 32- and 64-bit */ #define FRAME_ALIGN 64 +#define CALLEE_FLOAT_FRAME_SIZE 80 #define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE) #ifdef CONFIG_PA20 @@ -68,6 +50,28 @@ #ifdef __ASSEMBLY__ #ifdef CONFIG_64BIT +#define LDREG ldd +#define STREG std +#define LDREGX ldd,s +#define LDREGM ldd,mb +#define STREGM std,ma +#define SHRREG shrd +#define SHLREG shld +#define ANDCM andcm,* +#define COND(x) * ## x +#else /* CONFIG_64BIT */ +#define LDREG ldw +#define STREG stw +#define LDREGX ldwx,s +#define LDREGM ldwm +#define STREGM stwm +#define SHRREG shr +#define SHLREG shlw +#define ANDCM andcm +#define COND(x) x +#endif + +#ifdef CONFIG_64BIT /* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so * work around that for now... */ .level 2.0w diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 7efb1aa2f7f8..af2a598bc0f8 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <linux/stringify.h> #include <asm/assembly.h> #define JUMP_LABEL_NOP_SIZE 4 diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 7badd872f05a..3e7cf882639f 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -76,6 +76,8 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) purge_tlb_end(flags); } +extern void __update_cache(pte_t pte); + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. @@ -83,11 +85,14 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define set_pte(pteptr, pteval) \ do { \ *(pteptr) = (pteval); \ - barrier(); \ + mb(); \ } while(0) #define set_pte_at(mm, addr, pteptr, pteval) \ do { \ + if (pte_present(pteval) && \ + pte_user(pteval)) \ + __update_cache(pteval); \ *(pteptr) = (pteval); \ purge_tlb_entries(mm, addr); \ } while (0) @@ -303,6 +308,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) +#define pte_user(x) (pte_val(x) & _PAGE_USER) #define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) @@ -410,7 +416,7 @@ extern void paging_init (void); #define PG_dcache_dirty PG_arch_1 -extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); +#define update_mmu_cache(vms,addr,ptep) __update_cache(*ptep) /* Encode and de-code a swap entry */ diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h index 4b9e3d707571..2b3010ade00e 100644 --- a/arch/parisc/include/asm/rt_sigframe.h +++ b/arch/parisc/include/asm/rt_sigframe.h @@ -2,7 +2,7 @@ #ifndef _ASM_PARISC_RT_SIGFRAME_H #define _ASM_PARISC_RT_SIGFRAME_H -#define SIGRETURN_TRAMP 3 +#define SIGRETURN_TRAMP 4 #define SIGRESTARTBLOCK_TRAMP 5 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c61827e4928a..94150b91c96f 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -83,9 +83,9 @@ EXPORT_SYMBOL(flush_cache_all_local); #define pfn_va(pfn) __va(PFN_PHYS(pfn)) void -update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +__update_cache(pte_t pte) { - unsigned long pfn = pte_pfn(*ptep); + unsigned long pfn = pte_pfn(pte); struct page *page; /* We don't have pte special. As a result, we can be called with diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 57944d6f9ebb..88c188a965d8 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1805,7 +1805,7 @@ syscall_restore: /* Are we being ptraced? */ LDREG TASK_TI_FLAGS(%r1),%r19 - ldi _TIF_SYSCALL_TRACE_MASK,%r2 + ldi _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index bbfe23c40c01..46b1050640b8 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, already in userspace. The first words of tramp are used to save the previous sigrestartblock trampoline that might be on the stack. We start the sigreturn trampoline at - SIGRESTARTBLOCK_TRAMP. */ + SIGRESTARTBLOCK_TRAMP+X. */ err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0, &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]); - err |= __put_user(INSN_BLE_SR2_R0, + err |= __put_user(INSN_LDI_R20, &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); - err |= __put_user(INSN_LDI_R20, + err |= __put_user(INSN_BLE_SR2_R0, &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]); + err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]); - start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]; - end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]; + start = (unsigned long) &frame->tramp[0]; + end = (unsigned long) &frame->tramp[TRAMP_SIZE]; flush_user_dcache_range_asm(start, end); flush_user_icache_range_asm(start, end); /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP - * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP + * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP */ rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP]; diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index a5bdbb5678b7..f166250f2d06 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -36,7 +36,7 @@ struct compat_regfile { compat_int_t rf_sar; }; -#define COMPAT_SIGRETURN_TRAMP 3 +#define COMPAT_SIGRETURN_TRAMP 4 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \ COMPAT_SIGRESTARTBLOCK_TRAMP) diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c index 6b4ca91932cf..023834ef582e 100644 --- a/arch/parisc/kernel/stacktrace.c +++ b/arch/parisc/kernel/stacktrace.c @@ -8,6 +8,7 @@ * * TODO: Userspace stacktrace (CONFIG_USER_STACKTRACE_SUPPORT) */ +#include <linux/kernel.h> #include <linux/stacktrace.h> #include <asm/unwind.h> diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index bf751e0732b7..358c00000755 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -446,3 +446,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 2769eb991f58..3d208afd15bc 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -57,6 +57,8 @@ SECTIONS { . = KERNEL_BINARY_TEXT_START; + _stext = .; /* start of kernel text, includes init code & data */ + __init_begin = .; HEAD_TEXT_SECTION MLONGCALL_DISCARD(INIT_TEXT_SECTION(8)) @@ -80,7 +82,6 @@ SECTIONS /* freed after init ends here */ _text = .; /* Text and read-only data */ - _stext = .; MLONGCALL_KEEP(INIT_TEXT_SECTION(8)) .text ALIGN(PAGE_SIZE) : { TEXT_TEXT diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 5e2f9eaa3ee7..22cd0d55a892 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -16,3 +16,6 @@ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_PERF_EVENTS) += perf/ obj-$(CONFIG_KEXEC_CORE) += kexec/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ + +# for cleaning +subdir- += boot diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 54cad1faa5d0..e02568f17334 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -1,7 +1,5 @@ # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture. +# architecture-specific flags and dependencies. # # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive @@ -411,9 +409,6 @@ install: sh -x $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" vmlinux \ System.map "$(INSTALL_PATH)" -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. # In order to do that, we should use the archprepare target, but we can't since diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 396d508730a1..f491875700e8 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -274,7 +274,6 @@ CONFIG_NLS_UTF8=y CONFIG_ENCRYPTED_KEYS=y CONFIG_SECURITY=y CONFIG_HARDENED_USERCOPY=y -# CONFIG_HARDENED_USERCOPY_FALLBACK is not set CONFIG_HARDENED_USERCOPY_PAGESPAN=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_LOCKDOWN_LSM=y diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index a68311077d32..9c3c9f04129f 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -31,7 +31,7 @@ struct machdep_calls { #ifdef CONFIG_PM void (*iommu_restore)(void); #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG unsigned long (*memory_block_size)(void); #endif #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 2b9edbf6e929..f6cf0159024e 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -55,11 +55,6 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); -static inline const char *eeh_driver_name(struct pci_dev *pdev) -{ - return (pdev && pdev->driver) ? pdev->driver->name : "<null>"; -} - #endif /* CONFIG_EEH */ #define PCI_BUSNO(bdfn) ((bdfn >> 8) & 0xff) diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 6e4af4492a14..79cb7a25a5fb 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -6,21 +6,8 @@ #include <linux/elf.h> #include <linux/uaccess.h> -#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed - #include <asm-generic/sections.h> -extern bool init_mem_is_free; - -static inline int arch_is_kernel_initmem_freed(unsigned long addr) -{ - if (!init_mem_is_free) - return 0; - - return addr >= (unsigned long)__init_begin && - addr < (unsigned long)__init_end; -} - extern char __head_end[]; #ifdef __powerpc64__ diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index c60ebd04b2ed..52d05b465e3e 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -103,16 +103,6 @@ static inline void syscall_get_arguments(struct task_struct *task, } } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - memcpy(®s->gpr[3], args, 6 * sizeof(args[0])); - - /* Also copy the first argument into orig_gpr3 */ - regs->orig_gpr3 = args[0]; -} - static inline int syscall_get_arch(struct task_struct *task) { if (is_32bit_task()) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0e3640e14eb1..5fa68c2ef1f8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -196,3 +196,6 @@ clean-files := vmlinux.lds # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg $(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg + +# for cleaning +subdir- += vdso32 vdso64 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 358aee7c2d79..ba527fb52993 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -1095,8 +1095,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char cpufeatures_setup_finished(); - memblock_free(__pa(dt_cpu_features), - sizeof(struct dt_cpu_feature)*nr_dt_cpu_features); + memblock_free(dt_cpu_features, + sizeof(struct dt_cpu_feature) * nr_dt_cpu_features); return 0; } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 91e0f4cf1db3..28bb1e7263a6 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -399,6 +399,14 @@ out: return ret; } +static inline const char *eeh_driver_name(struct pci_dev *pdev) +{ + if (pdev) + return dev_driver_string(&pdev->dev); + + return "<null>"; +} + /** * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze * @edev: eeh device diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3eff6a4888e7..350dab18e137 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -104,13 +104,13 @@ static bool eeh_edev_actionable(struct eeh_dev *edev) */ static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) { - if (!pdev || !pdev->driver) + if (!pdev || !pdev->dev.driver) return NULL; - if (!try_module_get(pdev->driver->driver.owner)) + if (!try_module_get(pdev->dev.driver->owner)) return NULL; - return pdev->driver; + return to_pci_driver(pdev->dev.driver); } /** @@ -122,10 +122,10 @@ static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) */ static inline void eeh_pcid_put(struct pci_dev *pdev) { - if (!pdev || !pdev->driver) + if (!pdev || !pdev->dev.driver) return; - module_put(pdev->driver->driver.owner); + module_put(pdev->dev.driver->owner); } /** diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 2d596881b70e..0d073b9fd52c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -733,6 +733,7 @@ _GLOBAL(mmu_pin_tlb) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) + li r8, 0 #ifdef CONFIG_PIN_TLB_IMMR li r0, 3 #else @@ -741,26 +742,26 @@ _GLOBAL(mmu_pin_tlb) mtctr r0 cmpwi r4, 0 beq 4f - LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) LOAD_REG_ADDR(r9, _sinittext) 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h cmplw r6, r9 bdnzt lt, 2b - -4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +4: 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h @@ -781,7 +782,7 @@ _GLOBAL(mmu_pin_tlb) #endif #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) lis r0, (MD_RSV4I | MD_TWAM)@h - mtspr SPRN_MI_CTR, r0 + mtspr SPRN_MD_CTR, r0 #endif mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 9bd30cac852b..4208b4044d12 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -322,8 +322,8 @@ void __init free_unused_pacas(void) new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; if (new_ptrs_size < paca_ptrs_size) - memblock_free(__pa(paca_ptrs) + new_ptrs_size, - paca_ptrs_size - new_ptrs_size); + memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size, + paca_ptrs_size - new_ptrs_size); paca_nr_cpu_ids = nr_cpu_ids; paca_ptrs_size = new_ptrs_size; @@ -331,8 +331,8 @@ void __init free_unused_pacas(void) #ifdef CONFIG_PPC_BOOK3S_64 if (early_radix_enabled()) { /* Ugly fixup, see new_slb_shadow() */ - memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), - sizeof(struct slb_shadow)); + memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), + sizeof(struct slb_shadow)); paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL; } #endif diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c3573430919d..6749905932f4 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1059,7 +1059,7 @@ void pcibios_bus_add_device(struct pci_dev *dev) ppc_md.pcibios_bus_add_device(dev); } -int pcibios_add_device(struct pci_dev *dev) +int pcibios_device_add(struct pci_dev *dev) { struct irq_domain *d; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0b7894eed58d..4f1322b65760 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -822,7 +822,7 @@ static void __init smp_setup_pacas(void) set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]); } - memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32)); + memblock_free(cpu_to_phys_id, nr_cpu_ids * sizeof(u32)); cpu_to_phys_id = NULL; } #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index eaa79a0996d1..6052f5d5ded3 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -812,7 +812,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, static void __init pcpu_free_bootmem(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_free(ptr, size); } static int pcpu_cpu_distance(unsigned int from, unsigned int to) @@ -912,7 +912,7 @@ void __init setup_per_cpu_areas(void) } #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG unsigned long memory_block_size_bytes(void) { if (ppc_md.memory_block_size) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 1f07317964e4..618aeccdf691 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -25,8 +25,14 @@ static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src) return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]); } -#define unsafe_get_user_sigset(dst, src, label) \ - unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label) +#define unsafe_get_user_sigset(dst, src, label) do { \ + sigset_t *__dst = dst; \ + const sigset_t __user *__src = src; \ + int i; \ + \ + for (i = 0; i < _NSIG_WORDS; i++) \ + unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \ +} while (0) #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 38c3eae40c14..3e053e2fd6b6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1062,8 +1062,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * or if another thread unmaps the region containing the context. * We kill the task with a SIGSEGV in this situation. */ - if (do_setcontext(new_ctx, regs, 0)) - do_exit(SIGSEGV); + if (do_setcontext(new_ctx, regs, 0)) { + force_exit_sig(SIGSEGV); + return -EFAULT; + } set_thread_flag(TIF_RESTOREALL); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 9f471b4a11e3..d1e1fc0acbea 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -703,15 +703,18 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ - if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) - do_exit(SIGSEGV); + if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) { + force_exit_sig(SIGSEGV); + return -EFAULT; + } set_current_blocked(&set); if (!user_read_access_begin(new_ctx, ctx_size)) return -EFAULT; if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { user_read_access_end(); - do_exit(SIGSEGV); + force_exit_sig(SIGSEGV); + return -EFAULT; } user_read_access_end(); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index f9ea0e5357f9..3fa6d240bade 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -187,6 +187,12 @@ static void watchdog_smp_panic(int cpu, u64 tb) if (sysctl_hardlockup_all_cpu_backtrace) trigger_allbutself_cpu_backtrace(); + /* + * Force flush any remote buffers that might be stuck in IRQ context + * and therefore could not run their irq_work. + */ + printk_trigger_flush(); + if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index eb776d0c5d8e..32a4b4d412b9 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2005,7 +2005,7 @@ hcall_real_table: .globl hcall_real_table_end hcall_real_table_end: -_GLOBAL(kvmppc_h_set_xdabr) +_GLOBAL_TOC(kvmppc_h_set_xdabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) andi. r0, r5, DABRX_USER | DABRX_KERNEL beq 6f @@ -2015,7 +2015,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) 6: li r3, H_PARAMETER blr -_GLOBAL(kvmppc_h_set_dabr) +_GLOBAL_TOC(kvmppc_h_set_dabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) li r5, DABRX_USER | DABRX_KERNEL 3: diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index a7061ee3b157..28c436df9935 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -560,7 +560,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma, gpa, 0, page_shift); if (ret == U_SUCCESS) - *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; + *mig.dst = migrate_pfn(pfn); else { unlock_page(dpage); __free_page(dpage); @@ -774,7 +774,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, } } - *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + *mig.dst = migrate_pfn(page_to_pfn(dpage)); migrate_vma_pages(&mig); out_finalize: migrate_vma_finalize(&mig); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 35e9cccdeef9..a72920f4f221 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -641,9 +641,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * implementations just count online CPUs. */ if (hv_enabled) - r = num_present_cpus(); + r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS); else - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 99a7c9132422..9e5d0f413b71 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -65,5 +65,7 @@ obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) +# Enable <altivec.h> +CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9a75ba078e1b..82d8b368ca6d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -229,17 +229,22 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) m->hstate = hstate; return 1; } + +bool __init hugetlb_node_alloc_supported(void) +{ + return false; +} #endif -int __init alloc_bootmem_huge_page(struct hstate *h) +int __init alloc_bootmem_huge_page(struct hstate *h, int nid) { #ifdef CONFIG_PPC_BOOK3S_64 if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) return pseries_alloc_bootmem_huge_page(h); #endif - return __alloc_bootmem_huge_page(h); + return __alloc_bootmem_huge_page(h, nid); } #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 8fc49b1b4a91..6ec978967da0 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -314,7 +314,7 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size pr_warn("KASLR: No safe seed for randomizing the kernel base.\n"); ram = min_t(phys_addr_t, __max_low_memory, size); - ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, false); + ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true); linear_sz = min_t(unsigned long, ram, SZ_512M); /* If the linear size is smaller than 64M, do not randmize */ diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 89353d4f5604..647bf454a0fa 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -645,7 +645,7 @@ static void early_init_this_mmu(void) if (map) linear_map_top = map_mem_in_cams(linear_map_top, - num_cams, true, true); + num_cams, false, true); } #endif @@ -766,7 +766,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; linear_sz = map_mem_in_cams(first_memblock_size, num_cams, - false, true); + true, true); ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); } else diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 6f14c8fb6359..59d3cfcd7887 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -376,9 +376,9 @@ static void initialize_form2_numa_distance_lookup_table(void) { int i, j; struct device_node *root; - const __u8 *numa_dist_table; + const __u8 *form2_distances; const __be32 *numa_lookup_index; - int numa_dist_table_length; + int form2_distances_length; int max_numa_index, distance_index; if (firmware_has_feature(FW_FEATURE_OPAL)) @@ -392,45 +392,41 @@ static void initialize_form2_numa_distance_lookup_table(void) max_numa_index = of_read_number(&numa_lookup_index[0], 1); /* first element of the array is the size and is encode-int */ - numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL); - numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1); + form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL); + form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1); /* Skip the size which is encoded int */ - numa_dist_table += sizeof(__be32); + form2_distances += sizeof(__be32); - pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n", - numa_dist_table_length, max_numa_index); + pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n", + form2_distances_length, max_numa_index); for (i = 0; i < max_numa_index; i++) /* +1 skip the max_numa_index in the property */ numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1); - if (numa_dist_table_length != max_numa_index * max_numa_index) { + if (form2_distances_length != max_numa_index * max_numa_index) { WARN(1, "Wrong NUMA distance information\n"); - /* consider everybody else just remote. */ - for (i = 0; i < max_numa_index; i++) { - for (j = 0; j < max_numa_index; j++) { - int nodeA = numa_id_index_table[i]; - int nodeB = numa_id_index_table[j]; - - if (nodeA == nodeB) - numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE; - else - numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE; - } - } + form2_distances = NULL; // don't use it } - distance_index = 0; for (i = 0; i < max_numa_index; i++) { for (j = 0; j < max_numa_index; j++) { int nodeA = numa_id_index_table[i]; int nodeB = numa_id_index_table[j]; - - numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++]; - pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]); + int dist; + + if (form2_distances) + dist = form2_distances[distance_index++]; + else if (nodeA == nodeB) + dist = LOCAL_DISTANCE; + else + dist = REMOTE_DISTANCE; + numa_distance_table[nodeA][nodeB] = dist; + pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist); } } + of_node_put(root); } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index fde1ed445ca4..906e4e4328b2 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -33,8 +33,6 @@ #include <mm/mmu_decl.h> -extern char etext[], _stext[], _sinittext[], _einittext[]; - static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data; notrace void __init early_ioremap_init(void) @@ -104,14 +102,13 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) { unsigned long v, s; phys_addr_t p; - int ktext; + bool ktext; s = offset; v = PAGE_OFFSET + s; p = memstart_addr + s; for (; s < top; s += PAGE_SIZE) { - ktext = ((char *)v >= _stext && (char *)v < etext) || - ((char *)v >= _sinittext && (char *)v < _einittext); + ktext = core_kernel_text(v); map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); v += PAGE_SIZE; p += PAGE_SIZE; diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index bb789f33c70e..a38372f9ac12 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -186,7 +186,6 @@ err: static int mcu_remove(struct i2c_client *client) { struct mcu *mcu = i2c_get_clientdata(client); - int ret; kthread_stop(shutdown_thread); diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 9105efcf242a..28b009b46464 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -107,7 +107,8 @@ static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) int pos; u32 val; - pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0); + pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, + OCXL_DVSEC_FUNC_ID); if (!pos) return -ESRCH; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3dd35c327d1c..004cd6a96c8a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2981,7 +2981,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (!phb->hose) { pr_err(" Can't allocate PCI controller for %pOF\n", np); - memblock_free(__pa(phb), sizeof(struct pnv_phb)); + memblock_free(phb, sizeof(struct pnv_phb)); return; } diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index deddbb233fde..04155aaaadb1 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -51,7 +51,7 @@ * to "new_size", calculated above. Implementing this is a convoluted process * which requires several hooks in the PCI core: * - * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov(). + * 1. In pcibios_device_add() we call pnv_pci_ioda_fixup_iov(). * * At this point the device has been probed and the device's BARs are sized, * but no resource allocations have been done. The SR-IOV BARs are sized diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index a8db3f153063..ad56a54ac9c5 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -440,7 +440,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) } #endif /* CONFIG_KEXEC_CORE */ -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG static unsigned long pnv_memory_block_size(void) { /* @@ -553,7 +553,7 @@ define_machine(powernv) { #ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG .memory_block_size = pnv_memory_block_size, #endif }; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 49b401536d29..8f998e55735b 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1094,15 +1094,6 @@ static phys_addr_t ddw_memory_hotplug_max(void) phys_addr_t max_addr = memory_hotplug_max(); struct device_node *memory; - /* - * The "ibm,pmemory" can appear anywhere in the address space. - * Assuming it is still backed by page structs, set the upper limit - * for the huge DMA window as MAX_PHYSMEM_BITS. - */ - if (of_find_node_by_type(NULL, "ibm,pmemory")) - return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ? - (phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS); - for_each_node_by_type(memory, "memory") { unsigned long start, size; int n_mem_addr_cells, n_mem_size_cells, len; @@ -1238,7 +1229,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct dma_win *window; struct property *win64; - bool ddw_enabled = false; struct failed_ddw_pdn *fpdn; bool default_win_removed = false, direct_mapping = false; bool pmem_present; @@ -1253,7 +1243,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { direct_mapping = (len >= max_ram_len); - ddw_enabled = true; goto out_unlock; } @@ -1367,8 +1356,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) len = order_base_2(query.largest_available_block << page_shift); win_name = DMA64_PROPNAME; } else { - direct_mapping = true; - win_name = DIRECT64_PROPNAME; + direct_mapping = !default_win_removed || + (len == MAX_PHYSMEM_BITS) || + (!pmem_present && (len == max_ram_len)); + win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; } ret = create_ddw(dev, ddw_avail, &create, page_shift, len); @@ -1406,8 +1397,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", dn, ret); - /* Make sure to clean DDW if any TCE was set*/ - clean_dma_window(pdn, win64->value); + /* Make sure to clean DDW if any TCE was set*/ + clean_dma_window(pdn, win64->value); goto out_del_list; } } else { @@ -1454,7 +1445,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) spin_unlock(&dma_win_list_lock); dev->dev.archdata.dma_offset = win_addr; - ddw_enabled = true; goto out_unlock; out_del_list: @@ -1490,10 +1480,10 @@ out_unlock: * as RAM, then we failed to create a window to cover persistent * memory and need to set the DMA limit. */ - if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len) + if (pmem_present && direct_mapping && len == max_ram_len) dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); - return ddw_enabled && direct_mapping; + return direct_mapping; } static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 2188054470c1..8a62af5b9c24 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1088,7 +1088,7 @@ define_machine(pseries) { .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG .memory_block_size = pseries_memory_block_size, #endif }; diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index c083ecbbae4d..c5228f4969eb 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -57,8 +57,7 @@ void __init svm_swiotlb_init(void) return; - memblock_free_early(__pa(vstart), - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free(vstart, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); panic("SVM: Cannot allocate SWIOTLB buffer"); } diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig index 97796c6b63f0..785c292d104b 100644 --- a/arch/powerpc/sysdev/xive/Kconfig +++ b/arch/powerpc/sysdev/xive/Kconfig @@ -3,7 +3,6 @@ config PPC_XIVE bool select PPC_SMP_MUXED_IPI select HARDIRQS_SW_RESEND - select IRQ_DOMAIN_NOMAP config PPC_XIVE_NATIVE bool diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index c5d75c02ad8b..7b69299c2912 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1443,8 +1443,7 @@ static const struct irq_domain_ops xive_irq_domain_ops = { static void __init xive_init_host(struct device_node *np) { - xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ, - &xive_irq_domain_ops, NULL); + xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; irq_set_default_host(xive_irq_domain); diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index 4614c01ba5b3..fb3397223d52 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -2,3 +2,6 @@ obj-y += kernel/ mm/ net/ obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ + +# for cleaning +subdir- += boot diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a34c531be4e7..821252b65f89 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -62,6 +62,7 @@ config RISCV select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT + select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 58c1a28e20bb..8a107ed18b0d 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -1,7 +1,5 @@ # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture +# architecture-specific flags and dependencies. # # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive @@ -109,11 +107,13 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ +ifeq ($(KBUILD_EXTMOD),) ifeq ($(CONFIG_MMU),y) prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h endif +endif ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy) @@ -139,5 +139,12 @@ install zinstall: $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \ $(boot)/$(install-image) System.map "$(INSTALL_PATH)" -archclean: - $(Q)$(MAKE) $(clean)=$(boot) +PHONY += rv32_randconfig +rv32_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/32-bit.config \ + -f $(srctree)/Makefile randconfig + +PHONY += rv64_randconfig +rv64_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \ + -f $(srctree)/Makefile randconfig diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index b254c60589a1..fc1e5869df1b 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -9,10 +9,8 @@ #define RTCCLK_FREQ 1000000 / { - #address-cells = <2>; - #size-cells = <2>; model = "Microchip PolarFire-SoC Icicle Kit"; - compatible = "microchip,mpfs-icicle-kit"; + compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs"; aliases { ethernet0 = &emac1; @@ -35,9 +33,6 @@ reg = <0x0 0x80000000 0x0 0x40000000>; clocks = <&clkcfg 26>; }; - - soc { - }; }; &serial0 { @@ -56,8 +51,17 @@ status = "okay"; }; -&sdcard { +&mmc { status = "okay"; + + bus-width = <4>; + disable-wp; + cap-sd-highspeed; + card-detect-delay = <200>; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; }; &emac0 { diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index 9d2fbbc1f777..c9f6d205d2ba 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -6,8 +6,8 @@ / { #address-cells = <2>; #size-cells = <2>; - model = "Microchip MPFS Icicle Kit"; - compatible = "microchip,mpfs-icicle-kit"; + model = "Microchip PolarFire SoC"; + compatible = "microchip,mpfs"; chosen { }; @@ -161,7 +161,7 @@ }; clint@2000000 { - compatible = "sifive,clint0"; + compatible = "sifive,fu540-c000-clint", "sifive,clint0"; reg = <0x0 0x2000000 0x0 0xC000>; interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7 &cpu1_intc 3 &cpu1_intc 7 @@ -172,7 +172,7 @@ plic: interrupt-controller@c000000 { #interrupt-cells = <1>; - compatible = "sifive,plic-1.0.0"; + compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; reg = <0x0 0xc000000 0x0 0x4000000>; riscv,ndev = <186>; interrupt-controller; @@ -262,39 +262,13 @@ status = "disabled"; }; - emmc: mmc@20008000 { - compatible = "cdns,sd4hc"; + /* Common node entry for emmc/sd */ + mmc: mmc@20008000 { + compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc"; reg = <0x0 0x20008000 0x0 0x1000>; interrupt-parent = <&plic>; interrupts = <88 89>; - pinctrl-names = "default"; clocks = <&clkcfg 6>; - bus-width = <4>; - cap-mmc-highspeed; - mmc-ddr-3_3v; - max-frequency = <200000000>; - non-removable; - no-sd; - no-sdio; - voltage-ranges = <3300 3300>; - status = "disabled"; - }; - - sdcard: sdhc@20008000 { - compatible = "cdns,sd4hc"; - reg = <0x0 0x20008000 0x0 0x1000>; - interrupt-parent = <&plic>; - interrupts = <88>; - pinctrl-names = "default"; - clocks = <&clkcfg 6>; - bus-width = <4>; - disable-wp; - cap-sd-highspeed; - card-detect-delay = <200>; - sd-uhs-sdr12; - sd-uhs-sdr25; - sd-uhs-sdr50; - sd-uhs-sdr104; max-frequency = <200000000>; status = "disabled"; }; diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 7db861053483..0655b5c4201d 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -141,7 +141,7 @@ ranges; plic0: interrupt-controller@c000000 { #interrupt-cells = <1>; - compatible = "sifive,plic-1.0.0"; + compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; reg = <0x0 0xc000000 0x0 0x4000000>; riscv,ndev = <53>; interrupt-controller; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 60846e88ae4b..ba304d4c455c 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -8,10 +8,9 @@ #define RTCCLK_FREQ 1000000 / { - #address-cells = <2>; - #size-cells = <2>; model = "SiFive HiFive Unleashed A00"; - compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000"; + compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000", + "sifive,fu540"; chosen { stdout-path = "serial0"; @@ -26,9 +25,6 @@ reg = <0x0 0x80000000 0x2 0x00000000>; }; - soc { - }; - hfclk: hfclk { #clock-cells = <0>; compatible = "fixed-clock"; @@ -63,7 +59,7 @@ &qspi0 { status = "okay"; flash@0 { - compatible = "issi,is25wp256", "jedec,spi-nor"; + compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; m25p,fast-read; diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 2e4ea84f27e7..4f66919215f6 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -8,8 +8,6 @@ #define RTCCLK_FREQ 1000000 / { - #address-cells = <2>; - #size-cells = <2>; model = "SiFive HiFive Unmatched A00"; compatible = "sifive,hifive-unmatched-a00", "sifive,fu740-c000", "sifive,fu740"; @@ -27,9 +25,6 @@ reg = <0x0 0x80000000 0x4 0x00000000>; }; - soc { - }; - hfclk: hfclk { #clock-cells = <0>; compatible = "fixed-clock"; @@ -211,7 +206,7 @@ &qspi0 { status = "okay"; flash@0 { - compatible = "issi,is25wp256", "jedec,spi-nor"; + compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; m25p,fast-read; diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config new file mode 100644 index 000000000000..43f41323b67e --- /dev/null +++ b/arch/riscv/configs/32-bit.config @@ -0,0 +1,2 @@ +CONFIG_ARCH_RV32I=y +CONFIG_32BIT=y diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config new file mode 100644 index 000000000000..313edc554d84 --- /dev/null +++ b/arch/riscv/configs/64-bit.config @@ -0,0 +1,2 @@ +CONFIG_ARCH_RV64I=y +CONFIG_64BIT=y diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 4ebc80315f01..ef473e2f503b 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -19,6 +19,8 @@ CONFIG_SOC_VIRT=y CONFIG_SOC_MICROCHIP_POLARFIRE=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -72,9 +74,10 @@ CONFIG_GPIOLIB=y CONFIG_GPIO_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y -CONFIG_DRM=y -CONFIG_DRM_RADEON=y -CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_NOUVEAU=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 434ef5b64599..6e9f12ff968a 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -19,6 +19,8 @@ CONFIG_SOC_VIRT=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 109c97e991a6..b3e5ff0125fe 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define page_to_bus(page) (page_to_phys(page)) #define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr))) +#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) + #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) \ (((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr)) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 39b550310ec6..bf204e7c1f74 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -75,7 +75,8 @@ #endif #ifdef CONFIG_XIP_KERNEL -#define XIP_OFFSET SZ_8M +#define XIP_OFFSET SZ_32M +#define XIP_OFFSET_MASK (SZ_32M - 1) #else #define XIP_OFFSET 0 #endif @@ -97,7 +98,8 @@ #ifdef CONFIG_XIP_KERNEL #define XIP_FIXUP(addr) ({ \ uintptr_t __a = (uintptr_t)(addr); \ - (__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ? \ + (__a >= CONFIG_XIP_PHYS_ADDR && \ + __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ? \ __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\ __a; \ }) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 34fbb3ea21d5..7ac6a0e275f2 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -64,15 +64,6 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, ®s->a1, 5 * sizeof(args[0])); } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - regs->orig_a0 = args[0]; - args++; - memcpy(®s->a1, args, 5 * sizeof(regs->a1)); -} - static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_64BIT diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 208e31bc5d1c..bc6f75f3a199 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -8,30 +8,19 @@ #ifndef _ASM_RISCV_VDSO_H #define _ASM_RISCV_VDSO_H - -/* - * All systems with an MMU have a VDSO, but systems without an MMU don't - * support shared libraries and therefor don't have one. - */ -#ifdef CONFIG_MMU - -#include <linux/types.h> /* * All systems with an MMU have a VDSO, but systems without an MMU don't * support shared libraries and therefor don't have one. */ #ifdef CONFIG_MMU -#define __VVAR_PAGES 1 +#define __VVAR_PAGES 2 #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> #define VDSO_SYMBOL(base, name) \ (void __user *)((unsigned long)(base) + __vdso_##name##_offset) - -#endif /* CONFIG_MMU */ - #endif /* !__ASSEMBLY__ */ #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index f839f16e0d2a..77d9c2f721c4 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return _vdso_data; } +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return _timens_data; +} +#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 25ec50573957..f52f01ecbeea 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -20,10 +20,20 @@ REG_L t0, _xip_fixup add \reg, \reg, t0 .endm +.macro XIP_FIXUP_FLASH_OFFSET reg + la t1, __data_loc + li t0, XIP_OFFSET_MASK + and t1, t1, t0 + li t1, XIP_OFFSET + sub t0, t0, t1 + sub \reg, \reg, t0 +.endm _xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET #else .macro XIP_FIXUP_OFFSET reg .endm +.macro XIP_FIXUP_FLASH_OFFSET reg +.endm #endif /* CONFIG_XIP_KERNEL */ __HEAD @@ -267,6 +277,7 @@ pmp_done: la a3, hart_lottery mv a2, a3 XIP_FIXUP_OFFSET a2 + XIP_FIXUP_FLASH_OFFSET a3 lw t1, (a3) amoswap.w t0, t1, (a2) /* first time here if hart_lottery in RAM is not set */ @@ -305,6 +316,7 @@ clear_bss_done: XIP_FIXUP_OFFSET sp #ifdef CONFIG_BUILTIN_DTB la a0, __dtb_start + XIP_FIXUP_OFFSET a0 #else mv a0, s1 #endif /* CONFIG_BUILTIN_DTB */ diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index ee5878d968cc..9c842c41684a 100644 --- a/arch/riscv/kernel/reset.c +++ b/arch/riscv/kernel/reset.c @@ -12,7 +12,7 @@ static void default_power_off(void) wait_for_interrupt(); } -void (*pm_power_off)(void) = default_power_off; +void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); void machine_restart(char *cmd) @@ -23,10 +23,16 @@ void machine_restart(char *cmd) void machine_halt(void) { - pm_power_off(); + if (pm_power_off != NULL) + pm_power_off(); + else + default_power_off(); } void machine_power_off(void) { - pm_power_off(); + if (pm_power_off != NULL) + pm_power_off(); + else + default_power_off(); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b9620e5f00ba..b42bfdc67482 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -230,13 +230,13 @@ static void __init init_resources(void) /* Clean-up any unused pre-allocated resources */ if (res_idx >= 0) - memblock_free(__pa(mem_res), (res_idx + 1) * sizeof(*mem_res)); + memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res)); return; error: /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); - memblock_free(__pa(mem_res), mem_res_sz); + memblock_free(mem_res, mem_res_sz); } diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index b70956d80408..a9436a65161a 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -13,6 +13,7 @@ #include <linux/err.h> #include <asm/page.h> #include <asm/vdso.h> +#include <linux/time_namespace.h> #ifdef CONFIG_GENERIC_TIME_VSYSCALL #include <vdso/datapage.h> @@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[]; enum vvar_pages { VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, VVAR_NR_PAGES, }; #define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) -static unsigned int vdso_pages __ro_after_init; -static struct page **vdso_pagelist __ro_after_init; - /* * The vDSO data page. */ @@ -42,83 +41,228 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = &vdso_data_store.data; -static int __init vdso_init(void) +struct __vdso_info { + const char *name; + const char *vdso_code_start; + const char *vdso_code_end; + unsigned long vdso_pages; + /* Data Mapping */ + struct vm_special_mapping *dm; + /* Code Mapping */ + struct vm_special_mapping *cm; +}; + +static struct __vdso_info vdso_info __ro_after_init = { + .name = "vdso", + .vdso_code_start = vdso_start, + .vdso_code_end = vdso_end, +}; + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + +static int __init __vdso_init(void) { unsigned int i; + struct page **vdso_pagelist; + unsigned long pfn; - vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; - vdso_pagelist = - kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL); - if (unlikely(vdso_pagelist == NULL)) { - pr_err("vdso: pagelist allocation failed\n"); - return -ENOMEM; + if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; } - for (i = 0; i < vdso_pages; i++) { - struct page *pg; + vdso_info.vdso_pages = ( + vdso_info.vdso_code_end - + vdso_info.vdso_code_start) >> + PAGE_SHIFT; + + vdso_pagelist = kcalloc(vdso_info.vdso_pages, + sizeof(struct page *), + GFP_KERNEL); + if (vdso_pagelist == NULL) + return -ENOMEM; + + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(vdso_info.vdso_code_start); + + for (i = 0; i < vdso_info.vdso_pages; i++) + vdso_pagelist[i] = pfn_to_page(pfn + i); + + vdso_info.cm->pages = vdso_pagelist; + + return 0; +} + +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); - pg = virt_to_page(vdso_start + (i << PAGE_SHIFT)); - vdso_pagelist[i] = pg; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, vdso_info.dm)) + zap_page_range(vma, vma->vm_start, size); } - vdso_pagelist[i] = virt_to_page(vdso_data); + mmap_read_unlock(mm); return 0; } + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = sym_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +enum rv_vdso_map { + RV_VDSO_MAP_VVAR, + RV_VDSO_MAP_VDSO, +}; + +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { + [RV_VDSO_MAP_VVAR] = { + .name = "[vvar]", + .fault = vvar_fault, + }, + [RV_VDSO_MAP_VDSO] = { + .name = "[vdso]", + .mremap = vdso_mremap, + }, +}; + +static int __init vdso_init(void) +{ + vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR]; + vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO]; + + return __vdso_init(); +} arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +static int __setup_additional_pages(struct mm_struct *mm, + struct linux_binprm *bprm, + int uses_interp) { - struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_len; - int ret; + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); - vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT; + vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT; + /* Be sure to map the data page */ + vdso_mapping_len = vdso_text_len + VVAR_SIZE; - if (mmap_write_lock_killable(mm)) - return -EINTR; - - vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { - ret = vdso_base; - goto end; + ret = ERR_PTR(vdso_base); + goto up_fail; } - mm->context.vdso = NULL; - ret = install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); - if (unlikely(ret)) - goto end; + ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, + (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm); + if (IS_ERR(ret)) + goto up_fail; + vdso_base += VVAR_SIZE; + mm->context.vdso = (void *)vdso_base; ret = - install_special_mapping(mm, vdso_base + VVAR_SIZE, - vdso_pages << PAGE_SHIFT, + _install_special_mapping(mm, vdso_base, vdso_text_len, (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), - vdso_pagelist); + vdso_info.cm); - if (unlikely(ret)) - goto end; + if (IS_ERR(ret)) + goto up_fail; - /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). - */ - mm->context.vdso = (void *)vdso_base + VVAR_SIZE; + return 0; -end: - mmap_write_unlock(mm); - return ret; +up_fail: + mm->context.vdso = NULL; + return PTR_ERR(ret); } -const char *arch_vma_name(struct vm_area_struct *vma) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso)) - return "[vdso]"; - if (vma->vm_mm && (vma->vm_start == - (long)vma->vm_mm->context.vdso - VVAR_SIZE)) - return "[vdso_data]"; - return NULL; + struct mm_struct *mm = current->mm; + int ret; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + ret = __setup_additional_pages(mm, bprm, uses_interp); + mmap_write_unlock(mm); + + return ret; } diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index e9111f700af0..01d94aae5bf5 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv) SECTIONS { PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 9c9f35091ef0..f5ed08262139 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -64,8 +64,11 @@ SECTIONS /* * From this point, stuff is considered writable and will be copied to RAM */ - __data_loc = ALIGN(16); /* location in file */ - . = LOAD_OFFSET + XIP_OFFSET; /* location in memory */ + __data_loc = ALIGN(PAGE_SIZE); /* location in file */ + . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */ + +#undef LOAD_OFFSET +#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK)) _sdata = .; /* Start of data section */ _data = .; @@ -96,7 +99,6 @@ SECTIONS KEEP(*(__soc_builtin_dtb_table)) __soc_builtin_dtb_table_end = .; } - PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(8); .alternative : { @@ -122,6 +124,8 @@ SECTIONS BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) + PERCPU_SECTION(L1_CACHE_BYTES) + .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) { *(.rel.dyn*) } diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e3d3aed46184..fb84619df012 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -740,7 +740,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. * See the comment in kvm_vcpu_exiting_guest_mode() and - * Documentation/virtual/kvm/vcpu-requests.rst + * Documentation/virt/kvm/vcpu-requests.rst */ vcpu->mode = IN_GUEST_MODE; diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index eb3c045edf11..3b0e703d22cf 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * Copyright (c) 2019 Western Digital Corporation or its affiliates. * * Authors: diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 26399df15b63..fb18af34a4b5 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -74,7 +74,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index f51c9a03bca1..49d510ba75fd 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -4,10 +4,14 @@ */ #include <linux/delay.h> +#include <linux/math.h> #include <linux/param.h> #include <linux/timex.h> +#include <linux/types.h> #include <linux/export.h> +#include <asm/processor.h> + /* * This is copies from arch/arm/include/asm/delay.h * diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ee3459cb6750..ea54cc0c9106 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -233,8 +233,10 @@ static int __init asids_init(void) local_flush_tlb_all(); /* Pre-compute ASID details */ - num_asids = 1 << asid_bits; - asid_mask = num_asids - 1; + if (asid_bits) { + num_asids = 1 << asid_bits; + asid_mask = num_asids - 1; + } /* * Use ASID allocator only if number of HW ASIDs are @@ -255,7 +257,7 @@ static int __init asids_init(void) pr_info("ASID allocator using %lu bits (%lu entries)\n", asid_bits, num_asids); } else { - pr_info("ASID allocator disabled\n"); + pr_info("ASID allocator disabled (%lu bits)\n", asid_bits); } return 0; diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 18bf338303b6..ddb7d3b99e89 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/uaccess.h> -#ifdef CONFIG_BPF_JIT +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); #endif @@ -23,7 +23,7 @@ int fixup_exception(struct pt_regs *regs) if (!fixup) return 0; -#ifdef CONFIG_BPF_JIT +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END) return rv_bpf_fixup_exception(fixup, regs); #endif diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index c0cddf0fc22d..24b2b8044602 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -41,7 +41,7 @@ phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); #ifdef CONFIG_XIP_KERNEL -extern char _xiprom[], _exiprom[]; +extern char _xiprom[], _exiprom[], __data_loc; #endif unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] @@ -454,10 +454,9 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) /* called from head.S with MMU off */ asmlinkage void __init __copy_data(void) { - void *from = (void *)(&_sdata); - void *end = (void *)(&_end); + void *from = (void *)(&__data_loc); void *to = (void *)CONFIG_PHYS_RAM_BASE; - size_t sz = (size_t)(end - from + 1); + size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata)); memcpy(to, from, sz); } diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 2ca345c7b0bf..f2a779c7e225 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -460,6 +460,8 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) int rv_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs); +int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs) { off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index 8b98c501142d..76e362277179 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -8,3 +8,6 @@ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ + +# for cleaning +subdir- += boot tools diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b86de61b8caa..2a5bb4f29cfe 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -47,7 +47,7 @@ config ARCH_SUPPORTS_UPROBES config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0x18000000000000 + default 0x1C000000000000 config S390 def_bool y @@ -153,12 +153,15 @@ config S390 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER @@ -190,6 +193,8 @@ config S390 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING @@ -434,6 +439,14 @@ endchoice config 64BIT def_bool y +config COMMAND_LINE_SIZE + int "Maximum size of kernel command line" + default 4096 + range 896 1048576 + help + This allows you to specify the maximum length of the kernel command + line. + config COMPAT def_bool y prompt "Kernel support for 31 bit emulation" @@ -938,6 +951,8 @@ menu "Selftests" config S390_UNWIND_SELFTEST def_tristate n + depends on KUNIT + default KUNIT_ALL_TESTS prompt "Test unwind functions" help This option enables s390 specific stack unwinder testing kernel @@ -946,4 +961,16 @@ config S390_UNWIND_SELFTEST Say N if you are unsure. +config S390_KPROBES_SANITY_TEST + def_tristate n + prompt "Enable s390 specific kprobes tests" + depends on KPROBES + depends on KUNIT + help + This option enables an s390 specific kprobes test module. This option + is not useful for distributions or general kernels, but only for kernel + developers working on architecture code. + + Say N if you are unsure. + endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 450b351dfa8e..609e3697324b 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -3,9 +3,7 @@ # s390/Makefile # # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture +# architecture-specific flags and dependencies. # # Copyright (C) 1994 by Linus Torvalds # @@ -79,10 +77,12 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) -cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) -ifeq ($(call cc-option,-mstack-size=8192),) -cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) -endif + CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE) + ifeq ($(call cc-option,-mstack-size=8192),) + CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD) + endif + export CC_FLAGS_CHECK_STACK + cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK) endif ifdef CONFIG_EXPOLINE @@ -147,10 +147,6 @@ zfcpdump: vdso_install: $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - $(Q)$(MAKE) $(clean)=$(tools) - archheaders: $(Q)$(MAKE) $(build)=$(syscalls) uapi diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index a59f75c5b049..f75cc31a77dd 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -24,6 +24,7 @@ struct vmlinux_info { unsigned long dynsym_start; unsigned long rela_dyn_start; unsigned long rela_dyn_end; + unsigned long amode31_size; }; /* Symbols defined by linker scripts */ diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 40f4cff538b8..3a252d140c55 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -184,35 +184,23 @@ iplstart: bas %r14,.Lloader # load parameter file ltr %r2,%r2 # got anything ? bz .Lnopf - chi %r2,895 - bnh .Lnotrunc - la %r2,895 + l %r3,MAX_COMMAND_LINE_SIZE+ARCH_OFFSET-PARMAREA(%r12) + ahi %r3,-1 + clr %r2,%r3 + bl .Lnotrunc + lr %r2,%r3 .Lnotrunc: l %r4,.Linitrd clc 0(3,%r4),.L_hdr # if it is HDRx bz .Lagain1 # skip dataset header clc 0(3,%r4),.L_eof # if it is EOFx bz .Lagain1 # skip dateset trailer - la %r5,0(%r4,%r2) - lr %r3,%r2 - la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line - mvc 0(256,%r3),0(%r4) - mvc 256(256,%r3),256(%r4) - mvc 512(256,%r3),512(%r4) - mvc 768(122,%r3),768(%r4) - slr %r0,%r0 - b .Lcntlp -.Ldelspc: - ic %r0,0(%r2,%r3) - chi %r0,0x20 # is it a space ? - be .Lcntlp - ahi %r2,1 - b .Leolp -.Lcntlp: - brct %r2,.Ldelspc -.Leolp: - slr %r0,%r0 - stc %r0,0(%r2,%r3) # terminate buffer + + lr %r5,%r2 + la %r6,COMMAND_LINE-PARMAREA(%r12) + lr %r7,%r2 + ahi %r7,1 + mvcl %r6,%r4 .Lnopf: # @@ -317,6 +305,7 @@ SYM_CODE_START_LOCAL(startup_normal) xc 0x300(256),0x300 xc 0xe00(256),0xe00 xc 0xf00(256),0xf00 + lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers stcke __LC_BOOT_CLOCK mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 spt 6f-.LPG0(%r13) @@ -335,6 +324,22 @@ SYM_CODE_END(startup_normal) .quad 0x0000000180000000,startup_pgm_check_handler .Lio_new_psw: .quad 0x0002000180000000,0x1f0 # disabled wait +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space + .quad 0 # cr1: primary space segment table + .quad 0 # cr2: dispatchable unit control table + .quad 0 # cr3: instruction authorization + .quad 0xffff # cr4: instruction authorization + .quad 0 # cr5: primary-aste origin + .quad 0 # cr6: I/O interrupts + .quad 0 # cr7: secondary space segment table + .quad 0x0000000000008000 # cr8: access registers translation + .quad 0 # cr9: tracing off + .quad 0 # cr10: tracing off + .quad 0 # cr11: tracing off + .quad 0 # cr12: tracing off + .quad 0 # cr13: home space segment table + .quad 0xc0000000 # cr14: machine check handling off + .quad 0 # cr15: linkage stack operations #include "head_kdump.S" @@ -377,11 +382,10 @@ SYM_DATA_START(parmarea) .quad 0 # OLDMEM_BASE .quad 0 # OLDMEM_SIZE .quad kernel_version # points to kernel version string + .quad COMMAND_LINE_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" .byte 0 .org PARMAREA+__PARMAREA_SIZE SYM_DATA_END(parmarea) - - .org HEAD_END diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 0f84c072625e..9ed7e29c81d9 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -170,10 +170,10 @@ static inline int has_ebcdic_char(const char *str) void setup_boot_command_line(void) { - parmarea.command_line[ARCH_COMMAND_LINE_SIZE - 1] = 0; + parmarea.command_line[COMMAND_LINE_SIZE - 1] = 0; /* convert arch command line to ascii if necessary */ if (has_ebcdic_char(parmarea.command_line)) - EBCASC(parmarea.command_line, ARCH_COMMAND_LINE_SIZE); + EBCASC(parmarea.command_line, COMMAND_LINE_SIZE); /* copy arch command line */ strcpy(early_command_line, strim(parmarea.command_line)); diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 75bcbfa27941..c2a1defc79da 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -175,6 +175,6 @@ void print_pgm_check_info(void) gpregs[12], gpregs[13], gpregs[14], gpregs[15]); print_stacktrace(); decompressor_printk("Last Breaking-Event-Address:\n"); - decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.breaking_event_addr, - (void *)S390_lowcore.breaking_event_addr); + decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, + (void *)S390_lowcore.pgm_last_break); } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6dc8d0a53864..1aa11a8f57dd 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -15,6 +15,7 @@ #include "uv.h" unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); struct page *__bootdata_preserved(vmemmap); @@ -148,82 +149,56 @@ static void setup_ident_map_size(unsigned long max_physmem_end) static void setup_kernel_memory_layout(void) { - bool vmalloc_size_verified = false; - unsigned long vmemmap_off; - unsigned long vspace_left; + unsigned long vmemmap_start; unsigned long rte_size; unsigned long pages; - unsigned long vmax; pages = ident_map_size / PAGE_SIZE; /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - vmemmap_off = round_up(ident_map_size, _REGION3_SIZE); + vmemmap_start = round_up(ident_map_size, _REGION3_SIZE); if (IS_ENABLED(CONFIG_KASAN) || vmalloc_size > _REGION2_SIZE || - vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE) - vmax = _REGION1_SIZE; - else - vmax = _REGION2_SIZE; - - /* keep vmemmap_off aligned to a top level region table entry */ - rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE; - MODULES_END = vmax; - if (is_prot_virt_host()) { - /* - * forcing modules and vmalloc area under the ultravisor - * secure storage limit, so that any vmalloc allocation - * we do could be used to back secure guest storage. - */ - adjust_to_uv_max(&MODULES_END); - } - -#ifdef CONFIG_KASAN - if (MODULES_END < vmax) { - /* force vmalloc and modules below kasan shadow */ - MODULES_END = min(MODULES_END, KASAN_SHADOW_START); + vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN > + _REGION2_SIZE) { + MODULES_END = _REGION1_SIZE; + rte_size = _REGION2_SIZE; } else { - /* - * leave vmalloc and modules above kasan shadow but make - * sure they don't overlap with it - */ - vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN); - vmalloc_size_verified = true; - vspace_left = KASAN_SHADOW_START; + MODULES_END = _REGION2_SIZE; + rte_size = _REGION3_SIZE; } + /* + * forcing modules and vmalloc area under the ultravisor + * secure storage limit, so that any vmalloc allocation + * we do could be used to back secure guest storage. + */ + adjust_to_uv_max(&MODULES_END); +#ifdef CONFIG_KASAN + /* force vmalloc and modules below kasan shadow */ + MODULES_END = min(MODULES_END, KASAN_SHADOW_START); #endif MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; - if (vmalloc_size_verified) { - VMALLOC_START = VMALLOC_END - vmalloc_size; - } else { - vmemmap_off = round_up(ident_map_size, rte_size); + /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ + vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE)); + VMALLOC_START = VMALLOC_END - vmalloc_size; - if (vmemmap_off + vmemmap_size > VMALLOC_END || - vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) { - /* - * allow vmalloc area to occupy up to 1/2 of - * the rest virtual space left. - */ - vmalloc_size = min(vmalloc_size, VMALLOC_END / 2); - } - VMALLOC_START = VMALLOC_END - vmalloc_size; - vspace_left = VMALLOC_START; - } - - pages = vspace_left / (PAGE_SIZE + sizeof(struct page)); + /* split remaining virtual space between 1:1 mapping & vmemmap array */ + pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); pages = SECTION_ALIGN_UP(pages); - vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size); - /* keep vmemmap left most starting from a fresh region table entry */ - vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size)); - /* take care that identity map is lower then vmemmap */ - ident_map_size = min(ident_map_size, vmemmap_off); + /* keep vmemmap_start aligned to a top level region table entry */ + vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size); + /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */ + vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); + /* make sure identity map doesn't overlay with vmemmap */ + ident_map_size = min(ident_map_size, vmemmap_start); vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); - VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START); - vmemmap = (struct page *)vmemmap_off; + /* make sure vmemmap doesn't overlay with vmalloc area */ + VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START); + vmemmap = (struct page *)vmemmap_start; } /* @@ -259,6 +234,12 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.dynsym_start += offset; } +static unsigned long reserve_amode31(unsigned long safe_addr) +{ + __amode31_base = PAGE_ALIGN(safe_addr); + return safe_addr + vmlinux.amode31_size; +} + void startup_kernel(void) { unsigned long random_lma; @@ -273,6 +254,7 @@ void startup_kernel(void) setup_lpp(); store_ipl_parmblock(); safe_addr = mem_safe_offset(); + safe_addr = reserve_amode31(safe_addr); safe_addr = read_ipl_report(safe_addr); uv_query_info(); rescue_initrd(safe_addr); diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6aad18ee131d..fd825097cf04 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -61,7 +61,8 @@ CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m -CONFIG_S390_UNWIND_SELFTEST=y +CONFIG_S390_UNWIND_SELFTEST=m +CONFIG_S390_KPROBES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -776,7 +777,6 @@ CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 -CONFIG_DMA_API_DEBUG=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y @@ -839,8 +839,13 @@ CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_SAMPLES=y +CONFIG_SAMPLE_TRACE_PRINTK=m +CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_DEBUG_ENTRY=y CONFIG_CIO_INJECT=y +CONFIG_KUNIT=m +CONFIG_KUNIT_DEBUGFS=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index f08b161c9446..c9c3cedff2d8 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -60,6 +60,7 @@ CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m +CONFIG_S390_KPROBES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y # CONFIG_GCC_PLUGINS is not set @@ -788,6 +789,11 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_SAMPLES=y +CONFIG_SAMPLE_TRACE_PRINTK=m +CONFIG_SAMPLE_FTRACE_DIRECT=m +CONFIG_KUNIT=m +CONFIG_KUNIT_DEBUGFS=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index f9eddbca79d2..2c057e1f3200 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -16,20 +16,24 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES /* Fast-BCR without checkpoint synchronization */ -#define __ASM_BARRIER "bcr 14,0\n" +#define __ASM_BCR_SERIALIZE "bcr 14,0\n" #else -#define __ASM_BARRIER "bcr 15,0\n" +#define __ASM_BCR_SERIALIZE "bcr 15,0\n" #endif -#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) +static __always_inline void bcr_serialize(void) +{ + asm volatile(__ASM_BCR_SERIALIZE : : : "memory"); +} -#define rmb() barrier() -#define wmb() barrier() -#define dma_rmb() mb() -#define dma_wmb() mb() -#define __smp_mb() mb() -#define __smp_rmb() rmb() -#define __smp_wmb() wmb() +#define mb() bcr_serialize() +#define rmb() barrier() +#define wmb() barrier() +#define dma_rmb() mb() +#define dma_wmb() mb() +#define __smp_mb() mb() +#define __smp_rmb() rmb() +#define __smp_wmb() wmb() #define __smp_store_release(p, v) \ do { \ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index fd149480b6e2..5a530c552c23 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -188,7 +188,7 @@ static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { if (arch_test_bit(nr, ptr)) - return 1; + return true; return arch_test_and_set_bit(nr, ptr); } diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index 62228a884e06..26c710cd3485 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <linux/jump_label.h> struct cpuid { @@ -21,5 +22,7 @@ struct cpuid unsigned int unused : 16; } __attribute__ ((packed, aligned(8))); +DECLARE_STATIC_KEY_FALSE(cpu_has_bear); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_CPU_H */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 19a55e1e3a0c..77f24262c25c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -462,7 +462,7 @@ arch_initcall(VNAME(var, reg)) * * @var: Name of debug_info_t variable * @name: Name of debug log (e.g. used for debugfs entry) - * @pages_per_area: Number of pages per area + * @pages: Number of pages per area * @nr_areas: Number of debug areas * @buf_size: Size of data area in each debug entry * @view: Pointer to debug view struct diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index e3aa354ab9f4..94b6919026df 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -9,8 +9,12 @@ #define __ASM_FACILITY_H #include <asm/facility-defs.h> + +#include <linux/minmax.h> #include <linux/string.h> +#include <linux/types.h> #include <linux/preempt.h> + #include <asm/lowcore.h> #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index e8b460f39c58..267f70f4393f 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -17,7 +17,6 @@ void ftrace_caller(void); -extern char ftrace_graph_caller_end; extern void *ftrace_func; struct dyn_arch_ftrace { }; @@ -42,6 +41,35 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &fregs->regs; +} + +static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, + unsigned long ip) +{ + struct pt_regs *regs = arch_ftrace_get_regs(fregs); + + regs->psw.addr = ip; +} + +/* + * When an ftrace registered caller is tracing a function that is + * also set by a register_ftrace_direct() call, it needs to be + * differentiated in the ftrace_caller trampoline. To do this, + * place the direct caller in the ORIG_GPR2 part of pt_regs. This + * tells the ftrace_caller that there's a direct caller. + */ +static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + regs->orig_gpr2 = addr; +} + /* * Even though the system call numbers are identical for s390/s390x a * different system call table is used for compat tasks. This may lead @@ -68,4 +96,32 @@ static inline bool arch_syscall_match_sym_name(const char *sym, } #endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_FUNCTION_TRACER + +#define FTRACE_NOP_INSN .word 0xc004, 0x0000, 0x0000 /* brcl 0,0 */ + +#ifndef CC_USING_HOTPATCH + +#define FTRACE_GEN_MCOUNT_RECORD(name) \ + .section __mcount_loc, "a", @progbits; \ + .quad name; \ + .previous; + +#else /* !CC_USING_HOTPATCH */ + +#define FTRACE_GEN_MCOUNT_RECORD(name) + +#endif /* !CC_USING_HOTPATCH */ + +#define FTRACE_GEN_NOP_ASM(name) \ + FTRACE_GEN_MCOUNT_RECORD(name) \ + FTRACE_NOP_INSN + +#else /* CONFIG_FUNCTION_TRACER */ + +#define FTRACE_GEN_NOP_ASM(name) + +#endif /* CONFIG_FUNCTION_TRACER */ + #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index dcb1bba4f406..916cfcb36d8a 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -2,6 +2,8 @@ #ifndef _ASM_S390_JUMP_LABEL_H #define _ASM_S390_JUMP_LABEL_H +#define HAVE_JUMP_LABEL_BATCH + #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h index d5327f064799..4377238e4752 100644 --- a/arch/s390/include/asm/kdebug.h +++ b/arch/s390/include/asm/kdebug.h @@ -23,6 +23,6 @@ enum die_val { DIE_NMI_IPI, }; -extern void die(struct pt_regs *, const char *); +extern void __noreturn die(struct pt_regs *, const char *); #endif diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index ea398a05f643..7f3c9ac34bd8 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -74,6 +74,12 @@ void *kexec_file_add_components(struct kimage *image, int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, unsigned long addr); +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + void *ipl_buf; +}; + extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index d578a8c76676..5209f223331a 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -16,9 +16,7 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { - struct pt_regs *regs = ftrace_get_regs(fregs); - - regs->psw.addr = ip; + ftrace_instruction_pointer_set(fregs, ip); } #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 11213c8bfca5..1262f5003acf 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -65,7 +65,7 @@ struct lowcore { __u32 external_damage_code; /* 0x00f4 */ __u64 failing_storage_address; /* 0x00f8 */ __u8 pad_0x0100[0x0110-0x0100]; /* 0x0100 */ - __u64 breaking_event_addr; /* 0x0110 */ + __u64 pgm_last_break; /* 0x0110 */ __u8 pad_0x0118[0x0120-0x0118]; /* 0x0118 */ psw_t restart_old_psw; /* 0x0120 */ psw_t external_old_psw; /* 0x0130 */ @@ -93,9 +93,10 @@ struct lowcore { psw_t return_psw; /* 0x0290 */ psw_t return_mcck_psw; /* 0x02a0 */ + __u64 last_break; /* 0x02b0 */ + /* CPU accounting and timing values. */ - __u64 sys_enter_timer; /* 0x02b0 */ - __u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */ + __u64 sys_enter_timer; /* 0x02b8 */ __u64 mcck_enter_timer; /* 0x02c0 */ __u64 exit_timer; /* 0x02c8 */ __u64 user_timer; /* 0x02d0 */ @@ -188,7 +189,7 @@ struct lowcore { __u32 tod_progreg_save_area; /* 0x1324 */ __u32 cpu_timer_save_area[2]; /* 0x1328 */ __u32 clock_comp_save_area[2]; /* 0x1330 */ - __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ + __u64 last_break_save_area; /* 0x1338 */ __u32 access_regs_save_area[16]; /* 0x1340 */ __u64 cregs_save_area[16]; /* 0x1380 */ __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */ diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index b4bd8c41e9d3..82725cf783c7 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -12,6 +12,11 @@ void nospec_init_branches(void); void nospec_auto_detect(void); void nospec_revert(s32 *start, s32 *end); +static inline bool nospec_uses_trampoline(void) +{ + return __is_defined(CC_USING_EXPOLINE) && !nospec_disable; +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_EXPOLINE_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6b3c366af78e..90824be5ce9a 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -210,9 +210,11 @@ int zpci_deconfigure_device(struct zpci_dev *zdev); void zpci_device_reserved(struct zpci_dev *zdev); bool zpci_is_device_configured(struct zpci_dev *zdev); +int zpci_hot_reset_device(struct zpci_dev *zdev); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); +void zpci_update_fh(struct zpci_dev *zdev, u32 fh); /* CLP */ int clp_setup_writeback_mio(void); @@ -294,8 +296,10 @@ void zpci_debug_exit(void); void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); -/* Error reporting */ +/* Error handling */ int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *); +int zpci_clear_error_state(struct zpci_dev *zdev); +int zpci_reset_load_store_blocked(struct zpci_dev *zdev); #ifdef CONFIG_NUMA diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e43416950245..008a6c856fa4 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -583,11 +583,11 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new #define CRDTE_DTT_REGION1 0x1cUL static inline void crdte(unsigned long old, unsigned long new, - unsigned long table, unsigned long dtt, + unsigned long *table, unsigned long dtt, unsigned long address, unsigned long asce) { union register_pair r1 = { .even = old, .odd = new, }; - union register_pair r2 = { .even = table | dtt, .odd = address, }; + union register_pair r2 = { .even = __pa(table) | dtt, .odd = address, }; asm volatile(".insn rrf,0xb98f0000,%[r1],%[r2],%[asce],0" : [r1] "+&d" (r1.pair) @@ -1001,7 +1001,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, unsigned long opt, unsigned long asce, int local) { - unsigned long pto = (unsigned long) ptep; + unsigned long pto = __pa(ptep); if (__builtin_constant_p(opt) && opt == 0) { /* Invalidation + TLB flush for the pte */ @@ -1023,7 +1023,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, static __always_inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep, int local) { - unsigned long pto = (unsigned long) ptep; + unsigned long pto = __pa(ptep); /* Invalidate a range of ptes + TLB flush of the ptes */ do { @@ -1487,7 +1487,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, { unsigned long sto; - sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t); + sto = __pa(pmdp) - pmd_index(addr) * sizeof(pmd_t); if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ asm volatile( @@ -1513,7 +1513,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, { unsigned long r3o; - r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t); + r3o = __pa(pudp) - pud_index(addr) * sizeof(pud_t); r3o |= _ASCE_TYPE_REGION3; if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 61b22aa990e7..4ffa8e7f0ed3 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -76,8 +76,7 @@ enum { * The pt_regs struct defines the way the registers are stored on * the stack during a system call. */ -struct pt_regs -{ +struct pt_regs { union { user_pt_regs user_regs; struct { @@ -97,6 +96,7 @@ struct pt_regs }; unsigned long flags; unsigned long cr1; + unsigned long last_break; }; /* @@ -197,6 +197,25 @@ const char *regs_query_register_name(unsigned int offset); unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); +/** + * regs_get_kernel_argument() - get Nth function argument in kernel + * @regs: pt_regs of that context + * @n: function argument number (start from 0) + * + * regs_get_kernel_argument() returns @n th argument of the function call. + */ +static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, + unsigned int n) +{ + unsigned int argoffset = STACK_FRAME_OVERHEAD / sizeof(long); + +#define NR_REG_ARGUMENTS 5 + if (n < NR_REG_ARGUMENTS) + return regs_get_register(regs, 2 + n); + n -= NR_REG_ARGUMENTS; + return regs_get_kernel_stack_nth(regs, argoffset + n); +} + static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->gprs[15]; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e3ae937bef1c..c68ea35de498 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -117,6 +117,7 @@ struct zpci_report_error_header { extern char *sclp_early_sccb; +void sclp_early_adjust_va(void); void sclp_early_set_buffer(void *sccb); int sclp_early_read_info(void); int sclp_early_read_storage_info(void); diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 85881dd48022..3fecaa4e8b74 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -2,20 +2,8 @@ #ifndef _S390_SECTIONS_H #define _S390_SECTIONS_H -#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed - #include <asm-generic/sections.h> -extern bool initmem_freed; - -static inline int arch_is_kernel_initmem_freed(unsigned long addr) -{ - if (!initmem_freed) - return 0; - return addr >= (unsigned long)__init_begin && - addr < (unsigned long)__init_end; -} - /* * .boot.data section contains variables "shared" between the decompressor and * the decompressed kernel. The decompressor will store values in them, and diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b6606ffd85d8..77e6506898f5 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -11,8 +11,8 @@ #include <linux/build_bug.h> #define PARMAREA 0x10400 -#define HEAD_END 0x11000 +#define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE /* * Machine features detected in early.c */ @@ -43,6 +43,8 @@ #define STARTUP_NORMAL_OFFSET 0x10000 #define STARTUP_KDUMP_OFFSET 0x10010 +#define LEGACY_COMMAND_LINE_SIZE 896 + #ifndef __ASSEMBLY__ #include <asm/lowcore.h> @@ -55,8 +57,9 @@ struct parmarea { unsigned long oldmem_base; /* 0x10418 */ unsigned long oldmem_size; /* 0x10420 */ unsigned long kernel_version; /* 0x10428 */ - char pad1[0x10480 - 0x10430]; /* 0x10430 - 0x10480 */ - char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */ + unsigned long max_command_line_size; /* 0x10430 */ + char pad1[0x10480-0x10438]; /* 0x10438 - 0x10480 */ + char command_line[COMMAND_LINE_SIZE]; /* 0x10480 */ }; extern struct parmarea parmarea; diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 4fd66c5e8934..3fae93ddb322 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -31,22 +31,18 @@ void *memmove(void *dest, const void *src, size_t n); #define __HAVE_ARCH_STRCMP /* arch function */ #define __HAVE_ARCH_STRCPY /* inline & arch function */ #define __HAVE_ARCH_STRLCAT /* arch function */ -#define __HAVE_ARCH_STRLCPY /* arch function */ #define __HAVE_ARCH_STRLEN /* inline & arch function */ #define __HAVE_ARCH_STRNCAT /* arch function */ #define __HAVE_ARCH_STRNCPY /* arch function */ #define __HAVE_ARCH_STRNLEN /* inline & arch function */ -#define __HAVE_ARCH_STRRCHR /* arch function */ #define __HAVE_ARCH_STRSTR /* arch function */ /* Prototypes for non-inlined arch strings functions. */ int memcmp(const void *s1, const void *s2, size_t n); int strcmp(const char *s1, const char *s2); size_t strlcat(char *dest, const char *src, size_t n); -size_t strlcpy(char *dest, const char *src, size_t size); char *strncat(char *dest, const char *src, size_t n); char *strncpy(char *dest, const char *src, size_t n); -char *strrchr(const char *s, int c); char *strstr(const char *s1, const char *s2); #endif /* !CONFIG_KASAN */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index b3dd883699e7..27e3d804b311 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -78,18 +78,6 @@ static inline void syscall_get_arguments(struct task_struct *task, args[0] = regs->orig_gpr2 & mask; } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - unsigned int n = 6; - - while (n-- > 0) - if (n > 0) - regs->gprs[2 + n] = args[n]; - regs->orig_gpr2 = args[0]; -} - static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_COMPAT diff --git a/arch/s390/include/asm/text-patching.h b/arch/s390/include/asm/text-patching.h new file mode 100644 index 000000000000..b219056a8817 --- /dev/null +++ b/arch/s390/include/asm/text-patching.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_TEXT_PATCHING_H +#define _ASM_S390_TEXT_PATCHING_H + +#include <asm/barrier.h> + +static __always_inline void sync_core(void) +{ + bcr_serialize(); +} + +void text_poke_sync(void); +void text_poke_sync_lock(void); + +#endif /* _ASM_S390_TEXT_PATCHING_H */ diff --git a/arch/s390/include/uapi/asm/setup.h b/arch/s390/include/uapi/asm/setup.h index 1f8803a31079..598d769e76df 100644 --- a/arch/s390/include/uapi/asm/setup.h +++ b/arch/s390/include/uapi/asm/setup.h @@ -1,14 +1 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S390 version - * Copyright IBM Corp. 1999, 2010 - */ - -#ifndef _UAPI_ASM_S390_SETUP_H -#define _UAPI_ASM_S390_SETUP_H - -#define COMMAND_LINE_SIZE 4096 - -#define ARCH_COMMAND_LINE_SIZE 896 - -#endif /* _UAPI_ASM_S390_SETUP_H */ diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index c22ea1c3ef84..cce0ddee2d02 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/facility.h> #include <asm/nospec-branch.h> @@ -110,3 +113,20 @@ void __init apply_alternative_instructions(void) { apply_alternatives(__alt_instructions, __alt_instructions_end); } + +static void do_sync_core(void *info) +{ + sync_core(); +} + +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +void text_poke_sync_lock(void) +{ + cpus_read_lock(); + text_poke_sync(); + cpus_read_unlock(); +} diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index b57da9338588..8e00bb228662 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -35,6 +35,7 @@ int main(void) OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); OFFSET(__PT_FLAGS, pt_regs, flags); OFFSET(__PT_CR1, pt_regs, cr1); + OFFSET(__PT_LAST_BREAK, pt_regs, last_break); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); /* stack_frame offsets */ @@ -45,6 +46,7 @@ int main(void) OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]); OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]); OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]); + DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); @@ -77,7 +79,7 @@ int main(void) OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code); OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); - OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); + OFFSET(__LC_PGM_LAST_BREAK, lowcore, pgm_last_break); OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe); OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe); OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); @@ -126,6 +128,7 @@ int main(void) OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); + OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ @@ -139,6 +142,7 @@ int main(void) OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area); OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area); OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area); + OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area); OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area); OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); @@ -160,5 +164,6 @@ int main(void) DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base)); DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); + DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); return 0; } diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 54efc279f54e..72e106cfd8c7 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -29,7 +29,7 @@ static int diag8_noresponse(int cmdlen) asm volatile( " diag %[rx],%[ry],0x8\n" : [ry] "+&d" (cmdlen) - : [rx] "d" ((addr_t) cpcmd_buf) + : [rx] "d" (__pa(cpcmd_buf)) : "cc"); return cmdlen; } @@ -39,8 +39,8 @@ static int diag8_response(int cmdlen, char *response, int *rlen) union register_pair rx, ry; int cc; - rx.even = (addr_t) cpcmd_buf; - rx.odd = (addr_t) response; + rx.even = __pa(cpcmd_buf); + rx.odd = __pa(response); ry.even = cmdlen | 0x40000000L; ry.odd = *rlen; asm volatile( diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index d72a6df058d7..785d54c9350c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -191,8 +191,8 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) { - from -= oldmem_data.size; + if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { + from -= oldmem_data.start; len = min(count, oldmem_data.size - from); } else if (oldmem_data.start && from < oldmem_data.size) { len = min(count, oldmem_data.size - from); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index db1bc00229ca..0681c55e831d 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -152,7 +152,7 @@ void show_stack(struct task_struct *task, unsigned long *stack, static void show_last_breaking_event(struct pt_regs *regs) { printk("Last Breaking-Event-Address:\n"); - printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); + printk(" [<%016lx>] %pSR\n", regs->last_break, (void *)regs->last_break); } void show_registers(struct pt_regs *regs) @@ -192,7 +192,7 @@ void show_regs(struct pt_regs *regs) static DEFINE_SPINLOCK(die_lock); -void die(struct pt_regs *regs, const char *str) +void __noreturn die(struct pt_regs *regs, const char *str) { static int die_counter; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 9857cb046726..3cdf68c53614 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -280,7 +280,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; static void __init setup_boot_command_line(void) { /* copy arch command line */ - strlcpy(boot_command_line, early_command_line, ARCH_COMMAND_LINE_SIZE); + strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); } static void __init check_image_bootable(void) @@ -296,6 +296,7 @@ static void __init check_image_bootable(void) void __init startup_init(void) { + sclp_early_adjust_va(); reset_tod_clock(); check_image_bootable(); time_early_init(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4c9b967290ae..01bae1d51113 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,6 +52,22 @@ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _LPP_OFFSET = __LC_LPP + .macro STBEAR address + ALTERNATIVE "", ".insn s,0xb2010000,\address", 193 + .endm + + .macro LBEAR address + ALTERNATIVE "", ".insn s,0xb2000000,\address", 193 + .endm + + .macro LPSWEY address,lpswe + ALTERNATIVE "b \lpswe", ".insn siy,0xeb0000000071,\address,0", 193 + .endm + + .macro MBEAR reg + ALTERNATIVE "", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193 + .endm + .macro CHECK_STACK savearea #ifdef CONFIG_CHECK_STACK tml %r15,STACK_SIZE - CONFIG_STACK_GUARD @@ -302,6 +318,7 @@ ENTRY(system_call) BPOFF lghi %r14,0 .Lsysc_per: + STBEAR __LC_LAST_BREAK lctlg %c1,%c1,__LC_KERNEL_ASCE lg %r12,__LC_CURRENT lg %r15,__LC_KERNEL_STACK @@ -321,14 +338,16 @@ ENTRY(system_call) xgr %r11,%r11 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC + MBEAR %r2 lgr %r3,%r14 brasl %r14,__do_syscall lctlg %c1,%c1,__LC_USER_ASCE mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER - b __LC_RETURN_LPSWE + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE ENDPROC(system_call) # @@ -340,9 +359,10 @@ ENTRY(ret_from_fork) lctlg %c1,%c1,__LC_USER_ASCE mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER - b __LC_RETURN_LPSWE + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE ENDPROC(ret_from_fork) /* @@ -382,6 +402,7 @@ ENTRY(pgm_check_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use @@ -401,8 +422,9 @@ ENTRY(pgm_check_handler) stpt __LC_EXIT_TIMER .Lpgm_exit_kernel: mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - b __LC_RETURN_LPSWE + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # # single stepped system call @@ -412,7 +434,8 @@ ENTRY(pgm_check_handler) larl %r14,.Lsysc_per stg %r14,__LC_RETURN_PSW+8 lghi %r14,1 - lpswe __LC_RETURN_PSW # branch to .Lsysc_per + LBEAR __LC_PGM_LAST_BREAK + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per ENDPROC(pgm_check_handler) /* @@ -422,6 +445,7 @@ ENDPROC(pgm_check_handler) ENTRY(\name) STCK __LC_INT_CLOCK stpt __LC_SYS_ENTER_TIMER + STBEAR __LC_LAST_BREAK BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT @@ -453,6 +477,7 @@ ENTRY(\name) xgr %r10,%r10 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + MBEAR %r11 stmg %r8,%r9,__PT_PSW(%r11) tm %r8,0x0001 # coming from user space? jno 1f @@ -465,8 +490,9 @@ ENTRY(\name) lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER -2: lmg %r0,%r15,__PT_R0(%r11) - b __LC_RETURN_LPSWE +2: LBEAR __PT_LAST_BREAK(%r11) + lmg %r0,%r15,__PT_R0(%r11) + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE ENDPROC(\name) .endm @@ -505,6 +531,7 @@ ENTRY(mcck_int_handler) BPOFF la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer + LBEAR __LC_LAST_BREAK_SAVE_AREA-4095(%r1) # validate bear lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_MCK_OLD_PSW @@ -591,8 +618,10 @@ ENTRY(mcck_int_handler) jno 0f BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER -0: lmg %r11,%r15,__PT_R11(%r11) - b __LC_RETURN_MCCK_LPSWE +0: ALTERNATIVE "", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193 + LBEAR 0(%r12) + lmg %r11,%r15,__PT_R11(%r11) + LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE .Lmcck_panic: /* diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 7f2696e8d511..6083090be1f4 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -70,5 +70,6 @@ extern struct exception_table_entry _stop_amode31_ex_table[]; #define __amode31_data __section(".amode31.data") #define __amode31_ref __section(".amode31.refs") extern long _start_amode31_refs[], _end_amode31_refs[]; +extern unsigned long __amode31_base; #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5165bf344f95..5510c7d10ddc 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -17,6 +17,7 @@ #include <linux/kprobes.h> #include <trace/syscall.h> #include <asm/asm-offsets.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/ftrace.lds.h> #include <asm/nospec-branch.h> @@ -80,17 +81,6 @@ asm( #ifdef CONFIG_MODULES static char *ftrace_plt; - -asm( - " .data\n" - "ftrace_plt_template:\n" - " basr %r1,%r0\n" - " lg %r1,0f-.(%r1)\n" - " br %r1\n" - "0: .quad ftrace_caller\n" - "ftrace_plt_template_end:\n" - " .previous\n" -); #endif /* CONFIG_MODULES */ static const char *ftrace_shared_hotpatch_trampoline(const char **end) @@ -116,7 +106,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) bool ftrace_need_init_nop(void) { - return ftrace_shared_hotpatch_trampoline(NULL); + return true; } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) @@ -175,28 +165,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return 0; } -static void ftrace_generate_nop_insn(struct ftrace_insn *insn) -{ - /* brcl 0,0 */ - insn->opc = 0xc004; - insn->disp = 0; -} - -static void ftrace_generate_call_insn(struct ftrace_insn *insn, - unsigned long ip) -{ - unsigned long target; - - /* brasl r0,ftrace_caller */ - target = FTRACE_ADDR; -#ifdef CONFIG_MODULES - if (is_module_addr((void *)ip)) - target = (unsigned long)ftrace_plt; -#endif /* CONFIG_MODULES */ - insn->opc = 0xc005; - insn->disp = (target - ip) / 2; -} - static void brcl_disable(void *brcl) { u8 op = 0x04; /* set mask field to zero */ @@ -207,23 +175,7 @@ static void brcl_disable(void *brcl) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - struct ftrace_insn orig, new, old; - - if (ftrace_shared_hotpatch_trampoline(NULL)) { - brcl_disable((void *)rec->ip); - return 0; - } - - if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) - return -EFAULT; - /* Replace ftrace call with a nop. */ - ftrace_generate_call_insn(&orig, rec->ip); - ftrace_generate_nop_insn(&new); - - /* Verify that the to be replaced code matches what we expect. */ - if (memcmp(&orig, &old, sizeof(old))) - return -EINVAL; - s390_kernel_write((void *) rec->ip, &new, sizeof(new)); + brcl_disable((void *)rec->ip); return 0; } @@ -236,23 +188,7 @@ static void brcl_enable(void *brcl) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - struct ftrace_insn orig, new, old; - - if (ftrace_shared_hotpatch_trampoline(NULL)) { - brcl_enable((void *)rec->ip); - return 0; - } - - if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) - return -EFAULT; - /* Replace nop with an ftrace call. */ - ftrace_generate_nop_insn(&orig); - ftrace_generate_call_insn(&new, rec->ip); - - /* Verify that the to be replaced code matches what we expect. */ - if (memcmp(&orig, &old, sizeof(old))) - return -EINVAL; - s390_kernel_write((void *) rec->ip, &new, sizeof(new)); + brcl_enable((void *)rec->ip); return 0; } @@ -264,22 +200,16 @@ int ftrace_update_ftrace_func(ftrace_func_t func) void arch_ftrace_update_code(int command) { - if (ftrace_shared_hotpatch_trampoline(NULL)) - ftrace_modify_all_code(command); - else - ftrace_run_stop_machine(command); -} - -static void __ftrace_sync(void *dummy) -{ + ftrace_modify_all_code(command); } int ftrace_arch_code_modify_post_process(void) { - if (ftrace_shared_hotpatch_trampoline(NULL)) { - /* Send SIGP to the other CPUs, so they see the new code. */ - smp_call_function(__ftrace_sync, NULL, 1); - } + /* + * Flush any pre-fetched instructions on all + * CPUs to make the new code visible. + */ + text_poke_sync_lock(); return 0; } @@ -294,10 +224,6 @@ static int __init ftrace_plt_init(void) panic("cannot allocate ftrace plt\n"); start = ftrace_shared_hotpatch_trampoline(&end); - if (!start) { - start = ftrace_plt_template; - end = ftrace_plt_template_end; - } memcpy(ftrace_plt, start, end - start); set_memory_ro((unsigned long)ftrace_plt, 1); return 0; @@ -337,12 +263,14 @@ NOKPROBE_SYMBOL(prepare_ftrace_return); int ftrace_enable_ftrace_graph_caller(void) { brcl_disable(ftrace_graph_caller); + text_poke_sync_lock(); return 0; } int ftrace_disable_ftrace_graph_caller(void) { brcl_enable(ftrace_graph_caller); + text_poke_sync_lock(); return 0; } diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 114b5490ad8e..42f9a325a257 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -20,8 +20,6 @@ __HEAD ENTRY(startup_continue) larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK - larl %r13,.LPG1 # get base - lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers # # Setup stack # @@ -42,19 +40,3 @@ ENTRY(startup_continue) .align 16 .LPG1: .Ldw: .quad 0x0002000180000000,0x0000000000000000 -.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space - .quad 0 # cr1: primary space segment table - .quad 0 # cr2: dispatchable unit control table - .quad 0 # cr3: instruction authorization - .quad 0xffff # cr4: instruction authorization - .quad 0 # cr5: primary-aste origin - .quad 0 # cr6: I/O interrupts - .quad 0 # cr7: secondary space segment table - .quad 0x0000000000008000 # cr8: access registers translation - .quad 0 # cr9: tracing off - .quad 0 # cr10: tracing off - .quad 0 # cr11: tracing off - .quad 0 # cr12: tracing off - .quad 0 # cr13: home space segment table - .quad 0xc0000000 # cr14: machine check handling off - .quad 0 # cr15: linkage stack operations diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index e2cc35775b99..5ad1dde23dc5 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2156,7 +2156,7 @@ void *ipl_report_finish(struct ipl_report *report) buf = vzalloc(report->size); if (!buf) - return ERR_PTR(-ENOMEM); + goto out; ptr = buf; memcpy(ptr, report->ipib, report->ipib->hdr.len); @@ -2195,6 +2195,7 @@ void *ipl_report_finish(struct ipl_report *report) } BUG_ON(ptr > buf + report->size); +out: return buf; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 3a3145c4a3ba..0df83ecaa2e0 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -140,8 +140,11 @@ void noinstr do_io_irq(struct pt_regs *regs) irq_enter(); - if (user_mode(regs)) + if (user_mode(regs)) { update_timer_sys(); + if (static_branch_likely(&cpu_has_bear)) + current->thread.last_break = regs->last_break; + } from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; if (from_idle) @@ -171,8 +174,11 @@ void noinstr do_ext_irq(struct pt_regs *regs) irq_enter(); - if (user_mode(regs)) + if (user_mode(regs)) { update_timer_sys(); + if (static_branch_likely(&cpu_has_bear)) + current->thread.last_break = regs->last_break; + } regs->int_code = S390_lowcore.ext_int_code_addr; regs->int_parm = S390_lowcore.ext_params; diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index 9156653b56f6..6bec000c6c1c 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -6,8 +6,9 @@ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ #include <linux/uaccess.h> -#include <linux/stop_machine.h> #include <linux/jump_label.h> +#include <linux/module.h> +#include <asm/text-patching.h> #include <asm/ipl.h> struct insn { @@ -48,9 +49,9 @@ static struct insn orignop = { .offset = JUMP_LABEL_NOP_OFFSET >> 1, }; -static void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static void jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { void *code = (void *)jump_entry_code(entry); struct insn old, new; @@ -72,19 +73,28 @@ static void __jump_label_transform(struct jump_entry *entry, s390_kernel_write(code, &new, sizeof(new)); } -static void __jump_label_sync(void *dummy) +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { + jump_label_transform(entry, type, 0); + text_poke_sync(); } -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) +{ + jump_label_transform(entry, type, 0); + return true; +} + +void arch_jump_label_transform_apply(void) { - __jump_label_transform(entry, type, 0); - smp_call_function(__jump_label_sync, NULL, 1); + text_poke_sync(); } -void arch_jump_label_transform_static(struct jump_entry *entry, - enum jump_label_type type) +void __init_or_module arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) { - __jump_label_transform(entry, type, 1); + jump_label_transform(entry, type, 1); + text_poke_sync(); } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index c505c0ee5f47..e27a7d3b0364 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -122,9 +122,55 @@ static void s390_free_insn_slot(struct kprobe *p) } NOKPROBE_SYMBOL(s390_free_insn_slot); +/* Check if paddr is at an instruction boundary */ +static bool can_probe(unsigned long paddr) +{ + unsigned long addr, offset = 0; + kprobe_opcode_t insn; + struct kprobe *kp; + + if (paddr & 0x01) + return false; + + if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) + return false; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(insn))) + return false; + + if (insn >> 8 == 0) { + if (insn != BREAKPOINT_INSTRUCTION) { + /* + * Note that QEMU inserts opcode 0x0000 to implement + * software breakpoints for guests. Since the size of + * the original instruction is unknown, stop following + * instructions and prevent setting a kprobe. + */ + return false; + } + /* + * Check if the instruction has been modified by another + * kprobe, in which case the original instruction is + * decoded. + */ + kp = get_kprobe((void *)addr); + if (!kp) { + /* not a kprobe */ + return false; + } + insn = kp->opcode; + } + addr += insn_length(insn >> 8); + } + return addr == paddr; +} + int arch_prepare_kprobe(struct kprobe *p) { - if ((unsigned long) p->addr & 0x01) + if (!can_probe((unsigned long)p->addr)) return -EINVAL; /* Make sure the probe isn't going on a difficult instruction */ if (probe_is_prohibited_opcode(p->addr)) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index f9e4baa64b67..9975ad200d74 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,6 +12,7 @@ #include <linux/kexec.h> #include <linux/module_signature.h> #include <linux/verification.h> +#include <linux/vmalloc.h> #include <asm/boot_data.h> #include <asm/ipl.h> #include <asm/setup.h> @@ -170,6 +171,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, struct kexec_buf buf; unsigned long addr; void *ptr, *end; + int ret; buf.image = image; @@ -199,9 +201,13 @@ static int kexec_file_add_ipl_report(struct kimage *image, ptr += len; } + ret = -ENOMEM; buf.buffer = ipl_report_finish(data->report); + if (!buf.buffer) + goto out; buf.bufsz = data->report->size; buf.memsz = buf.bufsz; + image->arch.ipl_buf = buf.buffer; data->memsz += buf.memsz; @@ -209,14 +215,18 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; - return kexec_add_buffer(&buf); + ret = kexec_add_buffer(&buf); +out: + return ret; } void *kexec_file_add_components(struct kimage *image, int (*add_kernel)(struct kimage *image, struct s390_load_data *data)) { + unsigned long max_command_line_size = LEGACY_COMMAND_LINE_SIZE; struct s390_load_data data = {0}; + unsigned long minsize; int ret; data.report = ipl_report_init(&ipl_block); @@ -227,10 +237,23 @@ void *kexec_file_add_components(struct kimage *image, if (ret) goto out; - if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE) { - ret = -EINVAL; + ret = -EINVAL; + minsize = PARMAREA + offsetof(struct parmarea, command_line); + if (image->kernel_buf_len < minsize) goto out; - } + + if (data.parm->max_command_line_size) + max_command_line_size = data.parm->max_command_line_size; + + if (minsize + max_command_line_size < minsize) + goto out; + + if (image->kernel_buf_len < minsize + max_command_line_size) + goto out; + + if (image->cmdline_buf_len >= max_command_line_size) + goto out; + memcpy(data.parm->command_line, image->cmdline_buf, image->cmdline_buf_len); @@ -308,16 +331,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, return 0; } -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) +int arch_kimage_file_post_load_cleanup(struct kimage *image) { - /* A kernel must be at least large enough to contain head.S. During - * load memory in head.S will be accessed, e.g. to register the next - * command line. If the next kernel were smaller the current kernel - * will panic at load. - */ - if (buf_len < HEAD_END) - return -ENOEXEC; - - return kexec_image_probe_default(image, buf, buf_len); + vfree(image->arch.ipl_buf); + image->arch.ipl_buf = NULL; + + return kexec_image_post_load_cleanup_default(image); } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 6b13797143a7..39bcc0e39a10 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -22,10 +22,11 @@ ENTRY(ftrace_stub) BR_EX %r14 ENDPROC(ftrace_stub) -#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PTREGS (STACK_FRAME_OVERHEAD) -#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) -#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) #ifdef __PACK_STACK /* allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 @@ -33,13 +34,15 @@ ENDPROC(ftrace_stub) #define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD #endif -ENTRY(ftrace_caller) - .globl ftrace_regs_caller - .set ftrace_regs_caller,ftrace_caller + .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller + + .if \allregs == 1 lghi %r14,0 # save condition code ipm %r14 # don't put any instructions sllg %r14,%r14,16 # clobbering CC before this point + .endif + lgr %r1,%r15 # allocate stack frame for ftrace_caller to contain traced function aghi %r15,-TRACED_FUNC_FRAME_SIZE @@ -49,13 +52,31 @@ ENTRY(ftrace_caller) # allocate pt_regs and stack frame for ftrace_trace_function aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + xc STACK_PTREGS_ORIG_GPR2(8,%r15),STACK_PTREGS_ORIG_GPR2(%r15) + + .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) - lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address stosm (STACK_PTREGS_PSW)(%r15),0 + .endif + + lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address aghi %r1,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) + .endm + +SYM_CODE_START(ftrace_regs_caller) + ftrace_regs_entry 1 + j ftrace_common +SYM_CODE_END(ftrace_regs_caller) + +SYM_CODE_START(ftrace_caller) + ftrace_regs_entry 0 + j ftrace_common +SYM_CODE_END(ftrace_caller) + +SYM_CODE_START(ftrace_common) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES aghik %r2,%r0,-MCOUNT_INSN_SIZE lgrl %r4,function_trace_op @@ -74,24 +95,31 @@ ENTRY(ftrace_caller) #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. - .globl ftrace_graph_caller -ftrace_graph_caller: - j ftrace_graph_caller_end +SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL) + j .Lftrace_graph_caller_end lmg %r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15) lg %r4,(STACK_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) -ftrace_graph_caller_end: - .globl ftrace_graph_caller_end +.Lftrace_graph_caller_end: +#endif + lg %r0,(STACK_PTREGS_PSW+8)(%r15) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + ltg %r1,STACK_PTREGS_ORIG_GPR2(%r15) + locgrz %r1,%r0 +#else + lg %r1,STACK_PTREGS_ORIG_GPR2(%r15) + ltgr %r1,%r1 + jnz 0f + lgr %r1,%r0 #endif - lg %r1,(STACK_PTREGS_PSW+8)(%r15) - lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) +0: lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) BR_EX %r1 -ENDPROC(ftrace_caller) +SYM_CODE_END(ftrace_common) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(return_to_handler) +SYM_FUNC_START(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 aghi %r15,-STACK_FRAME_OVERHEAD @@ -101,6 +129,6 @@ ENTRY(return_to_handler) lgr %r14,%r2 lmg %r2,%r5,32(%r15) BR_EX %r14 -ENDPROC(return_to_handler) +SYM_FUNC_END(return_to_handler) #endif diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 250e4dbf653c..60e6fec27bba 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -38,7 +38,7 @@ static int __init nospec_report(void) { if (test_facility(156)) pr_info("Spectre V2 mitigation: etokens\n"); - if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) + if (nospec_uses_trampoline()) pr_info("Spectre V2 mitigation: execute trampolines\n"); if (__test_facility(82, alt_stfle_fac_list)) pr_info("Spectre V2 mitigation: limited branch prediction\n"); diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index b4b5c8c21166..52d4353188ad 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -15,7 +15,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, { if (test_facility(156)) return sprintf(buf, "Mitigation: etokens\n"); - if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) + if (nospec_uses_trampoline()) return sprintf(buf, "Mitigation: execute trampolines\n"); if (__test_facility(82, alt_stfle_fac_list)) return sprintf(buf, "Mitigation: limited branch prediction\n"); diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 4a99154fe651..ee8707abdb6a 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -687,8 +687,10 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) false); if (cfdiag_diffctr(cpuhw, event->hw.config_base)) cfdiag_push_sample(event, cpuhw); - } else + } else if (cpuhw->flags & PMU_F_RESERVED) { + /* Only update when PMU not hotplugged off */ hw_perf_event_update(event); + } hwc->state |= PERF_HES_UPTODATE; } } @@ -773,22 +775,46 @@ static int __init cpumf_pmu_init(void) * counter set via normal file operations. */ -static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Excl. access */ +static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */ static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ unsigned int sets; /* Counter set bit mask */ atomic_t cpus_ack; /* # CPUs successfully executed func */ }; -static struct cfset_request { /* CPUs and counter set bit mask */ +static struct cfset_session { /* CPUs and counter set bit mask */ + struct list_head head; /* Head of list of active processes */ +} cfset_session = { + .head = LIST_HEAD_INIT(cfset_session.head) +}; + +struct cfset_request { /* CPUs and counter set bit mask */ unsigned long ctrset; /* Bit mask of counter set to read */ cpumask_t mask; /* CPU mask to read from */ -} cfset_request; + struct list_head node; /* Chain to cfset_session.head */ +}; + +static void cfset_session_init(void) +{ + INIT_LIST_HEAD(&cfset_session.head); +} + +/* Remove current request from global bookkeeping. Maintain a counter set bit + * mask on a per CPU basis. + * Done in process context under mutex protection. + */ +static void cfset_session_del(struct cfset_request *p) +{ + list_del(&p->node); +} -static void cfset_ctrset_clear(void) +/* Add current request to global bookkeeping. Maintain a counter set bit mask + * on a per CPU basis. + * Done in process context under mutex protection. + */ +static void cfset_session_add(struct cfset_request *p) { - cpumask_clear(&cfset_request.mask); - cfset_request.ctrset = 0; + list_add(&p->node, &cfset_session.head); } /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access @@ -827,15 +853,23 @@ static void cfset_ioctl_off(void *parm) struct cfset_call_on_cpu_parm *p = parm; int rc; - cpuhw->dev_state = 0; + /* Check if any counter set used by /dev/hwc */ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) - if ((p->sets & cpumf_ctr_ctl[rc])) - atomic_dec(&cpuhw->ctr_set[rc]); - rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ + if ((p->sets & cpumf_ctr_ctl[rc])) { + if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { + ctr_set_disable(&cpuhw->dev_state, + cpumf_ctr_ctl[rc]); + ctr_set_stop(&cpuhw->dev_state, + cpumf_ctr_ctl[rc]); + } + } + /* Keep perf_event_open counter sets */ + rc = lcctl(cpuhw->dev_state | cpuhw->state); if (rc) pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", cpuhw->state, S390_HWCTR_DEVICE, rc); - cpuhw->flags &= ~PMU_F_IN_USE; + if (!cpuhw->dev_state) + cpuhw->flags &= ~PMU_F_IN_USE; debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", __func__, rc, cpuhw->state, cpuhw->dev_state); } @@ -870,11 +904,26 @@ static void cfset_release_cpu(void *p) debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", __func__, cpuhw->state, cpuhw->dev_state); + cpuhw->dev_state = 0; rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ if (rc) pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", cpuhw->state, S390_HWCTR_DEVICE, rc); - cpuhw->dev_state = 0; +} + +/* This modifies the process CPU mask to adopt it to the currently online + * CPUs. Offline CPUs can not be addresses. This call terminates the access + * and is usually followed by close() or a new iotcl(..., START, ...) which + * creates a new request structure. + */ +static void cfset_all_stop(struct cfset_request *req) +{ + struct cfset_call_on_cpu_parm p = { + .sets = req->ctrset, + }; + + cpumask_and(&req->mask, &req->mask, cpu_online_mask); + on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1); } /* Release function is also called when application gets terminated without @@ -882,10 +931,19 @@ static void cfset_release_cpu(void *p) */ static int cfset_release(struct inode *inode, struct file *file) { - on_each_cpu(cfset_release_cpu, NULL, 1); + mutex_lock(&cfset_ctrset_mutex); + /* Open followed by close/exit has no private_data */ + if (file->private_data) { + cfset_all_stop(file->private_data); + cfset_session_del(file->private_data); + kfree(file->private_data); + file->private_data = NULL; + } + if (!atomic_dec_return(&cfset_opencnt)) + on_each_cpu(cfset_release_cpu, NULL, 1); + mutex_unlock(&cfset_ctrset_mutex); + hw_perf_event_destroy(NULL); - cfset_ctrset_clear(); - atomic_set(&cfset_opencnt, 0); return 0; } @@ -893,9 +951,10 @@ static int cfset_open(struct inode *inode, struct file *file) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; - /* Only one user space program can open /dev/hwctr */ - if (atomic_xchg(&cfset_opencnt, 1)) - return -EBUSY; + mutex_lock(&cfset_ctrset_mutex); + if (atomic_inc_return(&cfset_opencnt) == 1) + cfset_session_init(); + mutex_unlock(&cfset_ctrset_mutex); cpumf_hw_inuse(); file->private_data = NULL; @@ -903,25 +962,10 @@ static int cfset_open(struct inode *inode, struct file *file) return nonseekable_open(inode, file); } -static int cfset_all_stop(void) -{ - struct cfset_call_on_cpu_parm p = { - .sets = cfset_request.ctrset, - }; - cpumask_var_t mask; - - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; - cpumask_and(mask, &cfset_request.mask, cpu_online_mask); - on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); - free_cpumask_var(mask); - return 0; -} - -static int cfset_all_start(void) +static int cfset_all_start(struct cfset_request *req) { struct cfset_call_on_cpu_parm p = { - .sets = cfset_request.ctrset, + .sets = req->ctrset, .cpus_ack = ATOMIC_INIT(0), }; cpumask_var_t mask; @@ -929,7 +973,7 @@ static int cfset_all_start(void) if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - cpumask_and(mask, &cfset_request.mask, cpu_online_mask); + cpumask_and(mask, &req->mask, cpu_online_mask); on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); @@ -1045,7 +1089,7 @@ static void cfset_cpu_read(void *parm) cpuhw->sets, cpuhw->used); } -static int cfset_all_read(unsigned long arg) +static int cfset_all_read(unsigned long arg, struct cfset_request *req) { struct cfset_call_on_cpu_parm p; cpumask_var_t mask; @@ -1054,46 +1098,53 @@ static int cfset_all_read(unsigned long arg) if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - p.sets = cfset_request.ctrset; - cpumask_and(mask, &cfset_request.mask, cpu_online_mask); + p.sets = req->ctrset; + cpumask_and(mask, &req->mask, cpu_online_mask); on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); rc = cfset_all_copy(arg, mask); free_cpumask_var(mask); return rc; } -static long cfset_ioctl_read(unsigned long arg) +static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) { struct s390_ctrset_read read; - int ret = 0; + int ret = -ENODATA; - if (copy_from_user(&read, (char __user *)arg, sizeof(read))) - return -EFAULT; - ret = cfset_all_read(arg); + if (req && req->ctrset) { + if (copy_from_user(&read, (char __user *)arg, sizeof(read))) + return -EFAULT; + ret = cfset_all_read(arg, req); + } return ret; } -static long cfset_ioctl_stop(void) +static long cfset_ioctl_stop(struct file *file) { - int ret = ENXIO; - - if (cfset_request.ctrset) { - ret = cfset_all_stop(); - cfset_ctrset_clear(); + struct cfset_request *req = file->private_data; + int ret = -ENXIO; + + if (req) { + cfset_all_stop(req); + cfset_session_del(req); + kfree(req); + file->private_data = NULL; + ret = 0; } return ret; } -static long cfset_ioctl_start(unsigned long arg) +static long cfset_ioctl_start(unsigned long arg, struct file *file) { struct s390_ctrset_start __user *ustart; struct s390_ctrset_start start; + struct cfset_request *preq; void __user *umask; unsigned int len; int ret = 0; size_t need; - if (cfset_request.ctrset) + if (file->private_data) return -EBUSY; ustart = (struct s390_ctrset_start __user *)arg; if (copy_from_user(&start, ustart, sizeof(start))) @@ -1108,25 +1159,36 @@ static long cfset_ioctl_start(unsigned long arg) return -EINVAL; /* Invalid counter set */ if (!start.counter_sets) return -EINVAL; /* No counter set at all? */ - cpumask_clear(&cfset_request.mask); + + preq = kzalloc(sizeof(*preq), GFP_KERNEL); + if (!preq) + return -ENOMEM; + cpumask_clear(&preq->mask); len = min_t(u64, start.cpumask_len, cpumask_size()); umask = (void __user *)start.cpumask; - if (copy_from_user(&cfset_request.mask, umask, len)) + if (copy_from_user(&preq->mask, umask, len)) { + kfree(preq); return -EFAULT; - if (cpumask_empty(&cfset_request.mask)) + } + if (cpumask_empty(&preq->mask)) { + kfree(preq); return -EINVAL; + } need = cfset_needspace(start.counter_sets); - if (put_user(need, &ustart->data_bytes)) - ret = -EFAULT; - if (ret) - goto out; - cfset_request.ctrset = start.counter_sets; - ret = cfset_all_start(); -out: - if (ret) - cfset_ctrset_clear(); - debug_sprintf_event(cf_dbg, 4, "%s sets %#lx need %ld ret %d\n", - __func__, cfset_request.ctrset, need, ret); + if (put_user(need, &ustart->data_bytes)) { + kfree(preq); + return -EFAULT; + } + preq->ctrset = start.counter_sets; + ret = cfset_all_start(preq); + if (!ret) { + cfset_session_add(preq); + file->private_data = preq; + debug_sprintf_event(cf_dbg, 4, "%s set %#lx need %ld ret %d\n", + __func__, preq->ctrset, need, ret); + } else { + kfree(preq); + } return ret; } @@ -1136,7 +1198,7 @@ out: * counter set keeps running until explicitly stopped. Returns the number * of bytes needed to store the counter values. If another S390_HWCTR_START * ioctl subcommand is called without a previous S390_HWCTR_STOP stop - * command, -EBUSY is returned. + * command on the same file descriptor, -EBUSY is returned. * S390_HWCTR_READ: Read the counter set values from specified CPU list given * with the S390_HWCTR_START command. * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the @@ -1150,13 +1212,13 @@ static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) mutex_lock(&cfset_ctrset_mutex); switch (cmd) { case S390_HWCTR_START: - ret = cfset_ioctl_start(arg); + ret = cfset_ioctl_start(arg, file); break; case S390_HWCTR_STOP: - ret = cfset_ioctl_stop(); + ret = cfset_ioctl_stop(file); break; case S390_HWCTR_READ: - ret = cfset_ioctl_read(arg); + ret = cfset_ioctl_read(arg, file->private_data); break; default: ret = -ENOTTY; @@ -1182,29 +1244,41 @@ static struct miscdevice cfset_dev = { .fops = &cfset_fops, }; +/* Hotplug add of a CPU. Scan through all active processes and add + * that CPU to the list of CPUs supplied with ioctl(..., START, ...). + */ int cfset_online_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; + struct cfset_request *rp; mutex_lock(&cfset_ctrset_mutex); - if (cfset_request.ctrset) { - p.sets = cfset_request.ctrset; - cfset_ioctl_on(&p); - cpumask_set_cpu(cpu, &cfset_request.mask); + if (!list_empty(&cfset_session.head)) { + list_for_each_entry(rp, &cfset_session.head, node) { + p.sets = rp->ctrset; + cfset_ioctl_on(&p); + cpumask_set_cpu(cpu, &rp->mask); + } } mutex_unlock(&cfset_ctrset_mutex); return 0; } +/* Hotplug remove of a CPU. Scan through all active processes and clear + * that CPU from the list of CPUs supplied with ioctl(..., START, ...). + */ int cfset_offline_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; + struct cfset_request *rp; mutex_lock(&cfset_ctrset_mutex); - if (cfset_request.ctrset) { - p.sets = cfset_request.ctrset; - cfset_ioctl_off(&p); - cpumask_clear_cpu(cpu, &cfset_request.mask); + if (!list_empty(&cfset_session.head)) { + list_for_each_entry(rp, &cfset_session.head, node) { + p.sets = rp->ctrset; + cfset_ioctl_off(&p); + cpumask_clear_cpu(cpu, &rp->mask); + } } mutex_unlock(&cfset_ctrset_mutex); return 0; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index e5dd46b1bff8..e8858b2de24b 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -141,7 +141,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, frame->childregs.gprs[10] = arg; frame->childregs.gprs[11] = (unsigned long)do_exit; frame->childregs.orig_gpr2 = -1; - + frame->childregs.last_break = 1; return 0; } frame->childregs = *current_pt_regs(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 67e5fff96ee0..225ab2d0a4c6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -95,10 +95,10 @@ EXPORT_SYMBOL(console_irq); * relocated above 2 GB, because it has to use 31 bit addresses. * Such code and data is part of the .amode31 section. */ -unsigned long __amode31_ref __samode31 = __pa(&_samode31); -unsigned long __amode31_ref __eamode31 = __pa(&_eamode31); -unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31); -unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31); +unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31; +unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31; +unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31; +unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31; struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; @@ -149,6 +149,7 @@ struct mem_detect_info __bootdata(mem_detect); struct initrd_data __bootdata(initrd_data); unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(__amode31_base); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -173,6 +174,8 @@ unsigned long MODULES_END; struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); +DEFINE_STATIC_KEY_FALSE(cpu_has_bear); + /* * The Write Back bit position in the physaddr is given by the SLPC PCI. * Leaving the mask zero always uses write through which is safe @@ -593,7 +596,8 @@ static void __init setup_resources(void) * part of the System RAM resource. */ if (crashk_res.end) { - memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0); + memblock_add_node(crashk_res.start, resource_size(&crashk_res), + 0, MEMBLOCK_NONE); memblock_reserve(crashk_res.start, resource_size(&crashk_res)); insert_resource(&iomem_resource, &crashk_res); } @@ -602,7 +606,7 @@ static void __init setup_resources(void) static void __init setup_memory_end(void) { - memblock_remove(ident_map_size, ULONG_MAX); + memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size); max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } @@ -634,14 +638,6 @@ static struct notifier_block kdump_mem_nb = { #endif /* - * Make sure that the area above identity mapping is protected - */ -static void __init reserve_above_ident_map(void) -{ - memblock_reserve(ident_map_size, ULONG_MAX); -} - -/* * Reserve memory for kdump kernel to be loaded with kexec */ static void __init reserve_crashkernel(void) @@ -693,7 +689,7 @@ static void __init reserve_crashkernel(void) } if (register_memory_notifier(&kdump_mem_nb)) { - memblock_free(crash_base, crash_size); + memblock_phys_free(crash_base, crash_size); return; } @@ -718,7 +714,7 @@ static void __init reserve_initrd(void) #ifdef CONFIG_BLK_DEV_INITRD if (!initrd_data.start || !initrd_data.size) return; - initrd_start = initrd_data.start; + initrd_start = (unsigned long)__va(initrd_data.start); initrd_end = initrd_start + initrd_data.size; memblock_reserve(initrd_data.start, initrd_data.size); #endif @@ -748,7 +744,7 @@ static void __init free_mem_detect_info(void) get_mem_detect_reserved(&start, &size); if (size) - memblock_free(start, size); + memblock_phys_free(start, size); } static const char * __init get_mem_info_source(void) @@ -781,7 +777,6 @@ static void __init memblock_add_mem_detect_info(void) } memblock_set_bottom_up(false); memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); - memblock_dump_all(); } /* @@ -793,7 +788,7 @@ static void __init check_initrd(void) if (initrd_data.start && initrd_data.size && !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_free(initrd_data.start, initrd_data.size); + memblock_phys_free(initrd_data.start, initrd_data.size); initrd_start = initrd_end = 0; } #endif @@ -804,12 +799,10 @@ static void __init check_initrd(void) */ static void __init reserve_kernel(void) { - unsigned long start_pfn = PFN_UP(__pa(_end)); - memblock_reserve(0, STARTUP_NORMAL_OFFSET); - memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP); - memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - - (unsigned long)_stext); + memblock_reserve(__amode31_base, __eamode31 - __samode31); + memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); + memblock_reserve(__pa(_stext), _end - _stext); } static void __init setup_memory(void) @@ -824,27 +817,18 @@ static void __init setup_memory(void) storage_key_init_range(start, end); psw_set_key(PAGE_DEFAULT_KEY); - - /* Only cosmetics */ - memblock_enforce_memory_limit(memblock_end_of_DRAM()); } static void __init relocate_amode31_section(void) { - unsigned long amode31_addr, amode31_size; - long amode31_offset; + unsigned long amode31_size = __eamode31 - __samode31; + long amode31_offset = __amode31_base - __samode31; long *ptr; - /* Allocate a new AMODE31 capable memory region */ - amode31_size = __eamode31 - __samode31; pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); - amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE); - if (!amode31_addr) - panic("Failed to allocate memory for AMODE31 section\n"); - amode31_offset = amode31_addr - __samode31; /* Move original AMODE31 section to the new one */ - memmove((void *)amode31_addr, (void *)__samode31, amode31_size); + memmove((void *)__amode31_base, (void *)__samode31, amode31_size); /* Zero out the old AMODE31 section to catch invalid accesses within it */ memset((void *)__samode31, 0, amode31_size); @@ -883,14 +867,12 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE, - PAGE_SIZE); + vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!vmms) panic("Failed to allocate memory for sysinfo structure\n"); - if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); - memblock_free((unsigned long) vmms, PAGE_SIZE); + memblock_free(vmms, PAGE_SIZE); } /* @@ -1005,24 +987,24 @@ void __init setup_arch(char **cmdline_p) setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ - reserve_above_ident_map(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); reserve_mem_detect_info(); + memblock_set_current_limit(ident_map_size); memblock_allow_resize(); /* Get information about *all* installed memory */ memblock_add_mem_detect_info(); free_mem_detect_info(); + setup_memory_end(); + memblock_dump_all(); + setup_memory(); relocate_amode31_section(); setup_cr(); - setup_uv(); - setup_memory_end(); - setup_memory(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); if (MACHINE_HAS_EDAT2) @@ -1047,6 +1029,9 @@ void __init setup_arch(char **cmdline_p) smp_detect_cpus(); topology_init_early(); + if (test_facility(193)) + static_branch_enable(&cpu_has_bear); + /* * Create kernel page tables and switch to virtual addressing. */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1a04e5bdf655..78a8ea6fd582 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -723,7 +723,7 @@ void __init smp_save_dump_cpus(void) /* Get the CPU registers */ smp_save_cpu_regs(sa, addr, is_boot_cpu, page); } - memblock_free(page, PAGE_SIZE); + memblock_phys_free(page, PAGE_SIZE); diag_amode31_ops.diag308_reset(); pcpu_set_smt(0); } @@ -880,7 +880,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ __smp_rescan_cpus(info, true); - memblock_free_early((unsigned long)info, sizeof(*info)); + memblock_phys_free((unsigned long)info, sizeof(*info)); } /* diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 8fe2d23b64f4..dc2355c623d6 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -154,6 +154,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) regs->psw = S390_lowcore.svc_old_psw; regs->int_code = S390_lowcore.svc_int_code; update_timer_sys(); + if (static_branch_likely(&cpu_has_bear)) + current->thread.last_break = regs->last_break; local_irq_enable(); regs->orig_gpr2 = regs->gprs[2]; diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index df5261e5cfe1..ed9c5c2eafad 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv sys_futex_waitv diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index bcefc2173de4..2b780786fc68 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { report_user_fault(regs, SIGSEGV, 0); - do_exit(SIGSEGV); + force_exit_sig(SIGSEGV); } else die(regs, "Unknown program exception"); } @@ -300,7 +300,6 @@ static void (*pgm_check_table[128])(struct pt_regs *regs); void noinstr __do_pgm_check(struct pt_regs *regs) { - unsigned long last_break = S390_lowcore.breaking_event_addr; unsigned int trapnr; irqentry_state_t state; @@ -311,10 +310,11 @@ void noinstr __do_pgm_check(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (last_break < 4096) - last_break = 1; - current->thread.last_break = last_break; - regs->args[0] = last_break; + if (!static_branch_likely(&cpu_has_bear)) { + if (regs->last_break < 4096) + regs->last_break = 1; + } + current->thread.last_break = regs->last_break; } if (S390_lowcore.pgm_code & 0x0200) { diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 8b0e62507d62..386d4e42b8d3 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -64,7 +64,7 @@ void __init setup_uv(void) } if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) { - memblock_free(uv_stor_base, uv_info.uv_base_stor_len); + memblock_phys_free(uv_stor_base, uv_info.uv_base_stor_len); goto fail; } diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index e3e6ac5686df..245bddfe9bc0 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_32 += -m31 -s KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \ +LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \ --hash-style=both --build-id=sha1 -melf_s390 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 6568de236701..9e2b95a222a9 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -8,8 +8,9 @@ ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT include $(srctree)/lib/vdso/Makefile obj-vdso64 = vdso_user_wrapper.o note.o obj-cvdso64 = vdso64_generic.o getcpu.o -CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) -CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) +CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) # Build rules @@ -25,7 +26,7 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ +ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \ --hash-style=both --build-id=sha1 -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 63bdb9e1bfc1..42c43521878f 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -212,6 +212,7 @@ SECTIONS QUAD(__dynsym_start) /* dynsym_start */ QUAD(__rela_dyn_start) /* rela_dyn_start */ QUAD(__rela_dyn_end) /* rela_dyn_end */ + QUAD(_eamode31 - _samode31) /* amode31_size */ } :NONE /* Debugging sections. */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2245f4b8d362..c3bd993fdd0c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -960,7 +960,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) /* bit 1+2 of the target are the ilc, so we can directly use ilen */ rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, - (u64 *) __LC_LAST_BREAK); + (u64 *) __LC_PGM_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_INT_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c6257f625929..14a18ba5ff2c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -585,6 +585,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_MAX_VCPUS; else if (sclp.has_esca && sclp.has_64bscao) r = KVM_S390_ESCA_CPU_SLOTS; + if (ext == KVM_CAP_NR_VCPUS) + r = min_t(unsigned int, num_online_cpus(), r); break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 678333936f78..707cd4622c13 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,6 +7,8 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o +obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o +test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o # Instrumenting memory accesses to __user data (in different address space) # produce false positives diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 9b2dab5a69f9..692dc84cd19c 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -26,7 +26,7 @@ static int __init spin_retry_init(void) } early_initcall(spin_retry_init); -/** +/* * spin_retry= parameter */ static int __init spin_retry_setup(char *str) diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 47080560e0d8..7d8741818239 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -101,32 +101,6 @@ EXPORT_SYMBOL(strcpy); #endif /** - * strlcpy - Copy a %NUL terminated string into a sized buffer - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Compatible with *BSD: the result is always a valid - * NUL-terminated string that fits in the buffer (unless, - * of course, the buffer size is zero). It does not pad - * out the result like strncpy() does. - */ -#ifdef __HAVE_ARCH_STRLCPY -size_t strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = __strend(src) - src; - - if (size) { - size_t len = (ret >= size) ? size-1 : ret; - dest[len] = '\0'; - memcpy(dest, src, len); - } - return ret; -} -EXPORT_SYMBOL(strlcpy); -#endif - -/** * strncpy - Copy a length-limited, %NUL-terminated string * @dest: Where to copy the string to * @src: Where to copy the string from @@ -254,25 +228,6 @@ int strcmp(const char *s1, const char *s2) EXPORT_SYMBOL(strcmp); #endif -/** - * strrchr - Find the last occurrence of a character in a string - * @s: The string to be searched - * @c: The character to search for - */ -#ifdef __HAVE_ARCH_STRRCHR -char *strrchr(const char *s, int c) -{ - ssize_t len = __strend(s) - s; - - do { - if (s[len] == (char)c) - return (char *)s + len; - } while (--len >= 0); - return NULL; -} -EXPORT_SYMBOL(strrchr); -#endif - static inline int clcle(const char *s1, unsigned long l1, const char *s2, unsigned long l2) { diff --git a/arch/s390/lib/test_kprobes.c b/arch/s390/lib/test_kprobes.c new file mode 100644 index 000000000000..9e62d62812e5 --- /dev/null +++ b/arch/s390/lib/test_kprobes.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/random.h> +#include <kunit/test.h> +#include "test_kprobes.h" + +static struct kprobe kp; + +static void setup_kprobe(struct kunit *test, struct kprobe *kp, + const char *symbol, int offset) +{ + kp->offset = offset; + kp->addr = NULL; + kp->symbol_name = symbol; +} + +static void test_kprobe_offset(struct kunit *test, struct kprobe *kp, + const char *target, int offset) +{ + int ret; + + setup_kprobe(test, kp, target, 0); + ret = register_kprobe(kp); + if (!ret) + unregister_kprobe(kp); + KUNIT_EXPECT_EQ(test, 0, ret); + setup_kprobe(test, kp, target, offset); + ret = register_kprobe(kp); + KUNIT_EXPECT_EQ(test, -EINVAL, ret); + if (!ret) + unregister_kprobe(kp); +} + +static void test_kprobe_odd(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_odd", + kprobes_target_odd_offs); +} + +static void test_kprobe_in_insn4(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_in_insn4", + kprobes_target_in_insn4_offs); +} + +static void test_kprobe_in_insn6_lo(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_in_insn6_lo", + kprobes_target_in_insn6_lo_offs); +} + +static void test_kprobe_in_insn6_hi(struct kunit *test) +{ + test_kprobe_offset(test, &kp, "kprobes_target_in_insn6_hi", + kprobes_target_in_insn6_hi_offs); +} + +static struct kunit_case kprobes_testcases[] = { + KUNIT_CASE(test_kprobe_odd), + KUNIT_CASE(test_kprobe_in_insn4), + KUNIT_CASE(test_kprobe_in_insn6_lo), + KUNIT_CASE(test_kprobe_in_insn6_hi), + {} +}; + +static struct kunit_suite kprobes_test_suite = { + .name = "kprobes_test_s390", + .test_cases = kprobes_testcases, +}; + +kunit_test_suites(&kprobes_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_kprobes.h b/arch/s390/lib/test_kprobes.h new file mode 100644 index 000000000000..2b4c9bc337f1 --- /dev/null +++ b/arch/s390/lib/test_kprobes.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef TEST_KPROBES_H +#define TEST_KPROBES_H + +extern unsigned long kprobes_target_odd_offs; +extern unsigned long kprobes_target_in_insn4_offs; +extern unsigned long kprobes_target_in_insn6_lo_offs; +extern unsigned long kprobes_target_in_insn6_hi_offs; + +#endif diff --git a/arch/s390/lib/test_kprobes_asm.S b/arch/s390/lib/test_kprobes_asm.S new file mode 100644 index 000000000000..ade7a3042334 --- /dev/null +++ b/arch/s390/lib/test_kprobes_asm.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> + +#define KPROBES_TARGET_START(name) \ + SYM_FUNC_START(name); \ + FTRACE_GEN_NOP_ASM(name) + +#define KPROBES_TARGET_END(name) \ + SYM_FUNC_END(name); \ + SYM_DATA(name##_offs, .quad 1b - name) + +KPROBES_TARGET_START(kprobes_target_in_insn4) + .word 0x4700 // bc 0,0 +1: .word 0x0000 + br %r14 +KPROBES_TARGET_END(kprobes_target_in_insn4) + +KPROBES_TARGET_START(kprobes_target_in_insn6_lo) + .word 0xe310 // ly 1,0 +1: .word 0x0000 + .word 0x0058 + br %r14 +KPROBES_TARGET_END(kprobes_target_in_insn6_lo) + +KPROBES_TARGET_START(kprobes_target_in_insn6_hi) + .word 0xe310 // ly 1,0 + .word 0x0000 +1: .word 0x0058 + br %r14 +KPROBES_TARGET_END(kprobes_target_in_insn6_hi) + +KPROBES_TARGET_START(kprobes_target_bp) + nop + .word 0x0000 + nop +1: br %r14 +KPROBES_TARGET_END(kprobes_target_bp) + +KPROBES_TARGET_START(kprobes_target_odd) + .byte 0x07 +1: .byte 0x07 + br %r14 +KPROBES_TARGET_END(kprobes_target_odd) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index ecf327d743a0..cfc5f5557c06 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -3,7 +3,7 @@ * Test module for unwind_for_each_frame */ -#define pr_fmt(fmt) "test_unwind: " fmt +#include <kunit/test.h> #include <asm/unwind.h> #include <linux/completion.h> #include <linux/kallsyms.h> @@ -16,6 +16,8 @@ #include <linux/wait.h> #include <asm/irq.h> +struct kunit *current_test; + #define BT_BUF_SIZE (PAGE_SIZE * 4) /* @@ -29,7 +31,7 @@ static void print_backtrace(char *bt) p = strsep(&bt, "\n"); if (!p) break; - pr_err("%s\n", p); + kunit_err(current_test, "%s\n", p); } } @@ -49,7 +51,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC); if (!bt) { - pr_err("failed to allocate backtrace buffer\n"); + kunit_err(current_test, "failed to allocate backtrace buffer\n"); return -ENOMEM; } /* Unwind. */ @@ -63,7 +65,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, if (frame_count++ == max_frames) break; if (state.reliable && !addr) { - pr_err("unwind state reliable but addr is 0\n"); + kunit_err(current_test, "unwind state reliable but addr is 0\n"); ret = -EINVAL; break; } @@ -75,7 +77,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, stack_type_name(state.stack_info.type), (void *)state.sp, (void *)state.ip); if (bt_pos >= BT_BUF_SIZE) - pr_err("backtrace buffer is too small\n"); + kunit_err(current_test, "backtrace buffer is too small\n"); } frame_count += 1; if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1")) @@ -85,15 +87,15 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, /* Check the results. */ if (unwind_error(&state)) { - pr_err("unwind error\n"); + kunit_err(current_test, "unwind error\n"); ret = -EINVAL; } if (!seen_func2_func1) { - pr_err("unwindme_func2 and unwindme_func1 not found\n"); + kunit_err(current_test, "unwindme_func2 and unwindme_func1 not found\n"); ret = -EINVAL; } if (frame_count == max_frames) { - pr_err("Maximum number of frames exceeded\n"); + kunit_err(current_test, "Maximum number of frames exceeded\n"); ret = -EINVAL; } if (ret) @@ -166,7 +168,7 @@ static noinline int unwindme_func4(struct unwindme *u) kp.pre_handler = pgm_pre_handler; ret = register_kprobe(&kp); if (ret < 0) { - pr_err("register_kprobe failed %d\n", ret); + kunit_err(current_test, "register_kprobe failed %d\n", ret); return -EINVAL; } @@ -252,7 +254,7 @@ static int test_unwind_irq(struct unwindme *u) } /* Spawns a task and passes it to test_unwind(). */ -static int test_unwind_task(struct unwindme *u) +static int test_unwind_task(struct kunit *test, struct unwindme *u) { struct task_struct *task; int ret; @@ -267,7 +269,7 @@ static int test_unwind_task(struct unwindme *u) */ task = kthread_run(unwindme_func1, u, "%s", __func__); if (IS_ERR(task)) { - pr_err("kthread_run() failed\n"); + kunit_err(test, "kthread_run() failed\n"); return PTR_ERR(task); } /* @@ -282,77 +284,98 @@ static int test_unwind_task(struct unwindme *u) return ret; } -static int test_unwind_flags(int flags) +struct test_params { + int flags; + char *name; +}; + +/* + * Create required parameter list for tests + */ +static const struct test_params param_list[] = { + {.flags = UWM_DEFAULT, .name = "UWM_DEFAULT"}, + {.flags = UWM_SP, .name = "UWM_SP"}, + {.flags = UWM_REGS, .name = "UWM_REGS"}, + {.flags = UWM_SWITCH_STACK, + .name = "UWM_SWITCH_STACK"}, + {.flags = UWM_SP | UWM_REGS, + .name = "UWM_SP | UWM_REGS"}, + {.flags = UWM_CALLER | UWM_SP, + .name = "WM_CALLER | UWM_SP"}, + {.flags = UWM_CALLER | UWM_SP | UWM_REGS, + .name = "UWM_CALLER | UWM_SP | UWM_REGS"}, + {.flags = UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, + .name = "UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, + {.flags = UWM_THREAD, .name = "UWM_THREAD"}, + {.flags = UWM_THREAD | UWM_SP, + .name = "UWM_THREAD | UWM_SP"}, + {.flags = UWM_THREAD | UWM_CALLER | UWM_SP, + .name = "UWM_THREAD | UWM_CALLER | UWM_SP"}, + {.flags = UWM_IRQ, .name = "UWM_IRQ"}, + {.flags = UWM_IRQ | UWM_SWITCH_STACK, + .name = "UWM_IRQ | UWM_SWITCH_STACK"}, + {.flags = UWM_IRQ | UWM_SP, + .name = "UWM_IRQ | UWM_SP"}, + {.flags = UWM_IRQ | UWM_REGS, + .name = "UWM_IRQ | UWM_REGS"}, + {.flags = UWM_IRQ | UWM_SP | UWM_REGS, + .name = "UWM_IRQ | UWM_SP | UWM_REGS"}, + {.flags = UWM_IRQ | UWM_CALLER | UWM_SP, + .name = "UWM_IRQ | UWM_CALLER | UWM_SP"}, + {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS, + .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS"}, + {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, + .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, + #ifdef CONFIG_KPROBES + {.flags = UWM_PGM, .name = "UWM_PGM"}, + {.flags = UWM_PGM | UWM_SP, + .name = "UWM_PGM | UWM_SP"}, + {.flags = UWM_PGM | UWM_REGS, + .name = "UWM_PGM | UWM_REGS"}, + {.flags = UWM_PGM | UWM_SP | UWM_REGS, + .name = "UWM_PGM | UWM_SP | UWM_REGS"}, + #endif +}; + +/* + * Parameter description generator: required for KUNIT_ARRAY_PARAM() + */ +static void get_desc(const struct test_params *params, char *desc) +{ + strscpy(desc, params->name, KUNIT_PARAM_DESC_SIZE); +} + +/* + * Create test_unwind_gen_params + */ +KUNIT_ARRAY_PARAM(test_unwind, param_list, get_desc); + +static void test_unwind_flags(struct kunit *test) { struct unwindme u; + const struct test_params *params; - u.flags = flags; + current_test = test; + params = (const struct test_params *)test->param_value; + u.flags = params->flags; if (u.flags & UWM_THREAD) - return test_unwind_task(&u); + KUNIT_EXPECT_EQ(test, 0, test_unwind_task(test, &u)); else if (u.flags & UWM_IRQ) - return test_unwind_irq(&u); + KUNIT_EXPECT_EQ(test, 0, test_unwind_irq(&u)); else - return unwindme_func1(&u); + KUNIT_EXPECT_EQ(test, 0, unwindme_func1(&u)); } -static int test_unwind_init(void) -{ - int failed = 0; - int total = 0; - -#define TEST(flags) \ -do { \ - pr_info("[ RUN ] " #flags "\n"); \ - total++; \ - if (!test_unwind_flags((flags))) { \ - pr_info("[ OK ] " #flags "\n"); \ - } else { \ - pr_err("[ FAILED ] " #flags "\n"); \ - failed++; \ - } \ -} while (0) - - pr_info("running stack unwinder tests"); - TEST(UWM_DEFAULT); - TEST(UWM_SP); - TEST(UWM_REGS); - TEST(UWM_SWITCH_STACK); - TEST(UWM_SP | UWM_REGS); - TEST(UWM_CALLER | UWM_SP); - TEST(UWM_CALLER | UWM_SP | UWM_REGS); - TEST(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK); - TEST(UWM_THREAD); - TEST(UWM_THREAD | UWM_SP); - TEST(UWM_THREAD | UWM_CALLER | UWM_SP); - TEST(UWM_IRQ); - TEST(UWM_IRQ | UWM_SWITCH_STACK); - TEST(UWM_IRQ | UWM_SP); - TEST(UWM_IRQ | UWM_REGS); - TEST(UWM_IRQ | UWM_SP | UWM_REGS); - TEST(UWM_IRQ | UWM_CALLER | UWM_SP); - TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS); - TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK); -#ifdef CONFIG_KPROBES - TEST(UWM_PGM); - TEST(UWM_PGM | UWM_SP); - TEST(UWM_PGM | UWM_REGS); - TEST(UWM_PGM | UWM_SP | UWM_REGS); -#endif -#undef TEST - if (failed) { - pr_err("%d of %d stack unwinder tests failed", failed, total); - WARN(1, "%d of %d stack unwinder tests failed", failed, total); - } else { - pr_info("all %d stack unwinder tests passed", total); - } +static struct kunit_case unwind_test_cases[] = { + KUNIT_CASE_PARAM(test_unwind_flags, test_unwind_gen_params), + {} +}; - return failed ? -EINVAL : 0; -} +static struct kunit_suite test_unwind_suite = { + .name = "test_unwind", + .test_cases = unwind_test_cases, +}; -static void test_unwind_exit(void) -{ -} +kunit_test_suites(&test_unwind_suite); -module_init(test_unwind_init); -module_exit(test_unwind_exit); MODULE_LICENSE("GPL"); diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 1141c8d5c0d0..2203164b39da 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -14,8 +14,8 @@ #include <linux/moduleparam.h> #include <linux/gfp.h> #include <linux/sched.h> +#include <linux/string_helpers.h> #include <linux/sysctl.h> -#include <linux/ctype.h> #include <linux/swap.h> #include <linux/kthread.h> #include <linux/oom.h> @@ -394,13 +394,10 @@ static int __init cmm_init(void) goto out_sysctl; #ifdef CONFIG_CMM_IUCV /* convert sender to uppercase characters */ - if (sender) { - int len = strlen(sender); - while (len--) - sender[len] = toupper(sender[len]); - } else { + if (sender) + string_upper(sender, sender); + else sender = cmm_default_sender; - } rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); if (rc < 0) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 0b0c8c284953..9f9af5298dd6 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -8,6 +8,7 @@ #include <linux/kasan.h> #include <asm/ptdump.h> #include <asm/kasan.h> +#include <asm/nospec-branch.h> #include <asm/sections.h> static unsigned long max_addr; @@ -116,8 +117,13 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) return; if (st->current_prot & _PAGE_NOEXEC) return; - /* The first lowcore page is currently still W+X. */ - if (addr == PAGE_SIZE) + /* + * The first lowcore page is W+X if spectre mitigations are using + * trampolines or the BEAR enhancements facility is not installed, + * in which case we have two lpswe instructions in lowcore that need + * to be executable. + */ + if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear))) return; WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n", (void *)st->start_address); @@ -203,7 +209,9 @@ void ptdump_check_wx(void) if (st.wx_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages); else - pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n"); + pr_info("Checked W+X mappings: passed, no %sW+X pages found\n", + (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? + "unexpected " : ""); } #endif /* CONFIG_DEBUG_WX */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 212632d57db9..d30f5986fa85 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -260,7 +260,6 @@ static noinline void do_no_context(struct pt_regs *regs) " in virtual user address space\n"); dump_fault_info(regs); die(regs, "Oops"); - do_exit(SIGKILL); } static noinline void do_low_address(struct pt_regs *regs) @@ -270,7 +269,6 @@ static noinline void do_low_address(struct pt_regs *regs) if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ die (regs, "Low-address protection"); - do_exit(SIGKILL); } do_no_context(regs); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a04faf49001a..8c6f258a6183 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -58,8 +58,6 @@ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); -bool initmem_freed; - static void __init setup_zero_pages(void) { unsigned int order; @@ -214,7 +212,6 @@ void __init mem_init(void) void free_initmem(void) { - initmem_freed = true; __set_memory((unsigned long)_sinittext, (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, SET_MEMORY_RW | SET_MEMORY_NX); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 3e4735168019..483b9dbe0970 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -399,5 +399,5 @@ void __init kasan_copy_shadow_mapping(void) void __init kasan_free_early_identity(void) { - memblock_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos); + memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index fdc86c0e4e6c..654019181a37 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -57,7 +57,7 @@ void arch_report_meminfo(struct seq_file *m) static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, unsigned long dtt) { - unsigned long table, mask; + unsigned long *table, mask; mask = 0; if (MACHINE_HAS_EDAT2) { @@ -72,7 +72,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1); break; } - table = (unsigned long)old & mask; + table = (unsigned long *)((unsigned long)old & mask); crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce); } else if (MACHINE_HAS_IDTE) { cspg(old, *old, new); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 2b1c6d916cf9..7d9705eeb02f 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -13,6 +13,7 @@ #include <linux/hugetlb.h> #include <linux/slab.h> #include <asm/cacheflush.h> +#include <asm/nospec-branch.h> #include <asm/pgalloc.h> #include <asm/setup.h> #include <asm/tlbflush.h> @@ -584,8 +585,13 @@ void __init vmem_map_init(void) __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - /* we need lowcore executable for our LPSWE instructions */ - set_memory_x(0, 1); + if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) { + /* + * Lowcore must be executable for LPSWE + * and expoline trampoline branch instructions. + */ + set_memory_x(0, 1); + } pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 1a374d021e25..233cc9bcd652 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -567,7 +567,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ save_restore_regs(jit, REGS_RESTORE, stack_depth); - if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) { + if (nospec_uses_trampoline()) { jit->r14_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r14 thunk */ if (test_facility(35)) { @@ -585,7 +585,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) /* br %r14 */ _EMIT2(0x07fe); - if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable && + if ((nospec_uses_trampoline()) && (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) { jit->r1_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r1 thunk */ @@ -1332,7 +1332,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, jit->seen |= SEEN_FUNC; /* lgrl %w1,func */ EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func)); - if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) { + if (nospec_uses_trampoline()) { /* brasl %r14,__s390_indirect_jump_r1 */ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); } else { diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index b833155ce838..2f9b78fa82a5 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -481,6 +481,34 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry) spin_unlock(&zpci_iomap_lock); } +static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh) +{ + int bar, idx; + + spin_lock(&zpci_iomap_lock); + for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { + if (!zdev->bars[bar].size) + continue; + idx = zdev->bars[bar].map_idx; + if (!zpci_iomap_start[idx].count) + continue; + WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh); + } + spin_unlock(&zpci_iomap_lock); +} + +void zpci_update_fh(struct zpci_dev *zdev, u32 fh) +{ + if (!fh || zdev->fh == fh) + return; + + zdev->fh = fh; + if (zpci_use_mio(zdev)) + return; + if (zdev->has_resources && zdev_enabled(zdev)) + zpci_do_update_iomap_fh(zdev, fh); +} + static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, unsigned long size, unsigned long flags) { @@ -561,7 +589,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) zdev->has_resources = 0; } -int pcibios_add_device(struct pci_dev *pdev) +int pcibios_device_add(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; @@ -668,7 +696,7 @@ int zpci_enable_device(struct zpci_dev *zdev) if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES)) rc = -EIO; else - zdev->fh = fh; + zpci_update_fh(zdev, fh); return rc; } @@ -679,14 +707,14 @@ int zpci_disable_device(struct zpci_dev *zdev) cc = clp_disable_fh(zdev, &fh); if (!cc) { - zdev->fh = fh; + zpci_update_fh(zdev, fh); } else if (cc == CLP_RC_SETPCIFN_ALRDY) { pr_info("Disabling PCI function %08x had no effect as it was already disabled\n", zdev->fid); /* Function is already disabled - update handle */ rc = clp_refresh_fh(zdev->fid, &fh); if (!rc) { - zdev->fh = fh; + zpci_update_fh(zdev, fh); rc = -EINVAL; } } else { @@ -696,6 +724,65 @@ int zpci_disable_device(struct zpci_dev *zdev) } /** + * zpci_hot_reset_device - perform a reset of the given zPCI function + * @zdev: the slot which should be reset + * + * Performs a low level reset of the zPCI function. The reset is low level in + * the sense that the zPCI function can be reset without detaching it from the + * common PCI subsystem. The reset may be performed while under control of + * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation + * table is reinstated at the end of the reset. + * + * After the reset the functions internal state is reset to an initial state + * equivalent to its state during boot when first probing a driver. + * Consequently after reset the PCI function requires re-initialization via the + * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors() + * and enabling the function via e.g.pci_enablde_device_flags().The caller + * must guard against concurrent reset attempts. + * + * In most cases this function should not be called directly but through + * pci_reset_function() or pci_reset_bus() which handle the save/restore and + * locking. + * + * Return: 0 on success and an error value otherwise + */ +int zpci_hot_reset_device(struct zpci_dev *zdev) +{ + int rc; + + zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); + if (zdev_enabled(zdev)) { + /* Disables device access, DMAs and IRQs (reset state) */ + rc = zpci_disable_device(zdev); + /* + * Due to a z/VM vs LPAR inconsistency in the error state the + * FH may indicate an enabled device but disable says the + * device is already disabled don't treat it as an error here. + */ + if (rc == -EINVAL) + rc = 0; + if (rc) + return rc; + } + + rc = zpci_enable_device(zdev); + if (rc) + return rc; + + if (zdev->dma_table) + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + (u64)zdev->dma_table); + else + rc = zpci_dma_init_device(zdev); + if (rc) { + zpci_disable_device(zdev); + return rc; + } + + return 0; +} + +/** * zpci_create_device() - Create a new zpci_dev and add it to the zbus * @fid: Function ID of the device to be created * @fh: Current Function Handle of the device to be created @@ -776,7 +863,7 @@ int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh) { int rc; - zdev->fh = fh; + zpci_update_fh(zdev, fh); /* the PCI function will be scanned once function 0 appears */ if (!zdev->zbus->bus) return 0; @@ -903,6 +990,59 @@ int zpci_report_error(struct pci_dev *pdev, } EXPORT_SYMBOL(zpci_report_error); +/** + * zpci_clear_error_state() - Clears the zPCI error state of the device + * @zdev: The zdev for which the zPCI error state should be reset + * + * Clear the zPCI error state of the device. If clearing the zPCI error state + * fails the device is left in the error state. In this case it may make sense + * to call zpci_io_perm_failure() on the associated pdev if it exists. + * + * Returns: 0 on success, -EIO otherwise + */ +int zpci_clear_error_state(struct zpci_dev *zdev) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR); + struct zpci_fib fib = {0}; + u8 status; + int cc; + + cc = zpci_mod_fc(req, &fib, &status); + if (cc) { + zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status); + return -EIO; + } + + return 0; +} + +/** + * zpci_reset_load_store_blocked() - Re-enables L/S from error state + * @zdev: The zdev for which to unblock load/store access + * + * Re-enables load/store access for a PCI function in the error state while + * keeping DMA blocked. In this state drivers can poke MMIO space to determine + * if error recovery is possible while catching any rogue DMA access from the + * device. + * + * Returns: 0 on success, -EIO otherwise + */ +int zpci_reset_load_store_blocked(struct zpci_dev *zdev) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK); + struct zpci_fib fib = {0}; + u8 status; + int cc; + + cc = zpci_mod_fc(req, &fib, &status); + if (cc) { + zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status); + return -EIO; + } + + return 0; +} + static int zpci_mem_init(void) { BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 93223bd110c3..1f4540d6bd2d 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -18,6 +18,8 @@ static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; static int s390_iommu_strict; +static u64 s390_iommu_aperture; +static u32 s390_iommu_aperture_factor = 1; static int zpci_refresh_global(struct zpci_dev *zdev) { @@ -565,15 +567,19 @@ int zpci_dma_init_device(struct zpci_dev *zdev) /* * Restrict the iommu bitmap size to the minimum of the following: - * - main memory size + * - s390_iommu_aperture which defaults to high_memory * - 3-level pagetable address limit minus start_dma offset * - DMA address range allowed by the hardware (clp query pci fn) * * Also set zdev->end_dma to the actual end address of the usable * range, instead of the theoretical maximum as reported by hardware. + * + * This limits the number of concurrently usable DMA mappings since + * for each DMA mapped memory address we need a DMA address including + * extra DMA addresses for multiple mappings of the same memory address. */ zdev->start_dma = PAGE_ALIGN(zdev->start_dma); - zdev->iommu_size = min3((u64) high_memory, + zdev->iommu_size = min3(s390_iommu_aperture, ZPCI_TABLE_SIZE_RT - zdev->start_dma, zdev->end_dma - zdev->start_dma + 1); zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; @@ -660,6 +666,12 @@ static int __init dma_alloc_cpu_table_caches(void) int __init zpci_dma_init(void) { + s390_iommu_aperture = (u64)high_memory; + if (!s390_iommu_aperture_factor) + s390_iommu_aperture = ULONG_MAX; + else + s390_iommu_aperture *= s390_iommu_aperture_factor; + return dma_alloc_cpu_table_caches(); } @@ -692,3 +704,12 @@ static int __init s390_iommu_setup(char *str) } __setup("s390_iommu=", s390_iommu_setup); + +static int __init s390_iommu_aperture_setup(char *str) +{ + if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) + s390_iommu_aperture_factor = 1; + return 1; +} + +__setup("s390_iommu_aperture=", s390_iommu_aperture_setup); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 5b8d647523f9..2e3e5b278925 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -47,16 +47,223 @@ struct zpci_ccdf_avail { u16 pec; /* PCI event code */ } __packed; +static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) +{ + switch (ers_res) { + case PCI_ERS_RESULT_CAN_RECOVER: + case PCI_ERS_RESULT_RECOVERED: + case PCI_ERS_RESULT_NEED_RESET: + return false; + default: + return true; + } +} + +static bool is_passed_through(struct zpci_dev *zdev) +{ + return zdev->s390_domain; +} + +static bool is_driver_supported(struct pci_driver *driver) +{ + if (!driver || !driver->err_handler) + return false; + if (!driver->err_handler->error_detected) + return false; + if (!driver->err_handler->slot_reset) + return false; + if (!driver->err_handler->resume) + return false; + return true; +} + +static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, + struct pci_driver *driver) +{ + pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + + ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); + if (ers_result_indicates_abort(ers_res)) + pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); + else if (ers_res == PCI_ERS_RESULT_NEED_RESET) + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + + return ers_res; +} + +static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, + struct pci_driver *driver) +{ + pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + struct zpci_dev *zdev = to_zpci(pdev); + int rc; + + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); + rc = zpci_reset_load_store_blocked(zdev); + if (rc) { + pr_err("%s: Unblocking device access failed\n", pci_name(pdev)); + /* Let's try a full reset instead */ + return PCI_ERS_RESULT_NEED_RESET; + } + + if (driver->err_handler->mmio_enabled) { + ers_res = driver->err_handler->mmio_enabled(pdev); + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; + } + } + + pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); + rc = zpci_clear_error_state(zdev); + if (!rc) { + pdev->error_state = pci_channel_io_normal; + } else { + pr_err("%s: Unblocking DMA failed\n", pci_name(pdev)); + /* Let's try a full reset instead */ + return PCI_ERS_RESULT_NEED_RESET; + } + + return ers_res; +} + +static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, + struct pci_driver *driver) +{ + pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + + pr_info("%s: Initiating reset\n", pci_name(pdev)); + if (zpci_hot_reset_device(to_zpci(pdev))) { + pr_err("%s: The reset request failed\n", pci_name(pdev)); + return ers_res; + } + pdev->error_state = pci_channel_io_normal; + ers_res = driver->err_handler->slot_reset(pdev); + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); + return ers_res; + } + + return ers_res; +} + +/* zpci_event_attempt_error_recovery - Try to recover the given PCI function + * @pdev: PCI function to recover currently in the error state + * + * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst. + * With the simplification that recovery always happens per function + * and the platform determines which functions are affected for + * multi-function devices. + */ +static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) +{ + pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + struct pci_driver *driver; + + /* + * Ensure that the PCI function is not removed concurrently, no driver + * is unbound or probed and that userspace can't access its + * configuration space while we perform recovery. + */ + pci_dev_lock(pdev); + if (pdev->error_state == pci_channel_io_perm_failure) { + ers_res = PCI_ERS_RESULT_DISCONNECT; + goto out_unlock; + } + pdev->error_state = pci_channel_io_frozen; + + if (is_passed_through(to_zpci(pdev))) { + pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", + pci_name(pdev)); + goto out_unlock; + } + + driver = to_pci_driver(pdev->dev.driver); + if (!is_driver_supported(driver)) { + if (!driver) + pr_info("%s: Cannot be recovered because no driver is bound to the device\n", + pci_name(pdev)); + else + pr_info("%s: The %s driver bound to the device does not support error recovery\n", + pci_name(pdev), + driver->name); + goto out_unlock; + } + + ers_res = zpci_event_notify_error_detected(pdev, driver); + if (ers_result_indicates_abort(ers_res)) + goto out_unlock; + + if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + ers_res = zpci_event_do_error_state_clear(pdev, driver); + if (ers_result_indicates_abort(ers_res)) + goto out_unlock; + } + + if (ers_res == PCI_ERS_RESULT_NEED_RESET) + ers_res = zpci_event_do_reset(pdev, driver); + + if (ers_res != PCI_ERS_RESULT_RECOVERED) { + pr_err("%s: Automatic recovery failed; operator intervention is required\n", + pci_name(pdev)); + goto out_unlock; + } + + pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); + if (driver->err_handler->resume) + driver->err_handler->resume(pdev); +out_unlock: + pci_dev_unlock(pdev); + + return ers_res; +} + +/* zpci_event_io_failure - Report PCI channel failure state to driver + * @pdev: PCI function for which to report + * @es: PCI channel failure state to report + */ +static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) +{ + struct pci_driver *driver; + + pci_dev_lock(pdev); + pdev->error_state = es; + /** + * While vfio-pci's error_detected callback notifies user-space QEMU + * reacts to this by freezing the guest. In an s390 environment PCI + * errors are rarely fatal so this is overkill. Instead in the future + * we will inject the error event and let the guest recover the device + * itself. + */ + if (is_passed_through(to_zpci(pdev))) + goto out; + driver = to_pci_driver(pdev->dev.driver); + if (driver && driver->err_handler && driver->err_handler->error_detected) + driver->err_handler->error_detected(pdev, pdev->error_state); +out: + pci_dev_unlock(pdev); +} + static void __zpci_event_error(struct zpci_ccdf_err *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; + pci_ers_result_t ers_res; + zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", + ccdf->fid, ccdf->fh, ccdf->pec); zpci_err("error CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); - if (zdev) - pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (zdev) { + zpci_update_fh(zdev, ccdf->fh); + if (zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + } pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -64,7 +271,20 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (!pdev) return; - pdev->error_state = pci_channel_io_perm_failure; + switch (ccdf->pec) { + case 0x003a: /* Service Action or Error Recovery Successful */ + ers_res = zpci_event_attempt_error_recovery(pdev); + if (ers_res != PCI_ERS_RESULT_RECOVERED) + zpci_event_io_failure(pdev, pci_channel_io_perm_failure); + break; + default: + /* + * Mark as frozen not permanently failed because the device + * could be subsequently recovered by the platform. + */ + zpci_event_io_failure(pdev, pci_channel_io_frozen); + break; + } pci_dev_put(pdev); } @@ -76,7 +296,7 @@ void zpci_event_error(void *data) static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) { - zdev->fh = fh; + zpci_update_fh(zdev, fh); /* Give the driver a hint that the function is * already unusable. */ @@ -96,6 +316,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); enum zpci_state state; + zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", + ccdf->fid, ccdf->fh, ccdf->pec); zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); @@ -117,7 +339,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) if (!zdev) zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); else - zdev->fh = ccdf->fh; + zpci_update_fh(zdev, ccdf->fh); break; case 0x0303: /* Deconfiguration requested */ if (zdev) { @@ -126,7 +348,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) */ if (zdev->state != ZPCI_FN_STATE_CONFIGURED) break; - zdev->fh = ccdf->fh; + zpci_update_fh(zdev, ccdf->fh); zpci_deconfigure_device(zdev); } break; diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 2e43996159f0..28d863aaafea 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -163,7 +163,7 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr, unsigned long len) { struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; - u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len); + u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len); return __zpci_load(data, req, ZPCI_OFFSET(addr)); } @@ -244,7 +244,7 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data, unsigned long len) { struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; - u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len); + u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len); return __zpci_store(data, req, ZPCI_OFFSET(addr)); } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 3823e159bf74..954bb7a83124 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -387,6 +387,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs); } +void arch_restore_msi_irqs(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + + if (!zdev->irqs_registered) + zpci_set_irq(zdev); + default_restore_msi_irqs(pdev); +} + static struct airq_struct zpci_airq = { .handler = zpci_floating_irq_handler, .isc = PCI_ISC, diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 335c281811c7..cae280e5c047 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -90,6 +90,14 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, if (zdev_enabled(zdev)) { ret = zpci_disable_device(zdev); + /* + * Due to a z/VM vs LPAR inconsistency in the error + * state the FH may indicate an enabled device but + * disable says the device is already disabled don't + * treat it as an error here. + */ + if (ret == -EINVAL) + ret = 0; if (ret) goto out; } diff --git a/arch/sh/Kbuild b/arch/sh/Kbuild index 48c2a091a072..be171880977e 100644 --- a/arch/sh/Kbuild +++ b/arch/sh/Kbuild @@ -2,3 +2,6 @@ obj-y += kernel/ mm/ boards/ obj-$(CONFIG_SH_FPU_EMU) += math-emu/ obj-$(CONFIG_USE_BUILTIN_DTB) += boot/dts/ + +# for cleaning +subdir- += boot diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 6904f4bdbf00..70afb30e0b32 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -56,7 +56,6 @@ config SUPERH select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select IRQ_FORCED_THREADING - select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA select NEED_SG_DMA_LENGTH select NO_DMA if !MMU && !DMA_COHERENT diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 958f790273ab..10290e5c1f43 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -54,6 +54,7 @@ config DUMP_CODE config DWARF_UNWINDER bool "Enable the DWARF unwinder for stacktraces" + depends on DEBUG_KERNEL select FRAME_POINTER default n help diff --git a/arch/sh/Makefile b/arch/sh/Makefile index 88ddb6f1c75b..b39412bf91fb 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -198,10 +198,6 @@ compressed: zImage archprepare: $(Q)$(MAKE) $(build)=arch/sh/tools include/generated/machtypes.h -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - $(Q)$(MAKE) $(clean)=arch/sh/kernel/vsyscall - archheaders: $(Q)$(MAKE) $(build)=arch/sh/kernel/syscalls all diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index bac8a058ebd7..c77b5f00a66a 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -560,7 +560,7 @@ static void __init ap325rxa_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu_dma_membase = phys; diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 97c5703b1818..4c9522dd351f 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -1502,7 +1502,7 @@ static void __init ecovec_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU0 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu0_dma_membase = phys; @@ -1510,7 +1510,7 @@ static void __init ecovec_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU1 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu1_dma_membase = phys; } diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index eeb5ce341efd..20f4db778ed6 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -633,7 +633,7 @@ static void __init kfr2r09_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu_dma_membase = phys; diff --git a/arch/sh/boards/mach-landisk/irq.c b/arch/sh/boards/mach-landisk/irq.c index 29b8b1f85246..0b672b80c561 100644 --- a/arch/sh/boards/mach-landisk/irq.c +++ b/arch/sh/boards/mach-landisk/irq.c @@ -26,8 +26,8 @@ enum { PCI_INTD, /* PCI int D */ ATA, /* ATA */ FATA, /* CF */ - POWER, /* Power swtich */ - BUTTON, /* Button swtich */ + POWER, /* Power switch */ + BUTTON, /* Button switch */ }; /* Vectors for LANDISK */ diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 6703a2122c0d..f60061283c48 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -633,7 +633,7 @@ static void __init migor_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu_dma_membase = phys; diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index d9b31d4560c0..b60a2626e18b 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -966,7 +966,7 @@ static void __init ms7724se_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU0 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu0_dma_membase = phys; @@ -974,7 +974,7 @@ static void __init ms7724se_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU1 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu1_dma_membase = phys; } diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile index c081e7e2d6e7..5c123f5b2797 100644 --- a/arch/sh/boot/Makefile +++ b/arch/sh/boot/Makefile @@ -27,8 +27,8 @@ suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo targets := zImage vmlinux.srec romImage uImage uImage.srec uImage.gz \ - uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin -extra-y += vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ + uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin \ + vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo subdir- := compressed romimage diff --git a/arch/sh/boot/compressed/.gitignore b/arch/sh/boot/compressed/.gitignore index 37aa53057369..cd16663bc7c8 100644 --- a/arch/sh/boot/compressed/.gitignore +++ b/arch/sh/boot/compressed/.gitignore @@ -1,7 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ashiftrt.S -ashldi3.c -ashlsi3.S -ashrsi3.S -lshrsi3.S vmlinux.bin.* diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile index 589d2d8a573d..cf3174df7859 100644 --- a/arch/sh/boot/compressed/Makefile +++ b/arch/sh/boot/compressed/Makefile @@ -5,12 +5,18 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz \ - vmlinux.bin.bz2 vmlinux.bin.lzma \ - vmlinux.bin.xz vmlinux.bin.lzo \ - head_32.o misc.o piggy.o +OBJECTS := head_32.o misc.o cache.o piggy.o \ + ashiftrt.o ashldi3.o ashrsi3.o ashlsi3.o lshrsi3.o + +# These were previously generated files. When you are building the kernel +# with O=, make sure to remove the stale files in the output tree. Otherwise, +# the build system wrongly compiles the stale ones. +ifdef building_out_of_srctree +$(shell rm -f $(addprefix $(obj)/, ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S)) +endif -OBJECTS = $(obj)/head_32.o $(obj)/misc.o $(obj)/cache.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ + vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo $(OBJECTS) GCOV_PROFILE := n @@ -33,21 +39,9 @@ ccflags-remove-$(CONFIG_MCOUNT) += -pg LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \ -T $(obj)/../../kernel/vmlinux.lds -# -# Pull in the necessary libgcc bits from the in-kernel implementation. -# -lib1funcs-y := ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S -lib1funcs-obj := \ - $(addsuffix .o, $(basename $(addprefix $(obj)/, $(lib1funcs-y)))) - -lib1funcs-dir := $(srctree)/arch/$(SRCARCH)/lib - -KBUILD_CFLAGS += -I$(lib1funcs-dir) -DDISABLE_BRANCH_PROFILING - -$(addprefix $(obj)/,$(lib1funcs-y)): $(obj)/%: $(lib1funcs-dir)/% FORCE - $(call cmd,shipped) +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(lib1funcs-obj) FORCE +$(obj)/vmlinux: $(addprefix $(obj)/, $(OBJECTS)) FORCE $(call if_changed,ld) $(obj)/vmlinux.bin: vmlinux FORCE diff --git a/arch/sh/boot/compressed/ashiftrt.S b/arch/sh/boot/compressed/ashiftrt.S new file mode 100644 index 000000000000..0f3b291a3f4b --- /dev/null +++ b/arch/sh/boot/compressed/ashiftrt.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/ashiftrt.S" diff --git a/arch/sh/boot/compressed/ashldi3.c b/arch/sh/boot/compressed/ashldi3.c new file mode 100644 index 000000000000..7cebd646df83 --- /dev/null +++ b/arch/sh/boot/compressed/ashldi3.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../lib/ashldi3.c" diff --git a/arch/sh/boot/compressed/ashlsi3.S b/arch/sh/boot/compressed/ashlsi3.S new file mode 100644 index 000000000000..e354262b275f --- /dev/null +++ b/arch/sh/boot/compressed/ashlsi3.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/ashlsi3.S" diff --git a/arch/sh/boot/compressed/ashrsi3.S b/arch/sh/boot/compressed/ashrsi3.S new file mode 100644 index 000000000000..e564be9a4dcd --- /dev/null +++ b/arch/sh/boot/compressed/ashrsi3.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/ashrsi3.S" diff --git a/arch/sh/boot/compressed/lshrsi3.S b/arch/sh/boot/compressed/lshrsi3.S new file mode 100644 index 000000000000..5a8281b7e516 --- /dev/null +++ b/arch/sh/boot/compressed/lshrsi3.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/lshrsi3.S" diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h index 1a391e3a7659..a6501b856f3e 100644 --- a/arch/sh/include/asm/checksum_32.h +++ b/arch/sh/include/asm/checksum_32.h @@ -84,7 +84,8 @@ static inline __sum16 csum_fold(__wsum sum) */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - unsigned int sum, __dummy0, __dummy1; + __wsum sum; + unsigned int __dummy0, __dummy1; __asm__ __volatile__( "mov.l @%1+, %0\n\t" @@ -197,6 +198,6 @@ static inline __wsum csum_and_copy_to_user(const void *src, { if (!access_ok(dst, len)) return 0; - return csum_partial_copy_generic((__force const void *)src, dst, len); + return csum_partial_copy_generic(src, (__force void *)dst, len); } #endif /* __ASM_SH_CHECKSUM_H */ diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h index 839551ce398c..1c4923502fd4 100644 --- a/arch/sh/include/asm/irq.h +++ b/arch/sh/include/asm/irq.h @@ -6,17 +6,6 @@ #include <asm/machvec.h> /* - * Only legacy non-sparseirq platforms have to set a reasonably sane - * value here. sparseirq platforms allocate their irq_descs on the fly, - * so will expand automatically based on the number of registered IRQs. - */ -#ifdef CONFIG_SPARSE_IRQ -# define NR_IRQS 8 -#else -# define NR_IRQS 512 -#endif - -/* * This is a special IRQ number for indicating that no IRQ has been * triggered and to simply ignore the IRQ dispatch. This is a special * case that can happen with IRQ auto-distribution when multiple CPUs diff --git a/arch/sh/include/asm/sfp-machine.h b/arch/sh/include/asm/sfp-machine.h index cbc7cf8c97ce..2d2423478b71 100644 --- a/arch/sh/include/asm/sfp-machine.h +++ b/arch/sh/include/asm/sfp-machine.h @@ -13,6 +13,14 @@ #ifndef _SFP_MACHINE_H #define _SFP_MACHINE_H +#ifdef __BIG_ENDIAN__ +#define __BYTE_ORDER __BIG_ENDIAN +#define __LITTLE_ENDIAN 0 +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#define __BIG_ENDIAN 0 +#endif + #define _FP_W_TYPE_SIZE 32 #define _FP_W_TYPE unsigned long #define _FP_WS_TYPE signed long diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index cb51a7528384..d87738eebe30 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -57,18 +57,6 @@ static inline void syscall_get_arguments(struct task_struct *task, args[0] = regs->regs[4]; } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - regs->regs[1] = args[5]; - regs->regs[0] = args[4]; - regs->regs[7] = args[3]; - regs->regs[6] = args[2]; - regs->regs[5] = args[1]; - regs->regs[4] = args[0]; -} - static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_SH; diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index 73f3b48d4a34..8867bb04b00e 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -68,7 +68,7 @@ struct __large_struct { unsigned long buf[100]; }; ({ \ long __gu_err = -EFAULT; \ unsigned long __gu_val = 0; \ - const __typeof__(*(ptr)) *__gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ if (likely(access_ok(__gu_addr, (size)))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -124,7 +124,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) * Clear the area and return remaining number of bytes * (on failure. Usually it's 0.) */ -__kernel_size_t __clear_user(void *addr, __kernel_size_t size); +__kernel_size_t __clear_user(void __user *addr, __kernel_size_t size); #define clear_user(addr,n) \ ({ \ diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index ae354a2931e7..fd6db0ab1928 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs) } if (!tsk_used_math(tsk)) { - local_irq_enable(); + int ret; /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + local_irq_enable(); + ret = init_fpu(tsk); + local_irq_disable(); + if (ret) { /* * ran out of memory! */ - do_group_exit(SIGKILL); + force_sig(SIGKILL); return; } - local_irq_disable(); } grab_fpu(regs); diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index f8a2bec0f260..1261dc7b84e8 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus) BUILD_BUG_ON(SMP_MSG_NR >= 8); for (i = 0; i < SMP_MSG_NR; i++) - request_irq(104 + i, ipi_interrupt_handler, - IRQF_PERCPU, "IPI", (void *)(long)i); + if (request_irq(104 + i, ipi_interrupt_handler, + IRQF_PERCPU, "IPI", (void *)(long)i)) + pr_err("Failed to request irq %d\n", i); for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c index a9086127b16d..5b41b59698c1 100644 --- a/arch/sh/kernel/crash_dump.c +++ b/arch/sh/kernel/crash_dump.c @@ -26,7 +26,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { - void *vaddr; + void __iomem *vaddr; if (!csize) return 0; @@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); if (userbuf) { - if (copy_to_user(buf, (vaddr + offset), csize)) { + if (copy_to_user((void __user *)buf, (vaddr + offset), csize)) { iounmap(vaddr); return -EFAULT; } diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index e76b22157099..cbe3201d4f21 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -20,7 +20,7 @@ static DEFINE_SPINLOCK(die_lock); -void die(const char *str, struct pt_regs *regs, long err) +void __noreturn die(const char *str, struct pt_regs *regs, long err) { static int die_counter; diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index b62ad0ba2395..b3c715bc254b 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -490,7 +490,7 @@ asmlinkage void do_address_error(struct pt_regs *regs, inc_unaligned_user_access(); oldfs = force_uaccess_begin(); - if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1), + if (copy_from_user(&instruction, (insn_size_t __user *)(regs->pc & ~1), sizeof(instruction))) { force_uaccess_end(oldfs); goto uspace_segv; @@ -614,7 +614,7 @@ asmlinkage void do_reserved_inst(void) unsigned short inst = 0; int err; - get_user(inst, (unsigned short*)regs->pc); + get_user(inst, (unsigned short __user *)regs->pc); err = do_fpu_inst(inst, regs); if (!err) { @@ -699,9 +699,9 @@ asmlinkage void do_illegal_slot_inst(void) return; #ifdef CONFIG_SH_FPU_EMU - get_user(inst, (unsigned short *)regs->pc + 1); + get_user(inst, (unsigned short __user *)regs->pc + 1); if (!do_fpu_inst(inst, regs)) { - get_user(inst, (unsigned short *)regs->pc); + get_user(inst, (unsigned short __user *)regs->pc); if (!emulate_branch(inst, regs)) return; /* fault in branch.*/ diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index e8be0eca0444..cdaef6501d76 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -51,8 +51,8 @@ #define Rn (regs->regs[n]) #define Rm (regs->regs[m]) -#define WRITE(d,a) ({if(put_user(d, (typeof (d)*)a)) return -EFAULT;}) -#define READ(d,a) ({if(get_user(d, (typeof (d)*)a)) return -EFAULT;}) +#define MWRITE(d,a) ({if(put_user(d, (typeof (d) __user *)a)) return -EFAULT;}) +#define MREAD(d,a) ({if(get_user(d, (typeof (d) __user *)a)) return -EFAULT;}) #define PACK_S(r,f) FP_PACK_SP(&r,f) #define UNPACK_S(f,r) FP_UNPACK_SP(f,&r) @@ -157,11 +157,11 @@ fmov_idx_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, { if (FPSCR_SZ) { FMOV_EXT(n); - READ(FRn, Rm + R0 + 4); + MREAD(FRn, Rm + R0 + 4); n++; - READ(FRn, Rm + R0); + MREAD(FRn, Rm + R0); } else { - READ(FRn, Rm + R0); + MREAD(FRn, Rm + R0); } return 0; @@ -173,11 +173,11 @@ fmov_mem_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, { if (FPSCR_SZ) { FMOV_EXT(n); - READ(FRn, Rm + 4); + MREAD(FRn, Rm + 4); n++; - READ(FRn, Rm); + MREAD(FRn, Rm); } else { - READ(FRn, Rm); + MREAD(FRn, Rm); } return 0; @@ -189,12 +189,12 @@ fmov_inc_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, { if (FPSCR_SZ) { FMOV_EXT(n); - READ(FRn, Rm + 4); + MREAD(FRn, Rm + 4); n++; - READ(FRn, Rm); + MREAD(FRn, Rm); Rm += 8; } else { - READ(FRn, Rm); + MREAD(FRn, Rm); Rm += 4; } @@ -207,11 +207,11 @@ fmov_reg_idx(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, { if (FPSCR_SZ) { FMOV_EXT(m); - WRITE(FRm, Rn + R0 + 4); + MWRITE(FRm, Rn + R0 + 4); m++; - WRITE(FRm, Rn + R0); + MWRITE(FRm, Rn + R0); } else { - WRITE(FRm, Rn + R0); + MWRITE(FRm, Rn + R0); } return 0; @@ -223,11 +223,11 @@ fmov_reg_mem(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, { if (FPSCR_SZ) { FMOV_EXT(m); - WRITE(FRm, Rn + 4); + MWRITE(FRm, Rn + 4); m++; - WRITE(FRm, Rn); + MWRITE(FRm, Rn); } else { - WRITE(FRm, Rn); + MWRITE(FRm, Rn); } return 0; @@ -240,12 +240,12 @@ fmov_reg_dec(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m, if (FPSCR_SZ) { FMOV_EXT(m); Rn -= 8; - WRITE(FRm, Rn + 4); + MWRITE(FRm, Rn + 4); m++; - WRITE(FRm, Rn); + MWRITE(FRm, Rn); } else { Rn -= 4; - WRITE(FRm, Rn); + MWRITE(FRm, Rn); } return 0; @@ -445,11 +445,11 @@ id_sys(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, u16 code) case 0x4052: case 0x4062: Rn -= 4; - WRITE(*reg, Rn); + MWRITE(*reg, Rn); break; case 0x4056: case 0x4066: - READ(*reg, Rn); + MREAD(*reg, Rn); Rn += 4; break; default: @@ -468,109 +468,6 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg } /** - * denormal_to_double - Given denormalized float number, - * store double float - * - * @fpu: Pointer to sh_fpu_soft structure - * @n: Index to FP register - */ -static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n) -{ - unsigned long du, dl; - unsigned long x = fpu->fpul; - int exp = 1023 - 126; - - if (x != 0 && (x & 0x7f800000) == 0) { - du = (x & 0x80000000); - while ((x & 0x00800000) == 0) { - x <<= 1; - exp--; - } - x &= 0x007fffff; - du |= (exp << 20) | (x >> 3); - dl = x << 29; - - fpu->fp_regs[n] = du; - fpu->fp_regs[n+1] = dl; - } -} - -/** - * ieee_fpe_handler - Handle denormalized number exception - * - * @regs: Pointer to register structure - * - * Returns 1 when it's handled (should not cause exception). - */ -static int ieee_fpe_handler(struct pt_regs *regs) -{ - unsigned short insn = *(unsigned short *)regs->pc; - unsigned short finsn; - unsigned long nextpc; - int nib[4] = { - (insn >> 12) & 0xf, - (insn >> 8) & 0xf, - (insn >> 4) & 0xf, - insn & 0xf}; - - if (nib[0] == 0xb || - (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ - regs->pr = regs->pc + 4; - - if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ - nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - else - nextpc = regs->pc + 4; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4; - else - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x4 && nib[3] == 0xb && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ - nextpc = regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x0 && nib[3] == 0x3 && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ - nextpc = regs->pc + 4 + regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (insn == 0x000b) { /* rts */ - nextpc = regs->pr; - finsn = *(unsigned short *) (regs->pc + 2); - } else { - nextpc = regs->pc + 2; - finsn = insn; - } - - if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ - struct task_struct *tsk = current; - - if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) { - /* FPU error */ - denormal_to_double (&tsk->thread.xstate->softfpu, - (finsn >> 8) & 0xf); - tsk->thread.xstate->softfpu.fpscr &= - ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); - task_thread_info(tsk)->status |= TS_USEDFPU; - } else { - force_sig_fault(SIGFPE, FPE_FLTINV, - (void __user *)regs->pc); - } - - regs->pc = nextpc; - return 1; - } - - return 0; -} - -/** * fpu_init - Initialize FPU registers * @fpu: Pointer to software emulated FPU registers. */ diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 88a1f453d73e..1e1aa75df3ca 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -238,8 +238,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, address); die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); } static void diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c index 8b4504413c5f..78c4b6e6d33b 100644 --- a/arch/sh/mm/nommu.c +++ b/arch/sh/mm/nommu.c @@ -28,9 +28,9 @@ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n) return 0; } -__kernel_size_t __clear_user(void *to, __kernel_size_t n) +__kernel_size_t __clear_user(void __user *to, __kernel_size_t n) { - memset(to, 0, n); + memset((__force void *)to, 0, n); return 0; } diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild index c9e574906a9b..71cb3d934bf6 100644 --- a/arch/sparc/Kbuild +++ b/arch/sparc/Kbuild @@ -9,3 +9,6 @@ obj-y += math-emu/ obj-y += net/ obj-y += crypto/ obj-$(CONFIG_SPARC64) += vdso/ + +# for cleaning +subdir- += boot diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b120ed947f50..66fc08646be5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -53,8 +53,9 @@ config SPARC32 def_bool !64BIT select ARCH_32BIT_OFF_T select ARCH_HAS_SYNC_DMA_FOR_CPU - select GENERIC_ATOMIC64 select CLZ_TAB + select DMA_DIRECT_REMAP + select GENERIC_ATOMIC64 select HAVE_UID16 select OLD_SIGACTION select ZONE_DMA diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 24fb5a99f439..c7008bbebc4c 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -75,9 +75,6 @@ install: sh $(srctree)/$(boot)/install.sh $(KERNELRELEASE) $(KBUILD_IMAGE) \ System.map "$(INSTALL_PATH)" -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - archheaders: $(Q)$(MAKE) $(build)=arch/sparc/kernel/syscalls all diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile index 849236d4eca4..45e5c76d449e 100644 --- a/arch/sparc/boot/Makefile +++ b/arch/sparc/boot/Makefile @@ -22,7 +22,7 @@ ifeq ($(CONFIG_SPARC64),y) # Actual linking -$(obj)/zImage: $(obj)/image +$(obj)/zImage: $(obj)/image FORCE $(call if_changed,gzip) @echo ' kernel: $@ is ready' @@ -31,7 +31,7 @@ $(obj)/vmlinux.aout: vmlinux FORCE @echo ' kernel: $@ is ready' else -$(obj)/zImage: $(obj)/image +$(obj)/zImage: $(obj)/image FORCE $(call if_changed,strip) @echo ' kernel: $@ is ready' @@ -44,7 +44,7 @@ OBJCOPYFLAGS_image.bin := -S -O binary -R .note -R .comment $(obj)/image.bin: $(obj)/image FORCE $(call if_changed,objcopy) -$(obj)/image.gz: $(obj)/image.bin +$(obj)/image.gz: $(obj)/image.bin FORCE $(call if_changed,gzip) UIMAGE_LOADADDR = $(CONFIG_UBOOT_LOAD_ADDR) @@ -56,7 +56,7 @@ quiet_cmd_uimage.o = UIMAGE.O $@ -r -b binary $@ -o $@.o targets += uImage -$(obj)/uImage: $(obj)/image.gz +$(obj)/uImage: $(obj)/image.gz FORCE $(call if_changed,uimage) $(call if_changed,uimage.o) @echo ' Image $@ is ready' diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 62a5a78804c4..20c109ac8cc9 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -117,16 +117,6 @@ static inline void syscall_get_arguments(struct task_struct *task, } } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - unsigned int i; - - for (i = 0; i < 6; i++) - regs->u_regs[UREG_I0 + i] = args[i]; -} - static inline int syscall_get_arch(struct task_struct *task) { #if defined(CONFIG_SPARC64) && defined(CONFIG_COMPAT) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 7ceae24b0ca9..57a72c46eddb 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -52,17 +52,6 @@ #include <asm/io-unit.h> #include <asm/leon.h> -/* This function must make sure that caches and memory are coherent after DMA - * On LEON systems without cache snooping it flushes the entire D-CACHE. - */ -static inline void dma_make_coherent(unsigned long pa, unsigned long len) -{ - if (sparc_cpu_model == sparc_leon) { - if (!sparc_leon3_snooping_enabled()) - leon_flush_dcache_all(); - } -} - static void __iomem *_sparc_ioremap(struct resource *res, u32 bus, u32 pa, int sz); static void __iomem *_sparc_alloc_io(unsigned int busno, unsigned long phys, unsigned long size, char *name); @@ -311,68 +300,19 @@ arch_initcall(sparc_register_ioport); #endif /* CONFIG_SBUS */ - -/* Allocate and map kernel buffer using consistent mode DMA for a device. - * hwdev should be valid struct pci_dev pointer for PCI devices. - */ -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned long addr; - void *va; - - if (!size || size > 256 * 1024) /* __get_free_pages() limit */ - return NULL; - - size = PAGE_ALIGN(size); - va = (void *) __get_free_pages(gfp | __GFP_ZERO, get_order(size)); - if (!va) { - printk("%s: no %zd pages\n", __func__, size >> PAGE_SHIFT); - return NULL; - } - - addr = sparc_dma_alloc_resource(dev, size); - if (!addr) - goto err_nomem; - - srmmu_mapiorange(0, virt_to_phys(va), addr, size); - - *dma_handle = virt_to_phys(va); - return (void *)addr; - -err_nomem: - free_pages((unsigned long)va, get_order(size)); - return NULL; -} - -/* Free and unmap a consistent DMA buffer. - * cpu_addr is what was returned arch_dma_alloc, size must be the same as what - * was passed into arch_dma_alloc, and likewise dma_addr must be the same as - * what *dma_ndler was set to. +/* + * IIep is write-through, not flushing on cpu to device transfer. * - * References to the memory and mappings associated with cpu_addr/dma_addr - * past this call are illegal. + * On LEON systems without cache snooping, the entire D-CACHE must be flushed to + * make DMA to cacheable memory coherent. */ -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - size = PAGE_ALIGN(size); - - if (!sparc_dma_free_resource(cpu_addr, size)) - return; - - dma_make_coherent(dma_addr, size); - srmmu_unmapiorange((unsigned long)cpu_addr, size); - free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size)); -} - -/* IIep is write-through, not flushing on cpu to device transfer. */ - void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (dir != PCI_DMA_TODEVICE) - dma_make_coherent(paddr, PAGE_ALIGN(size)); + if (dir != PCI_DMA_TODEVICE && + sparc_cpu_model == sparc_leon && + !sparc_leon3_snooping_enabled()) + leon_flush_dcache_all(); } #ifdef CONFIG_PROC_FS diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 9c2b720bfd20..31b0c1983286 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1010,7 +1010,7 @@ void pcibios_set_master(struct pci_dev *dev) } #ifdef CONFIG_PCI_IOV -int pcibios_add_device(struct pci_dev *dev) +int pcibios_device_add(struct pci_dev *dev) { struct pci_dev *pdev; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 02f3ad55dfe3..ffab16369bea 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + force_exit_sig(SIGILL); return -EINVAL; } @@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, sf = (struct rt_signal_frame __user *) get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + force_exit_sig(SIGILL); return -EINVAL; } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 0224d8f19ed6..b98a7bbe6728 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1567,7 +1567,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, static void __init pcpu_free_bootmem(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_free(ptr, size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index 69a6ba6e9293..8f20862ccc83 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -121,8 +121,10 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who) if ((sp & 7) || copy_to_user((char __user *) sp, &tp->reg_window[window], - sizeof(struct reg_window32))) - do_exit(SIGILL); + sizeof(struct reg_window32))) { + force_exit_sig(SIGILL); + return; + } } tp->w_saved = 0; } diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index fa858626b85b..90dc4ae315c8 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -248,7 +248,6 @@ no_context: } unhandled_fault(address, tsk, regs); - do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 0dce4b7ff73e..912205787161 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -266,7 +266,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign default: printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", current->comm, current->pid, tsb_bytes); - do_exit(SIGSEGV); + BUG(); } tte |= pte_sz_bits(page_sz); diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h index 2984feb9d576..172b74143c4b 100644 --- a/arch/um/include/asm/syscall-generic.h +++ b/arch/um/include/asm/syscall-generic.h @@ -62,20 +62,6 @@ static inline void syscall_get_arguments(struct task_struct *task, *args = UPT_SYSCALL_ARG6(r); } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - struct uml_pt_regs *r = ®s->regs; - - UPT_SYSCALL_ARG1(r) = *args++; - UPT_SYSCALL_ARG2(r) = *args++; - UPT_SYSCALL_ARG3(r) = *args++; - UPT_SYSCALL_ARG4(r) = *args++; - UPT_SYSCALL_ARG5(r) = *args++; - UPT_SYSCALL_ARG6(r) = *args; -} - /* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */ #endif /* __UM_SYSCALL_GENERIC_H */ diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 8e636ce02949..0039771eb01c 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -47,7 +47,7 @@ void __init mem_init(void) */ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); - memblock_free(__pa(brk_end), uml_reserved - brk_end); + memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; /* this will put all low memory onto the freelists */ diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 3198c4767387..c32efb09db21 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -158,7 +158,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) void fatal_sigsegv(void) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 30dec019756b..f384cb1a4f7a 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -25,3 +25,6 @@ obj-y += platform/ obj-y += net/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ + +# for cleaning +subdir- += boot tools diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b1d4b481fcdd..7399327d1eff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -63,7 +63,7 @@ config X86 select ARCH_CLOCKSOURCE_INIT select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION - select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) + select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_ENABLE_SPLIT_PMD_PTLOCK if (PGTABLE_LEVELS > 2) && (X86_64 || X86_PAE) select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE @@ -192,6 +192,8 @@ config X86 select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64 select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_SAMPLE_FTRACE_DIRECT if X86_64 + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EISA @@ -1627,7 +1629,7 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_MEMORY_PROBE bool "Enable sysfs memory/probe interface" - depends on X86_64 && MEMORY_HOTPLUG + depends on MEMORY_HOTPLUG help This option enables a sysfs memory/probe interface for testing. See Documentation/admin-guide/mm/memory-hotplug.rst for more information. @@ -2423,7 +2425,7 @@ endmenu config ARCH_HAS_ADD_PAGES def_bool y - depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG + depends on ARCH_ENABLE_MEMORY_HOTPLUG config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE def_bool y diff --git a/arch/x86/Makefile b/arch/x86/Makefile index aab70413ae7a..42243869216d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -283,8 +283,6 @@ endif archclean: $(Q)rm -rf $(objtree)/arch/i386 $(Q)rm -rf $(objtree)/arch/x86_64 - $(Q)$(MAKE) $(clean)=$(boot) - $(Q)$(MAKE) $(clean)=arch/x86/tools define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 1b40b9297083..fd2ee9408e91 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -226,7 +226,8 @@ bool emulate_vsyscall(unsigned long error_code, if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); - do_exit(SIGSYS); + force_exit_sig(SIGSYS); + return true; } regs->orig_ax = -1; if (tmp) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 603964408d2d..ec6444f2c9dc 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2211,7 +2211,6 @@ intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int /* must not have branches... */ local_irq_save(flags); __intel_pmu_disable_all(false); /* we don't care about BTS */ - __intel_pmu_pebs_disable_all(); __intel_pmu_lbr_disable(); /* ... until here */ return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); @@ -2225,7 +2224,6 @@ intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned /* must not have branches... */ local_irq_save(flags); __intel_pmu_disable_all(false); /* we don't care about BTS */ - __intel_pmu_pebs_disable_all(); __intel_pmu_arch_lbr_disable(); /* ... until here */ return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); @@ -3048,8 +3046,10 @@ intel_vlbr_constraints(struct perf_event *event) { struct event_constraint *c = &vlbr_constraint; - if (unlikely(constraint_match(c, event->hw.config))) + if (unlikely(constraint_match(c, event->hw.config))) { + event->hw.flags |= c->flags; return c; + } return NULL; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 6b72e9b55c69..8043213b75a5 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -265,6 +265,8 @@ void intel_pmu_lbr_reset(void) cpuc->last_task_ctx = NULL; cpuc->last_log_id = 0; + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) + wrmsrl(MSR_LBR_SELECT, 0); } /* diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index c72e368dd164..f1ba6ab2e97e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1187,7 +1187,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id * PCI slot and func to indicate the uncore box. */ if (id->driver_data & ~0xffff) { - struct pci_driver *pci_drv = pdev->driver; + struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); if (pmu == NULL) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index eb2c6cea9d0d..3660f698fb2a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3608,6 +3608,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct extra_reg *er; int idx = 0; + /* Any of the CHA events may be filtered by Thread/Core-ID.*/ + if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN) + idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID; for (er = skx_uncore_cha_extra_regs; er->msr; er++) { if (er->event != (event->hw.config & er->config_mask)) @@ -3675,6 +3678,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -4525,6 +4529,13 @@ static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group); } +static struct event_constraint snr_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), + EVENT_CONSTRAINT_END +}; + static struct intel_uncore_type snr_uncore_iio = { .name = "iio", .num_counters = 4, @@ -4536,6 +4547,7 @@ static struct intel_uncore_type snr_uncore_iio = { .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, .msr_offset = SNR_IIO_MSR_OFFSET, + .constraints = snr_uncore_iio_constraints, .ops = &ivbep_uncore_msr_ops, .format_group = &snr_uncore_iio_format_group, .attr_update = snr_iio_attr_update, diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 24f4a06ac46a..96eb7db31c8e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -177,6 +177,9 @@ void set_hv_tscchange_cb(void (*cb)(void)) return; } + if (!hv_vp_index) + return; + hv_reenlightenment_cb = cb; /* Make sure callback is registered before we write to MSRs */ @@ -383,20 +386,13 @@ static void __init hv_get_partition_id(void) */ void __init hyperv_init(void) { - u64 guest_id, required_msrs; + u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; int cpuhp; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; - /* Absolutely required MSRs */ - required_msrs = HV_MSR_HYPERCALL_AVAILABLE | - HV_MSR_VP_INDEX_AVAILABLE; - - if ((ms_hyperv.features & required_msrs) != required_msrs) - return; - if (hv_common_init()) return; diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h index 79f95d3787e2..9656a5bc6fea 100644 --- a/arch/x86/include/asm/fpu/xcr.h +++ b/arch/x86/include/asm/fpu/xcr.h @@ -3,6 +3,7 @@ #define _ASM_X86_FPU_XCR_H #define XCR_XFEATURE_ENABLED_MASK 0x00000000 +#define XCR_XFEATURE_IN_USE_MASK 0x00000001 static inline u64 xgetbv(u32 index) { @@ -20,4 +21,15 @@ static inline void xsetbv(u32 index, u64 value) asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +/* + * Return a mask of xfeatures which are currently being tracked + * by the processor as being in the initial configuration. + * + * Callers should check X86_FEATURE_XGETBV1. + */ +static inline u64 xfeatures_in_use(void) +{ + return xgetbv(XCR_XFEATURE_IN_USE_MASK); +} + #endif /* _ASM_X86_FPU_XCR_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 0f8b90ab18c9..cd3dd170e23a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -92,6 +92,13 @@ #define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ XFEATURE_MASK_SUPERVISOR_SUPPORTED) +/* + * Features in this mask have space allocated in the signal frame, but may not + * have that space initialized when the feature is in its init state. + */ +#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \ + XFEATURE_MASK_USER_DYNAMIC) + extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 27158436f322..5a0bcf8b78d7 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -108,6 +108,8 @@ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ +#define INTEL_FAM6_RAPTOR_LAKE 0xB7 + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2acf37cc1991..6ac61f85e07b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -38,7 +38,6 @@ #define __KVM_HAVE_ARCH_VCPU_DEBUGFS #define KVM_MAX_VCPUS 1024 -#define KVM_SOFT_MAX_VCPUS 710 /* * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs @@ -364,6 +363,7 @@ union kvm_mmu_extended_role { unsigned int cr4_smap:1; unsigned int cr4_smep:1; unsigned int cr4_la57:1; + unsigned int efer_lma:1; }; }; @@ -725,6 +725,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + u32 kvm_cpuid_base; u64 reserved_gpa_bits; int maxphyaddr; @@ -748,7 +749,7 @@ struct kvm_vcpu_arch { u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_pfn_cache cache; + struct gfn_to_hva_cache cache; } st; u64 l1_tsc_offset; @@ -1034,6 +1035,7 @@ struct kvm_x86_msr_filter { #define APICV_INHIBIT_REASON_IRQWIN 3 #define APICV_INHIBIT_REASON_PIT_REINJ 4 #define APICV_INHIBIT_REASON_X2APIC 5 +#define APICV_INHIBIT_REASON_BLOCKIRQ 6 struct kvm_arch { unsigned long n_used_mmu_pages; @@ -1476,6 +1478,7 @@ struct kvm_x86_ops { int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); + int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); int (*get_msr_feature)(struct kvm_msr_entry *entry); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 69299878b200..56935ebb1dfe 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, return ret; } +static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + long ret; + + asm volatile("vmmcall" + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3) + : "memory"); + return ret; +} + #ifdef CONFIG_KVM_GUEST void kvmclock_init(void); void kvmclock_disable(void); diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 2d4f5c17d79c..e2c6f433ed10 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, + bool enc); void __init mem_encrypt_free_decrypted_mem(void); @@ -78,6 +80,8 @@ static inline int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; } static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +static inline void __init +early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {} static inline void mem_encrypt_free_decrypted_mem(void) { } diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cebec95a7124..21c4a694ca11 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) PVOP_VCALL1(mmu.exit_mmap, mm); } +static inline void notify_page_enc_status_changed(unsigned long pfn, + int npages, bool enc) +{ + PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); +} + #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index d9d6b0203ec4..a69012e1903f 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -168,6 +168,7 @@ struct pv_mmu_ops { /* Hook for intercepting the destruction of an mm_struct. */ void (*exit_mmap)(struct mm_struct *mm); + void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc); #ifdef CONFIG_PARAVIRT_XXL struct paravirt_callee_save read_cr2; @@ -577,7 +578,9 @@ void paravirt_leave_lazy_mmu(void); void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); +void paravirt_BUG(void); u64 _paravirt_ident_64(u64); +unsigned long paravirt_ret0(void); #define paravirt_nop ((void *)_paravirt_nop) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 191878a65c61..355d38c0cf60 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -806,11 +806,14 @@ static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } #endif +#define for_each_possible_hypervisor_cpuid_base(function) \ + for (function = 0x40000000; function < 0x40010000; function += 0x100) + static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) { uint32_t base, eax, signature[3]; - for (base = 0x40000000; base < 0x40010000; base += 0x100) { + for_each_possible_hypervisor_cpuid_base(base) { cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); if (!memcmp(sig, signature, 12) && diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 43fa081a1adb..872617542bbc 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); +void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 08b0e90623ad..81a0211a372d 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -126,6 +126,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); +void smp_prepare_cpus_common(void); void native_smp_prepare_cpus(unsigned int max_cpus); void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index cbb67b6030f9..39ebe0511869 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -27,6 +27,7 @@ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ STATIC_CALL_TRAMP_STR(name) ": \n" \ insns " \n" \ + ".byte 0x53, 0x43, 0x54 \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ".popsection \n") diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index f7e2d82d24fb..5b85987a5e97 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -87,15 +87,6 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, ®s->bx, 6 * sizeof(args[0])); } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - unsigned int i, unsigned int n, - const unsigned long *args) -{ - BUG_ON(i + n > 6); - memcpy(®s->bx + i, args, n * sizeof(args[0])); -} - static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; @@ -127,30 +118,6 @@ static inline void syscall_get_arguments(struct task_struct *task, } } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ -# ifdef CONFIG_IA32_EMULATION - if (task->thread_info.status & TS_COMPAT) { - regs->bx = *args++; - regs->cx = *args++; - regs->dx = *args++; - regs->si = *args++; - regs->di = *args++; - regs->bp = *args; - } else -# endif - { - regs->di = *args++; - regs->si = *args++; - regs->dx = *args++; - regs->r10 = *args++; - regs->r8 = *args++; - regs->r9 = *args; - } -} - static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 4a7ff8b0db20..0575f5863b7f 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -248,6 +248,7 @@ privcmd_call(unsigned int call, return res; } +#ifdef CONFIG_XEN_PV static inline int HYPERVISOR_set_trap_table(struct trap_info *table) { @@ -281,6 +282,107 @@ HYPERVISOR_callback_op(int cmd, void *arg) } static inline int +HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg(int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor(u64 ma, u64 desc) +{ + return _hypercall2(int, update_descriptor, ma, desc); +} + +static inline int +HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, + unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); +} + +static inline int +HYPERVISOR_set_segment_base(int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; + + trace_xen_mc_entry(mcl, 1); +} + +static inline void +MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = va; + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + + trace_xen_mc_entry(mcl, 3); +} + +static inline void +MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, + struct desc_struct desc) +{ + mcl->op = __HYPERVISOR_update_descriptor; + mcl->args[0] = maddr; + mcl->args[1] = *(unsigned long *)&desc; + + trace_xen_mc_entry(mcl, 2); +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)req; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmuext_op; + mcl->args[0] = (unsigned long)op; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_stack_switch(struct multicall_entry *mcl, + unsigned long ss, unsigned long esp) +{ + mcl->op = __HYPERVISOR_stack_switch; + mcl->args[0] = ss; + mcl->args[1] = esp; + + trace_xen_mc_entry(mcl, 2); +} +#endif + +static inline int HYPERVISOR_sched_op(int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); @@ -308,26 +410,6 @@ HYPERVISOR_platform_op(struct xen_platform_op *op) return _hypercall1(int, platform_op, op); } -static __always_inline int -HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static __always_inline unsigned long -HYPERVISOR_get_debugreg(int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); -} - -static inline int -HYPERVISOR_update_descriptor(u64 ma, u64 desc) -{ - if (sizeof(u64) == sizeof(long)) - return _hypercall2(int, update_descriptor, ma, desc); - return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); -} - static inline long HYPERVISOR_memory_op(unsigned int cmd, void *arg) { @@ -341,18 +423,6 @@ HYPERVISOR_multicall(void *call_list, uint32_t nr_calls) } static inline int -HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, - unsigned long flags) -{ - if (sizeof(new_val) == sizeof(long)) - return _hypercall3(int, update_va_mapping, va, - new_val.pte, flags); - else - return _hypercall4(int, update_va_mapping, va, - new_val.pte, new_val.pte >> 32, flags); -} - -static inline int HYPERVISOR_event_channel_op(int cmd, void *arg) { return _hypercall2(int, event_channel_op, cmd, arg); @@ -394,14 +464,6 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } -#ifdef CONFIG_X86_64 -static inline int -HYPERVISOR_set_segment_base(int reg, unsigned long value) -{ - return _hypercall2(int, set_segment_base, reg, value); -} -#endif - static inline int HYPERVISOR_suspend(unsigned long start_info_mfn) { @@ -423,13 +485,6 @@ HYPERVISOR_hvm_op(int op, void *arg) } static inline int -HYPERVISOR_tmem_op( - struct tmem_op *op) -{ - return _hypercall1(int, tmem_op, op); -} - -static inline int HYPERVISOR_xenpmu_op(unsigned int op, void *arg) { return _hypercall2(int, xenpmu_op, op, arg); @@ -446,88 +501,4 @@ HYPERVISOR_dm_op( return ret; } -static inline void -MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) -{ - mcl->op = __HYPERVISOR_fpu_taskswitch; - mcl->args[0] = set; - - trace_xen_mc_entry(mcl, 1); -} - -static inline void -MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, - pte_t new_val, unsigned long flags) -{ - mcl->op = __HYPERVISOR_update_va_mapping; - mcl->args[0] = va; - if (sizeof(new_val) == sizeof(long)) { - mcl->args[1] = new_val.pte; - mcl->args[2] = flags; - } else { - mcl->args[1] = new_val.pte; - mcl->args[2] = new_val.pte >> 32; - mcl->args[3] = flags; - } - - trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4); -} - -static inline void -MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, - struct desc_struct desc) -{ - mcl->op = __HYPERVISOR_update_descriptor; - if (sizeof(maddr) == sizeof(long)) { - mcl->args[0] = maddr; - mcl->args[1] = *(unsigned long *)&desc; - } else { - u32 *p = (u32 *)&desc; - - mcl->args[0] = maddr; - mcl->args[1] = maddr >> 32; - mcl->args[2] = *p++; - mcl->args[3] = *p; - } - - trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4); -} - -static inline void -MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, - int count, int *success_count, domid_t domid) -{ - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)req; - mcl->args[1] = count; - mcl->args[2] = (unsigned long)success_count; - mcl->args[3] = domid; - - trace_xen_mc_entry(mcl, 4); -} - -static inline void -MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, - int *success_count, domid_t domid) -{ - mcl->op = __HYPERVISOR_mmuext_op; - mcl->args[0] = (unsigned long)op; - mcl->args[1] = count; - mcl->args[2] = (unsigned long)success_count; - mcl->args[3] = domid; - - trace_xen_mc_entry(mcl, 4); -} - -static inline void -MULTI_stack_switch(struct multicall_entry *mcl, - unsigned long ss, unsigned long esp) -{ - mcl->op = __HYPERVISOR_stack_switch; - mcl->args[0] = ss; - mcl->args[1] = esp; - - trace_xen_mc_entry(mcl, 2); -} - #endif /* _ASM_X86_XEN_HYPERCALL_H */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index ff4b52e37e60..4957f59deb40 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,4 +62,8 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif +#ifdef CONFIG_PVH +void __init xen_pvh_init(struct boot_params *boot_params); +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 4557f7cb0fa6..9015b888edd6 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -22,25 +22,6 @@ static inline int __init pci_xen_initial_domain(void) return -1; } #endif -#ifdef CONFIG_XEN_DOM0 -int xen_find_device_domain_owner(struct pci_dev *dev); -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -int xen_unregister_device_domain_owner(struct pci_dev *dev); -#else -static inline int xen_find_device_domain_owner(struct pci_dev *dev) -{ - return -1; -} -static inline int xen_register_device_domain_owner(struct pci_dev *dev, - uint16_t domain) -{ - return -1; -} -static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) -{ - return -1; -} -#endif #if defined(CONFIG_PCI_MSI) #if defined(CONFIG_PCI_XEN) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 5146bbab84d4..6e64b27b2c1e 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -8,6 +8,7 @@ * should be used to determine that a VM is running under KVM. */ #define KVM_CPUID_SIGNATURE 0x40000000 +#define KVM_SIGNATURE "KVMKVMKVM\0\0\0" /* This CPUID returns two feature bitmaps in eax, edx. Before enabling * a particular paravirtualization, the appropriate feature bit should diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 10562885f5fc..af3ba08b684b 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn) (pfn >= aperture_pfn_start + aperture_page_count)); } +#ifdef CONFIG_PROC_VMCORE +static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn) +{ + return !!gart_mem_pfn_is_ram(pfn); +} + +static struct vmcore_cb gart_vmcore_cb = { + .pfn_is_ram = gart_oldmem_pfn_is_ram, +}; +#endif + static void __init exclude_from_core(u64 aper_base, u32 aper_order) { aperture_pfn_start = aper_base >> PAGE_SHIFT; aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; #ifdef CONFIG_PROC_VMCORE - WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); + register_vmcore_cb(&gart_vmcore_cb); #endif #ifdef CONFIG_PROC_KCORE WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index cb2fdd130aae..c881bcafba7d 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -76,6 +76,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SGX1, X86_FEATURE_SGX }, { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, + { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 }, { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD }, {} }; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index acfd5d9f93c6..bb9a46a804bf 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -547,12 +547,13 @@ bool intel_filter_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; - /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */ + /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */ if ((c->x86 == 6) && ((c->x86_model == INTEL_FAM6_HASWELL) || (c->x86_model == INTEL_FAM6_HASWELL_L) || (c->x86_model == INTEL_FAM6_BROADWELL) || - (c->x86_model == INTEL_FAM6_HASWELL_G)) && + (c->x86_model == INTEL_FAM6_HASWELL_G) || + (c->x86_model == INTEL_FAM6_SKYLAKE_X)) && (m->bank == 0) && ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) return true; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4794b716ec79..ff55df60228f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -163,12 +163,22 @@ static uint32_t __init ms_hyperv_platform(void) cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); - if (eax >= HYPERV_CPUID_MIN && - eax <= HYPERV_CPUID_MAX && - !memcmp("Microsoft Hv", hyp_signature, 12)) - return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; + if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX || + memcmp("Microsoft Hv", hyp_signature, 12)) + return 0; - return 0; + /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */ + eax = cpuid_eax(HYPERV_CPUID_FEATURES); + if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) { + pr_warn("x86/hyperv: HYPERCALL MSR not available.\n"); + return 0; + } + if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) { + pr_warn("x86/hyperv: VP_INDEX MSR not available.\n"); + return 0; + } + + return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; } static unsigned char hv_get_nmi_reason(void) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 63d3de02bbcc..8471a8b9b48e 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -28,8 +28,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); static LIST_HEAD(sgx_active_page_list); static DEFINE_SPINLOCK(sgx_reclaimer_lock); -/* The free page list lock protected variables prepend the lock. */ -static unsigned long sgx_nr_free_pages; +static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0); /* Nodes with one or more EPC sections. */ static nodemask_t sgx_numa_mask; @@ -403,14 +402,15 @@ skip: spin_lock(&node->lock); list_add_tail(&epc_page->list, &node->free_page_list); - sgx_nr_free_pages++; spin_unlock(&node->lock); + atomic_long_inc(&sgx_nr_free_pages); } } static bool sgx_should_reclaim(unsigned long watermark) { - return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list); + return atomic_long_read(&sgx_nr_free_pages) < watermark && + !list_empty(&sgx_active_page_list); } static int ksgxd(void *p) @@ -471,9 +471,9 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); list_del_init(&page->list); - sgx_nr_free_pages--; spin_unlock(&node->lock); + atomic_long_dec(&sgx_nr_free_pages); return page; } @@ -625,9 +625,9 @@ void sgx_free_epc_page(struct sgx_epc_page *page) spin_lock(&node->lock); list_add_tail(&page->list, &node->free_page_list); - sgx_nr_free_pages++; spin_unlock(&node->lock); + atomic_long_inc(&sgx_nr_free_pages); } static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index d1d49e3d536b..3b58d8703094 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -77,9 +77,6 @@ asmlinkage noinstr void __noreturn doublefault_shim(void) * some way to reconstruct CR3. We could make a credible guess based * on cpu_tlbstate, but that would be racy and would not account for * PTI. - * - * Instead, don't bother. We can return through - * rewind_stack_do_exit() instead. */ panic("cannot return from double fault\n"); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index e18210dff88c..86ea7c0fa2f6 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -4,6 +4,7 @@ #include <asm/cpufeature.h> #include <asm/fpu/xstate.h> +#include <asm/fpu/xcr.h> #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u64, xfd_state); @@ -199,6 +200,32 @@ static inline void os_xrstor_supervisor(struct fpstate *fpstate) } /* + * XSAVE itself always writes all requested xfeatures. Removing features + * from the request bitmap reduces the features which are written. + * Generate a mask of features which must be written to a sigframe. The + * unset features can be optimized away and not written. + * + * This optimization is user-visible. Only use for states where + * uninitialized sigframe contents are tolerable, like dynamic features. + * + * Users of buffers produced with this optimization must check XSTATE_BV + * to determine which features have been optimized out. + */ +static inline u64 xfeatures_need_sigframe_write(void) +{ + u64 xfeaures_to_write; + + /* In-use features must be written: */ + xfeaures_to_write = xfeatures_in_use(); + + /* Also write all non-optimizable sigframe features: */ + xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED & + ~XFEATURE_MASK_SIGFRAME_INITOPT; + + return xfeaures_to_write; +} + +/* * Save xstate to user space xsave area. * * We don't use modified optimization because xrstor/xrstors might track @@ -220,10 +247,16 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) */ struct fpstate *fpstate = current->thread.fpu.fpstate; u64 mask = fpstate->user_xfeatures; - u32 lmask = mask; - u32 hmask = mask >> 32; + u32 lmask; + u32 hmask; int err; + /* Optimize away writing unnecessary xfeatures: */ + if (fpu_state_size_dynamic()) + mask &= xfeatures_need_sigframe_write(); + + lmask = mask; + hmask = mask >> 32; xfd_validate_state(fpstate, mask, false); stac(); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8863d1941f1b..59abbdad7729 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -28,6 +28,7 @@ #include <linux/swait.h> #include <linux/syscore_ops.h> #include <linux/cc_platform.h> +#include <linux/efi.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -41,6 +42,7 @@ #include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/svm.h> +#include <asm/e820/api.h> DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown) kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); + if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) + wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0); kvm_pv_disable_apf(); if (!shutdown) apf_task_wake_all(); @@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) __send_ipi_mask(local_mask, vector); } +static int __init setup_efi_kvm_sev_migration(void) +{ + efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled"; + efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID; + efi_status_t status; + unsigned long size; + bool enabled; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) || + !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) + return 0; + + if (!efi_enabled(EFI_BOOT)) + return 0; + + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { + pr_info("%s : EFI runtime services are not enabled\n", __func__); + return 0; + } + + size = sizeof(enabled); + + /* Get variable contents into buffer */ + status = efi.get_variable(efi_sev_live_migration_enabled, + &efi_variable_guid, NULL, &size, &enabled); + + if (status == EFI_NOT_FOUND) { + pr_info("%s : EFI live migration variable not found\n", __func__); + return 0; + } + + if (status != EFI_SUCCESS) { + pr_info("%s : EFI variable retrieval failed\n", __func__); + return 0; + } + + if (enabled == 0) { + pr_info("%s: live migration disabled in EFI\n", __func__); + return 0; + } + + pr_info("%s : live migration enabled in EFI\n", __func__); + wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY); + + return 1; +} + +late_initcall(setup_efi_kvm_sev_migration); + /* * Set the IPI entry points */ @@ -756,7 +809,7 @@ static noinline uint32_t __kvm_cpuid_base(void) return 0; /* So we don't blow up on old processors */ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) - return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); + return hypervisor_cpuid_base(KVM_SIGNATURE, 0); return 0; } @@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void) return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID); } +static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc) +{ + kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages, + KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K); +} + static void __init kvm_init_platform(void) { + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && + kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) { + unsigned long nr_pages; + int i; + + pv_ops.mmu.notify_page_enc_status_changed = + kvm_sev_hc_page_enc_status; + + /* + * Reset the host's shared pages list related to kernel + * specific page encryption status settings before we load a + * new kernel by kexec. Reset the page encryption status + * during early boot intead of just before kexec to avoid SMP + * races during kvm_pv_guest_cpu_reboot(). + * NOTE: We cannot reset the complete shared pages list + * here as we need to retain the UEFI/OVMF firmware + * specific settings. + */ + + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; + + if (entry->type != E820_TYPE_RAM) + continue; + + nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE); + + kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr, + nr_pages, + KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K); + } + + /* + * Ensure that _bss_decrypted section is marked as decrypted in the + * shared pages list. + */ + nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted, + PAGE_SIZE); + early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted, + nr_pages, 0); + + /* + * If not booted using EFI, enable Live migration support. + */ + if (!efi_enabled(EFI_BOOT)) + wrmsrl(MSR_KVM_MIGRATION_CONTROL, + KVM_MIGRATION_READY); + } kvmclock_init(); x86_platform.apic_post_init = kvm_apic_init; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ebc45360ffd4..7f7636aac620 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -46,6 +46,17 @@ asm (".pushsection .entry.text, \"ax\"\n" ".type _paravirt_nop, @function\n\t" ".popsection"); +/* stub always returning 0. */ +asm (".pushsection .entry.text, \"ax\"\n" + ".global paravirt_ret0\n" + "paravirt_ret0:\n\t" + "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" + "ret\n\t" + ".size paravirt_ret0, . - paravirt_ret0\n\t" + ".type paravirt_ret0, @function\n\t" + ".popsection"); + + void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -53,7 +64,7 @@ void __init default_banner(void) } /* Undefined instruction for dealing with missing ops pointers. */ -static void paravirt_BUG(void) +noinstr void paravirt_BUG(void) { BUG(); } @@ -326,6 +337,7 @@ struct paravirt_patch_template pv_ops = { (void (*)(struct mmu_gather *, void *))tlb_remove_page, .mmu.exit_mmap = paravirt_nop, + .mmu.notify_page_enc_status_changed = paravirt_nop, #ifdef CONFIG_PARAVIRT_XXL .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2), diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 9e1def3744f2..36e84d904260 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -80,7 +80,7 @@ static struct resource video_rom_resource = { */ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device) { - struct pci_driver *drv = pdev->driver; + struct pci_driver *drv = to_pci_driver(pdev->dev.driver); const struct pci_device_id *id; if (pdev->vendor == vendor && pdev->device == device) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e9ee8b526319..04143a653a8a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -964,6 +964,9 @@ unsigned long __get_wchan(struct task_struct *p) struct unwind_state state; unsigned long addr = 0; + if (!try_get_task_stack(p)) + return 0; + for (unwind_start(&state, p, NULL, NULL); !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); @@ -974,6 +977,8 @@ unsigned long __get_wchan(struct task_struct *p) break; } + put_task_stack(p); + return addr; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 40ed44ead063..c410be738ae7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -322,7 +322,7 @@ static void __init reserve_initrd(void) relocate_initrd(); - memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); + memblock_phys_free(ramdisk_image, ramdisk_end - ramdisk_image); } #else @@ -521,7 +521,7 @@ static void __init reserve_crashkernel(void) } if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { - memblock_free(crash_base, crash_size); + memblock_phys_free(crash_base, crash_size); return; } @@ -742,6 +742,28 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } +static char *prepare_command_line(void) +{ +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + + parse_early_param(); + + return command_line; +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -831,6 +853,23 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); /* + * x86_configure_nx() is called before parse_early_param() (called by + * prepare_command_line()) to detect whether hardware doesn't support + * NX (so that the early EHCI debug console setup can safely call + * set_fixmap()). It may then be called again from within noexec_setup() + * during parsing early parameters to honor the respective command line + * option. + */ + x86_configure_nx(); + + /* + * This parses early params and it needs to run before + * early_reserve_memory() because latter relies on such settings + * supplied as early params. + */ + *cmdline_p = prepare_command_line(); + + /* * Do some memory reservations *before* memory is added to memblock, so * memblock allocations won't overwrite it. * @@ -863,33 +902,6 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = __pa_symbol(__bss_start); bss_resource.end = __pa_symbol(__bss_stop)-1; -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0]) { - /* append boot loader cmdline to builtin */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; - - /* - * x86_configure_nx() is called before parse_early_param() to detect - * whether hardware doesn't support NX (so that the early EHCI debug - * console setup can safely call set_fixmap()). It may then be called - * again from within noexec_setup() during parsing early parameters - * to honor the respective command line option. - */ - x86_configure_nx(); - - parse_early_param(); - #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5afd98559193..7b65275544b2 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -135,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free_ptr(ptr, size); + memblock_free(ptr, size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8241927addff..ac2909f0cab3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1350,12 +1350,7 @@ static void __init smp_get_logical_apicid(void) cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); } -/* - * Prepare for SMP bootup. - * @max_cpus: configured maximum number of CPUs, It is a legacy parameter - * for common interface support. - */ -void __init native_smp_prepare_cpus(unsigned int max_cpus) +void __init smp_prepare_cpus_common(void) { unsigned int i; @@ -1386,6 +1381,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_sched_topology(x86_topology); set_cpu_sibling_map(0); +} + +/* + * Prepare for SMP bootup. + * @max_cpus: configured maximum number of CPUs, It is a legacy parameter + * for common interface support. + */ +void __init native_smp_prepare_cpus(unsigned int max_cpus) +{ + smp_prepare_cpus_common(); + init_freq_invariance(false, false); smp_sanity_check(); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index ea028e736831..9c407a33a774 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -56,10 +56,15 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void text_poke_bp(insn, code, size, emulate); } -static void __static_call_validate(void *insn, bool tail) +static void __static_call_validate(void *insn, bool tail, bool tramp) { u8 opcode = *(u8 *)insn; + if (tramp && memcmp(insn+5, "SCT", 3)) { + pr_err("trampoline signature fail"); + BUG(); + } + if (tail) { if (opcode == JMP32_INSN_OPCODE || opcode == RET_INSN_OPCODE) @@ -74,7 +79,8 @@ static void __static_call_validate(void *insn, bool tail) /* * If we ever trigger this, our text is corrupt, we'll probably not live long. */ - WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn); + pr_err("unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn); + BUG(); } static inline enum insn_type __sc_insn(bool null, bool tail) @@ -97,12 +103,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) mutex_lock(&text_mutex); if (tramp) { - __static_call_validate(tramp, true); + __static_call_validate(tramp, true, true); __static_call_transform(tramp, __sc_insn(!func, true), func); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { - __static_call_validate(site, tail); + __static_call_validate(site, tail, false); __static_call_transform(site, __sc_insn(!func, tail), func); } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index e6f7592790af..2de3c8c5eba9 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -175,7 +175,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (init_kernel_text(ip)) + if (is_kernel_inittext(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e5a7a10a0164..c21bcd668284 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -106,10 +106,8 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) */ local_irq_enable(); - if (!vm86 || !vm86->user_vm86) { - pr_alert("no user_vm86: BAD\n"); - do_exit(SIGSEGV); - } + BUG_ON(!vm86); + set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask); user = vm86->user_vm86; @@ -142,6 +140,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) user_access_end(); +exit_vm86: preempt_disable(); tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; @@ -161,7 +160,8 @@ Efault_end: user_access_end(); Efault: pr_alert("could not access userspace vm86 info\n"); - do_exit(SIGSEGV); + force_exit_sig(SIGSEGV); + goto exit_vm86; } static int do_vm86_irq_handling(int subfunction, int irqnumber); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2d70edb0f323..07e9215e911d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -99,11 +99,45 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent) return 0; } -void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu) { - struct kvm_cpuid_entry2 *best; + u32 function; + struct kvm_cpuid_entry2 *entry; + + vcpu->arch.kvm_cpuid_base = 0; + + for_each_possible_hypervisor_cpuid_base(function) { + entry = kvm_find_cpuid_entry(vcpu, function, 0); - best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + if (entry) { + u32 signature[3]; + + signature[0] = entry->ebx; + signature[1] = entry->ecx; + signature[2] = entry->edx; + + BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE)); + if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) { + vcpu->arch.kvm_cpuid_base = function; + break; + } + } + } +} + +static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) +{ + u32 base = vcpu->arch.kvm_cpuid_base; + + if (!base) + return NULL; + + return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0); +} + +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu); /* * save the feature bitmap to avoid cpuid lookup for every PV @@ -142,7 +176,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + best = kvm_find_kvm_cpuid_features(vcpu); if (kvm_hlt_in_guest(vcpu->kvm) && best && (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); @@ -239,6 +273,26 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu) return rsvd_bits(cpuid_maxphyaddr(vcpu), 63); } +static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, + int nent) +{ + int r; + + r = kvm_check_cpuid(e2, nent); + if (r) + return r; + + kvfree(vcpu->arch.cpuid_entries); + vcpu->arch.cpuid_entries = e2; + vcpu->arch.cpuid_nent = nent; + + kvm_update_kvm_cpuid_base(vcpu); + kvm_update_cpuid_runtime(vcpu); + kvm_vcpu_after_set_cpuid(vcpu); + + return 0; +} + /* when an old userspace process fills a new kernel module */ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, @@ -275,18 +329,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, e2[i].padding[2] = 0; } - r = kvm_check_cpuid(e2, cpuid->nent); - if (r) { + r = kvm_set_cpuid(vcpu, e2, cpuid->nent); + if (r) kvfree(e2); - goto out_free_cpuid; - } - - kvfree(vcpu->arch.cpuid_entries); - vcpu->arch.cpuid_entries = e2; - vcpu->arch.cpuid_nent = cpuid->nent; - - kvm_update_cpuid_runtime(vcpu); - kvm_vcpu_after_set_cpuid(vcpu); out_free_cpuid: kvfree(e); @@ -310,20 +355,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, return PTR_ERR(e2); } - r = kvm_check_cpuid(e2, cpuid->nent); - if (r) { + r = kvm_set_cpuid(vcpu, e2, cpuid->nent); + if (r) kvfree(e2); - return r; - } - kvfree(vcpu->arch.cpuid_entries); - vcpu->arch.cpuid_entries = e2; - vcpu->arch.cpuid_nent = cpuid->nent; - - kvm_update_cpuid_runtime(vcpu); - kvm_vcpu_after_set_cpuid(vcpu); - - return 0; + return r; } int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, @@ -871,8 +907,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } break; case KVM_CPUID_SIGNATURE: { - static const char signature[12] = "KVMKVMKVM\0\0"; - const u32 *sigptr = (const u32 *)signature; + const u32 *sigptr = (const u32 *)KVM_SIGNATURE; entry->eax = KVM_CPUID_FEATURES; entry->ebx = sigptr[0]; entry->ecx = sigptr[1]; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4f15c0165c05..5e19e6e4c2ce 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1472,7 +1472,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { hv_vcpu->hv_vapic = data; - if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) + if (kvm_lapic_set_pv_eoi(vcpu, 0, 0)) return 1; break; } @@ -1490,7 +1490,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; hv_vcpu->hv_vapic = data; kvm_vcpu_mark_page_dirty(vcpu, gfn); - if (kvm_lapic_enable_pv_eoi(vcpu, + if (kvm_lapic_set_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED, sizeof(struct hv_vp_assist_page))) return 1; @@ -2022,7 +2022,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) { bool longmode; - longmode = is_64_bit_mode(vcpu); + longmode = is_64_bit_hypercall(vcpu); if (longmode) kvm_rax_write(vcpu, result); else { @@ -2171,7 +2171,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) } #ifdef CONFIG_X86_64 - if (is_64_bit_mode(vcpu)) { + if (is_64_bit_hypercall(vcpu)) { hc.param = kvm_rcx_read(vcpu); hc.ingpa = kvm_rdx_read(vcpu); hc.outgpa = kvm_r8_read(vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d6ac32f3f650..759952dd1222 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2856,25 +2856,30 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) return 0; } -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) +int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) { u64 addr = data & ~KVM_MSR_ENABLED; struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; unsigned long new_len; + int ret; if (!IS_ALIGNED(addr, 4)) return 1; - vcpu->arch.pv_eoi.msr_val = data; - if (!pv_eoi_enabled(vcpu)) - return 0; + if (data & KVM_MSR_ENABLED) { + if (addr == ghc->gpa && len <= ghc->len) + new_len = ghc->len; + else + new_len = len; - if (addr == ghc->gpa && len <= ghc->len) - new_len = ghc->len; - else - new_len = len; + ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); + if (ret) + return ret; + } + + vcpu->arch.pv_eoi.msr_val = data; - return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); + return 0; } int kvm_apic_accept_events(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index d7c25d0c1354..2b44e533fc8d 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -127,7 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len); +int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len); void kvm_lapic_exit(void); #define VEC_POS(v) ((v) & (32 - 1)) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 323b5057d08f..3be9beea838d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3191,17 +3191,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) new_spte |= PT_WRITABLE_MASK; /* - * Do not fix write-permission on the large spte. Since - * we only dirty the first page into the dirty-bitmap in + * Do not fix write-permission on the large spte when + * dirty logging is enabled. Since we only dirty the + * first page into the dirty-bitmap in * fast_pf_fix_direct_spte(), other pages are missed * if its slot has dirty logging enabled. * * Instead, we let the slow page fault path create a * normal spte to fix the access. - * - * See the comments in kvm_arch_commit_memory_region(). */ - if (sp->role.level > PG_LEVEL_4K) + if (sp->role.level > PG_LEVEL_4K && + kvm_slot_dirty_track_enabled(fault->slot)) break; } @@ -4682,6 +4682,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu, /* PKEY and LA57 are active iff long mode is active. */ ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs); ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs); + ext.efer_lma = ____is_efer_lma(regs); } ext.valid = 1; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7c5dd83e52de..a54c3491af42 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -897,7 +897,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, struct tdp_iter *iter) { - struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep); + struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep)); u64 new_spte; int ret = RET_PF_FIXED; bool wrprot = false; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 0772bad9165c..09873f6488f7 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -319,7 +319,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) } /* check if idx is a valid index to access PMU */ -int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) +bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx); } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 0e4f2b1fa9fb..59d6b76203d5 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -32,7 +32,7 @@ struct kvm_pmu_ops { struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu, unsigned int idx, u64 *mask); struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr); - int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx); + bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); @@ -149,7 +149,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); -int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); +bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 8052d92069e0..affc0ea98d30 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -904,7 +904,8 @@ bool svm_check_apicv_inhibit_reasons(ulong bit) BIT(APICV_INHIBIT_REASON_NESTED) | BIT(APICV_INHIBIT_REASON_IRQWIN) | BIT(APICV_INHIBIT_REASON_PIT_REINJ) | - BIT(APICV_INHIBIT_REASON_X2APIC); + BIT(APICV_INHIBIT_REASON_X2APIC) | + BIT(APICV_INHIBIT_REASON_BLOCKIRQ); return supported & BIT(bit); } diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index fdf587f19c5f..871c426ec389 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -181,14 +181,13 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER); } -/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ -static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) +static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); idx &= ~(3u << 30); - return (idx >= pmu->nr_arch_gp_counters); + return idx < pmu->nr_arch_gp_counters; } /* idx is the ECX register of RDPMC instruction */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 1964b9a174be..21ac0a5de4e0 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -120,16 +120,26 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) return true; } +static int sev_misc_cg_try_charge(struct kvm_sev_info *sev) +{ + enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; + return misc_cg_try_charge(type, sev->misc_cg, 1); +} + +static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) +{ + enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; + misc_cg_uncharge(type, sev->misc_cg, 1); +} + static int sev_asid_new(struct kvm_sev_info *sev) { int asid, min_asid, max_asid, ret; bool retry = true; - enum misc_res_type type; - type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); - ret = misc_cg_try_charge(type, sev->misc_cg, 1); + ret = sev_misc_cg_try_charge(sev); if (ret) { put_misc_cg(sev->misc_cg); sev->misc_cg = NULL; @@ -162,7 +172,7 @@ again: return asid; e_uncharge: - misc_cg_uncharge(type, sev->misc_cg, 1); + sev_misc_cg_uncharge(sev); put_misc_cg(sev->misc_cg); sev->misc_cg = NULL; return ret; @@ -179,7 +189,6 @@ static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; int cpu; - enum misc_res_type type; mutex_lock(&sev_bitmap_lock); @@ -192,8 +201,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) mutex_unlock(&sev_bitmap_lock); - type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; - misc_cg_uncharge(type, sev->misc_cg, 1); + sev_misc_cg_uncharge(sev); put_misc_cg(sev->misc_cg); sev->misc_cg = NULL; } @@ -229,7 +237,6 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - bool es_active = argp->id == KVM_SEV_ES_INIT; int asid, ret; if (kvm->created_vcpus) @@ -239,7 +246,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (unlikely(sev->active)) return ret; - sev->es_active = es_active; + sev->active = true; + sev->es_active = argp->id == KVM_SEV_ES_INIT; asid = sev_asid_new(sev); if (asid < 0) goto e_no_asid; @@ -249,8 +257,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (ret) goto e_free; - sev->active = true; - sev->asid = asid; INIT_LIST_HEAD(&sev->regions_list); return 0; @@ -260,6 +266,7 @@ e_free: sev->asid = 0; e_no_asid: sev->es_active = false; + sev->active = false; return ret; } @@ -590,7 +597,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) * traditional VMSA as it has been built so far (in prep * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. */ - memcpy(svm->vmsa, save, sizeof(*save)); + memcpy(svm->sev_es.vmsa, save, sizeof(*save)); return 0; } @@ -612,11 +619,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, * the VMSA memory content (i.e it will write the same memory region * with the guest's key), so invalidate it first. */ - clflush_cache_range(svm->vmsa, PAGE_SIZE); + clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); vmsa.reserved = 0; vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; - vmsa.address = __sme_pa(svm->vmsa); + vmsa.address = __sme_pa(svm->sev_es.vmsa); vmsa.len = PAGE_SIZE; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); if (ret) @@ -1522,7 +1529,7 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); } -static bool cmd_allowed_from_miror(u32 cmd_id) +static bool is_cmd_allowed_from_mirror(u32 cmd_id) { /* * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES @@ -1536,6 +1543,201 @@ static bool cmd_allowed_from_miror(u32 cmd_id) return false; } +static int sev_lock_for_migration(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + /* + * Bail if this VM is already involved in a migration to avoid deadlock + * between two VMs trying to migrate to/from each other. + */ + if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1)) + return -EBUSY; + + mutex_lock(&kvm->lock); + + return 0; +} + +static void sev_unlock_after_migration(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + mutex_unlock(&kvm->lock); + atomic_set_release(&sev->migration_in_progress, 0); +} + + +static int sev_lock_vcpus_for_migration(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int i, j; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (mutex_lock_killable(&vcpu->mutex)) + goto out_unlock; + } + + return 0; + +out_unlock: + kvm_for_each_vcpu(j, vcpu, kvm) { + if (i == j) + break; + + mutex_unlock(&vcpu->mutex); + } + return -EINTR; +} + +static void sev_unlock_vcpus_for_migration(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + mutex_unlock(&vcpu->mutex); + } +} + +static void sev_migrate_from(struct kvm_sev_info *dst, + struct kvm_sev_info *src) +{ + dst->active = true; + dst->asid = src->asid; + dst->handle = src->handle; + dst->pages_locked = src->pages_locked; + + src->asid = 0; + src->active = false; + src->handle = 0; + src->pages_locked = 0; + + INIT_LIST_HEAD(&dst->regions_list); + list_replace_init(&src->regions_list, &dst->regions_list); +} + +static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) +{ + int i; + struct kvm_vcpu *dst_vcpu, *src_vcpu; + struct vcpu_svm *dst_svm, *src_svm; + + if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) + return -EINVAL; + + kvm_for_each_vcpu(i, src_vcpu, src) { + if (!src_vcpu->arch.guest_state_protected) + return -EINVAL; + } + + kvm_for_each_vcpu(i, src_vcpu, src) { + src_svm = to_svm(src_vcpu); + dst_vcpu = kvm_get_vcpu(dst, i); + dst_svm = to_svm(dst_vcpu); + + /* + * Transfer VMSA and GHCB state to the destination. Nullify and + * clear source fields as appropriate, the state now belongs to + * the destination. + */ + memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es)); + dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa; + dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa; + dst_vcpu->arch.guest_state_protected = true; + + memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es)); + src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE; + src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; + src_vcpu->arch.guest_state_protected = false; + } + to_kvm_svm(src)->sev_info.es_active = false; + to_kvm_svm(dst)->sev_info.es_active = true; + + return 0; +} + +int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) +{ + struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_sev_info *src_sev, *cg_cleanup_sev; + struct file *source_kvm_file; + struct kvm *source_kvm; + bool charged = false; + int ret; + + ret = sev_lock_for_migration(kvm); + if (ret) + return ret; + + if (sev_guest(kvm)) { + ret = -EINVAL; + goto out_unlock; + } + + source_kvm_file = fget(source_fd); + if (!file_is_kvm(source_kvm_file)) { + ret = -EBADF; + goto out_fput; + } + + source_kvm = source_kvm_file->private_data; + ret = sev_lock_for_migration(source_kvm); + if (ret) + goto out_fput; + + if (!sev_guest(source_kvm)) { + ret = -EINVAL; + goto out_source; + } + + src_sev = &to_kvm_svm(source_kvm)->sev_info; + dst_sev->misc_cg = get_current_misc_cg(); + cg_cleanup_sev = dst_sev; + if (dst_sev->misc_cg != src_sev->misc_cg) { + ret = sev_misc_cg_try_charge(dst_sev); + if (ret) + goto out_dst_cgroup; + charged = true; + } + + ret = sev_lock_vcpus_for_migration(kvm); + if (ret) + goto out_dst_cgroup; + ret = sev_lock_vcpus_for_migration(source_kvm); + if (ret) + goto out_dst_vcpu; + + if (sev_es_guest(source_kvm)) { + ret = sev_es_migrate_from(kvm, source_kvm); + if (ret) + goto out_source_vcpu; + } + sev_migrate_from(dst_sev, src_sev); + kvm_vm_dead(source_kvm); + cg_cleanup_sev = src_sev; + ret = 0; + +out_source_vcpu: + sev_unlock_vcpus_for_migration(source_kvm); +out_dst_vcpu: + sev_unlock_vcpus_for_migration(kvm); +out_dst_cgroup: + /* Operates on the source on success, on the destination on failure. */ + if (charged) + sev_misc_cg_uncharge(cg_cleanup_sev); + put_misc_cg(cg_cleanup_sev->misc_cg); + cg_cleanup_sev->misc_cg = NULL; +out_source: + sev_unlock_after_migration(source_kvm); +out_fput: + if (source_kvm_file) + fput(source_kvm_file); +out_unlock: + sev_unlock_after_migration(kvm); + return ret; +} + int svm_mem_enc_op(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -1554,7 +1756,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) /* Only the enc_context_owner handles some memory enc operations. */ if (is_mirroring_enc_context(kvm) && - !cmd_allowed_from_miror(sev_cmd.id)) { + !is_cmd_allowed_from_mirror(sev_cmd.id)) { r = -EINVAL; goto out; } @@ -1787,7 +1989,12 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) mutex_unlock(&source_kvm->lock); mutex_lock(&kvm->lock); - if (sev_guest(kvm)) { + /* + * Disallow out-of-band SEV/SEV-ES init if the target is already an + * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being + * created after SEV/SEV-ES initialization, e.g. to init intercepts. + */ + if (sev_guest(kvm) || kvm->created_vcpus) { ret = -EINVAL; goto e_mirror_unlock; } @@ -2038,16 +2245,16 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) svm = to_svm(vcpu); if (vcpu->arch.guest_state_protected) - sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); - __free_page(virt_to_page(svm->vmsa)); + sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE); + __free_page(virt_to_page(svm->sev_es.vmsa)); - if (svm->ghcb_sa_free) - kfree(svm->ghcb_sa); + if (svm->sev_es.ghcb_sa_free) + kfree(svm->sev_es.ghcb_sa); } static void dump_ghcb(struct vcpu_svm *svm) { - struct ghcb *ghcb = svm->ghcb; + struct ghcb *ghcb = svm->sev_es.ghcb; unsigned int nbits; /* Re-use the dump_invalid_vmcb module parameter */ @@ -2073,7 +2280,7 @@ static void dump_ghcb(struct vcpu_svm *svm) static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - struct ghcb *ghcb = svm->ghcb; + struct ghcb *ghcb = svm->sev_es.ghcb; /* * The GHCB protocol so far allows for the following data @@ -2093,7 +2300,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; struct kvm_vcpu *vcpu = &svm->vcpu; - struct ghcb *ghcb = svm->ghcb; + struct ghcb *ghcb = svm->sev_es.ghcb; u64 exit_code; /* @@ -2140,7 +2347,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) struct ghcb *ghcb; u64 exit_code = 0; - ghcb = svm->ghcb; + ghcb = svm->sev_es.ghcb; /* Only GHCB Usage code 0 is supported */ if (ghcb->ghcb_usage) @@ -2258,33 +2465,34 @@ vmgexit_err: void sev_es_unmap_ghcb(struct vcpu_svm *svm) { - if (!svm->ghcb) + if (!svm->sev_es.ghcb) return; - if (svm->ghcb_sa_free) { + if (svm->sev_es.ghcb_sa_free) { /* * The scratch area lives outside the GHCB, so there is a * buffer that, depending on the operation performed, may * need to be synced, then freed. */ - if (svm->ghcb_sa_sync) { + if (svm->sev_es.ghcb_sa_sync) { kvm_write_guest(svm->vcpu.kvm, - ghcb_get_sw_scratch(svm->ghcb), - svm->ghcb_sa, svm->ghcb_sa_len); - svm->ghcb_sa_sync = false; + ghcb_get_sw_scratch(svm->sev_es.ghcb), + svm->sev_es.ghcb_sa, + svm->sev_es.ghcb_sa_len); + svm->sev_es.ghcb_sa_sync = false; } - kfree(svm->ghcb_sa); - svm->ghcb_sa = NULL; - svm->ghcb_sa_free = false; + kfree(svm->sev_es.ghcb_sa); + svm->sev_es.ghcb_sa = NULL; + svm->sev_es.ghcb_sa_free = false; } - trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb); sev_es_sync_to_ghcb(svm); - kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); - svm->ghcb = NULL; + kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true); + svm->sev_es.ghcb = NULL; } void pre_sev_run(struct vcpu_svm *svm, int cpu) @@ -2314,7 +2522,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) { struct vmcb_control_area *control = &svm->vmcb->control; - struct ghcb *ghcb = svm->ghcb; + struct ghcb *ghcb = svm->sev_es.ghcb; u64 ghcb_scratch_beg, ghcb_scratch_end; u64 scratch_gpa_beg, scratch_gpa_end; void *scratch_va; @@ -2350,7 +2558,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) return false; } - scratch_va = (void *)svm->ghcb; + scratch_va = (void *)svm->sev_es.ghcb; scratch_va += (scratch_gpa_beg - control->ghcb_gpa); } else { /* @@ -2380,12 +2588,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) * the vCPU next time (i.e. a read was requested so the data * must be written back to the guest memory). */ - svm->ghcb_sa_sync = sync; - svm->ghcb_sa_free = true; + svm->sev_es.ghcb_sa_sync = sync; + svm->sev_es.ghcb_sa_free = true; } - svm->ghcb_sa = scratch_va; - svm->ghcb_sa_len = len; + svm->sev_es.ghcb_sa = scratch_va; + svm->sev_es.ghcb_sa_len = len; return true; } @@ -2504,15 +2712,15 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) return -EINVAL; } - if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) { + if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) { /* Unable to map GHCB from guest */ vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", ghcb_gpa); return -EINVAL; } - svm->ghcb = svm->ghcb_map.hva; - ghcb = svm->ghcb_map.hva; + svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; + ghcb = svm->sev_es.ghcb_map.hva; trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb); @@ -2535,7 +2743,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = kvm_sev_es_mmio_read(vcpu, control->exit_info_1, control->exit_info_2, - svm->ghcb_sa); + svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_MMIO_WRITE: if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) @@ -2544,7 +2752,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = kvm_sev_es_mmio_write(vcpu, control->exit_info_1, control->exit_info_2, - svm->ghcb_sa); + svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_NMI_COMPLETE: ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET); @@ -2604,7 +2812,8 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) if (!setup_vmgexit_scratch(svm, in, bytes)) return -EINVAL; - return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); + return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, + count, in); } void sev_es_init_vmcb(struct vcpu_svm *svm) @@ -2619,7 +2828,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm) * VMCB page. Do not include the encryption mask on the VMSA physical * address since hardware will access it using the guest key. */ - svm->vmcb->control.vmsa_pa = __pa(svm->vmsa); + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); /* Can't intercept CR register access, HV can't modify CR registers */ svm_clr_intercept(svm, INTERCEPT_CR0_READ); @@ -2691,8 +2900,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) struct vcpu_svm *svm = to_svm(vcpu); /* First SIPI: Use the values as initially set by the VMM */ - if (!svm->received_first_sipi) { - svm->received_first_sipi = true; + if (!svm->sev_es.received_first_sipi) { + svm->sev_es.received_first_sipi = true; return; } @@ -2701,8 +2910,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a * non-zero value. */ - if (!svm->ghcb) + if (!svm->sev_es.ghcb) return; - ghcb_set_sw_exit_info_2(svm->ghcb, 1); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b36ca4e476c2..5630c241d5f6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1452,7 +1452,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_switch_vmcb(svm, &svm->vmcb01); if (vmsa_page) - svm->vmsa = page_address(vmsa_page); + svm->sev_es.vmsa = page_address(vmsa_page); svm->guest_state_loaded = false; @@ -2835,11 +2835,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) { struct vcpu_svm *svm = to_svm(vcpu); - if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb)) + if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb)) return kvm_complete_insn_gp(vcpu, err); - ghcb_set_sw_exit_info_1(svm->ghcb, 1); - ghcb_set_sw_exit_info_2(svm->ghcb, + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, X86_TRAP_GP | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID); @@ -3121,11 +3121,6 @@ static int invpcid_interception(struct kvm_vcpu *vcpu) type = svm->vmcb->control.exit_info_2; gva = svm->vmcb->control.exit_info_1; - if (type > 3) { - kvm_inject_gp(vcpu, 0); - return 1; - } - return kvm_handle_invpcid(vcpu, type, gva); } @@ -4701,6 +4696,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_unreg_region = svm_unregister_enc_region, .vm_copy_enc_context_from = svm_vm_copy_asid_from, + .vm_move_enc_context_from = svm_vm_migrate_from, .can_emulate_instruction = svm_can_emulate_instruction, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5e9510d4574e..5faad3dc10e2 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -80,6 +80,7 @@ struct kvm_sev_info { u64 ap_jump_table; /* SEV-ES AP Jump Table address */ struct kvm *enc_context_owner; /* Owner of copied encryption context */ struct misc_cg *misc_cg; /* For misc cgroup accounting */ + atomic_t migration_in_progress; }; struct kvm_svm { @@ -123,6 +124,20 @@ struct svm_nested_state { bool initialized; }; +struct vcpu_sev_es_state { + /* SEV-ES support */ + struct vmcb_save_area *vmsa; + struct ghcb *ghcb; + struct kvm_host_map ghcb_map; + bool received_first_sipi; + + /* SEV-ES scratch area support */ + void *ghcb_sa; + u32 ghcb_sa_len; + bool ghcb_sa_sync; + bool ghcb_sa_free; +}; + struct vcpu_svm { struct kvm_vcpu vcpu; /* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */ @@ -186,17 +201,7 @@ struct vcpu_svm { DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); } shadow_msr_intercept; - /* SEV-ES support */ - struct vmcb_save_area *vmsa; - struct ghcb *ghcb; - struct kvm_host_map ghcb_map; - bool received_first_sipi; - - /* SEV-ES scratch area support */ - void *ghcb_sa; - u32 ghcb_sa_len; - bool ghcb_sa_sync; - bool ghcb_sa_free; + struct vcpu_sev_es_state sev_es; bool guest_state_loaded; }; @@ -242,7 +247,7 @@ static __always_inline bool sev_es_guest(struct kvm *kvm) #ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - return sev_guest(kvm) && sev->es_active; + return sev->es_active && !WARN_ON_ONCE(!sev->active); #else return false; #endif @@ -558,6 +563,7 @@ int svm_register_enc_region(struct kvm *kvm, int svm_unregister_enc_region(struct kvm *kvm, struct kvm_enc_region *range); int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd); +int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd); void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b4ee5e9f9e20..1e2f66951566 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -525,67 +525,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, } /* - * Check if MSR is intercepted for L01 MSR bitmap. + * For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1 + * itself utilizing x2APIC. All MSRs were previously set to be intercepted, + * only the "disable intercept" case needs to be handled. */ -static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1, + unsigned long *msr_bitmap_l0, + u32 msr, int type) { - unsigned long *msr_bitmap; - int f = sizeof(unsigned long); + if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr)) + vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr); - if (!cpu_has_vmx_msr_bitmap()) - return true; - - msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; - - if (msr <= 0x1fff) { - return !!test_bit(msr, msr_bitmap + 0x800 / f); - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - return !!test_bit(msr, msr_bitmap + 0xc00 / f); - } - - return true; -} - -/* - * If a msr is allowed by L0, we should check whether it is allowed by L1. - * The corresponding bit will be cleared unless both of L0 and L1 allow it. - */ -static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, - unsigned long *msr_bitmap_nested, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R && - !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) - /* read-low */ - __clear_bit(msr, msr_bitmap_nested + 0x000 / f); - - if (type & MSR_TYPE_W && - !test_bit(msr, msr_bitmap_l1 + 0x800 / f)) - /* write-low */ - __clear_bit(msr, msr_bitmap_nested + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R && - !test_bit(msr, msr_bitmap_l1 + 0x400 / f)) - /* read-high */ - __clear_bit(msr, msr_bitmap_nested + 0x400 / f); - - if (type & MSR_TYPE_W && - !test_bit(msr, msr_bitmap_l1 + 0xc00 / f)) - /* write-high */ - __clear_bit(msr, msr_bitmap_nested + 0xc00 / f); - - } + if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr)) + vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr); } static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) @@ -600,6 +552,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) } } +#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \ +static inline \ +void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \ + unsigned long *msr_bitmap_l1, \ + unsigned long *msr_bitmap_l0, u32 msr) \ +{ \ + if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \ + vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \ + vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \ + else \ + vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \ +} +BUILD_NVMX_MSR_INTERCEPT_HELPER(read) +BUILD_NVMX_MSR_INTERCEPT_HELPER(write) + +static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx, + unsigned long *msr_bitmap_l1, + unsigned long *msr_bitmap_l0, + u32 msr, int types) +{ + if (types & MSR_TYPE_R) + nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1, + msr_bitmap_l0, msr); + if (types & MSR_TYPE_W) + nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1, + msr_bitmap_l0, msr); +} + /* * Merge L0's and L1's MSR bitmap, return false to indicate that * we do not use the hardware. @@ -607,10 +587,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { + struct vcpu_vmx *vmx = to_vmx(vcpu); int msr; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; - struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; + unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; + struct kvm_host_map *map = &vmx->nested.msr_bitmap_map; /* Nothing to do if the MSR bitmap is not in use. */ if (!cpu_has_vmx_msr_bitmap() || @@ -625,7 +606,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, /* * To keep the control flow simple, pay eight 8-byte writes (sixteen * 4-byte writes on 32-bit systems) up front to enable intercepts for - * the x2APIC MSR range and selectively disable them below. + * the x2APIC MSR range and selectively toggle those relevant to L2. */ enable_x2apic_msr_intercepts(msr_bitmap_l0); @@ -644,61 +625,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, } } - nested_vmx_disable_intercept_for_msr( + nested_vmx_disable_intercept_for_x2apic_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_R | MSR_TYPE_W); if (nested_cpu_has_vid(vmcs12)) { - nested_vmx_disable_intercept_for_msr( + nested_vmx_disable_intercept_for_x2apic_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); - nested_vmx_disable_intercept_for_msr( + nested_vmx_disable_intercept_for_x2apic_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); } } - /* KVM unconditionally exposes the FS/GS base MSRs to L1. */ + /* + * Always check vmcs01's bitmap to honor userspace MSR filters and any + * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through. + */ #ifdef CONFIG_X86_64 - nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, - MSR_FS_BASE, MSR_TYPE_RW); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_FS_BASE, MSR_TYPE_RW); - nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, - MSR_GS_BASE, MSR_TYPE_RW); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_GS_BASE, MSR_TYPE_RW); - nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, - MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_KERNEL_GS_BASE, MSR_TYPE_RW); #endif + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); - /* - * Checking the L0->L1 bitmap is trying to verify two things: - * - * 1. L0 gave a permission to L1 to actually passthrough the MSR. This - * ensures that we do not accidentally generate an L02 MSR bitmap - * from the L12 MSR bitmap that is too permissive. - * 2. That L1 or L2s have actually used the MSR. This avoids - * unnecessarily merging of the bitmap if the MSR is unused. This - * works properly because we only update the L01 MSR bitmap lazily. - * So even if L0 should pass L1 these MSRs, the L01 bitmap is only - * updated to reflect this when L1 (or its L2s) actually write to - * the MSR. - */ - if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL)) - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - MSR_IA32_SPEC_CTRL, - MSR_TYPE_R | MSR_TYPE_W); - - if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD)) - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - MSR_IA32_PRED_CMD, - MSR_TYPE_W); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, MSR_TYPE_W); - kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false); + kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); return true; } @@ -706,33 +670,39 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - struct kvm_host_map map; - struct vmcs12 *shadow; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; if (!nested_cpu_has_shadow_vmcs(vmcs12) || vmcs12->vmcs_link_pointer == INVALID_GPA) return; - shadow = get_shadow_vmcs12(vcpu); - - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) + if (ghc->gpa != vmcs12->vmcs_link_pointer && + kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, + vmcs12->vmcs_link_pointer, VMCS12_SIZE)) return; - memcpy(shadow, map.hva, VMCS12_SIZE); - kvm_vcpu_unmap(vcpu, &map, false); + kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), + VMCS12_SIZE); } static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; if (!nested_cpu_has_shadow_vmcs(vmcs12) || vmcs12->vmcs_link_pointer == INVALID_GPA) return; - kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer, - get_shadow_vmcs12(vcpu), VMCS12_SIZE); + if (ghc->gpa != vmcs12->vmcs_link_pointer && + kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, + vmcs12->vmcs_link_pointer, VMCS12_SIZE)) + return; + + kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), + VMCS12_SIZE); } /* @@ -2866,6 +2836,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ +#ifdef CONFIG_X86_64 + if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) != + !!(vcpu->arch.efer & EFER_LMA))) + return -EINVAL; +#endif + return 0; +} + static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -2890,18 +2871,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, return -EINVAL; #ifdef CONFIG_X86_64 - ia32e = !!(vcpu->arch.efer & EFER_LMA); + ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE); #else ia32e = false; #endif if (ia32e) { - if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) || - CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) + if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) return -EINVAL; } else { - if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) || - CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || CC((vmcs12->host_rip) >> 32)) return -EINVAL; @@ -2946,9 +2925,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - int r = 0; - struct vmcs12 *shadow; - struct kvm_host_map map; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; + struct vmcs_hdr hdr; if (vmcs12->vmcs_link_pointer == INVALID_GPA) return 0; @@ -2956,17 +2935,21 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))) return -EINVAL; - if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))) - return -EINVAL; + if (ghc->gpa != vmcs12->vmcs_link_pointer && + CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, + vmcs12->vmcs_link_pointer, VMCS12_SIZE))) + return -EINVAL; - shadow = map.hva; + if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, + offsetof(struct vmcs12, hdr), + sizeof(hdr)))) + return -EINVAL; - if (CC(shadow->hdr.revision_id != VMCS12_REVISION) || - CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) - r = -EINVAL; + if (CC(hdr.revision_id != VMCS12_REVISION) || + CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) + return -EINVAL; - kvm_vcpu_unmap(vcpu, &map, false); - return r; + return 0; } /* @@ -3571,6 +3554,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (nested_vmx_check_controls(vcpu, vmcs12)) return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + if (nested_vmx_check_address_space_size(vcpu, vmcs12)) + return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); + if (nested_vmx_check_host_state(vcpu, vmcs12)) return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); @@ -5300,10 +5286,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) return 1; if (vmx->nested.current_vmptr != vmptr) { - struct kvm_host_map map; - struct vmcs12 *new_vmcs12; + struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache; + struct vmcs_hdr hdr; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) { + if (ghc->gpa != vmptr && + kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) { /* * Reads from an unbacked page return all 1s, * which means that the 32 bits located at the @@ -5314,12 +5301,16 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } - new_vmcs12 = map.hva; + if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, + offsetof(struct vmcs12, hdr), + sizeof(hdr))) { + return nested_vmx_fail(vcpu, + VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); + } - if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || - (new_vmcs12->hdr.shadow_vmcs && + if (hdr.revision_id != VMCS12_REVISION || + (hdr.shadow_vmcs && !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { - kvm_vcpu_unmap(vcpu, &map, false); return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); } @@ -5330,8 +5321,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) * Load VMCS12 from guest memory since it is not already * cached. */ - memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); - kvm_vcpu_unmap(vcpu, &map, false); + if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12, + VMCS12_SIZE)) { + return nested_vmx_fail(vcpu, + VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); + } set_current_vmptr(vmx, vmptr); } @@ -5379,7 +5373,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) struct { u64 eptp, gpa; } operand; - int i, r; + int i, r, gpr_index; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || @@ -5392,7 +5386,8 @@ static int handle_invept(struct kvm_vcpu *vcpu) return 1; vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); + type = kvm_register_read(vcpu, gpr_index); types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; @@ -5459,7 +5454,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) u64 gla; } operand; u16 vpid02; - int r; + int r, gpr_index; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || @@ -5472,7 +5467,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return 1; vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); + type = kvm_register_read(vcpu, gpr_index); types = (vmx->nested.msrs.vpid_caps & VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index b8e0d21b7c8a..1b7456b2177b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -118,16 +118,15 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) } } -/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ -static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) +static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); idx &= ~(3u << 30); - return (!fixed && idx >= pmu->nr_arch_gp_counters) || - (fixed && idx >= pmu->nr_arch_fixed_counters); + return fixed ? idx < pmu->nr_arch_fixed_counters + : idx < pmu->nr_arch_gp_counters; } static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 76861b66bbcf..ba66c171d951 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -769,24 +769,13 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu) /* * Check if MSR is intercepted for currently loaded MSR bitmap. */ -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) { - unsigned long *msr_bitmap; - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) + if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) return true; - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; - - if (msr <= 0x1fff) { - return !!test_bit(msr, msr_bitmap + 0x800 / f); - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - return !!test_bit(msr, msr_bitmap + 0xc00 / f); - } - - return true; + return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, + MSR_IA32_SPEC_CTRL); } static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, @@ -3697,46 +3686,6 @@ void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr) -{ - int f = sizeof(unsigned long); - - if (msr <= 0x1fff) - __clear_bit(msr, msr_bitmap + 0x000 / f); - else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) - __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); -} - -static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr) -{ - int f = sizeof(unsigned long); - - if (msr <= 0x1fff) - __clear_bit(msr, msr_bitmap + 0x800 / f); - else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) - __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); -} - -static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr) -{ - int f = sizeof(unsigned long); - - if (msr <= 0x1fff) - __set_bit(msr, msr_bitmap + 0x000 / f); - else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) - __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); -} - -static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr) -{ - int f = sizeof(unsigned long); - - if (msr <= 0x1fff) - __set_bit(msr, msr_bitmap + 0x800 / f); - else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) - __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); -} - void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5494,6 +5443,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) u64 pcid; u64 gla; } operand; + int gpr_index; if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { kvm_queue_exception(vcpu, UD_VECTOR); @@ -5501,12 +5451,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) } vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); - - if (type > 3) { - kvm_inject_gp(vcpu, 0); - return 1; - } + gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); + type = kvm_register_read(vcpu, gpr_index); /* According to the Intel instruction reference, the memory operand * is read even if it isn't needed (e.g., for type==all) @@ -6749,7 +6695,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); @@ -7563,7 +7509,8 @@ static void hardware_unsetup(void) static bool vmx_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | - BIT(APICV_INHIBIT_REASON_HYPERV); + BIT(APICV_INHIBIT_REASON_HYPERV) | + BIT(APICV_INHIBIT_REASON_BLOCKIRQ); return supported & BIT(bit); } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index e7db42e3b0ce..4df2ac24ffc1 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -142,6 +142,16 @@ struct nested_vmx { struct vmcs12 *cached_shadow_vmcs12; /* + * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer + */ + struct gfn_to_hva_cache shadow_vmcs12_cache; + + /* + * GPA to HVA cache for VMCS12 + */ + struct gfn_to_hva_cache vmcs12_cache; + + /* * Indicates if the shadow vmcs or enlightened vmcs must be updated * with the data held by struct vmcs12. */ @@ -400,6 +410,34 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); +/* + * Note, early Intel manuals have the write-low and read-high bitmap offsets + * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and + * 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and + * 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and + * 0xc00-0xfff for writes. MSRs not covered by either of the ranges always + * VM-Exit. + */ +#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \ +static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \ + u32 msr) \ +{ \ + int f = sizeof(unsigned long); \ + \ + if (msr <= 0x1fff) \ + return bitop##_bit(msr, bitmap + base / f); \ + else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \ + return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \ + return (rtype)true; \ +} +#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \ + __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \ + __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800) + +BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test) +BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear) +BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set) + static inline u8 vmx_get_rvi(void) { return vmcs_read16(GUEST_INTR_STATUS) & 0xff; @@ -522,4 +560,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu) void dump_vmcs(struct kvm_vcpu *vcpu); +static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info) +{ + return (vmx_instr_info >> 28) & 0xf; +} + #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c1c4e2b05a63..5a403d92833f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3260,8 +3260,11 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) static void record_steal_time(struct kvm_vcpu *vcpu) { - struct kvm_host_map map; - struct kvm_steal_time *st; + struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; + struct kvm_steal_time __user *st; + struct kvm_memslots *slots; + u64 steal; + u32 version; if (kvm_xen_msr_enabled(vcpu->kvm)) { kvm_xen_runstate_set_running(vcpu); @@ -3271,47 +3274,86 @@ static void record_steal_time(struct kvm_vcpu *vcpu) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - /* -EAGAIN is returned in atomic context so we can just return. */ - if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, - &map, &vcpu->arch.st.cache, false)) + if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm)) return; - st = map.hva + - offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + slots = kvm_memslots(vcpu->kvm); + + if (unlikely(slots->generation != ghc->generation || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { + gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; + + /* We rely on the fact that it fits in a single page. */ + BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); + + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || + kvm_is_error_hva(ghc->hva) || !ghc->memslot) + return; + } + st = (struct kvm_steal_time __user *)ghc->hva; /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { - u8 st_preempted = xchg(&st->preempted, 0); + u8 st_preempted = 0; + int err = -EFAULT; + + if (!user_access_begin(st, sizeof(*st))) + return; + + asm volatile("1: xchgb %0, %2\n" + "xor %1, %1\n" + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + : "+q" (st_preempted), + "+&r" (err), + "+m" (st->preempted)); + if (err) + goto out; + + user_access_end(); + + vcpu->arch.st.preempted = 0; trace_kvm_pv_tlb_flush(vcpu->vcpu_id, st_preempted & KVM_VCPU_FLUSH_TLB); if (st_preempted & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb_guest(vcpu); + + if (!user_access_begin(st, sizeof(*st))) + goto dirty; } else { - st->preempted = 0; - } + if (!user_access_begin(st, sizeof(*st))) + return; - vcpu->arch.st.preempted = 0; + unsafe_put_user(0, &st->preempted, out); + vcpu->arch.st.preempted = 0; + } - if (st->version & 1) - st->version += 1; /* first time write, random junk */ + unsafe_get_user(version, &st->version, out); + if (version & 1) + version += 1; /* first time write, random junk */ - st->version += 1; + version += 1; + unsafe_put_user(version, &st->version, out); smp_wmb(); - st->steal += current->sched_info.run_delay - + unsafe_get_user(steal, &st->steal, out); + steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; + unsafe_put_user(steal, &st->steal, out); - smp_wmb(); + version += 1; + unsafe_put_user(version, &st->version, out); - st->version += 1; - - kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); + out: + user_access_end(); + dirty: + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3517,7 +3559,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI)) return 1; - if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8))) + if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8))) return 1; break; @@ -4137,7 +4179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !static_call(kvm_x86_cpu_has_accelerated_tpr)(); break; case KVM_CAP_NR_VCPUS: - r = KVM_SOFT_MAX_VCPUS; + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; @@ -4351,8 +4393,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { - struct kvm_host_map map; - struct kvm_steal_time *st; + struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; + struct kvm_steal_time __user *st; + struct kvm_memslots *slots; + static const u8 preempted = KVM_VCPU_PREEMPTED; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -4360,16 +4404,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (vcpu->arch.st.preempted) return; - if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, - &vcpu->arch.st.cache, true)) + /* This happens on process exit */ + if (unlikely(current->mm != vcpu->kvm->mm)) + return; + + slots = kvm_memslots(vcpu->kvm); + + if (unlikely(slots->generation != ghc->generation || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) return; - st = map.hva + - offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + st = (struct kvm_steal_time __user *)ghc->hva; + BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted)); - st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) + vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; - kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -5728,6 +5779,12 @@ split_irqchip_unlock: if (kvm_x86_ops.vm_copy_enc_context_from) r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]); return r; + case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM: + r = -EINVAL; + if (kvm_x86_ops.vm_move_enc_context_from) + r = kvm_x86_ops.vm_move_enc_context_from( + kvm, cap->args[0]); + return r; case KVM_CAP_EXIT_HYPERCALL: if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) { r = -EINVAL; @@ -7328,7 +7385,9 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase) static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, u32 pmc) { - return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc); + if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc)) + return 0; + return -EINVAL; } static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, @@ -8789,7 +8848,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hypercall(nr, a0, a1, a2, a3); - op_64_bit = is_64_bit_mode(vcpu); + op_64_bit = is_64_bit_hypercall(vcpu); if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -9488,12 +9547,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; - if (to_hv_vcpu(vcpu)) + if (to_hv_vcpu(vcpu)) { bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, to_hv_synic(vcpu)->vec_bitmap, 256); + static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap); + return; + } - static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap); + static_call(kvm_x86_load_eoi_exitmap)( + vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors); } void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, @@ -9552,7 +9615,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_request_pending(vcpu)) { - if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) { + if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) { r = -EIO; goto out; } @@ -10564,6 +10627,24 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return ret; } +static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm) +{ + bool inhibit = false; + struct kvm_vcpu *vcpu; + int i; + + down_write(&kvm->arch.apicv_update_lock); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) { + inhibit = true; + break; + } + } + __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ); + up_write(&kvm->arch.apicv_update_lock); +} + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { @@ -10616,6 +10697,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, static_call(kvm_x86_update_exception_bitmap)(vcpu); + kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm); + r = 0; out: @@ -10859,11 +10942,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; int idx; - kvm_release_pfn(cache->pfn, cache->dirty, cache); - kvmclock_reset(vcpu); static_call(kvm_x86_vcpu_free)(vcpu); @@ -12275,7 +12355,8 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) return kvm_skip_emulated_instruction(vcpu); default: - BUG(); /* We have already checked above that type <= 3 */ + kvm_inject_gp(vcpu, 0); + return 1; } } EXPORT_SYMBOL_GPL(kvm_handle_invpcid); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index ea264c4502e4..997669ae9caa 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -153,12 +153,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) { int cs_db, cs_l; + WARN_ON_ONCE(vcpu->arch.guest_state_protected); + if (!is_long_mode(vcpu)) return false; static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l); return cs_l; } +static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu) +{ + /* + * If running with protected guest state, the CS register is not + * accessible. The hypercall register values will have had to been + * provided in 64-bit mode, so assume the guest is in 64-bit. + */ + return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu); +} + static inline bool x86_exception_has_error_code(unsigned int vector) { static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 8f62baebd028..dff2bdf9507a 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -127,9 +127,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) state_entry_time = vx->runstate_entry_time; state_entry_time |= XEN_RUNSTATE_UPDATE; - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) != + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != sizeof(state_entry_time)); - BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) != + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != sizeof(state_entry_time)); if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, @@ -144,9 +144,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != offsetof(struct compat_vcpu_runstate_info, state)); - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) != + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != sizeof(vx->current_runstate)); - BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) != + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, @@ -163,9 +163,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != - sizeof(((struct compat_vcpu_runstate_info *)0)->time)); - BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != + sizeof_field(struct compat_vcpu_runstate_info, time)); + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, @@ -205,9 +205,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != - sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending)); + sizeof_field(struct vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != - sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending)); + sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); /* * For efficiency, this mirrors the checks for using the valid @@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: - data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn); + data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn; r = 0; break; @@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) kvm_hv_hypercall_enabled(vcpu)) return kvm_hv_hypercall(vcpu); - longmode = is_64_bit_mode(vcpu); + longmode = is_64_bit_hypercall(vcpu); if (!longmode) { params[0] = (u32)kvm_rbx_read(vcpu); params[1] = (u32)kvm_rcx_read(vcpu); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 23a14d82e783..1895986842b9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -618,7 +618,7 @@ static void __init memory_map_top_down(unsigned long map_start, */ addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start, map_end); - memblock_free(addr, PMD_SIZE); + memblock_phys_free(addr, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bd90b8fe81e4..d4e2648a1dfb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -238,11 +238,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) } } -/* - * The <linux/kallsyms.h> already defines is_kernel_text, - * using '__' prefix not to get in conflict. - */ -static inline int __is_kernel_text(unsigned long addr) +static inline int is_x86_32_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; @@ -333,8 +329,8 @@ repeat: addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - if (__is_kernel_text(addr) || - __is_kernel_text(addr2)) + if (is_x86_32_kernel_text(addr) || + is_x86_32_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; pages_2m++; @@ -359,7 +355,7 @@ repeat: */ pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); - if (__is_kernel_text(addr)) + if (is_x86_32_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; pages_4k++; @@ -779,37 +775,6 @@ void __init mem_init(void) test_wp_bit(); } -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - int ret; - - /* - * The page tables were already mapped at boot so if the caller - * requests a different mapping type then we must change all the - * pages with __set_memory_prot(). - */ - if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) { - ret = __set_memory_prot(start, nr_pages, params->pgprot); - if (ret) - return ret; - } - - return __add_pages(nid, start_pfn, nr_pages, params); -} - -void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - - __remove_pages(start_pfn, nr_pages, altmap); -} -#endif - int kernel_set_to_readonly __read_mostly; static void mark_nxdata_nx(void) @@ -820,7 +785,7 @@ static void mark_nxdata_nx(void) */ unsigned long start = PFN_ALIGN(_etext); /* - * This comes from __is_kernel_text upper limit. Also HPAGE where used: + * This comes from is_x86_32_kernel_text upper limit. Also HPAGE where used: */ unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index ef885370719a..e7b9b464a82f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -49,7 +49,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, p = early_alloc(PMD_SIZE, nid, false); if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) return; - memblock_free_ptr(p, PMD_SIZE); + memblock_free(p, PMD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); @@ -85,7 +85,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; - memblock_free_ptr(p, PUD_SIZE); + memblock_free(p, PUD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 23d54b810f08..35487305d8af 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -229,28 +229,75 @@ void __init sev_setup_arch(void) swiotlb_adjust_size(size); } -static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) +static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) { - pgprot_t old_prot, new_prot; - unsigned long pfn, pa, size; - pte_t new_pte; + unsigned long pfn = 0; + pgprot_t prot; switch (level) { case PG_LEVEL_4K: pfn = pte_pfn(*kpte); - old_prot = pte_pgprot(*kpte); + prot = pte_pgprot(*kpte); break; case PG_LEVEL_2M: pfn = pmd_pfn(*(pmd_t *)kpte); - old_prot = pmd_pgprot(*(pmd_t *)kpte); + prot = pmd_pgprot(*(pmd_t *)kpte); break; case PG_LEVEL_1G: pfn = pud_pfn(*(pud_t *)kpte); - old_prot = pud_pgprot(*(pud_t *)kpte); + prot = pud_pgprot(*(pud_t *)kpte); break; default: - return; + WARN_ONCE(1, "Invalid level for kpte\n"); + return 0; + } + + if (ret_prot) + *ret_prot = prot; + + return pfn; +} + +void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +{ +#ifdef CONFIG_PARAVIRT + unsigned long sz = npages << PAGE_SHIFT; + unsigned long vaddr_end = vaddr + sz; + + while (vaddr < vaddr_end) { + int psize, pmask, level; + unsigned long pfn; + pte_t *kpte; + + kpte = lookup_address(vaddr, &level); + if (!kpte || pte_none(*kpte)) { + WARN_ONCE(1, "kpte lookup for vaddr\n"); + return; + } + + pfn = pg_level_to_pfn(level, kpte, NULL); + if (!pfn) + continue; + + psize = page_level_size(level); + pmask = page_level_mask(level); + + notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc); + + vaddr = (vaddr & pmask) + psize; } +#endif +} + +static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) +{ + pgprot_t old_prot, new_prot; + unsigned long pfn, pa, size; + pte_t new_pte; + + pfn = pg_level_to_pfn(level, kpte, &old_prot); + if (!pfn) + return; new_prot = old_prot; if (enc) @@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) static int __init early_set_memory_enc_dec(unsigned long vaddr, unsigned long size, bool enc) { - unsigned long vaddr_end, vaddr_next; + unsigned long vaddr_end, vaddr_next, start; unsigned long psize, pmask; int split_page_size_mask; int level, ret; pte_t *kpte; + start = vaddr; vaddr_next = vaddr; vaddr_end = vaddr + size; @@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; + notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); out: __flush_tlb_all(); return ret; @@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) return early_set_memory_enc_dec(vaddr, size, true); } +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) +{ + notify_range_enc_status_changed(vaddr, npages, enc); +} + /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ bool force_dma_unencrypted(struct device *dev) { diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 1e9b93b088db..c6b1213086d6 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -355,7 +355,7 @@ void __init numa_reset_distance(void) /* numa_distance could be 1LU marking allocation failure, test cnt */ if (numa_distance_cnt) - memblock_free_ptr(numa_distance, size); + memblock_free(numa_distance, size); numa_distance_cnt = 0; numa_distance = NULL; /* enable table creation */ } diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index e801e30089c4..1a02b791d273 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -517,7 +517,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) } /* free the copied physical distance table */ - memblock_free_ptr(phys_dist, phys_size); + memblock_free(phys_dist, phys_size); return; no_emu: diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 934dc5b2df36..b4072115c8ef 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); + /* + * Notify hypervisor that a given memory range is mapped encrypted + * or decrypted. + */ + notify_range_enc_status_changed(addr, numpages, enc); + return ret; } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 3507f456fcd0..9e1e6b8d8876 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -632,7 +632,7 @@ static void set_dev_domain_options(struct pci_dev *pdev) pdev->hotplug_user_indicators = 1; } -int pcibios_add_device(struct pci_dev *dev) +int pcibios_device_add(struct pci_dev *dev) { struct pci_setup_rom *rom; struct irq_domain *msidom; diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 5debe4ac6f81..12da00558631 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -23,6 +23,7 @@ #include <xen/features.h> #include <xen/events.h> +#include <xen/pci.h> #include <asm/xen/pci.h> #include <asm/xen/cpuid.h> #include <asm/apic.h> @@ -585,78 +586,3 @@ int __init pci_xen_initial_domain(void) } #endif -#ifdef CONFIG_XEN_DOM0 - -struct xen_device_domain_owner { - domid_t domain; - struct pci_dev *dev; - struct list_head list; -}; - -static DEFINE_SPINLOCK(dev_domain_list_spinlock); -static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); - -static struct xen_device_domain_owner *find_device(struct pci_dev *dev) -{ - struct xen_device_domain_owner *owner; - - list_for_each_entry(owner, &dev_domain_list, list) { - if (owner->dev == dev) - return owner; - } - return NULL; -} - -int xen_find_device_domain_owner(struct pci_dev *dev) -{ - struct xen_device_domain_owner *owner; - int domain = -ENODEV; - - spin_lock(&dev_domain_list_spinlock); - owner = find_device(dev); - if (owner) - domain = owner->domain; - spin_unlock(&dev_domain_list_spinlock); - return domain; -} -EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); - -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) -{ - struct xen_device_domain_owner *owner; - - owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); - if (!owner) - return -ENODEV; - - spin_lock(&dev_domain_list_spinlock); - if (find_device(dev)) { - spin_unlock(&dev_domain_list_spinlock); - kfree(owner); - return -EEXIST; - } - owner->domain = domain; - owner->dev = dev; - list_add_tail(&owner->list, &dev_domain_list); - spin_unlock(&dev_domain_list_spinlock); - return 0; -} -EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); - -int xen_unregister_device_domain_owner(struct pci_dev *dev) -{ - struct xen_device_domain_owner *owner; - - spin_lock(&dev_domain_list_spinlock); - owner = find_device(dev); - if (!owner) { - spin_unlock(&dev_domain_list_spinlock); - return -ENODEV; - } - list_del(&owner->list); - spin_unlock(&dev_domain_list_spinlock); - kfree(owner); - return 0; -} -EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); -#endif /* CONFIG_XEN_DOM0 */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 95d970359e17..30c6e986a6cd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -31,25 +31,10 @@ EXPORT_SYMBOL_GPL(hypercall_page); * Pointer to the xen_vcpu_info structure or * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info - * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point - * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to - * acknowledge pending events. - * Also more subtly it is used by the patched version of irq enable/disable - * e.g. xen_irq_enable_direct and xen_iret in PV mode. - * - * The desire to be able to do those mask/unmask operations as a single - * instruction by using the per-cpu offset held in %gs is the real reason - * vcpu info is in a per-cpu pointer and the original reason for this - * hypercall. - * + * but during boot it is switched to point to xen_vcpu_info. + * The pointer is used in __xen_evtchn_do_upcall to acknowledge pending events. */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); - -/* - * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info - * hypercall. This can be used both in PV and PVHVM mode. The structure - * overrides the default per_cpu(xen_vcpu, cpu) value. - */ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ @@ -84,21 +69,6 @@ EXPORT_SYMBOL(xen_start_flags); */ struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; -/* - * Flag to determine whether vcpu info placement is available on all - * VCPUs. We assume it is to start with, and then set it to zero on - * the first failure. This is because it can succeed on some VCPUs - * and not others, since it can involve hypervisor memory allocation, - * or because the guest failed to guarantee all the appropriate - * constraints on all VCPUs (ie buffer can't cross a page boundary). - * - * Note that any particular CPU may be using a placed vcpu structure, - * but we can only optimise if the all are. - * - * 0: not available, 1: available - */ -int xen_have_vcpu_info_placement = 1; - static int xen_cpu_up_online(unsigned int cpu) { xen_init_lock_cpu(cpu); @@ -124,10 +94,8 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } -static int xen_vcpu_setup_restore(int cpu) +static void xen_vcpu_setup_restore(int cpu) { - int rc = 0; - /* Any per_cpu(xen_vcpu) is stale, so reset it */ xen_vcpu_info_reset(cpu); @@ -136,11 +104,8 @@ static int xen_vcpu_setup_restore(int cpu) * be handled by hotplug. */ if (xen_pv_domain() || - (xen_hvm_domain() && cpu_online(cpu))) { - rc = xen_vcpu_setup(cpu); - } - - return rc; + (xen_hvm_domain() && cpu_online(cpu))) + xen_vcpu_setup(cpu); } /* @@ -150,7 +115,7 @@ static int xen_vcpu_setup_restore(int cpu) */ void xen_vcpu_restore(void) { - int cpu, rc; + int cpu; for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); @@ -170,20 +135,9 @@ void xen_vcpu_restore(void) if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_runstate_info(cpu); - rc = xen_vcpu_setup_restore(cpu); - if (rc) - pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " - "System will hang.\n", cpu, rc); - /* - * In case xen_vcpu_setup_restore() fails, do not bring up the - * VCPU. This helps us avoid the resulting OOPS when the VCPU - * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.) - * Note that this does not improve the situation much -- now the - * VM hangs instead of OOPSing -- with the VCPUs that did not - * fail, spinning in stop_machine(), waiting for the failed - * VCPUs to come up. - */ - if (other_cpu && is_up && (rc == 0) && + xen_vcpu_setup_restore(cpu); + + if (other_cpu && is_up && HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } @@ -200,7 +154,7 @@ void xen_vcpu_info_reset(int cpu) } } -int xen_vcpu_setup(int cpu) +void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -221,44 +175,26 @@ int xen_vcpu_setup(int cpu) */ if (xen_hvm_domain()) { if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) - return 0; + return; } - if (xen_have_vcpu_info_placement) { - vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - /* - * Check to see if the hypervisor will put the vcpu_info - * structure where we want it, which allows direct access via - * a percpu-variable. - * N.B. This hypercall can _only_ be called once per CPU. - * Subsequent calls will error out with -EINVAL. This is due to - * the fact that hypervisor has no unregister variant and this - * hypercall does not allow to over-write info.mfn and - * info.offset. - */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, - xen_vcpu_nr(cpu), &info); - - if (err) { - pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n", - cpu, err); - xen_have_vcpu_info_placement = 0; - } else { - /* - * This cpu is using the registered vcpu info, even if - * later ones fail to. - */ - per_cpu(xen_vcpu, cpu) = vcpup; - } - } + vcpup = &per_cpu(xen_vcpu_info, cpu); + info.mfn = arbitrary_virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); - if (!xen_have_vcpu_info_placement) - xen_vcpu_info_reset(cpu); + /* + * N.B. This hypercall can _only_ be called once per CPU. + * Subsequent calls will error out with -EINVAL. This is due to + * the fact that hypervisor has no unregister variant and this + * hypercall does not allow to over-write info.mfn and + * info.offset. + */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); + if (err) + panic("register_vcpu_info failed: cpu=%d err=%d\n", cpu, err); - return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); + per_cpu(xen_vcpu, cpu) = vcpup; } void __init xen_banner(void) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index e68ea5f4ad1c..42300941ec29 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -163,9 +163,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; - rc = xen_vcpu_setup(cpu); - if (rc || !xen_have_vector_callback) - return rc; + xen_vcpu_setup(cpu); + if (!xen_have_vector_callback) + return 0; if (xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4f63117f09bb..5004feb16783 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -27,7 +27,6 @@ #include <linux/export.h> #include <linux/mm.h> #include <linux/page-flags.h> -#include <linux/highmem.h> #include <linux/pci.h> #include <linux/gfp.h> #include <linux/edd.h> @@ -993,31 +992,13 @@ void __init xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) { /* Set up direct vCPU id mapping for PV guests. */ per_cpu(xen_vcpu_id, cpu) = cpu; - - /* - * xen_vcpu_setup(cpu) can fail -- in which case it - * falls back to the shared_info version for cpus - * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. - * - * xen_cpu_up_prepare_pv() handles the rest by failing - * them in hotplug. - */ - (void) xen_vcpu_setup(cpu); + xen_vcpu_setup(cpu); } - /* - * xen_vcpu_setup managed to place the vcpu_info within the - * percpu area for all cpus, so make use of it. - */ - if (xen_have_vcpu_info_placement) { - pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); - pv_ops.irq.irq_disable = - __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); - pv_ops.irq.irq_enable = - __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); - pv_ops.mmu.read_cr2 = - __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); - } + pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); + pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); } static const struct pv_info xen_info __initconst = { @@ -1247,12 +1228,6 @@ asmlinkage __visible void __init xen_start_kernel(void) __supported_pte_mask &= ~_PAGE_GLOBAL; __default_kernel_pte_mask &= ~_PAGE_GLOBAL; - /* - * Prevent page tables from being allocated in highmem, even - * if CONFIG_HIGHPTE is enabled. - */ - __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - /* Get mfn list */ xen_build_dynamic_phys_to_machine(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 4fe387e520af..06c3c2fb4b06 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -24,60 +24,6 @@ noinstr void xen_force_evtchn_callback(void) (void)HYPERVISOR_xen_version(0, NULL); } -asmlinkage __visible noinstr unsigned long xen_save_fl(void) -{ - struct vcpu_info *vcpu; - unsigned long flags; - - vcpu = this_cpu_read(xen_vcpu); - - /* flag has opposite sense of mask */ - flags = !vcpu->evtchn_upcall_mask; - - /* convert to IF type flag - -0 -> 0x00000000 - -1 -> 0xffffffff - */ - return (-flags) & X86_EFLAGS_IF; -} -__PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl, ".noinstr.text"); - -asmlinkage __visible noinstr void xen_irq_disable(void) -{ - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; - preempt_enable_no_resched(); -} -__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable, ".noinstr.text"); - -asmlinkage __visible noinstr void xen_irq_enable(void) -{ - struct vcpu_info *vcpu; - - /* - * We may be preempted as soon as vcpu->evtchn_upcall_mask is - * cleared, so disable preemption to ensure we check for - * events on the VCPU we are still running on. - */ - preempt_disable(); - - vcpu = this_cpu_read(xen_vcpu); - vcpu->evtchn_upcall_mask = 0; - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - xen_force_evtchn_callback(); - - preempt_enable(); -} -__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable, ".noinstr.text"); - static void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ @@ -96,10 +42,10 @@ static void xen_halt(void) static const typeof(pv_ops) xen_irq_ops __initconst = { .irq = { - - .save_fl = PV_CALLEE_SAVE(xen_save_fl), - .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), - .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), + /* Initial interrupt flag handling only called while interrupts off. */ + .save_fl = __PV_IS_CALLEE_SAVE(paravirt_ret0), + .irq_disable = __PV_IS_CALLEE_SAVE(paravirt_nop), + .irq_enable = __PV_IS_CALLEE_SAVE(paravirt_BUG), .safe_halt = xen_safe_halt, .halt = xen_halt, diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index 57409373750f..509bdee3ab90 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -9,39 +9,28 @@ #ifdef CONFIG_PROC_VMCORE /* - * This function is used in two contexts: - * - the kdump kernel has to check whether a pfn of the crashed kernel - * was a ballooned page. vmcore is using this function to decide - * whether to access a pfn of the crashed kernel. - * - the kexec kernel has to check whether a pfn was ballooned by the - * previous kernel. If the pfn is ballooned, handle it properly. - * Returns 0 if the pfn is not backed by a RAM page, the caller may + * The kdump kernel has to check whether a pfn of the crashed kernel + * was a ballooned page. vmcore is using this function to decide + * whether to access a pfn of the crashed kernel. + * Returns "false" if the pfn is not backed by a RAM page, the caller may * handle the pfn special in this case. */ -static int xen_oldmem_pfn_is_ram(unsigned long pfn) +static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn) { struct xen_hvm_get_mem_type a = { .domid = DOMID_SELF, .pfn = pfn, }; - int ram; - if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) - return -ENXIO; - - switch (a.mem_type) { - case HVMMEM_mmio_dm: - ram = 0; - break; - case HVMMEM_ram_rw: - case HVMMEM_ram_ro: - default: - ram = 1; - break; + if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) { + pr_warn_once("Unexpected HVMOP_get_mem_type failure\n"); + return true; } - - return ram; + return a.mem_type != HVMMEM_mmio_dm; } +static struct vmcore_cb xen_vmcore_cb = { + .pfn_is_ram = xen_vmcore_pfn_is_ram, +}; #endif static void xen_hvm_exit_mmap(struct mm_struct *mm) @@ -75,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void) if (is_pagetable_dying_supported()) pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap; #ifdef CONFIG_PROC_VMCORE - WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram)); + register_vmcore_cb(&xen_vmcore_cb); #endif } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1ce436eeda15..00354866921b 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -41,7 +41,6 @@ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 */ #include <linux/sched/mm.h> -#include <linux/highmem.h> #include <linux/debugfs.h> #include <linux/bug.h> #include <linux/vmalloc.h> @@ -86,8 +85,10 @@ #include "mmu.h" #include "debugfs.h" +#ifdef CONFIG_X86_VSYSCALL_EMULATION /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* * Protects atomic reservation decrease/increase against concurrent increases. @@ -241,9 +242,11 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val) * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. */ -void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) +void __init set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) { - set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); + if (HYPERVISOR_update_va_mapping(vaddr, mfn_pte(mfn, flags), + UVMF_INVLPG)) + BUG(); } static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) @@ -789,7 +792,9 @@ static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, static void __init xen_after_bootmem(void) { static_branch_enable(&xen_struct_pages_ready); +#ifdef CONFIG_X86_VSYSCALL_EMULATION SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1025,7 +1030,7 @@ static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size) for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) make_lowmem_page_readwrite(vaddr); - memblock_free(paddr, size); + memblock_phys_free(paddr, size); } static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin) @@ -1151,7 +1156,7 @@ static void __init xen_pagetable_p2m_free(void) xen_cleanhighmap(addr, addr + size); size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - memblock_free(__pa(addr), size); + memblock_free((void *)addr, size); } else { xen_cleanmfnmap(addr); } @@ -1192,6 +1197,13 @@ static void __init xen_pagetable_p2m_setup(void) static void __init xen_pagetable_init(void) { + /* + * The majority of further PTE writes is to pagetables already + * announced as such to Xen. Hence it is more efficient to use + * hypercalls for these updates. + */ + pv_ops.mmu.set_pte = __xen_set_pte; + paging_init(); xen_post_allocator_init(); @@ -1421,10 +1433,18 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) * * Many of these PTE updates are done on unpinned and writable pages * and doing a hypercall for these is unnecessary and expensive. At - * this point it is not possible to tell if a page is pinned or not, - * so always write the PTE directly and rely on Xen trapping and + * this point it is rarely possible to tell if a page is pinned, so + * mostly write the PTE directly and rely on Xen trapping and * emulating any updates as necessary. */ +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +{ + if (unlikely(is_early_ioremap_ptep(ptep))) + __xen_set_pte(ptep, pte); + else + native_set_pte(ptep, pte); +} + __visible pte_t xen_make_pte_init(pteval_t pte) { unsigned long pfn; @@ -1446,11 +1466,6 @@ __visible pte_t xen_make_pte_init(pteval_t pte) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - __xen_set_pte(ptep, pte); -} - /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) @@ -1750,7 +1765,6 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(init_top_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1767,6 +1781,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); +#ifdef CONFIG_X86_VSYSCALL_EMULATION + /* Pin user vsyscall L3 */ + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, + PFN_DOWN(__pa_symbol(level3_user_vsyscall))); +#endif + /* * At this stage there can be no user pgd, and no page structure to * attach it to, so make sure we just set kernel pgd. @@ -1956,7 +1977,7 @@ void __init xen_relocate_p2m(void) pfn_end = p2m_pfn_end; } - memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn)); + memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn)); while (pfn < pfn_end) { if (pfn == p2m_pfn) { pfn = p2m_pfn_end; @@ -1999,6 +2020,7 @@ static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { pte_t pte; + unsigned long vaddr; phys >>= PAGE_SHIFT; @@ -2039,15 +2061,15 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) break; } - __native_set_fixmap(idx, pte); + vaddr = __fix_to_virt(idx); + if (HYPERVISOR_update_va_mapping(vaddr, pte, UVMF_INVLPG)) + BUG(); #ifdef CONFIG_X86_VSYSCALL_EMULATION /* Replicate changes to map the vsyscall page into the user pagetable vsyscall mapping. */ - if (idx == VSYSCALL_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); + if (idx == VSYSCALL_PAGE) set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } #endif } diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 5e6e236977c7..58db86f7b384 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -197,7 +197,7 @@ static void * __ref alloc_p2m_page(void) static void __ref free_p2m_page(void *p) { if (unlikely(!slab_is_available())) { - memblock_free((unsigned long)p, PAGE_SIZE); + memblock_free(p, PAGE_SIZE); return; } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8bfc10330107..af216feb63d9 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -153,7 +153,7 @@ static void __init xen_del_extra_mem(unsigned long start_pfn, break; } } - memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); + memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); } /* @@ -306,10 +306,6 @@ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) BUG(); } - /* Update kernel mapping, but not for highmem. */ - if (pfn >= PFN_UP(__pa(high_memory - 1))) - return; - if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(mfn, PAGE_KERNEL), 0)) { WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", @@ -429,13 +425,13 @@ static unsigned long __init xen_set_identity_and_remap_chunk( } /* - * If the PFNs are currently mapped, the VA mapping also needs - * to be updated to be 1:1. + * If the PFNs are currently mapped, their VA mappings need to be + * zapped. */ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) (void)HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - mfn_pte(pfn, PAGE_KERNEL_IO), 0); + native_make_pte(0), 0); return remap_pfn; } @@ -719,7 +715,7 @@ static void __init xen_reserve_xen_mfnlist(void) return; xen_relocate_p2m(); - memblock_free(start, size); + memblock_phys_free(start, size); } /** @@ -885,7 +881,7 @@ char * __init xen_memory_setup(void) xen_phys_memcpy(new_area, start, size); pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n", start, start + size, new_area, new_area + size); - memblock_free(start, size); + memblock_phys_free(start, size); boot_params.hdr.ramdisk_image = new_area; boot_params.ext_ramdisk_image = new_area >> 32; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c1b2f764b29a..c3e1f9a7d43a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -121,34 +121,10 @@ int xen_smp_intr_init(unsigned int cpu) void __init xen_smp_cpus_done(unsigned int max_cpus) { - int cpu, rc, count = 0; - if (xen_hvm_domain()) native_smp_cpus_done(max_cpus); else calculate_max_logical_packages(); - - if (xen_have_vcpu_info_placement) - return; - - for_each_online_cpu(cpu) { - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) - continue; - - rc = remove_cpu(cpu); - - if (rc == 0) { - /* - * Reset vcpu_info so this cpu cannot be onlined again. - */ - xen_vcpu_info_reset(cpu); - count++; - } else { - pr_warn("%s: failed to bring CPU %d down, error %d\n", - __func__, cpu, rc); - } - } - WARN(count, "%s: brought %d CPUs offline\n", __func__, count); } void xen_smp_send_reschedule(int cpu) @@ -268,20 +244,16 @@ void xen_send_IPI_allbutself(int vector) static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { - irq_enter(); generic_smp_call_function_interrupt(); inc_irq_stat(irq_call_count); - irq_exit(); return IRQ_HANDLED; } static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { - irq_enter(); generic_smp_call_function_single_interrupt(); inc_irq_stat(irq_call_count); - irq_exit(); return IRQ_HANDLED; } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7ed56c6075b0..6a8f3b53ab83 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -225,7 +225,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; - unsigned int i; if (skip_ioapic_setup) { char *m = (max_cpus == 0) ? @@ -238,16 +237,9 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) } xen_init_lock_cpu(0); - smp_store_boot_cpu_info(); - cpu_data(0).x86_max_cores = 1; + smp_prepare_cpus_common(); - for_each_possible_cpu(i) { - zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); - } - set_cpu_sibling_map(0); + cpu_data(0).x86_max_cores = 1; speculative_store_bypass_ht_init(); @@ -458,10 +450,8 @@ static void xen_pv_stop_other_cpus(int wait) static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) { - irq_enter(); irq_work_run(); inc_irq_stat(apic_irq_work_irqs); - irq_exit(); return IRQ_HANDLED; } diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 9e27b86a0c31..6a64496edefb 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -45,13 +45,13 @@ SYM_CODE_START(startup_xen) /* Clear .bss */ xor %eax,%eax - mov $__bss_start, %_ASM_DI - mov $__bss_stop, %_ASM_CX - sub %_ASM_DI, %_ASM_CX - shr $__ASM_SEL(2, 3), %_ASM_CX - rep __ASM_SIZE(stos) + mov $__bss_start, %rdi + mov $__bss_stop, %rcx + sub %rdi, %rcx + shr $3, %rcx + rep stosq - mov %_ASM_SI, xen_start_info + mov %rsi, xen_start_info mov initial_stack(%rip), %rsp /* Set up %gs. diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 8bc8b72a205d..fd0fec6e92f4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -76,9 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); -extern int xen_have_vcpu_info_placement; - -int xen_vcpu_setup(int cpu); +void xen_vcpu_setup(int cpu); void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 96714ef7c89e..9778216d6e09 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -7,9 +7,7 @@ # Copyright (C) 2014 Cadence Design Systems Inc. # # This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture +# architecture-specific flags and dependencies. # Core configuration. # (Use VAR=<xtensa_config> to use another default compiler.) diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S index 99e98c9bae41..2dd28931d699 100644 --- a/arch/xtensa/boot/boot-elf/bootstrap.S +++ b/arch/xtensa/boot/boot-elf/bootstrap.S @@ -42,12 +42,14 @@ _bootparam: .align 4 _SetupMMU: +#if XCHAL_HAVE_WINDOWED movi a0, 0 wsr a0, windowbase rsync movi a0, 1 wsr a0, windowstart rsync +#endif movi a0, 0x1F wsr a0, ps rsync diff --git a/arch/xtensa/boot/boot-redboot/bootstrap.S b/arch/xtensa/boot/boot-redboot/bootstrap.S index 48ba5a232d94..3ed94ad35000 100644 --- a/arch/xtensa/boot/boot-redboot/bootstrap.S +++ b/arch/xtensa/boot/boot-redboot/bootstrap.S @@ -3,6 +3,7 @@ #include <asm/regs.h> #include <asm/asmmacro.h> #include <asm/cacheasm.h> +#include <asm/processor.h> /* * RB-Data: RedBoot data/bss * P: Boot-Parameters @@ -36,7 +37,7 @@ .globl __start /* this must be the first byte of the loader! */ __start: - entry sp, 32 # we do not intend to return + abi_entry(32) # we do not intend to return _call0 _start __start_a0: .align 4 @@ -55,17 +56,19 @@ _start: movi a4, 1 wsr a4, ps rsync - +#if XCHAL_HAVE_WINDOWED rsr a5, windowbase ssl a5 sll a4, a4 wsr a4, windowstart rsync - - movi a4, 0x00040000 +#endif + movi a4, KERNEL_PS_WOE_MASK wsr a4, ps rsync +KABI_C0 mov abi_saved0, abi_arg0 + /* copy the loader to its address * Note: The loader itself is a very small piece, so we assume we * don't partially overlap. We also assume (even more important) @@ -168,52 +171,52 @@ _reloc: movi a3, __image_load sub a4, a3, a4 - add a8, a0, a4 + add abi_arg2, a0, a4 # a1 Stack # a8(a4) Load address of the image - movi a6, _image_start - movi a10, _image_end - movi a7, 0x1000000 - sub a11, a10, a6 - movi a9, complen - s32i a11, a9, 0 + movi abi_arg0, _image_start + movi abi_arg4, _image_end + movi abi_arg1, 0x1000000 + sub abi_tmp0, abi_arg4, abi_arg0 + movi abi_arg3, complen + s32i abi_tmp0, abi_arg3, 0 movi a0, 0 - # a6 destination - # a7 maximum size of destination - # a8 source - # a9 ptr to length + # abi_arg0 destination + # abi_arg1 maximum size of destination + # abi_arg2 source + # abi_arg3 ptr to length .extern gunzip - movi a4, gunzip - beqz a4, 1f + movi abi_tmp0, gunzip + beqz abi_tmp0, 1f - callx4 a4 + abi_callx abi_tmp0 j 2f - # a6 destination start - # a7 maximum size of destination - # a8 source start - # a9 ptr to length - # a10 destination end + # abi_arg0 destination start + # abi_arg1 maximum size of destination + # abi_arg2 source start + # abi_arg3 ptr to length + # abi_arg4 destination end 1: - l32i a9, a8, 0 - l32i a11, a8, 4 - s32i a9, a6, 0 - s32i a11, a6, 4 - l32i a9, a8, 8 - l32i a11, a8, 12 - s32i a9, a6, 8 - s32i a11, a6, 12 - addi a6, a6, 16 - addi a8, a8, 16 - blt a6, a10, 1b + l32i abi_tmp0, abi_arg2, 0 + l32i abi_tmp1, abi_arg2, 4 + s32i abi_tmp0, abi_arg0, 0 + s32i abi_tmp1, abi_arg0, 4 + l32i abi_tmp0, abi_arg2, 8 + l32i abi_tmp1, abi_arg2, 12 + s32i abi_tmp0, abi_arg0, 8 + s32i abi_tmp1, abi_arg0, 12 + addi abi_arg0, abi_arg0, 16 + addi abi_arg2, abi_arg2, 16 + blt abi_arg0, abi_arg4, 1b /* jump to the kernel */ @@ -230,6 +233,7 @@ _reloc: # a2 Boot parameter list +KABI_C0 mov abi_arg0, abi_saved0 movi a0, _image_start jx a0 diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index bfc89e11f469..809c507d1825 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -194,6 +194,12 @@ #define XTENSA_STACK_ALIGNMENT 16 #if defined(__XTENSA_WINDOWED_ABI__) + +/* Assembly instructions for windowed kernel ABI. */ +#define KABI_W +/* Assembly instructions for call0 kernel ABI (will be ignored). */ +#define KABI_C0 # + #define XTENSA_FRAME_SIZE_RESERVE 16 #define XTENSA_SPILL_STACK_RESERVE 32 @@ -206,8 +212,34 @@ #define abi_ret(frame_size) retw #define abi_ret_default retw + /* direct call */ +#define abi_call call4 + /* indirect call */ +#define abi_callx callx4 + /* outgoing call argument registers */ +#define abi_arg0 a6 +#define abi_arg1 a7 +#define abi_arg2 a8 +#define abi_arg3 a9 +#define abi_arg4 a10 +#define abi_arg5 a11 + /* return value */ +#define abi_rv a6 + /* registers preserved across call */ +#define abi_saved0 a2 +#define abi_saved1 a3 + + /* none of the above */ +#define abi_tmp0 a4 +#define abi_tmp1 a5 + #elif defined(__XTENSA_CALL0_ABI__) +/* Assembly instructions for windowed kernel ABI (will be ignored). */ +#define KABI_W # +/* Assembly instructions for call0 kernel ABI. */ +#define KABI_C0 + #define XTENSA_SPILL_STACK_RESERVE 0 #define abi_entry(frame_size) __abi_entry (frame_size) @@ -233,10 +265,43 @@ #define abi_ret_default ret + /* direct call */ +#define abi_call call0 + /* indirect call */ +#define abi_callx callx0 + /* outgoing call argument registers */ +#define abi_arg0 a2 +#define abi_arg1 a3 +#define abi_arg2 a4 +#define abi_arg3 a5 +#define abi_arg4 a6 +#define abi_arg5 a7 + /* return value */ +#define abi_rv a2 + /* registers preserved across call */ +#define abi_saved0 a12 +#define abi_saved1 a13 + + /* none of the above */ +#define abi_tmp0 a8 +#define abi_tmp1 a9 + #else #error Unsupported Xtensa ABI #endif +#if defined(USER_SUPPORT_WINDOWED) +/* Assembly instructions for windowed user ABI. */ +#define UABI_W +/* Assembly instructions for call0 user ABI (will be ignored). */ +#define UABI_C0 # +#else +/* Assembly instructions for windowed user ABI (will be ignored). */ +#define UABI_W # +/* Assembly instructions for call0 user ABI. */ +#define UABI_C0 +#endif + #define __XTENSA_HANDLER .section ".exception.text", "ax" #endif /* _XTENSA_ASMMACRO_H */ diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 4361fe4247e3..52da614f953c 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -25,15 +25,15 @@ * * Locking interrupts looks like this: * - * rsil a15, TOPLEVEL + * rsil a14, TOPLEVEL * <code> - * wsr a15, PS + * wsr a14, PS * rsync * - * Note that a15 is used here because the register allocation + * Note that a14 is used here because the register allocation * done by the compiler is not guaranteed and a window overflow * may not occur between the rsil and wsr instructions. By using - * a15 in the rsil, the machine is guaranteed to be in a state + * a14 in the rsil, the machine is guaranteed to be in a state * where no register reference will cause an overflow. */ @@ -185,15 +185,15 @@ static inline void arch_atomic_##op(int i, atomic_t * v) \ unsigned int vval; \ \ __asm__ __volatile__( \ - " rsil a15, "__stringify(TOPLEVEL)"\n" \ + " rsil a14, "__stringify(TOPLEVEL)"\n" \ " l32i %[result], %[mem]\n" \ " " #op " %[result], %[result], %[i]\n" \ " s32i %[result], %[mem]\n" \ - " wsr a15, ps\n" \ + " wsr a14, ps\n" \ " rsync\n" \ : [result] "=&a" (vval), [mem] "+m" (*v) \ : [i] "a" (i) \ - : "a15", "memory" \ + : "a14", "memory" \ ); \ } \ @@ -203,15 +203,15 @@ static inline int arch_atomic_##op##_return(int i, atomic_t * v) \ unsigned int vval; \ \ __asm__ __volatile__( \ - " rsil a15,"__stringify(TOPLEVEL)"\n" \ + " rsil a14,"__stringify(TOPLEVEL)"\n" \ " l32i %[result], %[mem]\n" \ " " #op " %[result], %[result], %[i]\n" \ " s32i %[result], %[mem]\n" \ - " wsr a15, ps\n" \ + " wsr a14, ps\n" \ " rsync\n" \ : [result] "=&a" (vval), [mem] "+m" (*v) \ : [i] "a" (i) \ - : "a15", "memory" \ + : "a14", "memory" \ ); \ \ return vval; \ @@ -223,16 +223,16 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t * v) \ unsigned int tmp, vval; \ \ __asm__ __volatile__( \ - " rsil a15,"__stringify(TOPLEVEL)"\n" \ + " rsil a14,"__stringify(TOPLEVEL)"\n" \ " l32i %[result], %[mem]\n" \ " " #op " %[tmp], %[result], %[i]\n" \ " s32i %[tmp], %[mem]\n" \ - " wsr a15, ps\n" \ + " wsr a14, ps\n" \ " rsync\n" \ : [result] "=&a" (vval), [tmp] "=&a" (tmp), \ [mem] "+m" (*v) \ : [i] "a" (i) \ - : "a15", "memory" \ + : "a14", "memory" \ ); \ \ return vval; \ diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h index 3699e2818efb..eb87810357ad 100644 --- a/arch/xtensa/include/asm/cmpxchg.h +++ b/arch/xtensa/include/asm/cmpxchg.h @@ -52,16 +52,16 @@ __cmpxchg_u32(volatile int *p, int old, int new) return new; #else __asm__ __volatile__( - " rsil a15, "__stringify(TOPLEVEL)"\n" + " rsil a14, "__stringify(TOPLEVEL)"\n" " l32i %[old], %[mem]\n" " bne %[old], %[cmp], 1f\n" " s32i %[new], %[mem]\n" "1:\n" - " wsr a15, ps\n" + " wsr a14, ps\n" " rsync\n" : [old] "=&a" (old), [mem] "+m" (*p) : [cmp] "a" (old), [new] "r" (new) - : "a15", "memory"); + : "a14", "memory"); return old; #endif } @@ -116,10 +116,10 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, /* * xchg_u32 * - * Note that a15 is used here because the register allocation + * Note that a14 is used here because the register allocation * done by the compiler is not guaranteed and a window overflow * may not occur between the rsil and wsr instructions. By using - * a15 in the rsil, the machine is guaranteed to be in a state + * a14 in the rsil, the machine is guaranteed to be in a state * where no register reference will cause an overflow. */ @@ -157,14 +157,14 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val) #else unsigned long tmp; __asm__ __volatile__( - " rsil a15, "__stringify(TOPLEVEL)"\n" + " rsil a14, "__stringify(TOPLEVEL)"\n" " l32i %[tmp], %[mem]\n" " s32i %[val], %[mem]\n" - " wsr a15, ps\n" + " wsr a14, ps\n" " rsync\n" : [tmp] "=&a" (tmp), [mem] "+m" (*m) : [val] "a" (val) - : "a15", "memory"); + : "a14", "memory"); return tmp; #endif } diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index 5590b0f68837..9138077e567d 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -26,4 +26,15 @@ #define XCHAL_SPANNING_WAY 0 #endif +#if XCHAL_HAVE_WINDOWED +#if defined(CONFIG_USER_ABI_DEFAULT) || defined(CONFIG_USER_ABI_CALL0_PROBE) +/* Whether windowed ABI is supported in userspace. */ +#define USER_SUPPORT_WINDOWED +#endif +#if defined(__XTENSA_WINDOWED_ABI__) || defined(USER_SUPPORT_WINDOWED) +/* Whether windowed ABI is supported either in userspace or in the kernel. */ +#define SUPPORT_WINDOWED +#endif +#endif + #endif diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index ad15fbc57283..37d3e9887fe7 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -18,12 +18,6 @@ #include <asm/types.h> #include <asm/regs.h> -/* Assertions. */ - -#if (XCHAL_HAVE_WINDOWED != 1) -# error Linux requires the Xtensa Windowed Registers Option. -#endif - /* Xtensa ABI requires stack alignment to be at least 16 */ #define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) @@ -105,8 +99,18 @@ #define WSBITS (XCHAL_NUM_AREGS / 4) /* width of WINDOWSTART in bits */ #define WBBITS (XCHAL_NUM_AREGS_LOG2 - 2) /* width of WINDOWBASE in bits */ +#if defined(__XTENSA_WINDOWED_ABI__) +#define KERNEL_PS_WOE_MASK PS_WOE_MASK +#elif defined(__XTENSA_CALL0_ABI__) +#define KERNEL_PS_WOE_MASK 0 +#else +#error Unsupported xtensa ABI +#endif + #ifndef __ASSEMBLY__ +#if defined(__XTENSA_WINDOWED_ABI__) + /* Build a valid return address for the specified call winsize. * winsize must be 1 (call4), 2 (call8), or 3 (call12) */ @@ -117,6 +121,22 @@ */ #define MAKE_PC_FROM_RA(ra,sp) (((ra) & 0x3fffffff) | ((sp) & 0xc0000000)) +#elif defined(__XTENSA_CALL0_ABI__) + +/* Build a valid return address for the specified call winsize. + * winsize must be 1 (call4), 2 (call8), or 3 (call12) + */ +#define MAKE_RA_FOR_CALL(ra, ws) (ra) + +/* Convert return address to a valid pc + * Note: We assume that the stack pointer is in the same 1GB ranges as the ra + */ +#define MAKE_PC_FROM_RA(ra, sp) (ra) + +#else +#error Unsupported Xtensa ABI +#endif + /* Spill slot location for the register reg in the spill area under the stack * pointer sp. reg must be in the range [0..4). */ diff --git a/arch/xtensa/include/asm/sections.h b/arch/xtensa/include/asm/sections.h new file mode 100644 index 000000000000..a8c42d08e281 --- /dev/null +++ b/arch/xtensa/include/asm/sections.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _XTENSA_SECTIONS_H +#define _XTENSA_SECTIONS_H + +#include <asm-generic/sections.h> + +#ifdef CONFIG_VECTORS_ADDR +extern char _WindowVectors_text_start[]; +extern char _WindowVectors_text_end[]; +extern char _DebugInterruptVector_text_start[]; +extern char _DebugInterruptVector_text_end[]; +extern char _KernelExceptionVector_text_start[]; +extern char _KernelExceptionVector_text_end[]; +extern char _UserExceptionVector_text_start[]; +extern char _UserExceptionVector_text_end[]; +extern char _DoubleExceptionVector_text_start[]; +extern char _DoubleExceptionVector_text_end[]; +extern char _exception_text_start[]; +extern char _exception_text_end[]; +extern char _Level2InterruptVector_text_start[]; +extern char _Level2InterruptVector_text_end[]; +extern char _Level3InterruptVector_text_start[]; +extern char _Level3InterruptVector_text_end[]; +extern char _Level4InterruptVector_text_start[]; +extern char _Level4InterruptVector_text_end[]; +extern char _Level5InterruptVector_text_start[]; +extern char _Level5InterruptVector_text_end[]; +extern char _Level6InterruptVector_text_start[]; +extern char _Level6InterruptVector_text_end[]; +#endif +#ifdef CONFIG_SMP +extern char _SecondaryResetVector_text_start[]; +extern char _SecondaryResetVector_text_end[]; +#endif +#ifdef CONFIG_XIP_KERNEL +extern char _xip_start[]; +extern char _xip_end[]; +#endif + +#endif diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index f9a671cbf933..5ee974bf8330 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -68,17 +68,6 @@ static inline void syscall_get_arguments(struct task_struct *task, args[i] = regs->areg[reg[i]]; } -static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, - const unsigned long *args) -{ - static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS; - unsigned int i; - - for (i = 0; i < 6; ++i) - regs->areg[reg[i]] = args[i]; -} - asmlinkage long xtensa_rt_sigreturn(void); asmlinkage long xtensa_shmat(int, char __user *, int); asmlinkage long xtensa_fadvise64_64(int, int, diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index f720a57d0a5b..6fa47cd8e02d 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -56,6 +56,7 @@ void secondary_trap_init(void); static inline void spill_registers(void) { +#if defined(__XTENSA_WINDOWED_ABI__) #if XCHAL_NUM_AREGS > 16 __asm__ __volatile__ ( " call8 1f\n" @@ -96,6 +97,7 @@ static inline void spill_registers(void) " mov a12, a12\n" : : : "memory"); #endif +#endif } struct debug_table { diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S index 9301452e521e..d062c732ef18 100644 --- a/arch/xtensa/kernel/align.S +++ b/arch/xtensa/kernel/align.S @@ -58,7 +58,9 @@ * BE shift left / mask 0 0 X X */ +#if XCHAL_HAVE_WINDOWED #define UNALIGNED_USER_EXCEPTION +#endif #if XCHAL_HAVE_BE diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 647b162f959b..99ab3c1a3387 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -158,6 +158,7 @@ _user_exception: /* Rotate ws so that the current windowbase is at bit0. */ /* Assume ws = xxwww1yyyy. Rotate ws right, so that a2 = yyyyxxwww1 */ +#if defined(USER_SUPPORT_WINDOWED) rsr a2, windowbase rsr a3, windowstart ssr a2 @@ -167,24 +168,33 @@ _user_exception: src a2, a3, a2 srli a2, a2, 32-WSBITS s32i a2, a1, PT_WMASK # needed for restoring registers +#else + movi a2, 0 + movi a3, 1 + s32i a2, a1, PT_WINDOWBASE + s32i a3, a1, PT_WINDOWSTART + s32i a3, a1, PT_WMASK +#endif /* Save only live registers. */ - _bbsi.l a2, 1, 1f +UABI_W _bbsi.l a2, 1, 1f s32i a4, a1, PT_AREG4 s32i a5, a1, PT_AREG5 s32i a6, a1, PT_AREG6 s32i a7, a1, PT_AREG7 - _bbsi.l a2, 2, 1f +UABI_W _bbsi.l a2, 2, 1f s32i a8, a1, PT_AREG8 s32i a9, a1, PT_AREG9 s32i a10, a1, PT_AREG10 s32i a11, a1, PT_AREG11 - _bbsi.l a2, 3, 1f +UABI_W _bbsi.l a2, 3, 1f s32i a12, a1, PT_AREG12 s32i a13, a1, PT_AREG13 s32i a14, a1, PT_AREG14 s32i a15, a1, PT_AREG15 + +#if defined(USER_SUPPORT_WINDOWED) _bnei a2, 1, 1f # only one valid frame? /* Only one valid frame, skip saving regs. */ @@ -239,7 +249,7 @@ _user_exception: rsync /* We are back to the original stack pointer (a1) */ - +#endif 2: /* Now, jump to the common exception handler. */ j common_exception @@ -295,6 +305,7 @@ _kernel_exception: s32i a3, a1, PT_SAR s32i a2, a1, PT_ICOUNTLEVEL +#if defined(__XTENSA_WINDOWED_ABI__) /* Rotate ws so that the current windowbase is at bit0. */ /* Assume ws = xxwww1yyyy. Rotate ws right, so that a2 = yyyyxxwww1 */ @@ -305,27 +316,28 @@ _kernel_exception: src a2, a3, a2 srli a2, a2, 32-WSBITS s32i a2, a1, PT_WMASK # needed for kernel_exception_exit +#endif /* Save only the live window-frame */ - _bbsi.l a2, 1, 1f +KABI_W _bbsi.l a2, 1, 1f s32i a4, a1, PT_AREG4 s32i a5, a1, PT_AREG5 s32i a6, a1, PT_AREG6 s32i a7, a1, PT_AREG7 - _bbsi.l a2, 2, 1f +KABI_W _bbsi.l a2, 2, 1f s32i a8, a1, PT_AREG8 s32i a9, a1, PT_AREG9 s32i a10, a1, PT_AREG10 s32i a11, a1, PT_AREG11 - _bbsi.l a2, 3, 1f +KABI_W _bbsi.l a2, 3, 1f s32i a12, a1, PT_AREG12 s32i a13, a1, PT_AREG13 s32i a14, a1, PT_AREG14 s32i a15, a1, PT_AREG15 +#ifdef __XTENSA_WINDOWED_ABI__ _bnei a2, 1, 1f - /* Copy spill slots of a0 and a1 to imitate movsp * in order to keep exception stack continuous */ @@ -333,6 +345,7 @@ _kernel_exception: l32i a0, a1, PT_SIZE + 4 s32e a3, a1, -16 s32e a0, a1, -12 +#endif 1: l32i a0, a1, PT_AREG0 # restore saved a0 wsr a0, depc @@ -419,16 +432,16 @@ common_exception: movi a3, LOCKLEVEL .Lexception: - movi a0, PS_WOE_MASK - or a3, a3, a0 +KABI_W movi a0, PS_WOE_MASK +KABI_W or a3, a3, a0 #else addi a2, a2, -EXCCAUSE_LEVEL1_INTERRUPT movi a0, LOCKLEVEL extui a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH # a3 = PS.INTLEVEL moveqz a3, a0, a2 # a3 = LOCKLEVEL iff interrupt - movi a2, PS_WOE_MASK - or a3, a3, a2 +KABI_W movi a2, PS_WOE_MASK +KABI_W or a3, a3, a2 rsr a2, exccause #endif @@ -461,14 +474,14 @@ common_exception: */ rsr a4, excsave1 - mov a6, a1 # pass stack frame - mov a7, a2 # pass EXCCAUSE addx4 a4, a2, a4 l32i a4, a4, EXC_TABLE_DEFAULT # load handler + mov abi_arg1, a2 # pass EXCCAUSE + mov abi_arg0, a1 # pass stack frame /* Call the second-level handler */ - callx4 a4 + abi_callx a4 /* Jump here for exception exit */ .global common_exception_return @@ -482,15 +495,15 @@ common_exception_return: 1: irq_save a2, a3 #ifdef CONFIG_TRACE_IRQFLAGS - call4 trace_hardirqs_off + abi_call trace_hardirqs_off #endif /* Jump if we are returning from kernel exceptions. */ - l32i a3, a1, PT_PS + l32i abi_saved1, a1, PT_PS GET_THREAD_INFO(a2, a1) l32i a4, a2, TI_FLAGS - _bbci.l a3, PS_UM_BIT, 6f + _bbci.l abi_saved1, PS_UM_BIT, 6f /* Specific to a user exception exit: * We need to check some flags for signal handling and rescheduling, @@ -509,20 +522,20 @@ common_exception_return: /* Call do_signal() */ #ifdef CONFIG_TRACE_IRQFLAGS - call4 trace_hardirqs_on + abi_call trace_hardirqs_on #endif rsil a2, 0 - mov a6, a1 - call4 do_notify_resume # int do_notify_resume(struct pt_regs*) + mov abi_arg0, a1 + abi_call do_notify_resume # int do_notify_resume(struct pt_regs*) j 1b 3: /* Reschedule */ #ifdef CONFIG_TRACE_IRQFLAGS - call4 trace_hardirqs_on + abi_call trace_hardirqs_on #endif rsil a2, 0 - call4 schedule # void schedule (void) + abi_call schedule # void schedule (void) j 1b #ifdef CONFIG_PREEMPTION @@ -533,33 +546,33 @@ common_exception_return: l32i a4, a2, TI_PRE_COUNT bnez a4, 4f - call4 preempt_schedule_irq + abi_call preempt_schedule_irq j 4f #endif #if XTENSA_FAKE_NMI .LNMIexit: - l32i a3, a1, PT_PS - _bbci.l a3, PS_UM_BIT, 4f + l32i abi_saved1, a1, PT_PS + _bbci.l abi_saved1, PS_UM_BIT, 4f #endif 5: #ifdef CONFIG_HAVE_HW_BREAKPOINT _bbci.l a4, TIF_DB_DISABLED, 7f - call4 restore_dbreak + abi_call restore_dbreak 7: #endif #ifdef CONFIG_DEBUG_TLB_SANITY l32i a4, a1, PT_DEPC bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f - call4 check_tlb_sanity + abi_call check_tlb_sanity #endif 6: 4: #ifdef CONFIG_TRACE_IRQFLAGS - extui a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH + extui a4, abi_saved1, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH bgei a4, LOCKLEVEL, 1f - call4 trace_hardirqs_on + abi_call trace_hardirqs_on 1: #endif /* Restore optional registers. */ @@ -572,14 +585,15 @@ common_exception_return: l32i a2, a1, PT_SCOMPARE1 wsr a2, scompare1 #endif - wsr a3, ps /* disable interrupts */ + wsr abi_saved1, ps /* disable interrupts */ - _bbci.l a3, PS_UM_BIT, kernel_exception_exit + _bbci.l abi_saved1, PS_UM_BIT, kernel_exception_exit user_exception_exit: /* Restore the state of the task and return from the exception. */ +#if defined(USER_SUPPORT_WINDOWED) /* Switch to the user thread WINDOWBASE. Save SP temporarily in DEPC */ l32i a2, a1, PT_WINDOWBASE @@ -634,8 +648,10 @@ user_exception_exit: * frame where we had loaded a2), or at least the lower 4 bits * (if we have restored WSBITS-1 frames). */ - 2: +#else + movi a2, 1 +#endif #if XCHAL_HAVE_THREADPTR l32i a3, a1, PT_THREADPTR wur a3, threadptr @@ -650,6 +666,7 @@ user_exception_exit: kernel_exception_exit: +#if defined(__XTENSA_WINDOWED_ABI__) /* Check if we have to do a movsp. * * We only have to do a movsp if the previous window-frame has @@ -702,6 +719,9 @@ kernel_exception_exit: * * Note: We expect a2 to hold PT_WMASK */ +#else + movi a2, 1 +#endif common_exception_exit: @@ -920,14 +940,16 @@ unrecoverable_text: ENTRY(unrecoverable_exception) +#if XCHAL_HAVE_WINDOWED movi a0, 1 movi a1, 0 wsr a0, windowstart wsr a1, windowbase rsync +#endif - movi a1, PS_WOE_MASK | LOCKLEVEL + movi a1, KERNEL_PS_WOE_MASK | LOCKLEVEL wsr a1, ps rsync @@ -935,8 +957,8 @@ ENTRY(unrecoverable_exception) movi a0, 0 addi a1, a1, PT_REGS_OFFSET - movi a6, unrecoverable_text - call4 panic + movi abi_arg0, unrecoverable_text + abi_call panic 1: j 1b @@ -947,6 +969,7 @@ ENDPROC(unrecoverable_exception) __XTENSA_HANDLER .literal_position +#ifdef SUPPORT_WINDOWED /* * Fast-handler for alloca exceptions * @@ -1010,6 +1033,7 @@ ENTRY(fast_alloca) 8: j _WindowUnderflow8 4: j _WindowUnderflow4 ENDPROC(fast_alloca) +#endif #ifdef CONFIG_USER_ABI_CALL0_PROBE /* @@ -1206,7 +1230,8 @@ ENDPROC(fast_syscall_xtensa) * Note: We assume the stack pointer is EXC_TABLE_KSTK in the fixup handler. */ -#ifdef CONFIG_FAST_SYSCALL_SPILL_REGISTERS +#if defined(CONFIG_FAST_SYSCALL_SPILL_REGISTERS) && \ + defined(USER_SUPPORT_WINDOWED) ENTRY(fast_syscall_spill_registers) @@ -1403,12 +1428,12 @@ ENTRY(fast_syscall_spill_registers) rsr a3, excsave1 l32i a1, a3, EXC_TABLE_KSTK - movi a4, PS_WOE_MASK | LOCKLEVEL + movi a4, KERNEL_PS_WOE_MASK | LOCKLEVEL wsr a4, ps rsync - movi a6, SIGSEGV - call4 do_exit + movi abi_arg0, SIGSEGV + abi_call do_exit /* shouldn't return, so panic */ @@ -1887,57 +1912,77 @@ ENDPROC(fast_store_prohibited) ENTRY(system_call) +#if defined(__XTENSA_WINDOWED_ABI__) abi_entry_default +#elif defined(__XTENSA_CALL0_ABI__) + abi_entry(12) + + s32i a0, sp, 0 + s32i abi_saved0, sp, 4 + s32i abi_saved1, sp, 8 + mov abi_saved0, a2 +#else +#error Unsupported Xtensa ABI +#endif /* regs->syscall = regs->areg[2] */ - l32i a7, a2, PT_AREG2 - s32i a7, a2, PT_SYSCALL + l32i a7, abi_saved0, PT_AREG2 + s32i a7, abi_saved0, PT_SYSCALL GET_THREAD_INFO(a4, a1) - l32i a3, a4, TI_FLAGS + l32i abi_saved1, a4, TI_FLAGS movi a4, _TIF_WORK_MASK - and a3, a3, a4 - beqz a3, 1f + and abi_saved1, abi_saved1, a4 + beqz abi_saved1, 1f - mov a6, a2 - call4 do_syscall_trace_enter - beqz a6, .Lsyscall_exit - l32i a7, a2, PT_SYSCALL + mov abi_arg0, abi_saved0 + abi_call do_syscall_trace_enter + beqz abi_rv, .Lsyscall_exit + l32i a7, abi_saved0, PT_SYSCALL 1: /* syscall = sys_call_table[syscall_nr] */ movi a4, sys_call_table movi a5, __NR_syscalls - movi a6, -ENOSYS + movi abi_rv, -ENOSYS bgeu a7, a5, 1f addx4 a4, a7, a4 - l32i a4, a4, 0 + l32i abi_tmp0, a4, 0 /* Load args: arg0 - arg5 are passed via regs. */ - l32i a6, a2, PT_AREG6 - l32i a7, a2, PT_AREG3 - l32i a8, a2, PT_AREG4 - l32i a9, a2, PT_AREG5 - l32i a10, a2, PT_AREG8 - l32i a11, a2, PT_AREG9 + l32i abi_arg0, abi_saved0, PT_AREG6 + l32i abi_arg1, abi_saved0, PT_AREG3 + l32i abi_arg2, abi_saved0, PT_AREG4 + l32i abi_arg3, abi_saved0, PT_AREG5 + l32i abi_arg4, abi_saved0, PT_AREG8 + l32i abi_arg5, abi_saved0, PT_AREG9 - callx4 a4 + abi_callx abi_tmp0 1: /* regs->areg[2] = return_value */ - s32i a6, a2, PT_AREG2 - bnez a3, 1f + s32i abi_rv, abi_saved0, PT_AREG2 + bnez abi_saved1, 1f .Lsyscall_exit: +#if defined(__XTENSA_WINDOWED_ABI__) abi_ret_default +#elif defined(__XTENSA_CALL0_ABI__) + l32i a0, sp, 0 + l32i abi_saved0, sp, 4 + l32i abi_saved1, sp, 8 + abi_ret(12) +#else +#error Unsupported Xtensa ABI +#endif 1: - mov a6, a2 - call4 do_syscall_trace_leave - abi_ret_default + mov abi_arg0, abi_saved0 + abi_call do_syscall_trace_leave + j .Lsyscall_exit ENDPROC(system_call) @@ -1988,8 +2033,18 @@ ENDPROC(system_call) ENTRY(_switch_to) +#if defined(__XTENSA_WINDOWED_ABI__) abi_entry(XTENSA_SPILL_STACK_RESERVE) +#elif defined(__XTENSA_CALL0_ABI__) + abi_entry(16) + s32i a12, sp, 0 + s32i a13, sp, 4 + s32i a14, sp, 8 + s32i a15, sp, 12 +#else +#error Unsupported Xtensa ABI +#endif mov a11, a3 # and 'next' (a3) l32i a4, a2, TASK_THREAD_INFO @@ -2033,7 +2088,9 @@ ENTRY(_switch_to) /* Flush register file. */ +#if defined(__XTENSA_WINDOWED_ABI__) spill_registers_kernel +#endif /* Set kernel stack (and leave critical section) * Note: It's save to set it here. The stack will not be overwritten @@ -2055,34 +2112,43 @@ ENTRY(_switch_to) wsr a14, ps rsync +#if defined(__XTENSA_WINDOWED_ABI__) abi_ret(XTENSA_SPILL_STACK_RESERVE) +#elif defined(__XTENSA_CALL0_ABI__) + l32i a12, sp, 0 + l32i a13, sp, 4 + l32i a14, sp, 8 + l32i a15, sp, 12 + abi_ret(16) +#else +#error Unsupported Xtensa ABI +#endif ENDPROC(_switch_to) ENTRY(ret_from_fork) /* void schedule_tail (struct task_struct *prev) - * Note: prev is still in a6 (return value from fake call4 frame) + * Note: prev is still in abi_arg0 (return value from fake call frame) */ - call4 schedule_tail - - mov a6, a1 - call4 do_syscall_trace_leave + abi_call schedule_tail - j common_exception_return + mov abi_arg0, a1 + abi_call do_syscall_trace_leave + j common_exception_return ENDPROC(ret_from_fork) /* * Kernel thread creation helper - * On entry, set up by copy_thread: a2 = thread_fn, a3 = thread_fn arg - * left from _switch_to: a6 = prev + * On entry, set up by copy_thread: abi_saved0 = thread_fn, + * abi_saved1 = thread_fn arg. Left from _switch_to: abi_arg0 = prev */ ENTRY(ret_from_kernel_thread) - call4 schedule_tail - mov a6, a3 - callx4 a2 - j common_exception_return + abi_call schedule_tail + mov abi_arg0, abi_saved1 + abi_callx abi_saved0 + j common_exception_return ENDPROC(ret_from_kernel_thread) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index b9b81e76beea..8484294bc623 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -15,6 +15,7 @@ * Kevin Chea */ +#include <asm/asmmacro.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/cacheasm.h> @@ -66,11 +67,13 @@ _SetupOCD: * xt-gdb to single step via DEBUG exceptions received directly * by ocd. */ +#if XCHAL_HAVE_WINDOWED movi a1, 1 movi a0, 0 wsr a1, windowstart wsr a0, windowbase rsync +#endif movi a1, LOCKLEVEL wsr a1, ps @@ -193,9 +196,10 @@ ENTRY(_startup) movi a1, start_info l32i a1, a1, 0 - movi a2, PS_WOE_MASK | LOCKLEVEL - # WOE=1, INTLEVEL=LOCKLEVEL, UM=0 - wsr a2, ps # (enable reg-windows; progmode stack) + /* Disable interrupts. */ + /* Enable window exceptions if kernel is built with windowed ABI. */ + movi a2, KERNEL_PS_WOE_MASK | LOCKLEVEL + wsr a2, ps rsync #ifdef CONFIG_SMP @@ -267,13 +271,13 @@ ENTRY(_startup) l32i a1, a1, 0 #endif - movi a6, 0 - xsr a6, excsave1 + movi abi_arg0, 0 + xsr abi_arg0, excsave1 /* init_arch kick-starts the linux kernel */ - call4 init_arch - call4 start_kernel + abi_call init_arch + abi_call start_kernel should_never_return: j should_never_return @@ -297,10 +301,10 @@ should_never_return: s32i a3, a2, 0 memw - movi a6, 0 - wsr a6, excsave1 + movi abi_arg0, 0 + wsr abi_arg0, excsave1 - call4 secondary_start_kernel + abi_call secondary_start_kernel j should_never_return #endif /* CONFIG_SMP */ diff --git a/arch/xtensa/kernel/mcount.S b/arch/xtensa/kernel/mcount.S index 5e4619f52858..51daaf4e0b82 100644 --- a/arch/xtensa/kernel/mcount.S +++ b/arch/xtensa/kernel/mcount.S @@ -17,11 +17,16 @@ /* * Entry condition: * - * a2: a0 of the caller + * a2: a0 of the caller in windowed ABI + * a10: a0 of the caller in call0 ABI + * + * In call0 ABI the function _mcount is called with the special ABI: + * its argument is in a10 and all the usual argument registers (a2 - a7) + * must be preserved in addition to callee-saved a12 - a15. */ ENTRY(_mcount) - +#if defined(__XTENSA_WINDOWED_ABI__) abi_entry_default movi a4, ftrace_trace_function @@ -42,7 +47,36 @@ ENTRY(_mcount) callx4 a4 abi_ret_default +#elif defined(__XTENSA_CALL0_ABI__) + abi_entry_default + + movi a9, ftrace_trace_function + l32i a9, a9, 0 + movi a11, ftrace_stub + bne a9, a11, 1f + abi_ret_default +1: abi_entry(28) + s32i a0, sp, 0 + s32i a2, sp, 4 + s32i a3, sp, 8 + s32i a4, sp, 12 + s32i a5, sp, 16 + s32i a6, sp, 20 + s32i a7, sp, 24 + addi a2, a10, -MCOUNT_INSN_SIZE + callx0 a9 + l32i a0, sp, 0 + l32i a2, sp, 4 + l32i a3, sp, 8 + l32i a4, sp, 12 + l32i a5, sp, 16 + l32i a6, sp, 20 + l32i a7, sp, 24 + abi_ret(28) +#else +#error Unsupported Xtensa ABI +#endif ENDPROC(_mcount) ENTRY(ftrace_stub) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 47f933fed870..bd80df890b1e 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -211,11 +211,18 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, struct thread_info *ti; #endif +#if defined(__XTENSA_WINDOWED_ABI__) /* Create a call4 dummy-frame: a0 = 0, a1 = childregs. */ SPILL_SLOT(childregs, 1) = (unsigned long)childregs; SPILL_SLOT(childregs, 0) = 0; p->thread.sp = (unsigned long)childregs; +#elif defined(__XTENSA_CALL0_ABI__) + /* Reserve 16 bytes for the _switch_to stack frame. */ + p->thread.sp = (unsigned long)childregs - 16; +#else +#error Unsupported Xtensa ABI +#endif if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { struct pt_regs *regs = current_pt_regs(); @@ -272,11 +279,25 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, p->thread.ra = MAKE_RA_FOR_CALL( (unsigned long)ret_from_kernel_thread, 1); - /* pass parameters to ret_from_kernel_thread: - * a2 = thread_fn, a3 = thread_fn arg + /* pass parameters to ret_from_kernel_thread: */ +#if defined(__XTENSA_WINDOWED_ABI__) + /* + * a2 = thread_fn, a3 = thread_fn arg. + * Window underflow will load registers from the + * spill slots on the stack on return from _switch_to. */ - SPILL_SLOT(childregs, 3) = thread_fn_arg; SPILL_SLOT(childregs, 2) = usp_thread_fn; + SPILL_SLOT(childregs, 3) = thread_fn_arg; +#elif defined(__XTENSA_CALL0_ABI__) + /* + * a12 = thread_fn, a13 = thread_fn arg. + * _switch_to epilogue will load registers from the stack. + */ + ((unsigned long *)p->thread.sp)[0] = usp_thread_fn; + ((unsigned long *)p->thread.sp)[1] = thread_fn_arg; +#else +#error Unsupported Xtensa ABI +#endif /* Childregs are only used when we're going to userspace * in which case start_thread will set them up. diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index ee9082a142fe..8db20cfb44ab 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -37,14 +37,15 @@ #include <asm/bootparam.h> #include <asm/kasan.h> #include <asm/mmu_context.h> -#include <asm/processor.h> -#include <asm/timex.h> -#include <asm/platform.h> #include <asm/page.h> -#include <asm/setup.h> #include <asm/param.h> +#include <asm/platform.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/setup.h> #include <asm/smp.h> #include <asm/sysmem.h> +#include <asm/timex.h> #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) struct screen_info screen_info = { @@ -271,49 +272,6 @@ void __init init_arch(bp_tag_t *bp_start) * Initialize system. Setup memory and reserve regions. */ -extern char _end[]; -extern char _stext[]; -extern char _WindowVectors_text_start; -extern char _WindowVectors_text_end; -extern char _DebugInterruptVector_text_start; -extern char _DebugInterruptVector_text_end; -extern char _KernelExceptionVector_text_start; -extern char _KernelExceptionVector_text_end; -extern char _UserExceptionVector_text_start; -extern char _UserExceptionVector_text_end; -extern char _DoubleExceptionVector_text_start; -extern char _DoubleExceptionVector_text_end; -extern char _exception_text_start; -extern char _exception_text_end; -#if XCHAL_EXCM_LEVEL >= 2 -extern char _Level2InterruptVector_text_start; -extern char _Level2InterruptVector_text_end; -#endif -#if XCHAL_EXCM_LEVEL >= 3 -extern char _Level3InterruptVector_text_start; -extern char _Level3InterruptVector_text_end; -#endif -#if XCHAL_EXCM_LEVEL >= 4 -extern char _Level4InterruptVector_text_start; -extern char _Level4InterruptVector_text_end; -#endif -#if XCHAL_EXCM_LEVEL >= 5 -extern char _Level5InterruptVector_text_start; -extern char _Level5InterruptVector_text_end; -#endif -#if XCHAL_EXCM_LEVEL >= 6 -extern char _Level6InterruptVector_text_start; -extern char _Level6InterruptVector_text_end; -#endif -#ifdef CONFIG_SMP -extern char _SecondaryResetVector_text_start; -extern char _SecondaryResetVector_text_end; -#endif -#ifdef CONFIG_XIP_KERNEL -extern char _xip_start[]; -extern char _xip_end[]; -#endif - static inline int __init_memblock mem_reserve(unsigned long start, unsigned long end) { @@ -349,49 +307,51 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VECTORS_ADDR - mem_reserve(__pa(&_WindowVectors_text_start), - __pa(&_WindowVectors_text_end)); +#ifdef SUPPORT_WINDOWED + mem_reserve(__pa(_WindowVectors_text_start), + __pa(_WindowVectors_text_end)); +#endif - mem_reserve(__pa(&_DebugInterruptVector_text_start), - __pa(&_DebugInterruptVector_text_end)); + mem_reserve(__pa(_DebugInterruptVector_text_start), + __pa(_DebugInterruptVector_text_end)); - mem_reserve(__pa(&_KernelExceptionVector_text_start), - __pa(&_KernelExceptionVector_text_end)); + mem_reserve(__pa(_KernelExceptionVector_text_start), + __pa(_KernelExceptionVector_text_end)); - mem_reserve(__pa(&_UserExceptionVector_text_start), - __pa(&_UserExceptionVector_text_end)); + mem_reserve(__pa(_UserExceptionVector_text_start), + __pa(_UserExceptionVector_text_end)); - mem_reserve(__pa(&_DoubleExceptionVector_text_start), - __pa(&_DoubleExceptionVector_text_end)); + mem_reserve(__pa(_DoubleExceptionVector_text_start), + __pa(_DoubleExceptionVector_text_end)); - mem_reserve(__pa(&_exception_text_start), - __pa(&_exception_text_end)); + mem_reserve(__pa(_exception_text_start), + __pa(_exception_text_end)); #if XCHAL_EXCM_LEVEL >= 2 - mem_reserve(__pa(&_Level2InterruptVector_text_start), - __pa(&_Level2InterruptVector_text_end)); + mem_reserve(__pa(_Level2InterruptVector_text_start), + __pa(_Level2InterruptVector_text_end)); #endif #if XCHAL_EXCM_LEVEL >= 3 - mem_reserve(__pa(&_Level3InterruptVector_text_start), - __pa(&_Level3InterruptVector_text_end)); + mem_reserve(__pa(_Level3InterruptVector_text_start), + __pa(_Level3InterruptVector_text_end)); #endif #if XCHAL_EXCM_LEVEL >= 4 - mem_reserve(__pa(&_Level4InterruptVector_text_start), - __pa(&_Level4InterruptVector_text_end)); + mem_reserve(__pa(_Level4InterruptVector_text_start), + __pa(_Level4InterruptVector_text_end)); #endif #if XCHAL_EXCM_LEVEL >= 5 - mem_reserve(__pa(&_Level5InterruptVector_text_start), - __pa(&_Level5InterruptVector_text_end)); + mem_reserve(__pa(_Level5InterruptVector_text_start), + __pa(_Level5InterruptVector_text_end)); #endif #if XCHAL_EXCM_LEVEL >= 6 - mem_reserve(__pa(&_Level6InterruptVector_text_start), - __pa(&_Level6InterruptVector_text_end)); + mem_reserve(__pa(_Level6InterruptVector_text_start), + __pa(_Level6InterruptVector_text_end)); #endif #endif /* CONFIG_VECTORS_ADDR */ #ifdef CONFIG_SMP - mem_reserve(__pa(&_SecondaryResetVector_text_start), - __pa(&_SecondaryResetVector_text_end)); + mem_reserve(__pa(_SecondaryResetVector_text_start), + __pa(_SecondaryResetVector_text_end)); #endif parse_early_param(); bootmem_init(); diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index c4d77dbfb61a..f6c949895b3e 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -45,12 +45,13 @@ struct rt_sigframe unsigned int window[4]; }; -/* +#if defined(USER_SUPPORT_WINDOWED) +/* * Flush register windows stored in pt_regs to stack. * Returns 1 for errors. */ -int +static int flush_window_regs_user(struct pt_regs *regs) { const unsigned long ws = regs->windowstart; @@ -121,6 +122,13 @@ flush_window_regs_user(struct pt_regs *regs) errout: return err; } +#else +static int +flush_window_regs_user(struct pt_regs *regs) +{ + return 0; +} +#endif /* * Note: We don't copy double exception 'regs', we have to finish double exc. diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 874b6efc6fb3..4b4dbeb2d612 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -97,7 +97,9 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = { /* EXCCAUSE_INSTRUCTION_FETCH unhandled */ /* EXCCAUSE_LOAD_STORE_ERROR unhandled*/ { EXCCAUSE_LEVEL1_INTERRUPT, 0, do_interrupt }, +#ifdef SUPPORT_WINDOWED { EXCCAUSE_ALLOCA, USER|KRNL, fast_alloca }, +#endif /* EXCCAUSE_INTEGER_DIVIDE_BY_ZERO unhandled */ /* EXCCAUSE_PRIVILEGED unhandled */ #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION @@ -462,12 +464,10 @@ void secondary_trap_init(void) void show_regs(struct pt_regs * regs) { - int i, wmask; + int i; show_regs_print_info(KERN_DEFAULT); - wmask = regs->wmask & ~1; - for (i = 0; i < 16; i++) { if ((i % 8) == 0) pr_info("a%02d:", i); @@ -527,7 +527,7 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) DEFINE_SPINLOCK(die_lock); -void die(const char * str, struct pt_regs * regs, long err) +void __noreturn die(const char * str, struct pt_regs * regs, long err) { static int die_counter; const char *pr = ""; diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index 1a7538ccfc5a..407ece204e7c 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -226,6 +226,7 @@ ENTRY(_DoubleExceptionVector) xsr a0, depc # get DEPC, save a0 +#ifdef SUPPORT_WINDOWED movi a2, WINDOW_VECTORS_VADDR _bltu a0, a2, .Lfixup addi a2, a2, WINDOW_VECTORS_SIZE @@ -275,6 +276,10 @@ _DoubleExceptionVector_WindowUnderflow: l32i a0, a0, EXC_TABLE_FAST_USER jx a0 +#else + j .Lfixup +#endif + /* * We only allow the ITLB miss exception if we are in kernel space. * All other exceptions are unexpected and thus unrecoverable! @@ -343,6 +348,7 @@ _DoubleExceptionVector_WindowUnderflow: l32i a0, a0, EXC_TABLE_FAST_USER jx a0 +#ifdef SUPPORT_WINDOWED /* * Restart window OVERFLOW exception. * Currently: @@ -475,9 +481,12 @@ _DoubleExceptionVector_handle_exception: rsr a0, depc rotw -3 j 1b +#endif ENDPROC(_DoubleExceptionVector) +#ifdef SUPPORT_WINDOWED + /* * Fixup handler for TLB miss in double exception handler for window owerflow. * We get here with windowbase set to the window that was being spilled and @@ -590,6 +599,8 @@ ENTRY(window_overflow_restore_a0_fixup) ENDPROC(window_overflow_restore_a0_fixup) +#endif + /* * Debug interrupt vector * @@ -650,6 +661,25 @@ ENTRY(_Level\level\()InterruptVector) irq_entry_level 5 irq_entry_level 6 +#if XCHAL_EXCM_LEVEL >= 2 + /* + * Continuation of medium priority interrupt dispatch code. + * On entry here, a0 contains PS, and EPC2 contains saved a0: + */ + __XTENSA_HANDLER + .align 4 +_SimulateUserKernelVectorException: + addi a0, a0, (1 << PS_EXCM_BIT) +#if !XTENSA_FAKE_NMI + wsr a0, ps +#endif + bbsi.l a0, PS_UM_BIT, 1f # branch if user mode + xsr a0, excsave2 # restore a0 + j _KernelExceptionVector # simulate kernel vector exception +1: xsr a0, excsave2 # restore a0 + j _UserExceptionVector # simulate user vector exception +#endif + /* Window overflow and underflow handlers. * The handlers must be 64 bytes apart, first starting with the underflow @@ -668,6 +698,8 @@ ENTRY(_Level\level\()InterruptVector) .section .WindowVectors.text, "ax" +#ifdef SUPPORT_WINDOWED + /* 4-Register Window Overflow Vector (Handler) */ ENTRY_ALIGN64(_WindowOverflow4) @@ -680,27 +712,6 @@ ENTRY_ALIGN64(_WindowOverflow4) ENDPROC(_WindowOverflow4) - -#if XCHAL_EXCM_LEVEL >= 2 - /* Not a window vector - but a convenient location - * (where we know there's space) for continuation of - * medium priority interrupt dispatch code. - * On entry here, a0 contains PS, and EPC2 contains saved a0: - */ - .align 4 -_SimulateUserKernelVectorException: - addi a0, a0, (1 << PS_EXCM_BIT) -#if !XTENSA_FAKE_NMI - wsr a0, ps -#endif - bbsi.l a0, PS_UM_BIT, 1f # branch if user mode - xsr a0, excsave2 # restore a0 - j _KernelExceptionVector # simulate kernel vector exception -1: xsr a0, excsave2 # restore a0 - j _UserExceptionVector # simulate user vector exception -#endif - - /* 4-Register Window Underflow Vector (Handler) */ ENTRY_ALIGN64(_WindowUnderflow4) @@ -789,4 +800,6 @@ ENTRY_ALIGN64(_WindowUnderflow12) ENDPROC(_WindowUnderflow12) +#endif + .text diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index d23a6e38f062..eee270a039a4 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -94,7 +94,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); _vecbase = .; +#ifdef SUPPORT_WINDOWED SECTION_VECTOR2 (.WindowVectors.text, WINDOW_VECTORS_VADDR) +#endif #if XCHAL_EXCM_LEVEL >= 2 SECTION_VECTOR2 (.Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR) #endif @@ -166,8 +168,10 @@ SECTIONS __boot_reloc_table_start = ABSOLUTE(.); #if !MERGED_VECTORS +#ifdef SUPPORT_WINDOWED RELOCATE_ENTRY(_WindowVectors_text, .WindowVectors.text); +#endif #if XCHAL_EXCM_LEVEL >= 2 RELOCATE_ENTRY(_Level2InterruptVector_text, .Level2InterruptVector.text); @@ -229,14 +233,18 @@ SECTIONS #if !MERGED_VECTORS /* The vectors are relocated to the real position at startup time */ +#ifdef SUPPORT_WINDOWED SECTION_VECTOR4 (_WindowVectors_text, .WindowVectors.text, WINDOW_VECTORS_VADDR, - .dummy) + LAST) +#undef LAST +#define LAST .WindowVectors.text +#endif SECTION_VECTOR4 (_DebugInterruptVector_text, .DebugInterruptVector.text, DEBUG_VECTOR_VADDR, - .WindowVectors.text) + LAST) #undef LAST #define LAST .DebugInterruptVector.text #if XCHAL_EXCM_LEVEL >= 2 diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S index 4faf46fe3f38..0731912227d3 100644 --- a/arch/xtensa/lib/strncpy_user.S +++ b/arch/xtensa/lib/strncpy_user.S @@ -45,7 +45,6 @@ # a9/ tmp # a10/ tmp # a11/ dst -# a12/ tmp .text ENTRY(__strncpy_user) @@ -61,7 +60,7 @@ ENTRY(__strncpy_user) bbsi.l a3, 0, .Lsrc1mod2 # if only 8-bit aligned bbsi.l a3, 1, .Lsrc2mod4 # if only 16-bit aligned .Lsrcaligned: # return here when src is word-aligned - srli a12, a4, 2 # number of loop iterations with 4B per loop + srli a10, a4, 2 # number of loop iterations with 4B per loop movi a9, 3 bnone a11, a9, .Laligned j .Ldstunaligned @@ -102,11 +101,11 @@ EX(10f) s8i a9, a11, 0 # store byte 0 .byte 0 # (0 mod 4 alignment for LBEG) .Laligned: #if XCHAL_HAVE_LOOPS - loopnez a12, .Loop1done + loopnez a10, .Loop1done #else - beqz a12, .Loop1done - slli a12, a12, 2 - add a12, a12, a11 # a12 = end of last 4B chunck + beqz a10, .Loop1done + slli a10, a10, 2 + add a10, a10, a11 # a10 = end of last 4B chunck #endif .Loop1: EX(11f) l32i a9, a3, 0 # get word from src @@ -118,7 +117,7 @@ EX(10f) s32i a9, a11, 0 # store word to dst bnone a9, a8, .Lz3 # if byte 3 is zero addi a11, a11, 4 # advance dst pointer #if !XCHAL_HAVE_LOOPS - blt a11, a12, .Loop1 + blt a11, a10, .Loop1 #endif .Loop1done: @@ -185,7 +184,7 @@ EX(10f) s8i a9, a11, 2 loopnez a4, .Lunalignedend #else beqz a4, .Lunalignedend - add a12, a11, a4 # a12 = ending address + add a10, a11, a4 # a10 = ending address #endif /* XCHAL_HAVE_LOOPS */ .Lnextbyte: EX(11f) l8ui a9, a3, 0 @@ -194,7 +193,7 @@ EX(10f) s8i a9, a11, 0 beqz a9, .Lunalignedend addi a11, a11, 1 #if !XCHAL_HAVE_LOOPS - blt a11, a12, .Lnextbyte + blt a11, a10, .Lnextbyte #endif .Lunalignedend: diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index a0aa4047f94a..16128c094c62 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -60,7 +60,12 @@ .text ENTRY(__xtensa_copy_user) - abi_entry_default +#if !XCHAL_HAVE_LOOPS && defined(__XTENSA_CALL0_ABI__) +#define STACK_SIZE 4 +#else +#define STACK_SIZE 0 +#endif + abi_entry(STACK_SIZE) # a2/ dst, a3/ src, a4/ len mov a5, a2 # copy dst so that a2 is return value mov a11, a4 # preserve original len for error case @@ -75,7 +80,7 @@ ENTRY(__xtensa_copy_user) __ssa8 a3 # set shift amount from byte offset bnez a4, .Lsrcunaligned movi a2, 0 # return success for len==0 - abi_ret_default + abi_ret(STACK_SIZE) /* * Destination is unaligned @@ -127,7 +132,7 @@ EX(10f) s8i a6, a5, 0 #endif /* !XCHAL_HAVE_LOOPS */ .Lbytecopydone: movi a2, 0 # return success for len bytes copied - abi_ret_default + abi_ret(STACK_SIZE) /* * Destination and source are word-aligned. @@ -187,7 +192,7 @@ EX(10f) l8ui a6, a3, 0 EX(10f) s8i a6, a5, 0 .L5: movi a2, 0 # return success for len bytes copied - abi_ret_default + abi_ret(STACK_SIZE) /* * Destination is aligned, Source is unaligned @@ -205,8 +210,14 @@ EX(10f) l32i a6, a3, 0 # load first word loopnez a7, .Loop2done #else /* !XCHAL_HAVE_LOOPS */ beqz a7, .Loop2done +#if defined(__XTENSA_CALL0_ABI__) + s32i a10, a1, 0 + slli a10, a7, 4 + add a10, a10, a3 # a10 = end of last 16B source chunk +#else slli a12, a7, 4 add a12, a12, a3 # a12 = end of last 16B source chunk +#endif #endif /* !XCHAL_HAVE_LOOPS */ .Loop2: EX(10f) l32i a7, a3, 4 @@ -224,7 +235,12 @@ EX(10f) s32i a8, a5, 8 EX(10f) s32i a9, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS +#if defined(__XTENSA_CALL0_ABI__) + blt a3, a10, .Loop2 + l32i a10, a1, 0 +#else blt a3, a12, .Loop2 +#endif #endif /* !XCHAL_HAVE_LOOPS */ .Loop2done: bbci.l a4, 3, .L12 @@ -264,7 +280,7 @@ EX(10f) l8ui a6, a3, 0 EX(10f) s8i a6, a5, 0 .L15: movi a2, 0 # return success for len bytes copied - abi_ret_default + abi_ret(STACK_SIZE) ENDPROC(__xtensa_copy_user) @@ -281,4 +297,4 @@ ENDPROC(__xtensa_copy_user) 10: sub a2, a5, a2 /* a2 <-- bytes copied */ sub a2, a11, a2 /* a2 <-- bytes not copied */ - abi_ret_default + abi_ret(STACK_SIZE) diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 95a74890c7e9..fd6a70635962 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -238,7 +238,7 @@ bad_page_fault: void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { - extern void die(const char*, struct pt_regs*, long); + extern void __noreturn die(const char*, struct pt_regs*, long); const struct exception_table_entry *entry; /* Are we prepared to handle this kernel fault? */ @@ -257,5 +257,4 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) "address %08lx\n pc = %08lx, ra = %08lx\n", address, regs->pc, regs->areg[0]); die("Oops", regs, sig); - do_exit(sig); } |
