diff options
Diffstat (limited to 'arch/arm64')
25 files changed, 234 insertions, 233 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index de238b59d9eb..0b30e884e088 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -104,6 +104,7 @@ config ARM64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP + select GENERIC_PTDUMP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER @@ -164,7 +165,7 @@ config ARM64 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_RCU_TABLE_FREE + select MMU_GATHER_RCU_TABLE_FREE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cf09010d825f..1c906d932d6b 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -1,22 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -config ARM64_PTDUMP_CORE - def_bool n - -config ARM64_PTDUMP_DEBUGFS - bool "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select ARM64_PTDUMP_CORE - select DEBUG_FS - help - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - - If in doubt, say N. - config PID_IN_CONTEXTIDR bool "Write the current PID to the CONTEXTIDR register" help @@ -42,7 +25,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET config DEBUG_WX bool "Warn on W+X mappings at boot" - select ARM64_PTDUMP_CORE + select PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index bd23f87d6c55..d3077c991962 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -3,7 +3,6 @@ generic-y += bugs.h generic-y += delay.h generic-y += div64.h generic-y += dma.h -generic-y += dma-contiguous.h generic-y += dma-mapping.h generic-y += early_ioremap.h generic-y += emergency-restart.h diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index b0d53a265f1d..935d2aa231bf 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -4,6 +4,9 @@ */ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H + +#include <asm-generic/compat.h> + #ifdef CONFIG_COMPAT /* @@ -13,8 +16,6 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> -#include <asm-generic/compat.h> - #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ #define COMPAT_UTS_MACHINE "armv8b\0\0" @@ -113,23 +114,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) #define COMPAT_MINSIGSTKSZ 2048 diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5efe5ca8fecf..688c63412cc2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -17,7 +17,6 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> -#include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> @@ -219,6 +218,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; @@ -283,6 +314,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; @@ -304,7 +340,7 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); } -static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f5acdde17f3b..d87aa609d2b6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -24,7 +24,6 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmio.h> #include <asm/thread_info.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -53,7 +52,7 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); @@ -325,9 +324,6 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* IO related fields */ - struct kvm_decode mmio_decode; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -446,8 +442,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); @@ -491,6 +485,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); +/* MMIO helpers */ +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h deleted file mode 100644 index 02b5c48fd467..000000000000 --- a/arch/arm64/include/asm/kvm_mmio.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#ifndef __ARM64_KVM_MMIO_H__ -#define __ARM64_KVM_MMIO_H__ - -#include <linux/kvm_host.h> -#include <asm/kvm_arm.h> - -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ -struct kvm_decode { - unsigned long rt; - bool sign_extend; -}; - -void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); -unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); - -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); - -#endif /* __ARM64_KVM_MMIO_H__ */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index cd5de0e40bfa..538c85e62f86 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -441,6 +441,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) +#define pmd_leaf(pmd) pmd_sect(pmd) #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } @@ -525,6 +526,7 @@ static inline void pte_unmap(pte_t *pte) { } #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_leaf(pud) pud_sect(pud) #define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 0b8e7269ec82..38187f74e089 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,7 +5,7 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H -#ifdef CONFIG_ARM64_PTDUMP_CORE +#ifdef CONFIG_PTDUMP_CORE #include <linux/mm_types.h> #include <linux/seq_file.h> @@ -21,15 +21,15 @@ struct ptdump_info { unsigned long base_addr; }; -void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); -#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS +void ptdump_walk(struct seq_file *s, struct ptdump_info *info); +#ifdef CONFIG_PTDUMP_DEBUGFS void ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } #endif void ptdump_check_wx(void); -#endif /* CONFIG_ARM64_PTDUMP_CORE */ +#endif /* CONFIG_PTDUMP_CORE */ #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_check_wx() diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fbebb411ae20..bf57308fcd63 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 5af82587909e..1dd22da1c3a9 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 436 +#define __NR_compat_syscalls 439 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 94ab29cf4f00..c1c61635f89c 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -879,6 +879,10 @@ __SYSCALL(__NR_fspick, sys_fspick) __SYSCALL(__NR_pidfd_open, sys_pidfd_open) #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) +#define __NR_openat2 437 +__SYSCALL(__NR_openat2, sys_openat2) +#define __NR_pidfd_getfd 438 +__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 820e5751ada7..ba85bb23f060 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -220,10 +220,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2) #define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1) -/* EL0 Virtual Timer Registers */ +/* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ #define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) -#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) /* KVM-as-firmware specific pseudo-registers */ #define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7ed9294e2004..d1bb5b69f1ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b6f9455d7ca3..a34890bf309f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -360,9 +360,6 @@ void __init setup_arch(char **cmdline_p) init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page); #endif -#ifdef CONFIG_VT - conswitchp = &dummy_con; -#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 43487f035385..7a7e425616b5 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); - unsigned long mdscr; + unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + /* Write mdcr_el2 changes since vcpu_load on VHE systems */ + if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2fff06114a8f..2bd92301d32f 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -47,11 +47,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - static bool core_reg_offset_is_vreg(u64 off) { return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) && diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 0c6832ec52b1..d22d0534dd60 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -51,7 +51,7 @@ * u64 __guest_enter(struct kvm_vcpu *vcpu, * struct kvm_cpu_context *host_ctxt); */ -ENTRY(__guest_enter) +SYM_FUNC_START(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros @@ -100,9 +100,8 @@ alternative_else_nop_endif // Do not touch any register after this! eret sb -ENDPROC(__guest_enter) -ENTRY(__guest_exit) +SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // x0: return code // x1: vcpu // x2-x29,lr: vcpu regs @@ -195,4 +194,4 @@ abort_guest_exit_end: msr spsr_el2, x4 orr x0, x0, x5 1: ret -ENDPROC(__guest_exit) +SYM_FUNC_END(__guest_enter) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index ccdb6a051ab2..6aafc2825c1c 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include <asm/kvm_emulate.h> #include <asm/esr.h> -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f4a8ae918827..30b7ea680f66 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -204,7 +204,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) return true; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { kfree(vcpu->arch.sve_state); } diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index dab1fea4752a..a4f48c1ac28c 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -13,52 +13,46 @@ #include <asm/kvm_mmu.h> /* - * The LSB of the random hyp VA tag or 0 if no randomization is used. + * The LSB of the HYP VA tag */ static u8 tag_lsb; /* - * The random hyp VA tag value with the region bit if hyp randomization is used + * The HYP VA tag value with the region bit */ static u64 tag_val; static u64 va_mask; +/* + * We want to generate a hyp VA with the following format (with V == + * vabits_actual): + * + * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 + * --------------------------------------------------------- + * | 0000000 | hyp_va_msb | random tag | kern linear VA | + * |--------- tag_val -----------|----- va_mask ---| + * + * which does not conflict with the idmap regions. + */ __init void kvm_compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; - int kva_msb; /* Where is my RAM region? */ hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); hyp_va_msb ^= BIT(vabits_actual - 1); - kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ + tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); - if (kva_msb == (vabits_actual - 1)) { - /* - * No space in the address, let's compute the mask so - * that it covers (vabits_actual - 1) bits, and the region - * bit. The tag stays set to zero. - */ - va_mask = BIT(vabits_actual - 1) - 1; - va_mask |= hyp_va_msb; - } else { - /* - * We do have some free bits to insert a random tag. - * Hyp VAs are now created from kernel linear map VAs - * using the following formula (with V == vabits_actual): - * - * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 - * --------------------------------------------------------- - * | 0000000 | hyp_va_msb | random tag | kern linear VA | - */ - tag_lsb = kva_msb; - va_mask = GENMASK_ULL(tag_lsb - 1, 0); - tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); - tag_val |= hyp_va_msb; - tag_val >>= tag_lsb; + va_mask = GENMASK_ULL(tag_lsb - 1, 0); + tag_val = hyp_va_msb; + + if (tag_lsb != (vabits_actual - 1)) { + /* We have some free bits to insert a random tag. */ + tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } + tag_val >>= tag_lsb; } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -117,11 +111,11 @@ void __init kvm_update_va_mask(struct alt_instr *alt, * VHE doesn't need any address translation, let's NOP * everything. * - * Alternatively, if we don't have any spare bits in - * the address, NOP everything after masking that - * kernel VA. + * Alternatively, if the tag is zero (because the layout + * dictates it and we don't have any spare bits in the + * address), NOP everything after masking the kernel VA. */ - if (has_vhe() || (!tag_lsb && i > 0)) { + if (has_vhe() || (!tag_val && i > 0)) { updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); continue; } diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 849c1df3d214..d91030f0ffee 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,8 +4,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o -obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o +obj-$(CONFIG_PTDUMP_CORE) += dump.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 0a920b538a89..860c00ec8bd3 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/ptdump.h> #include <linux/sched.h> #include <linux/seq_file.h> @@ -75,10 +76,11 @@ static struct addr_marker address_markers[] = { * dumps out a description of the range. */ struct pg_state { + struct ptdump_state ptdump; struct seq_file *seq; const struct addr_marker *marker; unsigned long start_address; - unsigned level; + int level; u64 current_prot; bool check_wx; unsigned long wx_pages; @@ -174,11 +176,14 @@ struct pg_level { }; static struct pg_level pg_level[] = { - { - }, { /* pgd */ + { /* pgd */ .name = "PGD", .bits = pte_bits, .num = ARRAY_SIZE(pte_bits), + }, { /* p4d */ + .name = "P4D", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), }, { /* pud */ .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", .bits = pte_bits, @@ -241,13 +246,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } -static void note_page(struct pg_state *st, unsigned long addr, unsigned level, - u64 val) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); static const char units[] = "KMGTPE"; - u64 prot = val & pg_level[level].mask; + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; - if (!st->level) { + if (st->level == -1) { st->level = level; st->current_prot = prot; st->start_address = addr; @@ -260,21 +269,22 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, if (st->current_prot) { note_prot_uxn(st, addr); note_prot_wx(st, addr); - pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", + } + + pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); - delta = (addr - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, - pg_level[st->level].name); - if (pg_level[st->level].bits) - dump_prot(st, pg_level[st->level].bits, - pg_level[st->level].num); - pt_dump_seq_puts(st->seq, "\n"); + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; } + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); + if (st->current_prot && pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + pt_dump_seq_puts(st->seq, "\n"); if (addr >= st->marker[1].start_address) { st->marker++; @@ -293,85 +303,27 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, } -static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start, - unsigned long end) -{ - unsigned long addr = start; - pte_t *ptep = pte_offset_kernel(pmdp, start); - - do { - note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); - } while (ptep++, addr += PAGE_SIZE, addr != end); -} - -static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start, - unsigned long end) -{ - unsigned long next, addr = start; - pmd_t *pmdp = pmd_offset(pudp, start); - - do { - pmd_t pmd = READ_ONCE(*pmdp); - next = pmd_addr_end(addr, end); - - if (pmd_none(pmd) || pmd_sect(pmd)) { - note_page(st, addr, 3, pmd_val(pmd)); - } else { - BUG_ON(pmd_bad(pmd)); - walk_pte(st, pmdp, addr, next); - } - } while (pmdp++, addr = next, addr != end); -} - -static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start, - unsigned long end) +void ptdump_walk(struct seq_file *s, struct ptdump_info *info) { - unsigned long next, addr = start; - pud_t *pudp = pud_offset(pgdp, start); - - do { - pud_t pud = READ_ONCE(*pudp); - next = pud_addr_end(addr, end); - - if (pud_none(pud) || pud_sect(pud)) { - note_page(st, addr, 2, pud_val(pud)); - } else { - BUG_ON(pud_bad(pud)); - walk_pmd(st, pudp, addr, next); - } - } while (pudp++, addr = next, addr != end); -} + unsigned long end = ~0UL; + struct pg_state st; -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, - unsigned long start) -{ - unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0; - unsigned long next, addr = start; - pgd_t *pgdp = pgd_offset(mm, start); - - do { - pgd_t pgd = READ_ONCE(*pgdp); - next = pgd_addr_end(addr, end); - - if (pgd_none(pgd)) { - note_page(st, addr, 1, pgd_val(pgd)); - } else { - BUG_ON(pgd_bad(pgd)); - walk_pud(st, pgdp, addr, next); - } - } while (pgdp++, addr = next, addr != end); -} + if (info->base_addr < TASK_SIZE_64) + end = TASK_SIZE_64; -void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) -{ - struct pg_state st = { - .seq = m, + st = (struct pg_state){ + .seq = s, .marker = info->markers, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]){ + {info->base_addr, end}, + {0, 0} + } + } }; - walk_pgd(&st, info->mm, info->base_addr); - - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, info->mm, NULL); } static void ptdump_initialize(void) @@ -398,11 +350,19 @@ void ptdump_check_wx(void) { 0, NULL}, { -1, NULL}, }, + .level = -1, .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {PAGE_OFFSET, ~0UL}, + {0, 0} + } + } }; - walk_pgd(&st, &init_mm, PAGE_OFFSET); - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", st.wx_pages, st.uxn_pages); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 40797cbfba2d..128f70852bf3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -943,13 +943,13 @@ int __init arch_ioremap_pud_supported(void) * SW table walks can't handle removal of intermediate entries. */ return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && - !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int __init arch_ioremap_pmd_supported(void) { /* See arch_ioremap_pud_supported() */ - return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index 064163f25592..1f2eae3e988b 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -7,7 +7,7 @@ static int ptdump_show(struct seq_file *m, void *v) { struct ptdump_info *info = m->private; - ptdump_walk_pgd(m, info); + ptdump_walk(m, info); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); |