diff options
Diffstat (limited to 'arch')
71 files changed, 782 insertions, 506 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2f2394cce24e..2b07f0a27a7d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1160,115 +1160,8 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); __v; \ }) -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); - -static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * SYSREGS_ON_CPU *MUST* be checked before using this helper. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - if (!has_vhe()) - return false; - - switch (reg) { - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break; - case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break; - case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break; - case POR_EL1: *val = read_sysreg_s(SYS_POR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break; - case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break; - default: return false; - } - - return true; -} - -static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * SYSREGS_ON_CPU *MUST* be checked before using this helper. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - if (!has_vhe()) - return false; - - switch (reg) { - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; - case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; - case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; - case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; - case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; - default: return false; - } - - return true; -} +u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg); +void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg); struct kvm_vm_stat { struct kvm_vm_stat_generic generic; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ae563ebd6aee..e4069f2ce642 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -180,6 +180,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); +int kvm_handle_guest_sea(struct kvm_vcpu *vcpu); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 2888b5d03757..1246216616b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -355,6 +355,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return pteref; +} + static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { /* @@ -384,6 +389,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return rcu_dereference_raw(pteref); +} + static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { if (walker->flags & KVM_PGTABLE_WALK_SHARED) @@ -552,6 +562,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** + * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @addr: Intermediate physical address at which to place the mapping. + * @size: Size of the mapping. + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * + * It is assumed that the rest of the page-table is freed before this operation. + */ +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); + +/** * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. * @mm_ops: Memory management callbacks. * @pgtable: Unlinked stage-2 paging structure to be freed. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ea58282f59bb..35f9d9478004 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -179,7 +179,9 @@ struct pkvm_mapping { int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); -void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); +void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_walk_flags flags); diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h deleted file mode 100644 index 9398ade632aa..000000000000 --- a/arch/arm64/include/asm/kvm_ras.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2018 - Arm Ltd */ - -#ifndef __ARM64_KVM_RAS_H__ -#define __ARM64_KVM_RAS_H__ - -#include <linux/acpi.h> -#include <linux/errno.h> -#include <linux/types.h> - -#include <asm/acpi.h> - -/* - * Was this synchronous external abort a RAS notification? - * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. - */ -static inline int kvm_handle_guest_sea(void) -{ - /* apei_claim_sea(NULL) expects to mask interrupts itself */ - lockdep_assert_irqs_enabled(); - - return apei_claim_sea(NULL); -} - -#endif /* __ARM64_KVM_RAS_H__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d5b5f2ae1afa..6604fd6f33f4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1142,9 +1142,6 @@ #define ARM64_FEATURE_FIELD_BITS 4 -/* Defined for compatibility only, do not add new users. */ -#define ARM64_FEATURE_MASK(x) (x##_MASK) - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9ad065f15f1d..0d45c5e9b4da 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2269,6 +2269,24 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) /* Firmware may have left a deferred SError in this register. */ write_sysreg_s(0, SYS_DISR_EL1); } +static bool has_rasv1p1(const struct arm64_cpu_capabilities *__unused, int scope) +{ + const struct arm64_cpu_capabilities rasv1p1_caps[] = { + { + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, V1P1) + }, + { + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) + }, + { + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, RAS_frac, RASv1p1) + }, + }; + + return (has_cpuid_feature(&rasv1p1_caps[0], scope) || + (has_cpuid_feature(&rasv1p1_caps[1], scope) && + has_cpuid_feature(&rasv1p1_caps[2], scope))); +} #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -2687,6 +2705,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_clear_disr, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) }, + { + .desc = "RASv1p1 Extension Support", + .capability = ARM64_HAS_RASV1P1_EXTN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_rasv1p1, + }, #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_AMU_EXTN { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 888f7c7abf54..5bf101c869c9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2408,12 +2408,12 @@ static u64 get_hyp_id_aa64pfr0_el1(void) */ u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); + val &= ~(ID_AA64PFR0_EL1_CSV2 | + ID_AA64PFR0_EL1_CSV3); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV2, arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV3, arm64_get_meltdown_state() == SPECTRE_UNAFFECTED); return val; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 0e5610533949..d71ca4ddc9d1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1420,10 +1420,10 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) return; /* - * If we only have a single stage of translation (E2H=0 or - * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. + * If we only have a single stage of translation (EL2&0), exit + * early. Same thing if {VM,DC}=={0,0}. */ - if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || + if (compute_translation_regime(vcpu, op) == TR_EL20 || !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) return; diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 90cb4b7ae0ff..af69c897c2c3 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2833,7 +2833,7 @@ int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW); esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL; - vcpu_write_sys_reg(vcpu, FAR_EL2, addr); + vcpu_write_sys_reg(vcpu, addr, FAR_EL2); if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE) return kvm_inject_nested(vcpu, esr, except_type_serror); diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 95d186e0bf54..bef40ddb16db 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -22,36 +22,28 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { - u64 val; - - if (unlikely(vcpu_has_nv(vcpu))) + if (has_vhe()) return vcpu_read_sys_reg(vcpu, reg); - else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && - __vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; return __vcpu_sys_reg(vcpu, reg); } static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { - if (unlikely(vcpu_has_nv(vcpu))) + if (has_vhe()) vcpu_write_sys_reg(vcpu, val, reg); - else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) || - !__vcpu_write_sys_reg_to_cpu(val, reg)) + else __vcpu_assign_sys_reg(vcpu, reg, val); } static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, u64 val) { - if (unlikely(vcpu_has_nv(vcpu))) { + if (has_vhe()) { if (target_mode == PSR_MODE_EL1h) vcpu_write_sys_reg(vcpu, val, SPSR_EL1); else vcpu_write_sys_reg(vcpu, val, SPSR_EL2); - } else if (has_vhe()) { - write_sysreg_el1(val, SYS_SPSR); } else { __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val); } @@ -59,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) { - if (has_vhe()) + if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) write_sysreg(val, spsr_abt); else vcpu->arch.ctxt.spsr_abt = val; @@ -67,7 +59,7 @@ static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) { - if (has_vhe()) + if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) write_sysreg(val, spsr_und); else vcpu->arch.ctxt.spsr_und = val; diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index 46a2d4f2b3c6..baa6260f88dc 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -17,7 +17,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) bool corruption = unlikely(condition); \ if (corruption) { \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ - BUG_ON(1); \ + BUG(); \ } else \ WARN_ON(1); \ } \ diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 1ddd9ed3cbb3..71d2fc97f004 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -253,6 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + __vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); @@ -372,6 +373,9 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { /* Debug and Trace Registers are restricted. */ + /* Group 1 ID registers */ + HOST_HANDLED(SYS_REVIDR_EL1), + /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ AARCH32(SYS_ID_PFR0_EL1), @@ -460,6 +464,7 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), + HOST_HANDLED(SYS_AIDR_EL1), HOST_HANDLED(SYS_CSSELR_EL1), HOST_HANDLED(SYS_CTR_EL0), diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c351b4abd5db..c36f282a175d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1551,21 +1551,38 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, return 0; } -void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size) { - size_t pgd_sz; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); +} + +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) +{ + size_t pgd_sz; + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); + + /* + * Since the pgtable is unlinked at this point, and not shared with + * other walkers, safely deference pgd with kvm_dereference_pteref_raw() + */ + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); pgt->pgd = NULL; } +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +{ + kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); + kvm_pgtable_stage2_destroy_pgd(pgt); +} + void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 87a54375bd6e..78579b31a420 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -20,7 +20,7 @@ static bool __is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); - return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); + return !!(read_sysreg_el1(SYS_SCTLR) & SCTLR_ELx_EE); } /* diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e482181c6632..0998ad4a2552 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -43,8 +43,11 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); * * - API/APK: they are already accounted for by vcpu_load(), and can * only take effect across a load/put cycle (such as ERET) + * + * - FIEN: no way we let a guest have access to the RAS "Common Fault + * Injection" thing, whatever that does */ -#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK) +#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK | HCR_FIEN) static u64 __compute_hcr(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1c78864767c5..86f3d80daf37 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -4,19 +4,20 @@ * Author: Christoffer Dall <c.dall@virtualopensystems.com> */ +#include <linux/acpi.h> #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> #include <linux/hugetlb.h> #include <linux/sched/signal.h> #include <trace/events/kvm.h> +#include <asm/acpi.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> #include <asm/kvm_pgtable.h> #include <asm/kvm_pkvm.h> -#include <asm/kvm_ras.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/virt.h> @@ -903,6 +904,38 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) return 0; } +/* + * Assume that @pgt is valid and unlinked from the KVM MMU to free the + * page-table without taking the kvm_mmu_lock and without performing any + * TLB invalidations. + * + * Also, the range of addresses can be large enough to cause need_resched + * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke + * cond_resched() periodically to prevent hogging the CPU for a long time + * and schedule something else, if required. + */ +static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr, + phys_addr_t end) +{ + u64 next; + + do { + next = stage2_range_addr_end(addr, end); + KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr, + next - addr); + if (next != end) + cond_resched(); + } while (addr = next, addr != end); +} + +static void kvm_stage2_destroy(struct kvm_pgtable *pgt) +{ + unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); + + stage2_destroy_range(pgt, 0, BIT(ia_bits)); + KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); +} + /** * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure @@ -979,7 +1012,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return 0; out_destroy_pgtable: - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); + kvm_stage2_destroy(pgt); out_free_pgtable: kfree(pgt); return err; @@ -1076,7 +1109,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) write_unlock(&kvm->mmu_lock); if (pgt) { - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); + kvm_stage2_destroy(pgt); kfree(pgt); } } @@ -1811,6 +1844,19 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) read_unlock(&vcpu->kvm->mmu_lock); } +int kvm_handle_guest_sea(struct kvm_vcpu *vcpu) +{ + /* + * Give APEI the opportunity to claim the abort before handling it + * within KVM. apei_claim_sea() expects to be called with IRQs enabled. + */ + lockdep_assert_irqs_enabled(); + if (apei_claim_sea(NULL) == 0) + return 1; + + return kvm_inject_serror(vcpu); +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1834,17 +1880,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea()) - return kvm_inject_serror(vcpu); - - return 1; - } + if (kvm_vcpu_abt_issea(vcpu)) + return kvm_handle_guest_sea(vcpu); esr = kvm_vcpu_get_esr(vcpu); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 153b3e11b115..77db81bae86f 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1287,7 +1287,10 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) struct vncr_tlb *vt = vcpu->arch.vncr_tlb; u64 esr = kvm_vcpu_get_esr(vcpu); - BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT)); + WARN_ON_ONCE(!(esr & ESR_ELx_VNCR)); + + if (kvm_vcpu_abt_issea(vcpu)) + return kvm_handle_guest_sea(vcpu); if (esr_fsc_is_permission_fault(esr)) { inject_vncr_perm(vcpu); diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index fcd70bfe44fb..61827cf6fea4 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -316,9 +316,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e return 0; } -void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size) { - __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); + __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); +} + +void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) +{ + /* Expected to be called after all pKVM mappings have been released. */ + WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root)); } int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 82ffb3b3b3cf..b29f72478a50 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -82,43 +82,105 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, "sys_reg write to read-only register"); } -#define PURE_EL2_SYSREG(el2) \ - case el2: { \ - *el1r = el2; \ - return true; \ +enum sr_loc_attr { + SR_LOC_MEMORY = 0, /* Register definitely in memory */ + SR_LOC_LOADED = BIT(0), /* Register on CPU, unless it cannot */ + SR_LOC_MAPPED = BIT(1), /* Register in a different CPU register */ + SR_LOC_XLATED = BIT(2), /* Register translated to fit another reg */ + SR_LOC_SPECIAL = BIT(3), /* Demanding register, implies loaded */ +}; + +struct sr_loc { + enum sr_loc_attr loc; + enum vcpu_sysreg map_reg; + u64 (*xlate)(u64); +}; + +static enum sr_loc_attr locate_direct_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg) +{ + switch (reg) { + case SCTLR_EL1: + case CPACR_EL1: + case TTBR0_EL1: + case TTBR1_EL1: + case TCR_EL1: + case TCR2_EL1: + case PIR_EL1: + case PIRE0_EL1: + case POR_EL1: + case ESR_EL1: + case AFSR0_EL1: + case AFSR1_EL1: + case FAR_EL1: + case MAIR_EL1: + case VBAR_EL1: + case CONTEXTIDR_EL1: + case AMAIR_EL1: + case CNTKCTL_EL1: + case ELR_EL1: + case SPSR_EL1: + case ZCR_EL1: + case SCTLR2_EL1: + /* + * EL1 registers which have an ELx2 mapping are loaded if + * we're not in hypervisor context. + */ + return is_hyp_ctxt(vcpu) ? SR_LOC_MEMORY : SR_LOC_LOADED; + + case TPIDR_EL0: + case TPIDRRO_EL0: + case TPIDR_EL1: + case PAR_EL1: + case DACR32_EL2: + case IFSR32_EL2: + case DBGVCR32_EL2: + /* These registers are always loaded, no matter what */ + return SR_LOC_LOADED; + + default: + /* Non-mapped EL2 registers are by definition in memory. */ + return SR_LOC_MEMORY; } +} -#define MAPPED_EL2_SYSREG(el2, el1, fn) \ - case el2: { \ - *xlate = fn; \ - *el1r = el1; \ - return true; \ +static void locate_mapped_el2_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg, + enum vcpu_sysreg map_reg, + u64 (*xlate)(u64), + struct sr_loc *loc) +{ + if (!is_hyp_ctxt(vcpu)) { + loc->loc = SR_LOC_MEMORY; + return; + } + + loc->loc = SR_LOC_LOADED | SR_LOC_MAPPED; + loc->map_reg = map_reg; + + WARN_ON(locate_direct_register(vcpu, map_reg) != SR_LOC_MEMORY); + + if (xlate != NULL && !vcpu_el2_e2h_is_set(vcpu)) { + loc->loc |= SR_LOC_XLATED; + loc->xlate = xlate; } +} -static bool get_el2_to_el1_mapping(unsigned int reg, - unsigned int *el1r, u64 (**xlate)(u64)) +#define MAPPED_EL2_SYSREG(r, m, t) \ + case r: { \ + locate_mapped_el2_register(vcpu, r, m, t, loc); \ + break; \ + } + +static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, + struct sr_loc *loc) { + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) { + loc->loc = SR_LOC_MEMORY; + return; + } + switch (reg) { - PURE_EL2_SYSREG( VPIDR_EL2 ); - PURE_EL2_SYSREG( VMPIDR_EL2 ); - PURE_EL2_SYSREG( ACTLR_EL2 ); - PURE_EL2_SYSREG( HCR_EL2 ); - PURE_EL2_SYSREG( MDCR_EL2 ); - PURE_EL2_SYSREG( HSTR_EL2 ); - PURE_EL2_SYSREG( HACR_EL2 ); - PURE_EL2_SYSREG( VTTBR_EL2 ); - PURE_EL2_SYSREG( VTCR_EL2 ); - PURE_EL2_SYSREG( TPIDR_EL2 ); - PURE_EL2_SYSREG( HPFAR_EL2 ); - PURE_EL2_SYSREG( HCRX_EL2 ); - PURE_EL2_SYSREG( HFGRTR_EL2 ); - PURE_EL2_SYSREG( HFGWTR_EL2 ); - PURE_EL2_SYSREG( HFGITR_EL2 ); - PURE_EL2_SYSREG( HDFGRTR_EL2 ); - PURE_EL2_SYSREG( HDFGWTR_EL2 ); - PURE_EL2_SYSREG( HAFGRTR_EL2 ); - PURE_EL2_SYSREG( CNTVOFF_EL2 ); - PURE_EL2_SYSREG( CNTHCTL_EL2 ); MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1, translate_sctlr_el2_to_sctlr_el1 ); MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1, @@ -144,125 +206,189 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); + case CNTHCTL_EL2: + /* CNTHCTL_EL2 is super special, until we support NV2.1 */ + loc->loc = ((is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) ? + SR_LOC_SPECIAL : SR_LOC_MEMORY); + break; default: - return false; + loc->loc = locate_direct_register(vcpu, reg); } } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +static u64 read_sr_from_cpu(enum vcpu_sysreg reg) { u64 val = 0x8badf00d8badf00d; - u64 (*xlate)(u64) = NULL; - unsigned int el1r; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_read; + switch (reg) { + case SCTLR_EL1: val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: val = read_sysreg_s(SYS_TCR_EL12); break; + case TCR2_EL1: val = read_sysreg_s(SYS_TCR2_EL12); break; + case PIR_EL1: val = read_sysreg_s(SYS_PIR_EL12); break; + case PIRE0_EL1: val = read_sysreg_s(SYS_PIRE0_EL12); break; + case POR_EL1: val = read_sysreg_s(SYS_POR_EL12); break; + case ESR_EL1: val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: val = read_sysreg_s(SYS_ELR_EL12); break; + case SPSR_EL1: val = read_sysreg_s(SYS_SPSR_EL12); break; + case ZCR_EL1: val = read_sysreg_s(SYS_ZCR_EL12); break; + case SCTLR2_EL1: val = read_sysreg_s(SYS_SCTLR2_EL12); break; + case TPIDR_EL0: val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: val = read_sysreg_s(SYS_TPIDR_EL1); break; + case PAR_EL1: val = read_sysreg_par(); break; + case DACR32_EL2: val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_read; + return val; +} + +static void write_sr_to_cpu(enum vcpu_sysreg reg, u64 val) +{ + switch (reg) { + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; + case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; + case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; + case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; + case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; + case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } +} + +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + struct sr_loc loc = {}; + + locate_register(vcpu, reg, &loc); + + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); + + if (loc.loc & SR_LOC_SPECIAL) { + u64 val; + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); /* - * CNTHCTL_EL2 requires some special treatment to - * account for the bits that can be set via CNTKCTL_EL1. + * CNTHCTL_EL2 requires some special treatment to account + * for the bits that can be set via CNTKCTL_EL1 when E2H==1. */ switch (reg) { case CNTHCTL_EL2: - if (vcpu_el2_e2h_is_set(vcpu)) { - val = read_sysreg_el1(SYS_CNTKCTL); - val &= CNTKCTL_VALID_BITS; - val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; - return val; - } - break; + val = read_sysreg_el1(SYS_CNTKCTL); + val &= CNTKCTL_VALID_BITS; + val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; + return val; + default: + WARN_ON_ONCE(1); } + } - /* - * If this register does not have an EL1 counterpart, - * then read the stored EL2 version. - */ - if (reg == el1r) - goto memory_read; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; - /* - * If we have a non-VHE guest and that the sysreg - * requires translation to be used at EL1, use the - * in-memory copy instead. - */ - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - goto memory_read; + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; - /* Get the current version of the EL1 counterpart. */ - WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val)); - if (reg >= __SANITISED_REG_START__) - val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - - return val; - } + if (!(loc.loc & SR_LOC_XLATED)) { + u64 val = read_sr_from_cpu(map_reg); - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_read; + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - if (__vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; + return val; + } + } -memory_read: return __vcpu_sys_reg(vcpu, reg); } -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg) { - u64 (*xlate)(u64) = NULL; - unsigned int el1r; + struct sr_loc loc = {}; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_write; + locate_register(vcpu, reg, &loc); - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_write; + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); - /* - * Always store a copy of the write to memory to avoid having - * to reverse-translate virtual EL2 system registers for a - * non-VHE guest hypervisor. - */ - __vcpu_assign_sys_reg(vcpu, reg, val); + if (loc.loc & SR_LOC_SPECIAL) { + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); switch (reg) { case CNTHCTL_EL2: /* - * If E2H=0, CNHTCTL_EL2 is a pure shadow register. - * Otherwise, some of the bits are backed by + * If E2H=1, some of the bits are backed by * CNTKCTL_EL1, while the rest is kept in memory. * Yes, this is fun stuff. */ - if (vcpu_el2_e2h_is_set(vcpu)) - write_sysreg_el1(val, SYS_CNTKCTL); - return; + write_sysreg_el1(val, SYS_CNTKCTL); + break; + default: + WARN_ON_ONCE(1); } + } - /* No EL1 counterpart? We're done here.? */ - if (reg == el1r) - return; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; + u64 xlated_val; - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - val = xlate(val); + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - /* Redirect this to the EL1 version of the register. */ - WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r)); - return; - } + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_write; + if (loc.loc & SR_LOC_XLATED) + xlated_val = loc.xlate(val); + else + xlated_val = val; - if (__vcpu_write_sys_reg_to_cpu(val, reg)) - return; + write_sr_to_cpu(map_reg, xlated_val); + + /* + * Fall through to write the backing store anyway, which + * allows translated registers to be directly read without a + * reverse translation. + */ + } -memory_write: __vcpu_assign_sys_reg(vcpu, reg, val); } @@ -1584,6 +1710,7 @@ static u8 pmuver_to_perfmon(u8 pmuver) } static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val); +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val); static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); /* Read a sanitised cpufeature ID register by sys_reg_desc */ @@ -1606,19 +1733,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val = sanitise_id_aa64pfr0_el1(vcpu, val); break; case SYS_ID_AA64PFR1_EL1: - if (!kvm_has_mte(vcpu->kvm)) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); - } - - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); + val = sanitise_id_aa64pfr1_el1(vcpu, val); break; case SYS_ID_AA64PFR2_EL1: val &= ID_AA64PFR2_EL1_FPMR | @@ -1628,18 +1743,18 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); + val &= ~(ID_AA64ISAR1_EL1_APA | + ID_AA64ISAR1_EL1_API | + ID_AA64ISAR1_EL1_GPA | + ID_AA64ISAR1_EL1_GPI); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); + val &= ~(ID_AA64ISAR2_EL1_APA3 | + ID_AA64ISAR2_EL1_GPA3); if (!cpus_have_final_cap(ARM64_HAS_WFXT) || has_broken_cntvoff()) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~ID_AA64ISAR2_EL1_WFxT; break; case SYS_ID_AA64ISAR3_EL1: val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX; @@ -1655,7 +1770,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: - val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); + val &= ~ID_MMFR4_EL1_CCIDX; break; } @@ -1836,6 +1951,31 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) return val; } +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val) +{ + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + if (!kvm_has_mte(vcpu->kvm)) { + val &= ~ID_AA64PFR1_EL1_MTE; + val &= ~ID_AA64PFR1_EL1_MTE_frac; + } + + if (!(cpus_have_final_cap(ARM64_HAS_RASV1P1_EXTN) && + SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0) == ID_AA64PFR0_EL1_RAS_IMP)) + val &= ~ID_AA64PFR1_EL1_RAS_frac; + + val &= ~ID_AA64PFR1_EL1_SME; + val &= ~ID_AA64PFR1_EL1_RNDR_trap; + val &= ~ID_AA64PFR1_EL1_NMI; + val &= ~ID_AA64PFR1_EL1_GCS; + val &= ~ID_AA64PFR1_EL1_THE; + val &= ~ID_AA64PFR1_EL1_MTEX; + val &= ~ID_AA64PFR1_EL1_PFAR; + val &= ~ID_AA64PFR1_EL1_MPAM_frac; + + return val; +} + static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); @@ -2697,6 +2837,18 @@ static bool access_ras(struct kvm_vcpu *vcpu, struct kvm *kvm = vcpu->kvm; switch(reg_to_encoding(r)) { + case SYS_ERXPFGCDN_EL1: + case SYS_ERXPFGCTL_EL1: + case SYS_ERXPFGF_EL1: + case SYS_ERXMISC2_EL1: + case SYS_ERXMISC3_EL1: + if (!(kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1) || + (kvm_has_feat_enum(kvm, ID_AA64PFR0_EL1, RAS, IMP) && + kvm_has_feat(kvm, ID_AA64PFR1_EL1, RAS_frac, RASv1p1)))) { + kvm_inject_undefined(vcpu); + return false; + } + break; default: if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { kvm_inject_undefined(vcpu); @@ -2929,7 +3081,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ~(ID_AA64PFR0_EL1_AMU | ID_AA64PFR0_EL1_MPAM | ID_AA64PFR0_EL1_SVE | - ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_AdvSIMD | ID_AA64PFR0_EL1_FP)), ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1, @@ -2943,7 +3094,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR1_EL1_SME | ID_AA64PFR1_EL1_RES0 | ID_AA64PFR1_EL1_MPAM_frac | - ID_AA64PFR1_EL1_RAS_frac | ID_AA64PFR1_EL1_MTE)), ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR | @@ -3063,8 +3213,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXCTLR_EL1), access_ras }, { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras }, { SYS_DESC(SYS_ERXADDR_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGF_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCTL_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCDN_EL1), access_ras }, { SYS_DESC(SYS_ERXMISC0_EL1), access_ras }, { SYS_DESC(SYS_ERXMISC1_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC2_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC3_EL1), access_ras }, MTE_REG(TFSR_EL1), MTE_REG(TFSRE0_EL1), diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a3ef185209e9..70d50c77e5dc 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -50,6 +50,14 @@ bool vgic_has_its(struct kvm *kvm) bool vgic_supports_direct_msis(struct kvm *kvm) { + /* + * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware, + * indirectly allowing userspace to control whether or not vPEs are + * allocated for the VM. + */ + if (system_supports_direct_sgis() && !vgic_supports_direct_sgis(kvm)) + return false; + return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); } diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index e416e433baff..a573b1f0c6cb 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -1091,7 +1091,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, len = vgic_v3_init_dist_iodev(io_device); break; default: - BUG_ON(1); + BUG(); } io_device->base_addr = dist_base_address; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 1384a04c0784..de1c1d3261c3 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -396,15 +396,7 @@ bool vgic_supports_direct_sgis(struct kvm *kvm); static inline bool vgic_supports_direct_irqs(struct kvm *kvm) { - /* - * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware, - * indirectly allowing userspace to control whether or not vPEs are - * allocated for the VM. - */ - if (system_supports_direct_sgis()) - return vgic_supports_direct_sgis(kvm); - - return vgic_supports_direct_msis(kvm); + return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm); } int vgic_v4_init(struct kvm *kvm); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index ef0b7946f5a4..9ff5cdbd2759 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ HAS_S1PIE HAS_S1POE HAS_SCTLR2 HAS_RAS_EXTN +HAS_RASV1P1_EXTN HAS_RNG HAS_SB HAS_STAGE2_FWB diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index b0703a4e02a2..a3a9759414f4 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -102,7 +102,13 @@ KBUILD_CFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma) ifdef CONFIG_OBJTOOL ifdef CONFIG_CC_HAS_ANNOTATE_TABLEJUMP +# The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. +# Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to +# be passed via '-mllvm' to ld.lld. KBUILD_CFLAGS += -mannotate-tablejump +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -mllvm --loongarch-annotate-tablejump +endif else KBUILD_CFLAGS += -fno-jump-tables # keep compatibility with older compilers endif diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 3eda298702b1..5cb568a60cf8 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -58,7 +58,7 @@ .endm .macro STACKLEAK_ERASE -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE bl stackleak_erase_on_task_stack #endif .endm diff --git a/arch/loongarch/include/uapi/asm/setup.h b/arch/loongarch/include/uapi/asm/setup.h new file mode 100644 index 000000000000..d46363ce3e02 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/setup.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_LOONGARCH_SETUP_H +#define _UAPI_ASM_LOONGARCH_SETUP_H + +#define COMMAND_LINE_SIZE 4096 + +#endif /* _UAPI_ASM_LOONGARCH_SETUP_H */ diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index e2f30ff9afde..a43ba7f9f987 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/moduleloader.h> #include <linux/ftrace.h> +#include <linux/sort.h> Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { @@ -61,39 +62,38 @@ Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr v return (Elf_Addr)&plt[nr]; } -static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) -{ - return x->r_info == y->r_info && x->r_addend == y->r_addend; -} +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) -static bool duplicate_rela(const Elf_Rela *rela, int idx) +static int compare_rela(const void *x, const void *y) { - int i; + int ret; + const Elf_Rela *rela_x = x, *rela_y = y; - for (i = 0; i < idx; i++) { - if (is_rela_equal(&rela[i], &rela[idx])) - return true; - } + ret = cmp_3way(rela_x->r_info, rela_y->r_info); + if (ret == 0) + ret = cmp_3way(rela_x->r_addend, rela_y->r_addend); - return false; + return ret; } static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts, unsigned int *gots) { - unsigned int i, type; + unsigned int i; + + sort(relas, num, sizeof(Elf_Rela), compare_rela, NULL); for (i = 0; i < num; i++) { - type = ELF_R_TYPE(relas[i].r_info); - switch (type) { + if (i && !compare_rela(&relas[i-1], &relas[i])) + continue; + + switch (ELF_R_TYPE(relas[i].r_info)) { case R_LARCH_SOP_PUSH_PLT_PCREL: case R_LARCH_B26: - if (!duplicate_rela(relas, i)) - (*plts)++; + (*plts)++; break; case R_LARCH_GOT_PC_HI20: - if (!duplicate_rela(relas, i)) - (*gots)++; + (*gots)++; break; default: break; /* Do nothing. */ diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 4740cb5b2388..c9f7ca778364 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -677,6 +677,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); +#ifdef CONFIG_CPU_HAS_LBT + if (extctx->lbt.addr) + err |= protected_save_lbt_context(extctx); +#endif + if (extctx->lasx.addr) err |= protected_save_lasx_context(extctx); else if (extctx->lsx.addr) @@ -684,11 +689,6 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); -#ifdef CONFIG_CPU_HAS_LBT - if (extctx->lbt.addr) - err |= protected_save_lbt_context(extctx); -#endif - /* Set the "end" magic */ info = (struct sctx_info *)extctx->end.addr; err |= __put_user(0, &info->magic); diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 367906b10f81..f3092f2de8b5 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -5,6 +5,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/clockchips.h> +#include <linux/cpuhotplug.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/init.h> @@ -102,6 +103,23 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } +static int arch_timer_starting(unsigned int cpu) +{ + set_csr_ecfg(ECFGF_TIMER); + + return 0; +} + +static int arch_timer_dying(unsigned int cpu) +{ + constant_set_state_shutdown(this_cpu_ptr(&constant_clockevent_device)); + + /* Clear Timer Interrupt */ + write_csr_tintclear(CSR_TINTCLR_TI); + + return 0; +} + static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; @@ -172,6 +190,10 @@ int constant_clockevent_init(void) lpj_fine = get_loops_per_jiffy(); pr_info("Constant clock event device register\n"); + cpuhp_setup_state(CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, + "clockevents/loongarch/timer:starting", + arch_timer_starting, arch_timer_dying); + return 0; } diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index a3a12af9ecbf..026b139dcff2 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -45,7 +45,12 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) } cpu = s->sw_coremap[irq]; - vcpu = kvm_get_vcpu(s->kvm, cpu); + vcpu = kvm_get_vcpu_by_id(s->kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + if (level) { /* if not enable return false */ if (!test_bit(irq, (unsigned long *)s->enable.reg_u32)) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index e658d5b37c04..5a8481dda052 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -99,7 +99,7 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { int i, idx, ret; - uint32_t val = 0, mask = 0; + uint64_t val = 0, mask = 0; /* * Bit 27-30 is mask for byte writing. @@ -108,7 +108,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) if ((data >> 27) & 0xf) { /* Read the old val */ idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, 4, &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); @@ -124,7 +124,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) } val |= ((uint32_t)(data >> 32) & ~mask); idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, 4, &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); @@ -298,7 +298,7 @@ static int kvm_ipi_regs_access(struct kvm_device *dev, cpu = (attr->attr >> 16) & 0x3ff; addr = attr->attr & 0xff; - vcpu = kvm_get_vcpu(dev->kvm, cpu); + vcpu = kvm_get_vcpu_by_id(dev->kvm, cpu); if (unlikely(vcpu == NULL)) { kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); return -EINVAL; diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 6f00ffe05c54..119290bcea79 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -195,6 +195,11 @@ static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + /* statistics of pch pic reading */ vcpu->stat.pch_pic_read_exits++; ret = loongarch_pch_pic_read(s, addr, len, val); @@ -302,6 +307,11 @@ static int kvm_pch_pic_write(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + /* statistics of pch pic writing */ vcpu->stat.pch_pic_write_exits++; ret = loongarch_pch_pic_write(s, addr, len, val); diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index d1b8c50941ca..ce478151466c 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1283,9 +1283,11 @@ int kvm_own_lbt(struct kvm_vcpu *vcpu) return -EINVAL; preempt_disable(); - set_csr_euen(CSR_EUEN_LBTEN); - _restore_lbt(&vcpu->arch.lbt); - vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) { + set_csr_euen(CSR_EUEN_LBTEN); + _restore_lbt(&vcpu->arch.lbt); + vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + } preempt_enable(); return 0; diff --git a/arch/mips/boot/dts/lantiq/danube_easy50712.dts b/arch/mips/boot/dts/lantiq/danube_easy50712.dts index 1ce20b7d05cb..c4d7aa5753b0 100644 --- a/arch/mips/boot/dts/lantiq/danube_easy50712.dts +++ b/arch/mips/boot/dts/lantiq/danube_easy50712.dts @@ -82,13 +82,16 @@ }; }; - etop@e180000 { + ethernet@e180000 { compatible = "lantiq,etop-xway"; reg = <0xe180000 0x40000>; interrupt-parent = <&icu0>; interrupts = <73 78>; + interrupt-names = "tx", "rx"; phy-mode = "rmii"; mac-address = [ 00 11 22 33 44 55 ]; + lantiq,rx-burst-length = <4>; + lantiq,tx-burst-length = <4>; }; stp0: stp@e100bb0 { diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 5a75283d17f1..6031a0272d87 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -497,7 +497,7 @@ void __init ltq_soc_init(void) ifccr = CGU_IFCCR_VR9; pcicr = CGU_PCICR_VR9; } else { - clkdev_add_pmu("1e180000.etop", NULL, 1, 0, PMU_PPE); + clkdev_add_pmu("1e180000.ethernet", NULL, 1, 0, PMU_PPE); } if (!of_machine_is_compatible("lantiq,ase")) @@ -531,9 +531,9 @@ void __init ltq_soc_init(void) CLOCK_133M, CLOCK_133M); clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); - clkdev_add_pmu("1e180000.etop", "ppe", 1, 0, PMU_PPE); - clkdev_add_cgu("1e180000.etop", "ephycgu", CGU_EPHY); - clkdev_add_pmu("1e180000.etop", "ephy", 1, 0, PMU_EPHY); + clkdev_add_pmu("1e180000.ethernet", "ppe", 1, 0, PMU_PPE); + clkdev_add_cgu("1e180000.ethernet", "ephycgu", CGU_EPHY); + clkdev_add_pmu("1e180000.ethernet", "ephy", 1, 0, PMU_EPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_ASE_SDIO); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); } else if (of_machine_is_compatible("lantiq,grx390")) { @@ -592,7 +592,7 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM); clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 0, PMU_USB1_P); clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1 | PMU_AHBM); - clkdev_add_pmu("1e180000.etop", "switch", 1, 0, PMU_SWITCH); + clkdev_add_pmu("1e180000.ethernet", "switch", 1, 0, PMU_SWITCH); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7ab087d412c..c47b78c1d3e7 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -243,13 +243,13 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE hostprogs := addnote hack-coff mktree targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds -extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ +always-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds dtstree := $(src)/dts wrapper := $(src)/wrapper -wrapperbits := $(extra-y) $(addprefix $(obj)/,addnote hack-coff mktree) \ +wrapperbits := $(always-y) $(addprefix $(obj)/,addnote hack-coff mktree) \ $(wrapper) FORCE ############# @@ -456,7 +456,7 @@ WRAPPER_DTSDIR := /usr/lib/kernel-wrapper/dts WRAPPER_BINDIR := /usr/sbin INSTALL := install -extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(extra-y)) +extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(always-y)) hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs)) wrapper-installed := $(DESTDIR)$(WRAPPER_BINDIR)/wrapper dts-installed := $(patsubst $(dtstree)/%, $(DESTDIR)$(WRAPPER_DTSDIR)/%, $(wildcard $(dtstree)/*.dts)) diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh index 101fcb397a0f..c3df6c27ce75 100755 --- a/arch/powerpc/boot/install.sh +++ b/arch/powerpc/boot/install.sh @@ -19,19 +19,19 @@ set -e # this should work for both the pSeries zImage and the iSeries vmlinux.sm -image_name=`basename $2` +image_name=$(basename "$2") echo "Warning: '${INSTALLKERNEL}' command not available... Copying" \ "directly to $4/$image_name-$1" >&2 -if [ -f $4/$image_name-$1 ]; then - mv $4/$image_name-$1 $4/$image_name-$1.old +if [ -f "$4"/"$image_name"-"$1" ]; then + mv "$4"/"$image_name"-"$1" "$4"/"$image_name"-"$1".old fi -if [ -f $4/System.map-$1 ]; then - mv $4/System.map-$1 $4/System-$1.old +if [ -f "$4"/System.map-"$1" ]; then + mv "$4"/System.map-"$1" "$4"/System-"$1".old fi -cat $2 > $4/$image_name-$1 -cp $3 $4/System.map-$1 +cat "$2" > "$4"/"$image_name"-"$1" +cp "$3" "$4"/System.map-"$1" diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fb2b95267022..2f0a2e69c607 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -199,7 +199,9 @@ obj-$(CONFIG_ALTIVEC) += vector.o obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o -extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +ifdef KBUILD_BUILTIN +always-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +endif obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 5b3c093611ba..7209d00a9c25 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -632,19 +632,19 @@ static void __init kvm_check_ins(u32 *inst, u32 features) #endif } - switch (inst_no_rt & ~KVM_MASK_RB) { #ifdef CONFIG_PPC_BOOK3S_32 + switch (inst_no_rt & ~KVM_MASK_RB) { case KVM_INST_MTSRIN: if (features & KVM_MAGIC_FEAT_SR) { u32 inst_rb = _inst & KVM_MASK_RB; kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); } break; -#endif } +#endif - switch (_inst) { #ifdef CONFIG_BOOKE + switch (_inst) { case KVM_INST_WRTEEI_0: kvm_patch_ins_wrteei_0(inst); break; @@ -652,8 +652,8 @@ static void __init kvm_check_ins(u32 *inst, u32 features) case KVM_INST_WRTEEI_1: kvm_patch_ins_wrtee(inst, 0, 1); break; -#endif } +#endif } extern u32 kvm_template_start[]; diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 69623b9045d5..3090b97258ae 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -15,8 +15,8 @@ has_renamed_memintrinsics() { - grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \ - ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG} + grep -q "^CONFIG_KASAN=y$" "${KCONFIG_CONFIG}" && \ + ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" "${KCONFIG_CONFIG}" } if has_renamed_memintrinsics @@ -42,15 +42,15 @@ check_section() { file=$1 section=$2 - size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") + size=$(objdump -h -j "$section" "$file" 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") size=${size:-0} - if [ $size -ne 0 ]; then + if [ "$size" -ne 0 ]; then ERROR=1 echo "Error: Section $section not empty in prom_init.c" >&2 fi } -for UNDEF in $($NM -u $OBJ | awk '{print $2}') +for UNDEF in $($NM -u "$OBJ" | awk '{print $2}') do # On 64-bit nm gives us the function descriptors, which have # a leading . on the name, so strip it off here. @@ -87,8 +87,8 @@ do fi done -check_section $OBJ .data -check_section $OBJ .bss -check_section $OBJ .init.data +check_section "$OBJ" .data +check_section "$OBJ" .bss +check_section "$OBJ" .init.data exit $ERROR diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7284c8021eeb..8fd7cbf3bd04 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -141,10 +141,7 @@ void __init check_smt_enabled(void) smt_enabled_at_boot = 0; else { int smt; - int rc; - - rc = kstrtoint(smt_enabled_cmdline, 10, &smt); - if (!rc) + if (!kstrtoint(smt_enabled_cmdline, 10, &smt)) smt_enabled_at_boot = min(threads_per_core, smt); } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 153587741864..2ba057171ebe 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -69,7 +69,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) /* * Common checks before entering the guest world. Call with interrupts - * disabled. + * enabled. * * returns: * diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c index a49d4a9ab3bc..3292071e4da3 100644 --- a/arch/powerpc/platforms/8xx/cpm1-ic.c +++ b/arch/powerpc/platforms/8xx/cpm1-ic.c @@ -110,8 +110,7 @@ static int cpm_pic_probe(struct platform_device *pdev) out_be32(&data->reg->cpic_cimr, 0); - data->host = irq_domain_create_linear(of_fwnode_handle(dev->of_node), - 64, &cpm_pic_host_ops, data); + data->host = irq_domain_create_linear(dev_fwnode(dev), 64, &cpm_pic_host_ops, data); if (!data->host) return -ENODEV; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 613b383ed8b3..7b527d18aa5e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -122,16 +122,11 @@ choice If unsure, select Generic. config POWERPC64_CPU - bool "Generic (POWER5 and PowerPC 970 and above)" - depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN - select PPC_64S_HASH_MMU - -config POWERPC64_CPU - bool "Generic (POWER8 and above)" - depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN - select ARCH_HAS_FAST_MULTIPLIER + bool "Generic 64 bits powerpc" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER if CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU - select PPC_HAS_LBARX_LHARX + select PPC_HAS_LBARX_LHARX if CPU_LITTLE_ENDIAN config POWERPC_CPU bool "Generic 32 bits powerpc" diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 4fe8a7b1b288..2a007bfb038d 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -412,9 +412,8 @@ static int fsl_of_msi_probe(struct platform_device *dev) } platform_set_drvdata(dev, msi); - msi->irqhost = irq_domain_create_linear(of_fwnode_handle(dev->dev.of_node), - NR_MSI_IRQS_MAX, &fsl_msi_host_ops, msi); - + msi->irqhost = irq_domain_create_linear(dev_fwnode(&dev->dev), NR_MSI_IRQS_MAX, + &fsl_msi_host_ops, msi); if (msi->irqhost == NULL) { dev_err(&dev->dev, "No memory for MSI irqhost\n"); err = -ENOMEM; diff --git a/arch/riscv/boot/dts/thead/th1520.dtsi b/arch/riscv/boot/dts/thead/th1520.dtsi index 42724bf7e90e..03f1d7319049 100644 --- a/arch/riscv/boot/dts/thead/th1520.dtsi +++ b/arch/riscv/boot/dts/thead/th1520.dtsi @@ -297,8 +297,9 @@ reg-names = "dwmac", "apb"; interrupts = <67 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; @@ -319,8 +320,9 @@ reg-names = "dwmac", "apb"; interrupts = <66 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index a1c3b2ec1dde..525fb5a330c0 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -39,6 +39,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, unsigned long size, bool writable, bool in_atomic) { int ret = 0; + pgprot_t prot; unsigned long pfn; phys_addr_t addr, end; struct kvm_mmu_memory_cache pcache = { @@ -55,10 +56,12 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); + prot = pgprot_noncached(PAGE_WRITE); for (addr = gpa; addr < end; addr += PAGE_SIZE) { map.addr = addr; - map.pte = pfn_pte(pfn, PAGE_KERNEL_IO); + map.pte = pfn_pte(pfn, prot); + map.pte = pte_mkdirty(map.pte); map.level = 0; if (!writable) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index f001e56403f9..3ebcfffaa978 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -683,7 +683,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) } /** - * check_vcpu_requests - check and handle pending vCPU requests + * kvm_riscv_check_vcpu_requests - check and handle pending vCPU requests * @vcpu: the VCPU pointer * * Return: 1 if we should enter the guest diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index a5f88cb717f3..05f3cc2d8e31 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -182,6 +182,8 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; unsigned long reg_val; + if (reg_size != sizeof(reg_val)) + return -EINVAL; if (copy_from_user(®_val, uaddr, reg_size)) return -EFAULT; if (reg_val != cntx->vector.vlenb) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 1d073acd05a7..cea3de4dce8c 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -530,6 +530,9 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l lowcore_address + sizeof(struct lowcore), POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { + /* Do not map lowcore with identity mapping */ + if (!start) + start = sizeof(struct lowcore); pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), POPULATE_IDENTITY); diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6b33429f1c4d..5e616bc988ac 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -5,6 +5,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -19,6 +20,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -42,6 +44,7 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -105,6 +108,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y CONFIG_ANON_VMA_NAME=y @@ -223,17 +227,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -248,6 +254,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m @@ -318,16 +325,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -340,15 +339,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -383,6 +376,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -504,6 +498,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -641,6 +636,7 @@ CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -665,6 +661,7 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -755,6 +752,8 @@ CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_SELFTESTS_FULL=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_BENCHMARK=m @@ -783,7 +782,6 @@ CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m @@ -822,6 +820,7 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CRYPTO_KRB5=m CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m +CONFIG_TRACE_MMIO_ACCESS=y CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b75eb2775850..094599cdaf4d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -4,6 +4,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -17,6 +18,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -40,11 +42,12 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y -CONFIG_HZ_100=y +CONFIG_HZ_1000=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y @@ -97,6 +100,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_ANON_VMA_NAME=y CONFIG_USERFAULTFD=y @@ -214,17 +218,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -239,6 +245,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m @@ -309,16 +316,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -331,15 +330,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -373,6 +366,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -494,6 +488,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -631,6 +626,7 @@ CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -652,6 +648,7 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -683,7 +680,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m @@ -741,6 +737,7 @@ CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_BENCHMARK=m @@ -769,7 +766,6 @@ CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8163c1702720..ed0b137353ad 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,5 +1,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set @@ -11,7 +12,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KEXEC=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=2 -CONFIG_HZ_100=y +CONFIG_HZ_1000=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set # CONFIG_AP is not set diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 5d9effb0867c..41a0d2066fa0 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -6,6 +6,7 @@ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> */ +#include <linux/security.h> #include <linux/slab.h> #include "hypfs.h" @@ -66,23 +67,27 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long rc; mutex_lock(&df->lock); - if (df->unlocked_ioctl) - rc = df->unlocked_ioctl(file, cmd, arg); - else - rc = -ENOTTY; + rc = df->unlocked_ioctl(file, cmd, arg); mutex_unlock(&df->lock); return rc; } -static const struct file_operations dbfs_ops = { +static const struct file_operations dbfs_ops_ioctl = { .read = dbfs_read, .unlocked_ioctl = dbfs_ioctl, }; +static const struct file_operations dbfs_ops = { + .read = dbfs_read, +}; + void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) { - df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, - &dbfs_ops); + const struct file_operations *fops = &dbfs_ops; + + if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS)) + fops = &dbfs_ops_ioctl; + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops); mutex_init(&df->lock); } diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index 916bac09b464..63e037e94e4c 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -106,5 +106,18 @@ void get_cpuflags(void) cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], &cpu.flags[1]); } + + if (max_amd_level >= 0x8000001f) { + u32 ebx; + + /* + * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in + * the virtualization flags entry (word 8) and set by + * scattered.c, so the bit needs to be explicitly set. + */ + cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored); + if (ebx & BIT(31)) + set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags); + } } } diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 7a706db87b93..a34cd19796f9 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -785,6 +785,7 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) pc->entry[0].page_size = RMP_PG_SIZE_4K; pc->entry[0].action = validate; pc->entry[0].ignore_cf = 0; + pc->entry[0].rsvd = 0; pc->entry[0].pfn = paddr >> PAGE_SHIFT; /* Protocol 0, Call ID 1 */ @@ -810,6 +811,13 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, if (ret) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); } + + /* + * If validating memory (making it private) and affected by the + * cache-coherency vulnerability, perform the cache eviction mitigation. + */ + if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO)) + sev_evict_cache((void *)vaddr, 1); } /* diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fc59ce78c477..14ef5908fb27 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -227,6 +227,7 @@ static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, pe->page_size = RMP_PG_SIZE_4K; pe->action = action; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = pfn; pe++; @@ -257,6 +258,7 @@ static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int d pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = e->gfn; pe++; @@ -358,10 +360,31 @@ static void svsm_pval_pages(struct snp_psc_desc *desc) static void pvalidate_pages(struct snp_psc_desc *desc) { + struct psc_entry *e; + unsigned int i; + if (snp_vmpl) svsm_pval_pages(desc); else pval_pages(desc); + + /* + * If not affected by the cache-coherency vulnerability there is no need + * to perform the cache eviction mitigation. + */ + if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) + return; + + for (i = 0; i <= desc->hdr.end_entry; i++) { + e = &desc->entries[i]; + + /* + * If validating memory (making it private) perform the cache + * eviction mitigation. + */ + if (e->operation == SNP_PAGE_STATE_PRIVATE) + sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1); + } } static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c index faf1fce89ed4..c3b4acbde0d8 100644 --- a/arch/x86/coco/sev/vc-handle.c +++ b/arch/x86/coco/sev/vc-handle.c @@ -371,29 +371,30 @@ static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) * executing with Secure TSC enabled, so special handling is required for * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. */ -static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) +static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool write) { + struct pt_regs *regs = ctxt->regs; u64 tsc; /* - * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. - * Terminate the SNP guest when the interception is enabled. + * Writing to MSR_IA32_TSC can cause subsequent reads of the TSC to + * return undefined values, and GUEST_TSC_FREQ is read-only. Generate + * a #GP on all writes. */ - if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) - return ES_VMM_ERROR; + if (write) { + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + return ES_EXCEPTION; + } /* - * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC - * to return undefined values, so ignore all writes. - * - * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use - * the value returned by rdtsc_ordered(). + * GUEST_TSC_FREQ read should not be intercepted when Secure TSC is + * enabled. Terminate the guest if a read is attempted. */ - if (write) { - WARN_ONCE(1, "TSC MSR writes are verboten!\n"); - return ES_OK; - } + if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) + return ES_VMM_ERROR; + /* Reads of MSR_IA32_TSC should return the current TSC value. */ tsc = rdtsc_ordered(); regs->ax = lower_32_bits(tsc); regs->dx = upper_32_bits(tsc); @@ -416,7 +417,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) case MSR_IA32_TSC: case MSR_AMD64_GUEST_TSC_FREQ: if (sev_status & MSR_AMD64_SNP_SECURE_TSC) - return __vc_handle_secure_tsc_msrs(regs, write); + return __vc_handle_secure_tsc_msrs(ctxt, write); break; default: break; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 602957dd2609..06fc0479a23f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h deleted file mode 100644 index d5749b25fa10..000000000000 --- a/arch/x86/include/asm/cpuid.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_X86_CPUID_H -#define _ASM_X86_CPUID_H - -#include <asm/cpuid/api.h> - -#endif /* _ASM_X86_CPUID_H */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 89075ff19afa..02236962fdb1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -619,6 +619,24 @@ int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); + +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -634,6 +652,7 @@ static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} +static inline void sev_evict_cache(void *va, int npages) {} #endif #endif diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 59a62c3780a2..a16d4631547c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -94,12 +94,13 @@ DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func); #ifdef MODULE #define __ADDRESSABLE_xen_hypercall #else -#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall) +#define __ADDRESSABLE_xen_hypercall \ + __stringify(.global STATIC_CALL_KEY(xen_hypercall);) #endif #define __HYPERCALL \ __ADDRESSABLE_xen_hypercall \ - "call __SCT__xen_hypercall" + __stringify(call STATIC_CALL_TRAMP(xen_hypercall)) #define __HYPERCALL_ENTRY(x) "a" (x) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a5ece6ebe8a7..a6f88ca1a6b4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1326,8 +1326,8 @@ static const char * const s5_reset_reason_txt[] = { static __init int print_s5_reset_status_mmio(void) { - unsigned long value; void __iomem *addr; + u32 value; int i; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) @@ -1340,12 +1340,16 @@ static __init int print_s5_reset_status_mmio(void) value = ioread32(addr); iounmap(addr); + /* Value with "all bits set" is an error response and should be ignored. */ + if (value == U32_MAX) + return 0; + for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) continue; if (s5_reset_reason_txt[i]) { - pr_info("x86/amd: Previous system reset reason [0x%08lx]: %s\n", + pr_info("x86/amd: Previous system reset reason [0x%08x]: %s\n", value, s5_reset_reason_txt[i]); } } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b74bf937cd9f..49ef1b832c1a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -386,7 +386,6 @@ static bool __init should_mitigate_vuln(unsigned int bug) case X86_BUG_SPECTRE_V2: case X86_BUG_RETBLEED: - case X86_BUG_SRSO: case X86_BUG_L1TF: case X86_BUG_ITS: return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || @@ -1069,10 +1068,8 @@ static void __init gds_select_mitigation(void) if (gds_mitigation == GDS_MITIGATION_AUTO) { if (should_mitigate_vuln(X86_BUG_GDS)) gds_mitigation = GDS_MITIGATION_FULL; - else { + else gds_mitigation = GDS_MITIGATION_OFF; - return; - } } /* No microcode */ @@ -3184,8 +3181,18 @@ static void __init srso_select_mitigation(void) } if (srso_mitigation == SRSO_MITIGATION_AUTO) { - if (should_mitigate_vuln(X86_BUG_SRSO)) { + /* + * Use safe-RET if user->kernel or guest->host protection is + * required. Otherwise the 'microcode' mitigation is sufficient + * to protect the user->user and guest->guest vectors. + */ + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) && + !boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO))) { srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) { + srso_mitigation = SRSO_MITIGATION_MICROCODE; } else { srso_mitigation = SRSO_MITIGATION_NONE; return; diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 2154f12766fb..1fda6c3a2b65 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -16,6 +16,7 @@ #include <asm/spec-ctrl.h> #include <asm/delay.h> #include <asm/msr.h> +#include <asm/resctrl.h> #include "cpu.h" @@ -117,6 +118,8 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << 10; } } + + resctrl_cpu_detect(c); } static void early_init_hygon(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b4a1f6732a3a..6b868afb26c3 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -48,6 +48,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 12ed75c1b567..28e4fd65c9da 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1881,19 +1881,20 @@ long fpu_xstate_prctl(int option, unsigned long arg2) #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 - * use in the task. + * use in the task. Report -1 if no AVX-512 usage. */ static void avx512_status(struct seq_file *m, struct task_struct *task) { - unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); - long delta; + unsigned long timestamp; + long delta = -1; - if (!timestamp) { - /* - * Report -1 if no AVX512 usage - */ - delta = -1; - } else { + /* AVX-512 usage is not tracked for kernel threads. Don't report anything. */ + if (task->flags & (PF_KTHREAD | PF_USER_WORKER)) + return; + + timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); + + if (timestamp) { delta = (long)(jiffies - timestamp); /* * Cap to LONG_MAX if time difference > LONG_MAX diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8172c2042dd6..5fc437341e03 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -810,6 +810,8 @@ static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, if (min > map->max_apic_id) return 0; + min = array_index_nospec(min, map->max_apic_id + 1); + for_each_set_bit(i, ipi_bitmap, min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { if (map->phys_map[min + i]) { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2fbdebf79fbb..0635bd71c10e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -719,13 +719,6 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) static void sev_writeback_caches(struct kvm *kvm) { /* - * Note, the caller is responsible for ensuring correctness if the mask - * can be modified, e.g. if a CPU could be doing VMRUN. - */ - if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus)) - return; - - /* * Ensure that all dirty guest tagged cache entries are written back * before releasing the pages back to the system for use. CLFLUSH will * not do this without SME_COHERENT, and flushing many cache lines @@ -739,6 +732,9 @@ static void sev_writeback_caches(struct kvm *kvm) * serializing multiple calls and having responding CPUs (to the IPI) * mark themselves as still running if they are running (or about to * run) a vCPU for the VM. + * + * Note, the caller is responsible for ensuring correctness if the mask + * can be modified, e.g. if a CPU could be doing VMRUN. */ wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..604490b1cb19 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9908,8 +9908,11 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) rcu_read_lock(); map = rcu_dereference(vcpu->kvm->arch.apic_map); - if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id]) - target = map->phys_map[dest_id]->vcpu; + if (likely(map) && dest_id <= map->max_apic_id) { + dest_id = array_index_nospec(dest_id, map->max_apic_id + 1); + if (map->phys_map[dest_id]) + target = map->phys_map[dest_id]->vcpu; + } rcu_read_unlock(); |