diff options
Diffstat (limited to 'arch')
183 files changed, 3353 insertions, 1424 deletions
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 3bb5b513d5ae..91f3093eee6a 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -71,6 +71,7 @@ config CRYPTO_POLYVAL_ARM64_CE config CRYPTO_AES_ARM64 tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS" select CRYPTO_AES + select CRYPTO_LIB_SHA256 help Block ciphers: AES cipher algorithms (FIPS-197) Length-preserving ciphers: AES with ECB, CBC, CTR, CTS, diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 81560f722b9d..5e207ff34482 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -122,7 +122,6 @@ struct crypto_aes_xts_ctx { struct crypto_aes_essiv_cbc_ctx { struct crypto_aes_ctx key1; struct crypto_aes_ctx __aligned(8) key2; - struct crypto_shash *hash; }; struct mac_tfm_ctx { @@ -171,7 +170,7 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, if (ret) return ret; - crypto_shash_tfm_digest(ctx->hash, in_key, key_len, digest); + sha256(in_key, key_len, digest); return aes_expandkey(&ctx->key2, digest, sizeof(digest)); } @@ -388,22 +387,6 @@ static int cts_cbc_decrypt(struct skcipher_request *req) return skcipher_walk_done(&walk, 0); } -static int __maybe_unused essiv_cbc_init_tfm(struct crypto_skcipher *tfm) -{ - struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - ctx->hash = crypto_alloc_shash("sha256", 0, 0); - - return PTR_ERR_OR_ZERO(ctx->hash); -} - -static void __maybe_unused essiv_cbc_exit_tfm(struct crypto_skcipher *tfm) -{ - struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_shash(ctx->hash); -} - static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -793,8 +776,6 @@ static struct skcipher_alg aes_algs[] = { { .setkey = essiv_cbc_set_key, .encrypt = essiv_cbc_encrypt, .decrypt = essiv_cbc_decrypt, - .init = essiv_cbc_init_tfm, - .exit = essiv_cbc_exit_tfm, } }; static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key, diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index bec227f9500a..9da54d4ee49e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -81,6 +81,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fa8a08a1ccd5..c9eab316398e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -220,6 +220,20 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu) { + /* + * DDI0487L.b Known Issue D22105 + * + * When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is + * IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO + * is the value programmed or 1. + * + * Make the implementation choice of treating the effective value as 1 as + * we cannot subsequently catch changes to TGE or AMO that would + * otherwise lead to the SError becoming deliverable. + */ + if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu)) + return true; + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO; } @@ -511,21 +525,29 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) { *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT; } else { - u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + enum vcpu_sysreg r; + u64 sctlr; + + r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + + sctlr = vcpu_read_sys_reg(vcpu, r); sctlr |= SCTLR_ELx_EE; - vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); + vcpu_write_sys_reg(vcpu, sctlr, r); } } static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { + enum vcpu_sysreg r; + u64 bit; + if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); - if (vcpu_mode_priv(vcpu)) - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE); - else - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E); + r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E; + + return vcpu_read_sys_reg(vcpu, r) & bit; } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0ee4f6fa3a17..b763293281c8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -252,7 +252,8 @@ struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; struct kvm_hyp_memcache stage2_teardown_mc; - bool enabled; + bool is_protected; + bool is_created; }; struct kvm_mpidr_data { @@ -1442,7 +1443,7 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected) #define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 7fd76f41c296..f7c06a840963 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -83,6 +83,8 @@ extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu); extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu); extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu); +extern void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu); + struct kvm_s2_trans { phys_addr_t output; unsigned long block_size; @@ -265,7 +267,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid) return base; } -static inline unsigned int ps_to_output_size(unsigned int ps) +static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit) { switch (ps) { case 0: return 32; @@ -273,7 +275,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps) case 2: return 40; case 3: return 42; case 4: return 44; - case 5: + case 5: return 48; + case 6: if (pa52bit) + return 52; + fallthrough; default: return 48; } @@ -285,13 +290,28 @@ enum trans_regime { TR_EL2, }; +struct s1_walk_info; + +struct s1_walk_context { + struct s1_walk_info *wi; + u64 table_ipa; + int level; +}; + +struct s1_walk_filter { + int (*fn)(struct s1_walk_context *, void *); + void *priv; +}; + struct s1_walk_info { + struct s1_walk_filter *filter; u64 baddr; enum trans_regime regime; unsigned int max_oa_bits; unsigned int pgshift; unsigned int txsz; int sl; + u8 sh; bool as_el0; bool hpd; bool e0poe; @@ -299,6 +319,7 @@ struct s1_walk_info { bool pan; bool be; bool s2; + bool pa52bit; }; struct s1_walk_result { @@ -334,6 +355,8 @@ struct s1_walk_result { int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va); +int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, + int *level); /* VNCR management */ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ea58282f59bb..08be89c95466 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -18,6 +18,7 @@ int pkvm_init_host_vm(struct kvm *kvm); int pkvm_create_hyp_vm(struct kvm *kvm); +bool pkvm_hyp_vm_is_created(struct kvm *kvm); void pkvm_destroy_hyp_vm(struct kvm *kvm); int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index e3e8944a71c3..e92e4a0e48fc 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -36,6 +36,7 @@ int kasan_brk_handler(struct pt_regs *regs, unsigned long esr); int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr); int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); +void dump_kernel_instr(unsigned long kaddr); /* * Move regs->pc to next instruction and do necessary setup before it diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h index f6ec500ad3fa..c2485a862e69 100644 --- a/arch/arm64/include/asm/vncr_mapping.h +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -94,6 +94,8 @@ #define VNCR_PMSICR_EL1 0x838 #define VNCR_PMSIRR_EL1 0x840 #define VNCR_PMSLATFR_EL1 0x848 +#define VNCR_PMSNEVFR_EL1 0x850 +#define VNCR_PMSDSFR_EL1 0x858 #define VNCR_TRFCR_EL1 0x880 #define VNCR_MPAM1_EL1 0x900 #define VNCR_MPAMHCR_EL2 0x930 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af6fd64a8a19..3917ad897801 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2550,6 +2550,15 @@ test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope) return idr & MPAMIDR_EL1_HAS_HCR; } +static bool +test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF)) + return false; + + return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -3167,6 +3176,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP) }, + { + .desc = "GICv5 Legacy vCPU interface", + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, + .capability = ARM64_HAS_GICV5_LEGACY, + .matches = test_has_gicv5_legacy, + }, {}, }; diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 714b0b5ec5ac..5369763606e7 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); +/* Static key indicating whether GICv3 has GICv2 compatibility */ +KVM_NVHE_ALIAS(vgic_v3_has_v2_compat); + /* Static key which is set if CNTVOFF_EL2 is unusable */ KVM_NVHE_ALIAS(broken_cntvoff_key); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5041817af267..681939ef5d16 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = { int show_unhandled_signals = 0; -static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) +void dump_kernel_instr(unsigned long kaddr) { - unsigned long addr = instruction_pointer(regs); char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - if (user_mode(regs)) + if (!is_ttbr1_addr(kaddr)) return; for (i = -4; i < 1; i++) { unsigned int val, bad; - bad = aarch64_insn_read(&((u32 *)addr)[i], &val); + bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val); if (!bad) p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); @@ -169,7 +168,7 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) p += sprintf(p, i == 0 ? "(????????) " : "???????? "); } - printk("%sCode: %s\n", lvl, str); + printk(KERN_EMERG "Code: %s\n", str); } #define S_SMP " SMP" @@ -178,6 +177,7 @@ static int __die(const char *str, long err, struct pt_regs *regs) { static int die_counter; int ret; + unsigned long addr = instruction_pointer(regs); pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n", str, err, ++die_counter); @@ -190,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs) print_modules(); show_regs(regs); - dump_kernel_instr(KERN_EMERG, regs); + if (user_mode(regs)) + return ret; + + dump_kernel_instr(addr); return ret; } diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 713248f240e0..bff62e75d681 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -37,6 +37,7 @@ menuconfig KVM select HAVE_KVM_VCPU_RUN_PID_CHANGE select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS + select KVM_GUEST_MEMFD help Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bd6b6a620a09..fa79744290f3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -170,10 +170,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) return ret; - ret = pkvm_init_host_vm(kvm); - if (ret) - goto err_unshare_kvm; - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) { ret = -ENOMEM; goto err_unshare_kvm; @@ -184,6 +180,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto err_free_cpumask; + if (is_protected_kvm_enabled()) { + /* + * If any failures occur after this is successful, make sure to + * call __pkvm_unreserve_vm to unreserve the VM in hyp. + */ + ret = pkvm_init_host_vm(kvm); + if (ret) + goto err_free_cpumask; + } + kvm_vgic_early_init(kvm); kvm_timer_init_vm(kvm); @@ -2317,8 +2323,9 @@ static int __init init_subsystems(void) } if (kvm_mode == KVM_MODE_NV && - !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) { - kvm_err("NV support requires GICv3, giving up\n"); + !(vgic_present && (kvm_vgic_global_state.type == VGIC_V3 || + kvm_vgic_global_state.has_gcie_v3_compat))) { + kvm_err("NV support requires GICv3 or GICv5 with legacy support, giving up\n"); err = -EINVAL; goto out; } diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index d71ca4ddc9d1..20bb9af125b1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -28,9 +28,57 @@ static int get_ia_size(struct s1_walk_info *wi) /* Return true if the IPA is out of the OA range */ static bool check_output_size(u64 ipa, struct s1_walk_info *wi) { + if (wi->pa52bit) + return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits)); return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); } +static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr) +{ + switch (BIT(wi->pgshift)) { + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52)) + return false; + return ((wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_PS_MASK, tcr) : + FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110); + case SZ_16K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)) + return false; + break; + case SZ_4K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)) + return false; + break; + } + + return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); +} + +static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc) +{ + u64 addr; + + if (!wi->pa52bit) + return desc & GENMASK_ULL(47, wi->pgshift); + + switch (BIT(wi->pgshift)) { + case SZ_4K: + case SZ_16K: + addr = desc & GENMASK_ULL(49, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50; + break; + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + addr = desc & GENMASK_ULL(47, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48; + break; + } + + return addr; +} + /* Return the translation regime that applies to an AT instruction */ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) { @@ -50,21 +98,26 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o } } +static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + if (regime == TR_EL10) { + if (vcpu_has_nv(vcpu) && + !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) + return 0; + + return vcpu_read_sys_reg(vcpu, TCR2_EL1); + } + + return vcpu_read_sys_reg(vcpu, TCR2_EL2); +} + static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) { if (!kvm_has_s1pie(vcpu->kvm)) return false; - switch (regime) { - case TR_EL2: - case TR_EL20: - return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; - case TR_EL10: - return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && - (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE); - default: - BUG(); - } + /* Abuse TCR2_EL1_PIE and use it for EL2 as well */ + return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE; } static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) @@ -76,23 +129,11 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) return; } - switch (wi->regime) { - case TR_EL2: - case TR_EL20: - val = vcpu_read_sys_reg(vcpu, TCR2_EL2); - wi->poe = val & TCR2_EL2_POE; - wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); - break; - case TR_EL10: - if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { - wi->poe = wi->e0poe = false; - return; - } + val = effective_tcr2(vcpu, wi->regime); - val = __vcpu_sys_reg(vcpu, TCR2_EL1); - wi->poe = val & TCR2_EL1_POE; - wi->e0poe = val & TCR2_EL1_E0POE; - } + /* Abuse TCR2_EL1_* for EL2 */ + wi->poe = val & TCR2_EL1_POE; + wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE); } static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, @@ -102,14 +143,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, unsigned int stride, x; bool va55, tbi, lva; - hcr = __vcpu_sys_reg(vcpu, HCR_EL2); - va55 = va & BIT(55); - if (wi->regime == TR_EL2 && va55) - goto addrsz; - - wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); + if (vcpu_has_nv(vcpu)) { + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); + wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); + } else { + WARN_ON_ONCE(wi->regime != TR_EL10); + wi->s2 = false; + hcr = 0; + } switch (wi->regime) { case TR_EL10: @@ -131,6 +174,46 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, BUG(); } + /* Someone was silly enough to encode TG0/TG1 differently */ + if (va55 && wi->regime != TR_EL2) { + wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG1_MASK, tcr); + + switch (tg << TCR_TG1_SHIFT) { + case TCR_TG1_4K: + wi->pgshift = 12; break; + case TCR_TG1_16K: + wi->pgshift = 14; break; + case TCR_TG1_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } else { + wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG0_MASK, tcr); + + switch (tg << TCR_TG0_SHIFT) { + case TCR_TG0_4K: + wi->pgshift = 12; break; + case TCR_TG0_16K: + wi->pgshift = 14; break; + case TCR_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } + + wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); + + ia_bits = get_ia_size(wi); + + /* AArch64.S1StartLevel() */ + stride = wi->pgshift - 3; + wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); + + if (wi->regime == TR_EL2 && va55) + goto addrsz; + tbi = (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_TBI, tcr) : (va55 ? @@ -140,6 +223,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (!tbi && (u64)sign_extend64(va, 55) != va) goto addrsz; + wi->sh = (wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_SH0_MASK, tcr) : + (va55 ? + FIELD_GET(TCR_SH1_MASK, tcr) : + FIELD_GET(TCR_SH0_MASK, tcr))); + va = (u64)sign_extend64(va, 55); /* Let's put the MMU disabled case aside immediately */ @@ -194,53 +283,20 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, /* R_BVXDG */ wi->hpd |= (wi->poe || wi->e0poe); - /* Someone was silly enough to encode TG0/TG1 differently */ - if (va55) { - wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG1_MASK, tcr); - - switch (tg << TCR_TG1_SHIFT) { - case TCR_TG1_4K: - wi->pgshift = 12; break; - case TCR_TG1_16K: - wi->pgshift = 14; break; - case TCR_TG1_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } else { - wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG0_MASK, tcr); - - switch (tg << TCR_TG0_SHIFT) { - case TCR_TG0_4K: - wi->pgshift = 12; break; - case TCR_TG0_16K: - wi->pgshift = 14; break; - case TCR_TG0_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } - /* R_PLCGL, R_YXNYW */ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { if (wi->txsz > 39) - goto transfault_l0; + goto transfault; } else { if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) - goto transfault_l0; + goto transfault; } /* R_GTJBY, R_SXWGM */ switch (BIT(wi->pgshift)) { case SZ_4K: - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); - break; case SZ_16K: - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); + lva = wi->pa52bit; break; case SZ_64K: lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); @@ -248,38 +304,42 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, } if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) - goto transfault_l0; - - ia_bits = get_ia_size(wi); + goto transfault; /* R_YYVYV, I_THCZK */ if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || (va55 && va < GENMASK(63, ia_bits))) - goto transfault_l0; + goto transfault; /* I_ZFSYQ */ if (wi->regime != TR_EL2 && (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) - goto transfault_l0; + goto transfault; /* R_BNDVG and following statements */ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) - goto transfault_l0; - - /* AArch64.S1StartLevel() */ - stride = wi->pgshift - 3; - wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); + goto transfault; ps = (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); - wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); + wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit)); /* Compute minimal alignment */ x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); wi->baddr = ttbr & TTBRx_EL1_BADDR; + if (wi->pa52bit) { + /* + * Force the alignment on 64 bytes for top-level tables + * smaller than 8 entries, since TTBR.BADDR[5:2] are used to + * store bits [51:48] of the first level of lookup. + */ + x = max(x, 6); + + wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48; + } /* R_VPBBF */ if (check_output_size(wi->baddr, wi)) @@ -289,12 +349,17 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; -addrsz: /* Address Size Fault level 0 */ +addrsz: + /* + * Address Size Fault level 0 to indicate it comes from TTBR. + * yes, this is an oddity. + */ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); return -EFAULT; -transfault_l0: /* Translation Fault level 0 */ - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false); +transfault: + /* Translation Fault on start level */ + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false); return -EFAULT; } @@ -339,6 +404,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ipa = kvm_s2_trans_output(&s2_trans); } + if (wi->filter) { + ret = wi->filter->fn(&(struct s1_walk_context) + { + .wi = wi, + .table_ipa = baddr, + .level = level, + }, wi->filter->priv); + if (ret) + return ret; + } + ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); if (ret) { fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); @@ -369,7 +445,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); } - baddr = desc & GENMASK_ULL(47, wi->pgshift); + baddr = desc_to_oa(wi, desc); /* Check for out-of-range OA */ if (check_output_size(baddr, wi)) @@ -386,11 +462,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, switch (BIT(wi->pgshift)) { case SZ_4K: - valid_block = level == 1 || level == 2; + valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0); break; case SZ_16K: case SZ_64K: - valid_block = level == 2; + valid_block = level == 2 || (wi->pa52bit && level == 1); break; } @@ -398,7 +474,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, goto transfault; } - if (check_output_size(desc & GENMASK(47, va_bottom), wi)) + baddr = desc_to_oa(wi, desc); + if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) goto addrsz; if (!(desc & PTE_AF)) { @@ -411,7 +488,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->failed = false; wr->level = level; wr->desc = desc; - wr->pa = desc & GENMASK(47, va_bottom); + wr->pa = baddr & GENMASK(52, va_bottom); wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); @@ -640,21 +717,36 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2) #define ATTR_OSH 0b10 #define ATTR_ISH 0b11 -static u8 compute_sh(u8 attr, u64 desc) +static u8 compute_final_sh(u8 attr, u8 sh) { - u8 sh; - /* Any form of device, as well as NC has SH[1:0]=0b10 */ if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) return ATTR_OSH; - sh = FIELD_GET(PTE_SHARED, desc); if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ sh = ATTR_NSH; return sh; } +static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr, + u8 attr) +{ + u8 sh; + + /* + * non-52bit and LPA have their basic shareability described in the + * descriptor. LPA2 gets it from the corresponding field in TCR, + * conveniently recorded in the walk info. + */ + if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K) + sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc); + else + sh = wi->sh; + + return compute_final_sh(attr, sh); +} + static u8 combine_sh(u8 s1_sh, u8 s2_sh) { if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) @@ -668,7 +760,7 @@ static u8 combine_sh(u8 s1_sh, u8 s2_sh) static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, struct kvm_s2_trans *tr) { - u8 s1_parattr, s2_memattr, final_attr; + u8 s1_parattr, s2_memattr, final_attr, s2_sh; u64 par; /* If S2 has failed to translate, report the damage */ @@ -741,17 +833,19 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, !MEMATTR_IS_DEVICE(final_attr)) final_attr = MEMATTR(NC, NC); + s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc); + par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); par |= tr->output & GENMASK(47, 12); par |= FIELD_PREP(SYS_PAR_EL1_SH, combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), - compute_sh(final_attr, tr->desc))); + compute_final_sh(final_attr, s2_sh))); return par; } -static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, - enum trans_regime regime) +static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, + struct s1_walk_result *wr) { u64 par; @@ -764,9 +858,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, } else if (wr->level == S1_MMU_DISABLED) { /* MMU off or HCR_EL2.DC == 1 */ par = SYS_PAR_EL1_NSE; - par |= wr->pa & GENMASK_ULL(47, 12); + par |= wr->pa & SYS_PAR_EL1_PA; - if (regime == TR_EL10 && + if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) && (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { par |= FIELD_PREP(SYS_PAR_EL1_ATTR, MEMATTR(WbRaWa, WbRaWa)); @@ -781,14 +875,14 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, par = SYS_PAR_EL1_NSE; - mair = (regime == TR_EL10 ? + mair = (wi->regime == TR_EL10 ? vcpu_read_sys_reg(vcpu, MAIR_EL1) : vcpu_read_sys_reg(vcpu, MAIR_EL2)); mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; mair &= 0xff; - sctlr = (regime == TR_EL10 ? + sctlr = (wi->regime == TR_EL10 ? vcpu_read_sys_reg(vcpu, SCTLR_EL1) : vcpu_read_sys_reg(vcpu, SCTLR_EL2)); @@ -797,9 +891,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, mair = MEMATTR(NC, NC); par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); - par |= wr->pa & GENMASK_ULL(47, 12); + par |= wr->pa & SYS_PAR_EL1_PA; - sh = compute_sh(mair, wr->desc); + sh = compute_s1_sh(wi, wr, mair); par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); } @@ -873,7 +967,7 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); break; case TR_EL10: - wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); + wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); break; } @@ -1186,7 +1280,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); compute_par: - return compute_par_s1(vcpu, &wr, wi.regime); + return compute_par_s1(vcpu, &wi, &wr); } /* @@ -1202,7 +1296,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { struct mmu_config config; struct kvm_s2_mmu *mmu; - bool fail; + bool fail, mmu_cs; u64 par; par = SYS_PAR_EL1_F; @@ -1218,8 +1312,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already * the right one (as we trapped from vEL2). If not, save the * full MMU context. + * + * We are also guaranteed to be in the correct context if + * we're not in a nested VM. */ - if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) + mmu_cs = (vcpu_has_nv(vcpu) && + !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))); + if (!mmu_cs) goto skip_mmu_switch; /* @@ -1287,7 +1386,7 @@ skip_mmu_switch: write_sysreg_hcr(HCR_HOST_VHE_FLAGS); - if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) + if (mmu_cs) __mmu_config_restore(&config); return par; @@ -1470,3 +1569,68 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; } + +struct desc_match { + u64 ipa; + int level; +}; + +static int match_s1_desc(struct s1_walk_context *ctxt, void *priv) +{ + struct desc_match *dm = priv; + u64 ipa = dm->ipa; + + /* Use S1 granule alignment */ + ipa &= GENMASK(51, ctxt->wi->pgshift); + + /* Not the IPA we're looking for? Continue. */ + if (ipa != ctxt->table_ipa) + return 0; + + /* Note the level and interrupt the walk */ + dm->level = ctxt->level; + return -EINTR; +} + +int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) +{ + struct desc_match dm = { + .ipa = ipa, + }; + struct s1_walk_info wi = { + .filter = &(struct s1_walk_filter){ + .fn = match_s1_desc, + .priv = &dm, + }, + .regime = TR_EL10, + .as_el0 = false, + .pan = false, + }; + struct s1_walk_result wr = {}; + int ret; + + ret = setup_s1_walk(vcpu, &wi, &wr, va); + if (ret) + return ret; + + /* We really expect the S1 MMU to be on here... */ + if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) { + *level = 0; + return 0; + } + + /* Walk the guest's PT, looking for a match along the way */ + ret = walk_s1(vcpu, &wi, &wr, va); + switch (ret) { + case -EINTR: + /* We interrupted the walk on a match, return the level */ + *level = dm.level; + return 0; + case 0: + /* The walk completed, we failed to find the entry */ + return -ENOENT; + default: + /* Any other error... */ + return ret; + } +} diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index da66c4a14775..fbd8944a3dea 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -7,12 +7,22 @@ #include <linux/kvm_host.h> #include <asm/sysreg.h> +/* + * Describes the dependencies between a set of bits (or the negation + * of a set of RES0 bits) and a feature. The flags indicate how the + * data is interpreted. + */ struct reg_bits_to_feat_map { - u64 bits; + union { + u64 bits; + u64 *res0p; + }; #define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */ #define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */ #define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ +#define RES0_POINTER BIT(3) /* Pointer to RES0 value instead of bits */ + unsigned long flags; union { @@ -28,9 +38,27 @@ struct reg_bits_to_feat_map { }; }; -#define __NEEDS_FEAT_3(m, f, id, fld, lim) \ +/* + * Describes the dependencies for a given register: + * + * @feat_map describes the dependency for the whole register. If the + * features the register depends on are not present, the whole + * register is effectively RES0. + * + * @bit_feat_map describes the dependencies for a set of bits in that + * register. If the features these bits depend on are not present, the + * bits are effectively RES0. + */ +struct reg_feat_map_desc { + const char *name; + const struct reg_bits_to_feat_map feat_map; + const struct reg_bits_to_feat_map *bit_feat_map; + const unsigned int bit_feat_map_sz; +}; + +#define __NEEDS_FEAT_3(m, f, w, id, fld, lim) \ { \ - .bits = (m), \ + .w = (m), \ .flags = (f), \ .regidx = IDREG_IDX(SYS_ ## id), \ .shift = id ##_## fld ## _SHIFT, \ @@ -39,28 +67,63 @@ struct reg_bits_to_feat_map { .lo_lim = id ##_## fld ##_## lim \ } -#define __NEEDS_FEAT_2(m, f, fun, dummy) \ +#define __NEEDS_FEAT_2(m, f, w, fun, dummy) \ { \ - .bits = (m), \ + .w = (m), \ .flags = (f) | CALL_FUNC, \ .fval = (fun), \ } -#define __NEEDS_FEAT_1(m, f, fun) \ +#define __NEEDS_FEAT_1(m, f, w, fun) \ { \ - .bits = (m), \ + .w = (m), \ .flags = (f) | CALL_FUNC, \ .match = (fun), \ } +#define __NEEDS_FEAT_FLAG(m, f, w, ...) \ + CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, w, __VA_ARGS__) + #define NEEDS_FEAT_FLAG(m, f, ...) \ - CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, __VA_ARGS__) + __NEEDS_FEAT_FLAG(m, f, bits, __VA_ARGS__) #define NEEDS_FEAT_FIXED(m, ...) \ - NEEDS_FEAT_FLAG(m, FIXED_VALUE, __VA_ARGS__, 0) + __NEEDS_FEAT_FLAG(m, FIXED_VALUE, bits, __VA_ARGS__, 0) +#define NEEDS_FEAT_RES0(p, ...) \ + __NEEDS_FEAT_FLAG(p, RES0_POINTER, res0p, __VA_ARGS__) + +/* + * Declare the dependency between a set of bits and a set of features, + * generating a struct reg_bit_to_feat_map. + */ #define NEEDS_FEAT(m, ...) NEEDS_FEAT_FLAG(m, 0, __VA_ARGS__) +/* + * Declare the dependency between a non-FGT register, a set of + * feature, and the set of individual bits it contains. This generates + * a struct reg_feat_map_desc. + */ +#define DECLARE_FEAT_MAP(n, r, m, f) \ + struct reg_feat_map_desc n = { \ + .name = #r, \ + .feat_map = NEEDS_FEAT(~r##_RES0, f), \ + .bit_feat_map = m, \ + .bit_feat_map_sz = ARRAY_SIZE(m), \ + } + +/* + * Specialised version of the above for FGT registers that have their + * RES0 masks described as struct fgt_masks. + */ +#define DECLARE_FEAT_MAP_FGT(n, msk, m, f) \ + struct reg_feat_map_desc n = { \ + .name = #msk, \ + .feat_map = NEEDS_FEAT_RES0(&msk.res0, f),\ + .bit_feat_map = m, \ + .bit_feat_map_sz = ARRAY_SIZE(m), \ + } + #define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP #define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2 #define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP @@ -73,6 +136,7 @@ struct reg_bits_to_feat_map { #define FEAT_AA32EL0 ID_AA64PFR0_EL1, EL0, AARCH32 #define FEAT_AA32EL1 ID_AA64PFR0_EL1, EL1, AARCH32 #define FEAT_AA64EL1 ID_AA64PFR0_EL1, EL1, IMP +#define FEAT_AA64EL2 ID_AA64PFR0_EL1, EL2, IMP #define FEAT_AA64EL3 ID_AA64PFR0_EL1, EL3, IMP #define FEAT_AIE ID_AA64MMFR3_EL1, AIE, IMP #define FEAT_S2POE ID_AA64MMFR3_EL1, S2POE, IMP @@ -131,7 +195,6 @@ struct reg_bits_to_feat_map { #define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP #define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP #define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP -#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP #define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2 #define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP #define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP @@ -143,7 +206,6 @@ struct reg_bits_to_feat_map { #define FEAT_LSMAOC ID_AA64MMFR2_EL1, LSM, IMP #define FEAT_MixedEnd ID_AA64MMFR0_EL1, BIGEND, IMP #define FEAT_MixedEndEL0 ID_AA64MMFR0_EL1, BIGENDEL0, IMP -#define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2 #define FEAT_MTE_ASYNC ID_AA64PFR1_EL1, MTE_frac, ASYNC #define FEAT_MTE_STORE_ONLY ID_AA64PFR2_EL1, MTESTOREONLY, IMP #define FEAT_PAN ID_AA64MMFR1_EL1, PAN, IMP @@ -151,7 +213,9 @@ struct reg_bits_to_feat_map { #define FEAT_SSBS ID_AA64PFR1_EL1, SSBS, IMP #define FEAT_TIDCP1 ID_AA64MMFR1_EL1, TIDCP1, IMP #define FEAT_FGT ID_AA64MMFR0_EL1, FGT, IMP +#define FEAT_FGT2 ID_AA64MMFR0_EL1, FGT, FGT2 #define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP +#define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP static bool not_feat_aa64el3(struct kvm *kvm) { @@ -397,6 +461,10 @@ static const struct reg_bits_to_feat_map hfgrtr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1), }; + +static const DECLARE_FEAT_MAP_FGT(hfgrtr_desc, hfgrtr_masks, + hfgrtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = { NEEDS_FEAT(HFGWTR_EL2_nAMAIR2_EL1 | HFGWTR_EL2_nMAIR2_EL1, @@ -461,6 +529,9 @@ static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP_FGT(hfgwtr_desc, hfgwtr_masks, + hfgwtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { NEEDS_FEAT(HDFGRTR_EL2_PMBIDR_EL1 | HDFGRTR_EL2_PMSLATFR_EL1 | @@ -528,6 +599,9 @@ static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1) }; +static const DECLARE_FEAT_MAP_FGT(hdfgrtr_desc, hdfgrtr_masks, + hdfgrtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { NEEDS_FEAT(HDFGWTR_EL2_PMSLATFR_EL1 | HDFGWTR_EL2_PMSIRR_EL1 | @@ -588,6 +662,8 @@ static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { NEEDS_FEAT(HDFGWTR_EL2_TRFCR_EL1, FEAT_TRF), }; +static const DECLARE_FEAT_MAP_FGT(hdfgwtr_desc, hdfgwtr_masks, + hdfgwtr_feat_map, FEAT_FGT); static const struct reg_bits_to_feat_map hfgitr_feat_map[] = { NEEDS_FEAT(HFGITR_EL2_PSBCSYNC, FEAT_SPEv1p5), @@ -662,6 +738,9 @@ static const struct reg_bits_to_feat_map hfgitr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP_FGT(hfgitr_desc, hfgitr_masks, + hfgitr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = { NEEDS_FEAT(HAFGRTR_EL2_AMEVTYPER115_EL0 | HAFGRTR_EL2_AMEVTYPER114_EL0 | @@ -704,11 +783,17 @@ static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = { FEAT_AMUv1), }; +static const DECLARE_FEAT_MAP_FGT(hafgrtr_desc, hafgrtr_masks, + hafgrtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hfgitr2_feat_map[] = { NEEDS_FEAT(HFGITR2_EL2_nDCCIVAPS, FEAT_PoPS), NEEDS_FEAT(HFGITR2_EL2_TSBCSYNC, FEAT_TRBEv1p1) }; +static const DECLARE_FEAT_MAP_FGT(hfgitr2_desc, hfgitr2_masks, + hfgitr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = { NEEDS_FEAT(HFGRTR2_EL2_nPFAR_EL1, FEAT_PFAR), NEEDS_FEAT(HFGRTR2_EL2_nERXGSR_EL1, FEAT_RASv2), @@ -728,6 +813,9 @@ static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = { NEEDS_FEAT(HFGRTR2_EL2_nRCWSMASK_EL1, FEAT_THE), }; +static const DECLARE_FEAT_MAP_FGT(hfgrtr2_desc, hfgrtr2_masks, + hfgrtr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = { NEEDS_FEAT(HFGWTR2_EL2_nPFAR_EL1, FEAT_PFAR), NEEDS_FEAT(HFGWTR2_EL2_nACTLRALIAS_EL1 | @@ -746,6 +834,9 @@ static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = { NEEDS_FEAT(HFGWTR2_EL2_nRCWSMASK_EL1, FEAT_THE), }; +static const DECLARE_FEAT_MAP_FGT(hfgwtr2_desc, hfgwtr2_masks, + hfgwtr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = { NEEDS_FEAT(HDFGRTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9), NEEDS_FEAT(HDFGRTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss), @@ -776,6 +867,9 @@ static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = { NEEDS_FEAT(HDFGRTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam), }; +static const DECLARE_FEAT_MAP_FGT(hdfgrtr2_desc, hdfgrtr2_masks, + hdfgrtr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = { NEEDS_FEAT(HDFGWTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9), NEEDS_FEAT(HDFGWTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss), @@ -804,6 +898,10 @@ static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = { NEEDS_FEAT(HDFGWTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam), }; +static const DECLARE_FEAT_MAP_FGT(hdfgwtr2_desc, hdfgwtr2_masks, + hdfgwtr2_feat_map, FEAT_FGT2); + + static const struct reg_bits_to_feat_map hcrx_feat_map[] = { NEEDS_FEAT(HCRX_EL2_PACMEn, feat_pauth_lr), NEEDS_FEAT(HCRX_EL2_EnFPM, FEAT_FPMR), @@ -833,6 +931,10 @@ static const struct reg_bits_to_feat_map hcrx_feat_map[] = { NEEDS_FEAT(HCRX_EL2_EnAS0, FEAT_LS64_ACCDATA), }; + +static const DECLARE_FEAT_MAP(hcrx_desc, __HCRX_EL2, + hcrx_feat_map, FEAT_HCX); + static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT(HCR_EL2_TID0, FEAT_AA32EL0), NEEDS_FEAT_FIXED(HCR_EL2_RW, compute_hcr_rw), @@ -904,6 +1006,9 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), }; +static const DECLARE_FEAT_MAP(hcr_desc, HCR_EL2, + hcr_feat_map, FEAT_AA64EL2); + static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { NEEDS_FEAT(SCTLR2_EL1_NMEA | SCTLR2_EL1_EASE, @@ -921,6 +1026,9 @@ static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { FEAT_CPA2), }; +static const DECLARE_FEAT_MAP(sctlr2_desc, SCTLR2_EL1, + sctlr2_feat_map, FEAT_SCTLR2); + static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { NEEDS_FEAT(TCR2_EL2_FNG1 | TCR2_EL2_FNG0 | @@ -943,6 +1051,9 @@ static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE), }; +static const DECLARE_FEAT_MAP(tcr2_el2_desc, TCR2_EL2, + tcr2_el2_feat_map, FEAT_TCR2); + static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { NEEDS_FEAT(SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD | @@ -1017,6 +1128,9 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP(sctlr_el1_desc, SCTLR_EL1, + sctlr_el1_feat_map, FEAT_AA64EL1); + static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9), NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock), @@ -1048,6 +1162,9 @@ static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2, + mdcr_el2_feat_map, FEAT_AA64EL2); + static void __init check_feat_map(const struct reg_bits_to_feat_map *map, int map_size, u64 res0, const char *str) { @@ -1061,32 +1178,36 @@ static void __init check_feat_map(const struct reg_bits_to_feat_map *map, str, mask ^ ~res0); } +static u64 reg_feat_map_bits(const struct reg_bits_to_feat_map *map) +{ + return map->flags & RES0_POINTER ? ~(*map->res0p) : map->bits; +} + +static void __init check_reg_desc(const struct reg_feat_map_desc *r) +{ + check_feat_map(r->bit_feat_map, r->bit_feat_map_sz, + ~reg_feat_map_bits(&r->feat_map), r->name); +} + void __init check_feature_map(void) { - check_feat_map(hfgrtr_feat_map, ARRAY_SIZE(hfgrtr_feat_map), - hfgrtr_masks.res0, hfgrtr_masks.str); - check_feat_map(hfgwtr_feat_map, ARRAY_SIZE(hfgwtr_feat_map), - hfgwtr_masks.res0, hfgwtr_masks.str); - check_feat_map(hfgitr_feat_map, ARRAY_SIZE(hfgitr_feat_map), - hfgitr_masks.res0, hfgitr_masks.str); - check_feat_map(hdfgrtr_feat_map, ARRAY_SIZE(hdfgrtr_feat_map), - hdfgrtr_masks.res0, hdfgrtr_masks.str); - check_feat_map(hdfgwtr_feat_map, ARRAY_SIZE(hdfgwtr_feat_map), - hdfgwtr_masks.res0, hdfgwtr_masks.str); - check_feat_map(hafgrtr_feat_map, ARRAY_SIZE(hafgrtr_feat_map), - hafgrtr_masks.res0, hafgrtr_masks.str); - check_feat_map(hcrx_feat_map, ARRAY_SIZE(hcrx_feat_map), - __HCRX_EL2_RES0, "HCRX_EL2"); - check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map), - HCR_EL2_RES0, "HCR_EL2"); - check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map), - SCTLR2_EL1_RES0, "SCTLR2_EL1"); - check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map), - TCR2_EL2_RES0, "TCR2_EL2"); - check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map), - SCTLR_EL1_RES0, "SCTLR_EL1"); - check_feat_map(mdcr_el2_feat_map, ARRAY_SIZE(mdcr_el2_feat_map), - MDCR_EL2_RES0, "MDCR_EL2"); + check_reg_desc(&hfgrtr_desc); + check_reg_desc(&hfgwtr_desc); + check_reg_desc(&hfgitr_desc); + check_reg_desc(&hdfgrtr_desc); + check_reg_desc(&hdfgwtr_desc); + check_reg_desc(&hafgrtr_desc); + check_reg_desc(&hfgrtr2_desc); + check_reg_desc(&hfgwtr2_desc); + check_reg_desc(&hfgitr2_desc); + check_reg_desc(&hdfgrtr2_desc); + check_reg_desc(&hdfgwtr2_desc); + check_reg_desc(&hcrx_desc); + check_reg_desc(&hcr_desc); + check_reg_desc(&sctlr2_desc); + check_reg_desc(&tcr2_el2_desc); + check_reg_desc(&sctlr_el1_desc); + check_reg_desc(&mdcr_el2_desc); } static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map) @@ -1129,7 +1250,7 @@ static u64 __compute_fixed_bits(struct kvm *kvm, match = idreg_feat_match(kvm, &map[i]); if (!match || (map[i].flags & FIXED_VALUE)) - val |= map[i].bits; + val |= reg_feat_map_bits(&map[i]); } return val; @@ -1145,15 +1266,36 @@ static u64 compute_res0_bits(struct kvm *kvm, require, exclude | FIXED_VALUE); } -static u64 compute_fixed_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - u64 *fixed_bits, - unsigned long require, - unsigned long exclude) +static u64 compute_reg_res0_bits(struct kvm *kvm, + const struct reg_feat_map_desc *r, + unsigned long require, unsigned long exclude) + +{ + u64 res0; + + res0 = compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + require, exclude); + + /* + * If computing FGUs, don't take RES0 or register existence + * into account -- we're not computing bits for the register + * itself. + */ + if (!(exclude & NEVER_FGU)) { + res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude); + res0 |= ~reg_feat_map_bits(&r->feat_map); + } + + return res0; +} + +static u64 compute_reg_fixed_bits(struct kvm *kvm, + const struct reg_feat_map_desc *r, + u64 *fixed_bits, unsigned long require, + unsigned long exclude) { - return __compute_fixed_bits(kvm, map, map_size, fixed_bits, - require | FIXED_VALUE, exclude); + return __compute_fixed_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + fixed_bits, require | FIXED_VALUE, exclude); } void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) @@ -1162,51 +1304,40 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) switch (fgt) { case HFGRTR_GROUP: - val |= compute_res0_bits(kvm, hfgrtr_feat_map, - ARRAY_SIZE(hfgrtr_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hfgwtr_feat_map, - ARRAY_SIZE(hfgwtr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgrtr_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgwtr_desc, + 0, NEVER_FGU); break; case HFGITR_GROUP: - val |= compute_res0_bits(kvm, hfgitr_feat_map, - ARRAY_SIZE(hfgitr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgitr_desc, + 0, NEVER_FGU); break; case HDFGRTR_GROUP: - val |= compute_res0_bits(kvm, hdfgrtr_feat_map, - ARRAY_SIZE(hdfgrtr_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hdfgwtr_feat_map, - ARRAY_SIZE(hdfgwtr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgrtr_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgwtr_desc, + 0, NEVER_FGU); break; case HAFGRTR_GROUP: - val |= compute_res0_bits(kvm, hafgrtr_feat_map, - ARRAY_SIZE(hafgrtr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hafgrtr_desc, + 0, NEVER_FGU); break; case HFGRTR2_GROUP: - val |= compute_res0_bits(kvm, hfgrtr2_feat_map, - ARRAY_SIZE(hfgrtr2_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hfgwtr2_feat_map, - ARRAY_SIZE(hfgwtr2_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgrtr2_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgwtr2_desc, + 0, NEVER_FGU); break; case HFGITR2_GROUP: - val |= compute_res0_bits(kvm, hfgitr2_feat_map, - ARRAY_SIZE(hfgitr2_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgitr2_desc, + 0, NEVER_FGU); break; case HDFGRTR2_GROUP: - val |= compute_res0_bits(kvm, hdfgrtr2_feat_map, - ARRAY_SIZE(hdfgrtr2_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hdfgwtr2_feat_map, - ARRAY_SIZE(hdfgwtr2_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgrtr2_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgwtr2_desc, + 0, NEVER_FGU); break; default: BUG(); @@ -1221,109 +1352,74 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r switch (reg) { case HFGRTR_EL2: - *res0 = compute_res0_bits(kvm, hfgrtr_feat_map, - ARRAY_SIZE(hfgrtr_feat_map), 0, 0); - *res0 |= hfgrtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgrtr_desc, 0, 0); *res1 = HFGRTR_EL2_RES1; break; case HFGWTR_EL2: - *res0 = compute_res0_bits(kvm, hfgwtr_feat_map, - ARRAY_SIZE(hfgwtr_feat_map), 0, 0); - *res0 |= hfgwtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgwtr_desc, 0, 0); *res1 = HFGWTR_EL2_RES1; break; case HFGITR_EL2: - *res0 = compute_res0_bits(kvm, hfgitr_feat_map, - ARRAY_SIZE(hfgitr_feat_map), 0, 0); - *res0 |= hfgitr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgitr_desc, 0, 0); *res1 = HFGITR_EL2_RES1; break; case HDFGRTR_EL2: - *res0 = compute_res0_bits(kvm, hdfgrtr_feat_map, - ARRAY_SIZE(hdfgrtr_feat_map), 0, 0); - *res0 |= hdfgrtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgrtr_desc, 0, 0); *res1 = HDFGRTR_EL2_RES1; break; case HDFGWTR_EL2: - *res0 = compute_res0_bits(kvm, hdfgwtr_feat_map, - ARRAY_SIZE(hdfgwtr_feat_map), 0, 0); - *res0 |= hdfgwtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgwtr_desc, 0, 0); *res1 = HDFGWTR_EL2_RES1; break; case HAFGRTR_EL2: - *res0 = compute_res0_bits(kvm, hafgrtr_feat_map, - ARRAY_SIZE(hafgrtr_feat_map), 0, 0); - *res0 |= hafgrtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hafgrtr_desc, 0, 0); *res1 = HAFGRTR_EL2_RES1; break; case HFGRTR2_EL2: - *res0 = compute_res0_bits(kvm, hfgrtr2_feat_map, - ARRAY_SIZE(hfgrtr2_feat_map), 0, 0); - *res0 |= hfgrtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgrtr2_desc, 0, 0); *res1 = HFGRTR2_EL2_RES1; break; case HFGWTR2_EL2: - *res0 = compute_res0_bits(kvm, hfgwtr2_feat_map, - ARRAY_SIZE(hfgwtr2_feat_map), 0, 0); - *res0 |= hfgwtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgwtr2_desc, 0, 0); *res1 = HFGWTR2_EL2_RES1; break; case HFGITR2_EL2: - *res0 = compute_res0_bits(kvm, hfgitr2_feat_map, - ARRAY_SIZE(hfgitr2_feat_map), 0, 0); - *res0 |= hfgitr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgitr2_desc, 0, 0); *res1 = HFGITR2_EL2_RES1; break; case HDFGRTR2_EL2: - *res0 = compute_res0_bits(kvm, hdfgrtr2_feat_map, - ARRAY_SIZE(hdfgrtr2_feat_map), 0, 0); - *res0 |= hdfgrtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgrtr2_desc, 0, 0); *res1 = HDFGRTR2_EL2_RES1; break; case HDFGWTR2_EL2: - *res0 = compute_res0_bits(kvm, hdfgwtr2_feat_map, - ARRAY_SIZE(hdfgwtr2_feat_map), 0, 0); - *res0 |= hdfgwtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgwtr2_desc, 0, 0); *res1 = HDFGWTR2_EL2_RES1; break; case HCRX_EL2: - *res0 = compute_res0_bits(kvm, hcrx_feat_map, - ARRAY_SIZE(hcrx_feat_map), 0, 0); - *res0 |= __HCRX_EL2_RES0; + *res0 = compute_reg_res0_bits(kvm, &hcrx_desc, 0, 0); *res1 = __HCRX_EL2_RES1; break; case HCR_EL2: - mask = compute_fixed_bits(kvm, hcr_feat_map, - ARRAY_SIZE(hcr_feat_map), &fixed, - 0, 0); - *res0 = compute_res0_bits(kvm, hcr_feat_map, - ARRAY_SIZE(hcr_feat_map), 0, 0); - *res0 |= HCR_EL2_RES0 | (mask & ~fixed); + mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0); + *res0 = compute_reg_res0_bits(kvm, &hcr_desc, 0, 0); + *res0 |= (mask & ~fixed); *res1 = HCR_EL2_RES1 | (mask & fixed); break; case SCTLR2_EL1: case SCTLR2_EL2: - *res0 = compute_res0_bits(kvm, sctlr2_feat_map, - ARRAY_SIZE(sctlr2_feat_map), 0, 0); - *res0 |= SCTLR2_EL1_RES0; + *res0 = compute_reg_res0_bits(kvm, &sctlr2_desc, 0, 0); *res1 = SCTLR2_EL1_RES1; break; case TCR2_EL2: - *res0 = compute_res0_bits(kvm, tcr2_el2_feat_map, - ARRAY_SIZE(tcr2_el2_feat_map), 0, 0); - *res0 |= TCR2_EL2_RES0; + *res0 = compute_reg_res0_bits(kvm, &tcr2_el2_desc, 0, 0); *res1 = TCR2_EL2_RES1; break; case SCTLR_EL1: - *res0 = compute_res0_bits(kvm, sctlr_el1_feat_map, - ARRAY_SIZE(sctlr_el1_feat_map), 0, 0); - *res0 |= SCTLR_EL1_RES0; + *res0 = compute_reg_res0_bits(kvm, &sctlr_el1_desc, 0, 0); *res1 = SCTLR_EL1_RES1; break; case MDCR_EL2: - *res0 = compute_res0_bits(kvm, mdcr_el2_feat_map, - ARRAY_SIZE(mdcr_el2_feat_map), 0, 0); - *res0 |= MDCR_EL2_RES0; + *res0 = compute_reg_res0_bits(kvm, &mdcr_el2_desc, 0, 0); *res1 = MDCR_EL2_RES1; break; default: diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index e027d9c32b0d..3515a273eaa2 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -56,6 +56,9 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) if (!kvm_guest_owns_debug_regs(vcpu)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + if (vcpu_has_nv(vcpu)) + kvm_nested_setup_mdcr_el2(vcpu); + /* Write MDCR_EL2 directly if we're already at EL2 */ if (has_vhe()) write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); @@ -243,29 +246,29 @@ void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val) preempt_enable(); } -void kvm_enable_trbe(void) +static bool skip_trbe_access(bool skip_condition) { - if (has_vhe() || is_protected_kvm_enabled() || - WARN_ON_ONCE(preemptible())) - return; + return (WARN_ON_ONCE(preemptible()) || skip_condition || + is_protected_kvm_enabled() || !is_kvm_arm_initialised()); +} - host_data_set_flag(TRBE_ENABLED); +void kvm_enable_trbe(void) +{ + if (!skip_trbe_access(has_vhe())) + host_data_set_flag(TRBE_ENABLED); } EXPORT_SYMBOL_GPL(kvm_enable_trbe); void kvm_disable_trbe(void) { - if (has_vhe() || is_protected_kvm_enabled() || - WARN_ON_ONCE(preemptible())) - return; - - host_data_clear_flag(TRBE_ENABLED); + if (!skip_trbe_access(has_vhe())) + host_data_clear_flag(TRBE_ENABLED); } EXPORT_SYMBOL_GPL(kvm_disable_trbe); void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) { - if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible())) + if (skip_trbe_access(false)) return; if (has_vhe()) { diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index af69c897c2c3..834f13fb1fb7 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -1185,6 +1185,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_PMSIRR_EL1, CGT_MDCR_TPMS), SR_TRAP(SYS_PMSLATFR_EL1, CGT_MDCR_TPMS), SR_TRAP(SYS_PMSNEVFR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSDSFR_EL1, CGT_MDCR_TPMS), SR_TRAP(SYS_TRFCR_EL1, CGT_MDCR_TTRF), SR_TRAP(SYS_TRBBASER_EL1, CGT_MDCR_E2TB), SR_TRAP(SYS_TRBLIMITR_EL1, CGT_MDCR_E2TB), diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 8bdb1eed090a..bca8c80e11da 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -559,6 +559,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, /* Dump the nVHE hypervisor backtrace */ kvm_nvhe_dump_backtrace(hyp_offset); + /* Dump the faulting instruction */ + dump_kernel_instr(panic_addr + kaslr_offset()); + /* * Hyp has panicked and we're going to handle that by panicking the * kernel. The kernel offset will be revealed in the panic so we're diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index ce31d3b73603..184ad7a39950 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -29,7 +29,7 @@ struct pkvm_hyp_vcpu { }; /* - * Holds the relevant data for running a protected vm. + * Holds the relevant data for running a vm in protected mode. */ struct pkvm_hyp_vm { struct kvm kvm; @@ -67,6 +67,8 @@ static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm) void pkvm_hyp_vm_table_init(void *tbl); +int __pkvm_reserve_vm(void); +void __pkvm_unreserve_vm(pkvm_handle_t handle); int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva); int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h index 1e6d995968a1..ba5382c12787 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -12,7 +12,8 @@ #include <asm/kvm_host.h> #define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] -#define DECLARE_REG(type, name, ctxt, reg) \ +#define DECLARE_REG(type, name, ctxt, reg) \ + __always_unused int ___check_reg_ ## reg; \ type name = (type)cpu_reg(ctxt, (reg)) #endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 0b0a68b663d4..a244ec25f8c5 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o +hyp-obj-y += ../../../kernel/smccc-call.o hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 3369dd0c4009..4e16f9b96f63 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -71,36 +71,68 @@ static u32 hyp_ffa_version; static bool has_version_negotiated; static hyp_spinlock_t version_lock; -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno) +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno) { - *res = (struct arm_smccc_res) { + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_ERROR, .a2 = ffa_errno, }; } -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop) +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop) { if (ret == FFA_RET_SUCCESS) { - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS, - .a2 = prop }; + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS, + .a2 = prop }; } else { ffa_to_smccc_error(res, ret); } } -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret) +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret) { ffa_to_smccc_res_prop(res, ret, 0); } static void ffa_set_retval(struct kvm_cpu_context *ctxt, - struct arm_smccc_res *res) + struct arm_smccc_1_2_regs *res) { cpu_reg(ctxt, 0) = res->a0; cpu_reg(ctxt, 1) = res->a1; cpu_reg(ctxt, 2) = res->a2; cpu_reg(ctxt, 3) = res->a3; + cpu_reg(ctxt, 4) = res->a4; + cpu_reg(ctxt, 5) = res->a5; + cpu_reg(ctxt, 6) = res->a6; + cpu_reg(ctxt, 7) = res->a7; + + /* + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30. + * + * In FF-A 1.2, we cannot rely on the function ID sent by the caller to + * detect 32-bit calls because the CPU cycle management interfaces (e.g. + * FFA_MSG_WAIT, FFA_RUN) are 32-bit only but can have 64-bit responses. + * + * FFA-1.3 introduces 64-bit variants of the CPU cycle management + * interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests + * complete with SMC32 direct reponses which *should* allow us use the + * function ID sent by the caller to determine whether to return x8-x17. + * + * Note that we also cannot rely on function IDs in the response. + * + * Given the above, assume SMC64 and send back x0-x17 unconditionally + * as the passthrough code (__kvm_hyp_host_forward_smc) does the same. + */ + cpu_reg(ctxt, 8) = res->a8; + cpu_reg(ctxt, 9) = res->a9; + cpu_reg(ctxt, 10) = res->a10; + cpu_reg(ctxt, 11) = res->a11; + cpu_reg(ctxt, 12) = res->a12; + cpu_reg(ctxt, 13) = res->a13; + cpu_reg(ctxt, 14) = res->a14; + cpu_reg(ctxt, 15) = res->a15; + cpu_reg(ctxt, 16) = res->a16; + cpu_reg(ctxt, 17) = res->a17; } static bool is_ffa_call(u64 func_id) @@ -113,82 +145,92 @@ static bool is_ffa_call(u64 func_id) static int ffa_map_hyp_buffers(u64 ffa_page_count) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP, - hyp_virt_to_phys(hyp_buffers.tx), - hyp_virt_to_phys(hyp_buffers.rx), - ffa_page_count, - 0, 0, 0, 0, - &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_FN64_RXTX_MAP, + .a1 = hyp_virt_to_phys(hyp_buffers.tx), + .a2 = hyp_virt_to_phys(hyp_buffers.rx), + .a3 = ffa_page_count, + }, &res); return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; } static int ffa_unmap_hyp_buffers(void) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_RXTX_UNMAP, - HOST_FFA_ID, - 0, 0, 0, 0, 0, 0, - &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_RXTX_UNMAP, + .a1 = HOST_FFA_ID, + }, &res); return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; } -static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 fraglen, u32 endpoint_id) { - arm_smccc_1_1_smc(FFA_MEM_FRAG_TX, - handle_lo, handle_hi, fraglen, endpoint_id, - 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_FRAG_TX, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = fraglen, + .a4 = endpoint_id, + }, res); } -static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 fragoff) { - arm_smccc_1_1_smc(FFA_MEM_FRAG_RX, - handle_lo, handle_hi, fragoff, HOST_FFA_ID, - 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_FRAG_RX, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = fragoff, + .a4 = HOST_FFA_ID, + }, res); } -static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len, +static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len, u32 fraglen) { - arm_smccc_1_1_smc(func_id, len, fraglen, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = func_id, + .a1 = len, + .a2 = fraglen, + }, res); } -static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 flags) { - arm_smccc_1_1_smc(FFA_MEM_RECLAIM, - handle_lo, handle_hi, flags, - 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_RECLAIM, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = flags, + }, res); } -static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) +static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len) { - arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ, - len, len, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_FN64_MEM_RETRIEVE_REQ, + .a1 = len, + .a2 = len, + }, res); } -static void ffa_rx_release(struct arm_smccc_res *res) +static void ffa_rx_release(struct arm_smccc_1_2_regs *res) { - arm_smccc_1_1_smc(FFA_RX_RELEASE, - 0, 0, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_RX_RELEASE, + }, res); } -static void do_ffa_rxtx_map(struct arm_smccc_res *res, +static void do_ffa_rxtx_map(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(phys_addr_t, tx, ctxt, 1); @@ -267,7 +309,7 @@ err_unmap: goto out_unlock; } -static void do_ffa_rxtx_unmap(struct arm_smccc_res *res, +static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, id, ctxt, 1); @@ -368,7 +410,7 @@ static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges, return ret; } -static void do_ffa_mem_frag_tx(struct arm_smccc_res *res, +static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, handle_lo, ctxt, 1); @@ -427,7 +469,7 @@ out: } static void __do_ffa_mem_xfer(const u64 func_id, - struct arm_smccc_res *res, + struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, len, ctxt, 1); @@ -521,7 +563,7 @@ err_unshare: __do_ffa_mem_xfer((fid), (res), (ctxt)); \ } while (0); -static void do_ffa_mem_reclaim(struct arm_smccc_res *res, +static void do_ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, handle_lo, ctxt, 1); @@ -628,13 +670,26 @@ static bool ffa_call_supported(u64 func_id) case FFA_RXTX_MAP: case FFA_MEM_DONATE: case FFA_MEM_RETRIEVE_REQ: + /* Optional notification interfaces added in FF-A 1.1 */ + case FFA_NOTIFICATION_BITMAP_CREATE: + case FFA_NOTIFICATION_BITMAP_DESTROY: + case FFA_NOTIFICATION_BIND: + case FFA_NOTIFICATION_UNBIND: + case FFA_NOTIFICATION_SET: + case FFA_NOTIFICATION_GET: + case FFA_NOTIFICATION_INFO_GET: + /* Optional interfaces added in FF-A 1.2 */ + case FFA_MSG_SEND_DIRECT_REQ2: /* Optional per 7.5.1 */ + case FFA_MSG_SEND_DIRECT_RESP2: /* Optional per 7.5.1 */ + case FFA_CONSOLE_LOG: /* Optional per 13.1: not in Table 13.1 */ + case FFA_PARTITION_INFO_GET_REGS: /* Optional for virtual instances per 13.1 */ return false; } return true; } -static bool do_ffa_features(struct arm_smccc_res *res, +static bool do_ffa_features(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, id, ctxt, 1); @@ -666,21 +721,25 @@ out_handled: static int hyp_ffa_post_init(void) { size_t min_rxtx_sz; - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ + .a0 = FFA_ID_GET, + }, &res); if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; if (res.a2 != HOST_FFA_ID) return -EINVAL; - arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, - 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ + .a0 = FFA_FEATURES, + .a1 = FFA_FN64_RXTX_MAP, + }, &res); if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; - switch (res.a2) { + switch (res.a2 & FFA_FEAT_RXTX_MIN_SZ_MASK) { case FFA_FEAT_RXTX_MIN_SZ_4K: min_rxtx_sz = SZ_4K; break; @@ -700,7 +759,7 @@ static int hyp_ffa_post_init(void) return 0; } -static void do_ffa_version(struct arm_smccc_res *res, +static void do_ffa_version(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, ffa_req_version, ctxt, 1); @@ -712,7 +771,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_spin_lock(&version_lock); if (has_version_negotiated) { - res->a0 = hyp_ffa_version; + if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) + res->a0 = FFA_RET_NOT_SUPPORTED; + else + res->a0 = hyp_ffa_version; goto unlock; } @@ -721,9 +783,10 @@ static void do_ffa_version(struct arm_smccc_res *res, * first if TEE supports it. */ if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) { - arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_VERSION, + .a1 = ffa_req_version, + }, res); if (res->a0 == FFA_RET_NOT_SUPPORTED) goto unlock; @@ -740,7 +803,7 @@ unlock: hyp_spin_unlock(&version_lock); } -static void do_ffa_part_get(struct arm_smccc_res *res, +static void do_ffa_part_get(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, uuid0, ctxt, 1); @@ -756,9 +819,14 @@ static void do_ffa_part_get(struct arm_smccc_res *res, goto out_unlock; } - arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1, - uuid2, uuid3, flags, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_PARTITION_INFO_GET, + .a1 = uuid0, + .a2 = uuid1, + .a3 = uuid2, + .a4 = uuid3, + .a5 = flags, + }, res); if (res->a0 != FFA_SUCCESS) goto out_unlock; @@ -791,7 +859,7 @@ out_unlock: bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; /* * There's no way we can tell what a non-standard SMC call might @@ -860,13 +928,16 @@ out_handled: int hyp_ffa_init(void *pages) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; void *tx, *rx; if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2) return 0; - arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_VERSION, + .a1 = FFA_VERSION_1_2, + }, &res); if (res.a0 == FFA_RET_NOT_SUPPORTED) return 0; @@ -886,10 +957,10 @@ int hyp_ffa_init(void *pages) if (FFA_MAJOR_VERSION(res.a0) != 1) return -EOPNOTSUPP; - if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1)) + if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_2)) hyp_ffa_version = res.a0; else - hyp_ffa_version = FFA_VERSION_1_1; + hyp_ffa_version = FFA_VERSION_1_2; tx = pages; pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 3206b2c07f82..29430c031095 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -546,6 +546,18 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } +static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm(); +} + +static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + + __pkvm_unreserve_vm(handle); +} + static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); @@ -606,6 +618,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), + HANDLE_FUNC(__pkvm_reserve_vm), + HANDLE_FUNC(__pkvm_unreserve_vm), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), HANDLE_FUNC(__pkvm_teardown_vm), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8957734d6183..ddc8beb55eee 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -1010,9 +1010,12 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip return ret; if (!kvm_pte_valid(pte)) return -ENOENT; - if (kvm_granule_size(level) != size) + if (size && kvm_granule_size(level) != size) return -E2BIG; + if (!size) + size = kvm_granule_size(level); + state = guest_get_page_state(pte, ipa); if (state != PKVM_PAGE_SHARED_BORROWED) return -EPERM; @@ -1100,7 +1103,7 @@ int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_ if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; - assert_host_shared_guest(vm, ipa, PAGE_SIZE); + assert_host_shared_guest(vm, ipa, 0); guest_lock_component(vm); ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); @@ -1156,7 +1159,7 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) if (pkvm_hyp_vm_is_protected(vm)) return -EPERM; - assert_host_shared_guest(vm, ipa, PAGE_SIZE); + assert_host_shared_guest(vm, ipa, 0); guest_lock_component(vm); kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); guest_unlock_component(vm); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 338505cb0171..05774aed09cb 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -23,8 +23,8 @@ unsigned int kvm_arm_vmid_bits; unsigned int kvm_host_sve_max_vl; /* - * The currently loaded hyp vCPU for each physical CPU. Used only when - * protected KVM is enabled, but for both protected and non-protected VMs. + * The currently loaded hyp vCPU for each physical CPU. Used in protected mode + * for both protected and non-protected VMs. */ static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); @@ -135,7 +135,7 @@ static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - /* Protected KVM does not support AArch32 guests. */ + /* No AArch32 support for protected guests. */ if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) || kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32)) return -EINVAL; @@ -192,6 +192,11 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) */ #define HANDLE_OFFSET 0x1000 +/* + * Marks a reserved but not yet used entry in the VM table. + */ +#define RESERVED_ENTRY ((void *)0xa110ca7ed) + static unsigned int vm_handle_to_idx(pkvm_handle_t handle) { return handle - HANDLE_OFFSET; @@ -210,8 +215,8 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx) DEFINE_HYP_SPINLOCK(vm_table_lock); /* - * The table of VM entries for protected VMs in hyp. - * Allocated at hyp initialization and setup. + * A table that tracks all VMs in protected mode. + * Allocated during hyp initialization and setup. */ static struct pkvm_hyp_vm **vm_table; @@ -231,6 +236,10 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) if (unlikely(idx >= KVM_MAX_PVMS)) return NULL; + /* A reserved entry doesn't represent an initialized VM. */ + if (unlikely(vm_table[idx] == RESERVED_ENTRY)) + return NULL; + return vm_table[idx]; } @@ -401,14 +410,26 @@ static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], } static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, - unsigned int nr_vcpus) + unsigned int nr_vcpus, pkvm_handle_t handle) { + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + int idx = vm_handle_to_idx(handle); + + hyp_vm->kvm.arch.pkvm.handle = handle; + hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; - hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; - hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); + hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected); + hyp_vm->kvm.arch.pkvm.is_created = true; hyp_vm->kvm.arch.flags = 0; pkvm_init_features_from_host(hyp_vm, host_kvm); + + /* VMID 0 is reserved for the host */ + atomic64_set(&mmu->vmid.id, idx + 1); + + mmu->vtcr = host_mmu.arch.mmu.vtcr; + mmu->arch = &hyp_vm->kvm.arch; + mmu->pgt = &hyp_vm->pgt; } static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) @@ -480,7 +501,7 @@ done: return ret; } -static int find_free_vm_table_entry(struct kvm *host_kvm) +static int find_free_vm_table_entry(void) { int i; @@ -493,15 +514,13 @@ static int find_free_vm_table_entry(struct kvm *host_kvm) } /* - * Allocate a VM table entry and insert a pointer to the new vm. + * Reserve a VM table entry. * - * Return a unique handle to the protected VM on success, + * Return a unique handle to the VM on success, * negative error code on failure. */ -static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, - struct pkvm_hyp_vm *hyp_vm) +static int allocate_vm_table_entry(void) { - struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; int idx; hyp_assert_lock_held(&vm_table_lock); @@ -514,20 +533,57 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, if (unlikely(!vm_table)) return -EINVAL; - idx = find_free_vm_table_entry(host_kvm); - if (idx < 0) + idx = find_free_vm_table_entry(); + if (unlikely(idx < 0)) return idx; - hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); + vm_table[idx] = RESERVED_ENTRY; - /* VMID 0 is reserved for the host */ - atomic64_set(&mmu->vmid.id, idx + 1); + return idx; +} - mmu->arch = &hyp_vm->kvm.arch; - mmu->pgt = &hyp_vm->pgt; +static int __insert_vm_table_entry(pkvm_handle_t handle, + struct pkvm_hyp_vm *hyp_vm) +{ + unsigned int idx; + + hyp_assert_lock_held(&vm_table_lock); + + /* + * Initializing protected state might have failed, yet a malicious + * host could trigger this function. Thus, ensure that 'vm_table' + * exists. + */ + if (unlikely(!vm_table)) + return -EINVAL; + + idx = vm_handle_to_idx(handle); + if (unlikely(idx >= KVM_MAX_PVMS)) + return -EINVAL; + + if (unlikely(vm_table[idx] != RESERVED_ENTRY)) + return -EINVAL; vm_table[idx] = hyp_vm; - return hyp_vm->kvm.arch.pkvm.handle; + + return 0; +} + +/* + * Insert a pointer to the initialized VM into the VM table. + * + * Return 0 on success, or negative error code on failure. + */ +static int insert_vm_table_entry(pkvm_handle_t handle, + struct pkvm_hyp_vm *hyp_vm) +{ + int ret; + + hyp_spin_lock(&vm_table_lock); + ret = __insert_vm_table_entry(handle, hyp_vm); + hyp_spin_unlock(&vm_table_lock); + + return ret; } /* @@ -594,10 +650,45 @@ static void unmap_donated_memory_noclear(void *va, size_t size) } /* - * Initialize the hypervisor copy of the protected VM state using the - * memory donated by the host. + * Reserves an entry in the hypervisor for a new VM in protected mode. * - * Unmaps the donated memory from the host at stage 2. + * Return a unique handle to the VM on success, negative error code on failure. + */ +int __pkvm_reserve_vm(void) +{ + int ret; + + hyp_spin_lock(&vm_table_lock); + ret = allocate_vm_table_entry(); + hyp_spin_unlock(&vm_table_lock); + + if (ret < 0) + return ret; + + return idx_to_vm_handle(ret); +} + +/* + * Removes a reserved entry, but only if is hasn't been used yet. + * Otherwise, the VM needs to be destroyed. + */ +void __pkvm_unreserve_vm(pkvm_handle_t handle) +{ + unsigned int idx = vm_handle_to_idx(handle); + + if (unlikely(!vm_table)) + return; + + hyp_spin_lock(&vm_table_lock); + if (likely(idx < KVM_MAX_PVMS && vm_table[idx] == RESERVED_ENTRY)) + remove_vm_table_entry(handle); + hyp_spin_unlock(&vm_table_lock); +} + +/* + * Initialize the hypervisor copy of the VM state using host-donated memory. + * + * Unmap the donated memory from the host at stage 2. * * host_kvm: A pointer to the host's struct kvm. * vm_hva: The host va of the area being donated for the VM state. @@ -606,8 +697,7 @@ static void unmap_donated_memory_noclear(void *va, size_t size) * the VM. Must be page aligned. Its size is implied by the VM's * VTCR. * - * Return a unique handle to the protected VM on success, - * negative error code on failure. + * Return 0 success, negative error code on failure. */ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva) @@ -615,6 +705,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, struct pkvm_hyp_vm *hyp_vm = NULL; size_t vm_size, pgd_size; unsigned int nr_vcpus; + pkvm_handle_t handle; void *pgd = NULL; int ret; @@ -628,6 +719,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, goto err_unpin_kvm; } + handle = READ_ONCE(host_kvm->arch.pkvm.handle); + if (unlikely(handle < HANDLE_OFFSET)) { + ret = -EINVAL; + goto err_unpin_kvm; + } + vm_size = pkvm_get_hyp_vm_size(nr_vcpus); pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); @@ -641,24 +738,19 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, if (!pgd) goto err_remove_mappings; - init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); - - hyp_spin_lock(&vm_table_lock); - ret = insert_vm_table_entry(host_kvm, hyp_vm); - if (ret < 0) - goto err_unlock; + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle); ret = kvm_guest_prepare_stage2(hyp_vm, pgd); if (ret) - goto err_remove_vm_table_entry; - hyp_spin_unlock(&vm_table_lock); + goto err_remove_mappings; - return hyp_vm->kvm.arch.pkvm.handle; + /* Must be called last since this publishes the VM. */ + ret = insert_vm_table_entry(handle, hyp_vm); + if (ret) + goto err_remove_mappings; + + return 0; -err_remove_vm_table_entry: - remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); -err_unlock: - hyp_spin_unlock(&vm_table_lock); err_remove_mappings: unmap_donated_memory(hyp_vm, vm_size); unmap_donated_memory(pgd, pgd_size); @@ -668,10 +760,9 @@ err_unpin_kvm: } /* - * Initialize the hypervisor copy of the protected vCPU state using the - * memory donated by the host. + * Initialize the hypervisor copy of the vCPU state using host-donated memory. * - * handle: The handle for the protected vm. + * handle: The hypervisor handle for the vm. * host_vcpu: A pointer to the corresponding host vcpu. * vcpu_hva: The host va of the area being donated for the vcpu state. * Must be page aligned. The size of the area must be equal to diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a48d3f5a5afb..90bd014e952f 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -192,6 +192,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, enum pkvm_page_state state; struct hyp_page *page; phys_addr_t phys; + enum kvm_pgtable_prot prot; if (!kvm_pte_valid(ctx->old)) return 0; @@ -210,11 +211,18 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, * configured in the hypervisor stage-1, and make sure to propagate them * to the hyp_vmemmap state. */ - state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); + prot = kvm_pgtable_hyp_pte_prot(ctx->old); + state = pkvm_getstate(prot); switch (state) { case PKVM_PAGE_OWNED: set_hyp_state(page, PKVM_PAGE_OWNED); - return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); + /* hyp text is RO in the host stage-2 to be inspected on panic. */ + if (prot == PAGE_HYP_EXEC) { + set_host_state(page, PKVM_NOPAGE); + return host_stage2_idmap_locked(phys, PAGE_SIZE, KVM_PGTABLE_PROT_R); + } else { + return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); + } case PKVM_PAGE_SHARED_OWNED: set_hyp_state(page, PKVM_PAGE_SHARED_OWNED); set_host_state(page, PKVM_PAGE_SHARED_BORROWED); diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index d81275790e69..acd909b7f225 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -295,12 +295,8 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) } } - /* - * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due - * to be relaxed in a future spec release, at which point this in - * condition can be dropped. - */ - if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + /* Only disable SRE if the host implements the GICv2 interface */ + if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { /* * Prevent the guest from touching the ICC_SRE_EL1 system * register. Note that this may not have any effect, as @@ -329,19 +325,16 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } - /* - * Can be dropped in the future when GICv5 spec is relaxed. See comment - * above. - */ - if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + /* Only restore SRE if the host implements the GICv2 interface */ + if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { val = read_gicreg(ICC_SRE_EL2); write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); - } - if (!cpu_if->vgic_sre) { - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ - isb(); - write_gicreg(1, ICC_SRE_EL1); + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + isb(); + write_gicreg(1, ICC_SRE_EL1); + } } /* diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 0998ad4a2552..9984c492305a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -95,6 +95,13 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) /* Force NV2 in case the guest is forgetful... */ guest_hcr |= HCR_NV2; } + + /* + * Exclude the guest's TWED configuration if it hasn't set TWE + * to avoid potentially delaying traps for the host. + */ + if (!(guest_hcr & HCR_TWE)) + guest_hcr &= ~(HCR_EL2_TWEDEn | HCR_EL2_TWEDEL); } BUG_ON(host_data_test_flag(VCPU_IN_HYP_CONTEXT) && diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 6745f38b64f9..dfcd66c65517 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -106,7 +106,30 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); - u64 esr = 0; + u64 esr = 0, fsc; + int level; + + /* + * If injecting an abort from a failed S1PTW, rewalk the S1 PTs to + * find the failing level. If we can't find it, assume the error was + * transient and restart without changing the state. + */ + if (kvm_vcpu_abt_iss1tw(vcpu)) { + u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu); + int ret; + + if (hpfar == INVALID_GPA) + return; + + ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level); + if (ret) + return; + + WARN_ON_ONCE(level < -1 || level > 3); + fsc = ESR_ELx_FSC_SEA_TTW(level); + } else { + fsc = ESR_ELx_FSC_EXTABT; + } /* This delight is brought to you by FEAT_DoubleFault2. */ if (effective_sctlr2_ease(vcpu)) @@ -133,7 +156,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr if (!is_iabt) esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; - esr |= ESR_ELx_FSC_EXTABT; + esr |= fsc; vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu)); vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 736394292503..7cc964af8d30 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1431,11 +1431,8 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) * able to see the page's tags and therefore they must be initialised first. If * PG_mte_tagged is set, tags have already been initialised. * - * The race in the test/set of the PG_mte_tagged flag is handled by: - * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs - * racing to santise the same page - * - mmap_lock protects between a VM faulting a page in and the VMM performing - * an mprotect() to add VM_MTE + * Must be called with kvm->mmu_lock held to ensure the memory remains mapped + * while the tags are zeroed. */ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, unsigned long size) @@ -1482,13 +1479,132 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma) } } +static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache, + void **memcache) +{ + int min_pages; + + if (!is_protected_kvm_enabled()) + *memcache = &vcpu->arch.mmu_page_cache; + else + *memcache = &vcpu->arch.pkvm_memcache; + + if (!topup_memcache) + return 0; + + min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); + + if (!is_protected_kvm_enabled()) + return kvm_mmu_topup_memory_cache(*memcache, min_pages); + + return topup_hyp_memcache(*memcache, min_pages); +} + +/* + * Potentially reduce shadow S2 permissions to match the guest's own S2. For + * exec faults, we'd only reach this point if the guest actually allowed it (see + * kvm_s2_handle_perm_fault). + * + * Also encode the level of the original translation in the SW bits of the leaf + * entry as a proxy for the span of that translation. This will be retrieved on + * TLB invalidation from the guest and used to limit the invalidation scope if a + * TTL hint or a range isn't provided. + */ +static void adjust_nested_fault_perms(struct kvm_s2_trans *nested, + enum kvm_pgtable_prot *prot, + bool *writable) +{ + *writable &= kvm_s2_trans_writable(nested); + if (!kvm_s2_trans_readable(nested)) + *prot &= ~KVM_PGTABLE_PROT_R; + + *prot |= kvm_encode_nested_level(nested); +} + +#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) + +static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_s2_trans *nested, + struct kvm_memory_slot *memslot, bool is_perm) +{ + bool write_fault, exec_fault, writable; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; + struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; + unsigned long mmu_seq; + struct page *page; + struct kvm *kvm = vcpu->kvm; + void *memcache; + kvm_pfn_t pfn; + gfn_t gfn; + int ret; + + ret = prepare_mmu_memcache(vcpu, true, &memcache); + if (ret) + return ret; + + if (nested) + gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT; + else + gfn = fault_ipa >> PAGE_SHIFT; + + write_fault = kvm_is_write_fault(vcpu); + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); + + VM_WARN_ON_ONCE(write_fault && exec_fault); + + mmu_seq = kvm->mmu_invalidate_seq; + /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */ + smp_rmb(); + + ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, + write_fault, exec_fault, false); + return ret; + } + + writable = !(memslot->flags & KVM_MEM_READONLY); + + if (nested) + adjust_nested_fault_perms(nested, &prot, &writable); + + if (writable) + prot |= KVM_PGTABLE_PROT_W; + + if (exec_fault || + (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && + (!nested || kvm_s2_trans_executable(nested)))) + prot |= KVM_PGTABLE_PROT_X; + + kvm_fault_lock(kvm); + if (mmu_invalidate_retry(kvm, mmu_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE, + __pfn_to_phys(pfn), prot, + memcache, flags); + +out_unlock: + kvm_release_faultin_page(kvm, page, !!ret, writable); + kvm_fault_unlock(kvm); + + if (writable && !ret) + mark_page_dirty_in_slot(kvm, memslot, gfn); + + return ret != -EAGAIN ? ret : 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, bool fault_is_perm) { int ret = 0; - bool write_fault, writable, force_pte = false; + bool topup_memcache; + bool write_fault, writable; bool exec_fault, mte_allowed, is_vma_cacheable; bool s2_force_noncacheable = false, vfio_allow_any_uc = false; unsigned long mmu_seq; @@ -1500,23 +1616,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); + bool force_pte = logging_active; long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; struct page *page; vm_flags_t vm_flags; - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; if (fault_is_perm) fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); - VM_BUG_ON(write_fault && exec_fault); - - if (!is_protected_kvm_enabled()) - memcache = &vcpu->arch.mmu_page_cache; - else - memcache = &vcpu->arch.pkvm_memcache; + VM_WARN_ON_ONCE(write_fault && exec_fault); /* * Permission faults just need to update the existing leaf entry, @@ -1524,17 +1636,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * only exception to this is when dirty logging is enabled at runtime * and a write fault needs to collapse a block entry into a table. */ - if (!fault_is_perm || (logging_active && write_fault)) { - int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); - - if (!is_protected_kvm_enabled()) - ret = kvm_mmu_topup_memory_cache(memcache, min_pages); - else - ret = topup_hyp_memcache(memcache, min_pages); - - if (ret) - return ret; - } + topup_memcache = !fault_is_perm || (logging_active && write_fault); + ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache); + if (ret) + return ret; /* * Let's check if we will get back a huge page backed by hugetlbfs, or @@ -1548,16 +1653,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* - * logging_active is guaranteed to never be true for VM_PFNMAP - * memslots. - */ - if (logging_active) { - force_pte = true; + if (force_pte) vma_shift = PAGE_SHIFT; - } else { + else vma_shift = get_vma_page_shift(vma, hva); - } switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED @@ -1609,7 +1708,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, max_map_size = PAGE_SIZE; force_pte = (max_map_size == PAGE_SIZE); - vma_pagesize = min(vma_pagesize, (long)max_map_size); + vma_pagesize = min_t(long, vma_pagesize, max_map_size); } /* @@ -1642,7 +1741,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs * with the smp_wmb() in kvm_mmu_invalidate_end(). */ - mmu_seq = vcpu->kvm->mmu_invalidate_seq; + mmu_seq = kvm->mmu_invalidate_seq; mmap_read_unlock(current->mm); pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, @@ -1673,7 +1772,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * cache maintenance. */ if (!kvm_supports_cacheable_pfnmap()) - return -EFAULT; + ret = -EFAULT; } else { /* * If the page was identified as device early by looking at @@ -1696,27 +1795,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } if (exec_fault && s2_force_noncacheable) - return -ENOEXEC; + ret = -ENOEXEC; - /* - * Potentially reduce shadow S2 permissions to match the guest's own - * S2. For exec faults, we'd only reach this point if the guest - * actually allowed it (see kvm_s2_handle_perm_fault). - * - * Also encode the level of the original translation in the SW bits - * of the leaf entry as a proxy for the span of that translation. - * This will be retrieved on TLB invalidation from the guest and - * used to limit the invalidation scope if a TTL hint or a range - * isn't provided. - */ - if (nested) { - writable &= kvm_s2_trans_writable(nested); - if (!kvm_s2_trans_readable(nested)) - prot &= ~KVM_PGTABLE_PROT_R; - - prot |= kvm_encode_nested_level(nested); + if (ret) { + kvm_release_page_unused(page); + return ret; } + if (nested) + adjust_nested_fault_perms(nested, &prot, &writable); + kvm_fault_lock(kvm); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) { @@ -1985,8 +2073,15 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out_unlock; } - ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, - esr_fsc_is_permission_fault(esr)); + VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) && + !write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu)); + + if (kvm_slot_has_gmem(memslot)) + ret = gmem_abort(vcpu, fault_ipa, nested, memslot, + esr_fsc_is_permission_fault(esr)); + else + ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, + esr_fsc_is_permission_fault(esr)); if (ret == 0) ret = 1; out: @@ -2218,6 +2313,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT)) return -EFAULT; + /* + * Only support guest_memfd backed memslots with mappable memory, since + * there aren't any CoCo VMs that support only private memory on arm64. + */ + if (kvm_slot_has_gmem(new) && !kvm_memslot_is_gmem_only(new)) + return -EINVAL; + hva = new->userspace_addr; reg_end = hva + (new->npages << PAGE_SHIFT); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 50d559248a1f..7a045cad6bdf 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); /* Global limit for now, should eventually be per-VM */ wi->max_oa_bits = min(get_kvm_ipa_limit(), - ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr))); + ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); } int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, @@ -1172,8 +1172,9 @@ static u64 read_vncr_el2(struct kvm_vcpu *vcpu) return (u64)sign_extend64(__vcpu_sys_reg(vcpu, VNCR_EL2), 48); } -static int kvm_translate_vncr(struct kvm_vcpu *vcpu) +static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem) { + struct kvm_memory_slot *memslot; bool write_fault, writable; unsigned long mmu_seq; struct vncr_tlb *vt; @@ -1216,10 +1217,25 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu) smp_rmb(); gfn = vt->wr.pa >> PAGE_SHIFT; - pfn = kvm_faultin_pfn(vcpu, gfn, write_fault, &writable, &page); - if (is_error_noslot_pfn(pfn) || (write_fault && !writable)) + memslot = gfn_to_memslot(vcpu->kvm, gfn); + if (!memslot) return -EFAULT; + *is_gmem = kvm_slot_has_gmem(memslot); + if (!*is_gmem) { + pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, + &writable, &page); + if (is_error_noslot_pfn(pfn) || (write_fault && !writable)) + return -EFAULT; + } else { + ret = kvm_gmem_get_pfn(vcpu->kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, vt->wr.pa, PAGE_SIZE, + write_fault, false, false); + return ret; + } + } + scoped_guard(write_lock, &vcpu->kvm->mmu_lock) { if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) return -EAGAIN; @@ -1295,23 +1311,36 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) if (esr_fsc_is_permission_fault(esr)) { inject_vncr_perm(vcpu); } else if (esr_fsc_is_translation_fault(esr)) { - bool valid; + bool valid, is_gmem = false; int ret; scoped_guard(read_lock, &vcpu->kvm->mmu_lock) valid = kvm_vncr_tlb_lookup(vcpu); if (!valid) - ret = kvm_translate_vncr(vcpu); + ret = kvm_translate_vncr(vcpu, &is_gmem); else ret = -EPERM; switch (ret) { case -EAGAIN: - case -ENOMEM: /* Let's try again... */ break; + case -ENOMEM: + /* + * For guest_memfd, this indicates that it failed to + * create a folio to back the memory. Inform userspace. + */ + if (is_gmem) + return 0; + /* Otherwise, let's try again... */ + break; case -EFAULT: + case -EIO: + case -EHWPOISON: + if (is_gmem) + return 0; + fallthrough; case -EINVAL: case -ENOENT: case -EACCES: @@ -1462,9 +1491,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64PFR1_EL1: /* Only support BTI, SSBS, CSV2_frac */ - val &= (ID_AA64PFR1_EL1_BT | - ID_AA64PFR1_EL1_SSBS | - ID_AA64PFR1_EL1_CSV2_frac); + val &= ~(ID_AA64PFR1_EL1_PFAR | + ID_AA64PFR1_EL1_MTEX | + ID_AA64PFR1_EL1_THE | + ID_AA64PFR1_EL1_GCS | + ID_AA64PFR1_EL1_MTE_frac | + ID_AA64PFR1_EL1_NMI | + ID_AA64PFR1_EL1_SME | + ID_AA64PFR1_EL1_RES0 | + ID_AA64PFR1_EL1_MPAM_frac | + ID_AA64PFR1_EL1_MTE); break; case SYS_ID_AA64MMFR0_EL1: @@ -1517,12 +1553,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) break; case SYS_ID_AA64MMFR1_EL1: - val &= (ID_AA64MMFR1_EL1_HCX | - ID_AA64MMFR1_EL1_PAN | - ID_AA64MMFR1_EL1_LO | - ID_AA64MMFR1_EL1_HPDS | - ID_AA64MMFR1_EL1_VH | - ID_AA64MMFR1_EL1_VMIDBits); + val &= ~(ID_AA64MMFR1_EL1_CMOW | + ID_AA64MMFR1_EL1_nTLBPA | + ID_AA64MMFR1_EL1_ETS | + ID_AA64MMFR1_EL1_XNX | + ID_AA64MMFR1_EL1_HAFDBS); /* FEAT_E2H0 implies no VHE */ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) val &= ~ID_AA64MMFR1_EL1_VH; @@ -1564,14 +1599,22 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64DFR0_EL1: /* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */ - val &= (ID_AA64DFR0_EL1_PMUVer | - ID_AA64DFR0_EL1_WRPs | - ID_AA64DFR0_EL1_BRPs | - ID_AA64DFR0_EL1_DebugVer| - ID_AA64DFR0_EL1_HPMN0); - - /* Cap Debug to ARMv8.1 */ - val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE); + val &= ~(ID_AA64DFR0_EL1_ExtTrcBuff | + ID_AA64DFR0_EL1_BRBE | + ID_AA64DFR0_EL1_MTPMU | + ID_AA64DFR0_EL1_TraceBuffer | + ID_AA64DFR0_EL1_TraceFilt | + ID_AA64DFR0_EL1_PMSVer | + ID_AA64DFR0_EL1_CTX_CMPs | + ID_AA64DFR0_EL1_SEBEP | + ID_AA64DFR0_EL1_PMSS | + ID_AA64DFR0_EL1_TraceVer); + + /* + * FEAT_Debugv8p9 requires support for extended breakpoints / + * watchpoints. + */ + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); break; } @@ -1796,3 +1839,33 @@ void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu) if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); } + +/* + * KVM unconditionally sets most of these traps anyway but use an allowlist + * to document the guest hypervisor traps that may take precedence and guard + * against future changes to the non-nested trap configuration. + */ +#define NV_MDCR_GUEST_INCLUDE (MDCR_EL2_TDE | \ + MDCR_EL2_TDA | \ + MDCR_EL2_TDRA | \ + MDCR_EL2_TTRF | \ + MDCR_EL2_TPMS | \ + MDCR_EL2_TPM | \ + MDCR_EL2_TPMCR | \ + MDCR_EL2_TDCC | \ + MDCR_EL2_TDOSA) + +void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) +{ + u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); + + /* + * In yet another example where FEAT_NV2 is fscking broken, accesses + * to MDSCR_EL1 are redirected to the VNCR despite having an effect + * at EL2. Use a big hammer to apply sanity. + */ + if (is_hyp_ctxt(vcpu)) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + else + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); +} diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index fcd70bfe44fb..24f0f8a8c943 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -85,16 +85,23 @@ void __init kvm_hyp_reserve(void) hyp_mem_base); } -static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) +static void __pkvm_destroy_hyp_vm(struct kvm *kvm) { - if (host_kvm->arch.pkvm.handle) { + if (pkvm_hyp_vm_is_created(kvm)) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, - host_kvm->arch.pkvm.handle)); + kvm->arch.pkvm.handle)); + } else if (kvm->arch.pkvm.handle) { + /* + * The VM could have been reserved but hyp initialization has + * failed. Make sure to unreserve it. + */ + kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle); } - host_kvm->arch.pkvm.handle = 0; - free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); - free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); + kvm->arch.pkvm.handle = 0; + kvm->arch.pkvm.is_created = false; + free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); + free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc); } static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) @@ -129,16 +136,16 @@ static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) * * Return 0 on success, negative error code on failure. */ -static int __pkvm_create_hyp_vm(struct kvm *host_kvm) +static int __pkvm_create_hyp_vm(struct kvm *kvm) { size_t pgd_sz, hyp_vm_sz; void *pgd, *hyp_vm; int ret; - if (host_kvm->created_vcpus < 1) + if (kvm->created_vcpus < 1) return -EINVAL; - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); + pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr); /* * The PGD pages will be reclaimed using a hyp_memcache which implies @@ -152,7 +159,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) /* Allocate memory to donate to hyp for vm and vcpu pointers. */ hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, size_mul(sizeof(void *), - host_kvm->created_vcpus))); + kvm->created_vcpus))); hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); if (!hyp_vm) { ret = -ENOMEM; @@ -160,12 +167,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) } /* Donate the VM memory to hyp and let hyp initialize it. */ - ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); - if (ret < 0) + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd); + if (ret) goto free_vm; - host_kvm->arch.pkvm.handle = ret; - host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; + kvm->arch.pkvm.is_created = true; + kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); return 0; @@ -176,14 +183,19 @@ free_pgd: return ret; } -int pkvm_create_hyp_vm(struct kvm *host_kvm) +bool pkvm_hyp_vm_is_created(struct kvm *kvm) +{ + return READ_ONCE(kvm->arch.pkvm.is_created); +} + +int pkvm_create_hyp_vm(struct kvm *kvm) { int ret = 0; - mutex_lock(&host_kvm->arch.config_lock); - if (!host_kvm->arch.pkvm.handle) - ret = __pkvm_create_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->arch.config_lock); + mutex_lock(&kvm->arch.config_lock); + if (!pkvm_hyp_vm_is_created(kvm)) + ret = __pkvm_create_hyp_vm(kvm); + mutex_unlock(&kvm->arch.config_lock); return ret; } @@ -200,15 +212,31 @@ int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) return ret; } -void pkvm_destroy_hyp_vm(struct kvm *host_kvm) +void pkvm_destroy_hyp_vm(struct kvm *kvm) { - mutex_lock(&host_kvm->arch.config_lock); - __pkvm_destroy_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->arch.config_lock); + mutex_lock(&kvm->arch.config_lock); + __pkvm_destroy_hyp_vm(kvm); + mutex_unlock(&kvm->arch.config_lock); } -int pkvm_init_host_vm(struct kvm *host_kvm) +int pkvm_init_host_vm(struct kvm *kvm) { + int ret; + + if (pkvm_hyp_vm_is_created(kvm)) + return -EINVAL; + + /* VM is already reserved, no need to proceed. */ + if (kvm->arch.pkvm.handle) + return 0; + + /* Reserve the VM in hyp and obtain a hyp handle for the VM. */ + ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm); + if (ret < 0) + return ret; + + kvm->arch.pkvm.handle = ret; + return 0; } diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c index 098416d7e5c2..dc5acfb00af9 100644 --- a/arch/arm64/kvm/ptdump.c +++ b/arch/arm64/kvm/ptdump.c @@ -32,23 +32,23 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = { .set = " ", .clear = "F", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, .set = "R", .clear = " ", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, .set = "W", .clear = " ", }, { - .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, - .val = PTE_VALID, - .set = " ", - .clear = "X", + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .set = "NX", + .clear = "x ", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF, + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF, .set = "AF", .clear = " ", }, { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b29f72478a50..91053aa832d0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1757,7 +1757,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ID_AA64ISAR2_EL1_WFxT; break; case SYS_ID_AA64ISAR3_EL1: - val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX; + val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_LSFE | + ID_AA64ISAR3_EL1_FAMINMAX; break; case SYS_ID_AA64MMFR2_EL1: val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; @@ -1997,6 +1998,26 @@ static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) return val; } +/* + * Older versions of KVM erroneously claim support for FEAT_DoubleLock with + * NV-enabled VMs on unsupporting hardware. Silently ignore the incorrect + * value if it is consistent with the bug. + */ +static bool ignore_feat_doublelock(struct kvm_vcpu *vcpu, u64 val) +{ + u8 host, user; + + if (!vcpu_has_nv(vcpu)) + return false; + + host = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, + read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1)); + user = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, val); + + return host == ID_AA64DFR0_EL1_DoubleLock_NI && + user == ID_AA64DFR0_EL1_DoubleLock_IMP; +} + static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) @@ -2028,6 +2049,11 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, if (debugver < ID_AA64DFR0_EL1_DebugVer_IMP) return -EINVAL; + if (ignore_feat_doublelock(vcpu, val)) { + val &= ~ID_AA64DFR0_EL1_DoubleLock; + val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI); + } + return set_id_reg(vcpu, rd, val); } @@ -2148,16 +2174,29 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, return set_id_reg(vcpu, rd, user_val); } +/* + * Allow userspace to de-feature a stage-2 translation granule but prevent it + * from claiming the impossible. + */ +#define tgran2_val_allowed(tg, safe, user) \ +({ \ + u8 __s = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, safe); \ + u8 __u = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, user); \ + \ + __s == __u || __u == ID_AA64MMFR0_EL1_##tg##_NI; \ +}) + static int set_id_aa64mmfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 user_val) { u64 sanitized_val = kvm_read_sanitised_id_reg(vcpu, rd); - u64 tgran2_mask = ID_AA64MMFR0_EL1_TGRAN4_2_MASK | - ID_AA64MMFR0_EL1_TGRAN16_2_MASK | - ID_AA64MMFR0_EL1_TGRAN64_2_MASK; - if (vcpu_has_nv(vcpu) && - ((sanitized_val & tgran2_mask) != (user_val & tgran2_mask))) + if (!vcpu_has_nv(vcpu)) + return set_id_reg(vcpu, rd, user_val); + + if (!tgran2_val_allowed(TGRAN4_2, sanitized_val, user_val) || + !tgran2_val_allowed(TGRAN16_2, sanitized_val, user_val) || + !tgran2_val_allowed(TGRAN64_2, sanitized_val, user_val)) return -EINVAL; return set_id_reg(vcpu, rd, user_val); @@ -3141,6 +3180,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64ISAR2_EL1_APA3 | ID_AA64ISAR2_EL1_GPA3)), ID_WRITABLE(ID_AA64ISAR3_EL1, (ID_AA64ISAR3_EL1_FPRCVT | + ID_AA64ISAR3_EL1_LSFE | ID_AA64ISAR3_EL1_FAMINMAX)), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), @@ -3152,8 +3192,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ~(ID_AA64MMFR0_EL1_RES0 | ID_AA64MMFR0_EL1_ASIDBITS)), ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | - ID_AA64MMFR1_EL1_HCX | - ID_AA64MMFR1_EL1_TWED | ID_AA64MMFR1_EL1_XNX | ID_AA64MMFR1_EL1_VH | ID_AA64MMFR1_EL1_VMIDBits)), @@ -3238,6 +3276,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMBLIMITR_EL1), undef_access }, { SYS_DESC(SYS_PMBPTR_EL1), undef_access }, { SYS_DESC(SYS_PMBSR_EL1), undef_access }, + { SYS_DESC(SYS_PMSDSFR_EL1), undef_access }, /* PMBIDR_EL1 is not trapped */ { PMU_SYS_REG(PMINTENSET_EL1), diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 4c3c0d82e476..1796b1a22a72 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -554,7 +554,6 @@ int vgic_lazy_init(struct kvm *kvm) * Also map the virtual CPU interface into the VM. * v2 calls vgic_init() if not already done. * v3 and derivatives return an error if the VGIC is not initialized. - * vgic_ready() returns true if this function has succeeded. */ int kvm_vgic_map_resources(struct kvm *kvm) { @@ -563,12 +562,12 @@ int kvm_vgic_map_resources(struct kvm *kvm) gpa_t dist_base; int ret = 0; - if (likely(vgic_ready(kvm))) + if (likely(smp_load_acquire(&dist->ready))) return 0; mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->arch.config_lock); - if (vgic_ready(kvm)) + if (dist->ready) goto out; if (!irqchip_in_kernel(kvm)) @@ -594,14 +593,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) goto out_slots; } - /* - * kvm_io_bus_register_dev() guarantees all readers see the new MMIO - * registration before returning through synchronize_srcu(), which also - * implies a full memory barrier. As such, marking the distributor as - * 'ready' here is guaranteed to be ordered after all vCPUs having seen - * a completely configured distributor. - */ - dist->ready = true; + smp_store_release(&dist->ready, true); goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b9ad7c42c5b0..f1c153106c56 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -588,6 +588,7 @@ int vgic_v3_map_resources(struct kvm *kvm) } DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); +DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat); static int __init early_group0_trap_cfg(char *buf) { @@ -697,6 +698,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); + /* + * Flip the static branch if the HW supports v2, even if we're + * not using it (such as in protected mode). + */ + if (has_v2) + static_branch_enable(&vgic_v3_has_v2_compat); + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) { group0_trap = true; group1_trap = true; diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index 6bdbb221bcde..2d3811f4e117 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -15,7 +15,7 @@ int vgic_v5_probe(const struct gic_kvm_info *info) u64 ich_vtr_el2; int ret; - if (!info->has_gcie_v3_compat) + if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) return -ENODEV; kvm_vgic_global_state.type = VGIC_V5; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 9ff5cdbd2759..1b32c1232d28 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -37,6 +37,7 @@ HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_GICV3_CPUIF HAS_GICV5_CPUIF +HAS_GICV5_LEGACY HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC HAS_HCR_NV1 diff --git a/arch/loongarch/include/asm/kvm_pch_pic.h b/arch/loongarch/include/asm/kvm_pch_pic.h index e6df6a4c1c70..7f33a3039272 100644 --- a/arch/loongarch/include/asm/kvm_pch_pic.h +++ b/arch/loongarch/include/asm/kvm_pch_pic.h @@ -34,13 +34,26 @@ #define PCH_PIC_INT_ISR_END 0x3af #define PCH_PIC_POLARITY_START 0x3e0 #define PCH_PIC_POLARITY_END 0x3e7 -#define PCH_PIC_INT_ID_VAL 0x7000000UL +#define PCH_PIC_INT_ID_VAL 0x7UL #define PCH_PIC_INT_ID_VER 0x1UL +union pch_pic_id { + struct { + uint8_t reserved_0[3]; + uint8_t id; + uint8_t version; + uint8_t reserved_1; + uint8_t irq_num; + uint8_t reserved_2; + } desc; + uint64_t data; +}; + struct loongarch_pch_pic { spinlock_t lock; struct kvm *kvm; struct kvm_io_device device; + union pch_pic_id id; uint64_t mask; /* 1:disable irq, 0:enable irq */ uint64_t htmsi_en; /* 1:msi */ uint64_t edge; /* 1:edge triggered, 0:level triggered */ diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 5f354f5c6847..57ba1a563bb1 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -103,6 +103,7 @@ struct kvm_fpu { #define KVM_LOONGARCH_VM_FEAT_PMU 5 #define KVM_LOONGARCH_VM_FEAT_PV_IPI 6 #define KVM_LOONGARCH_VM_FEAT_PV_STEALTIME 7 +#define KVM_LOONGARCH_VM_FEAT_PTW 8 /* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 6c9c7de7226b..cb493980d874 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -218,16 +218,16 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) } trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val); } else { + vcpu->arch.io_gpr = rd; /* Set register id for iocsr read completion */ idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, + run->iocsr_io.len, run->iocsr_io.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); - if (ret == 0) + if (ret == 0) { + kvm_complete_iocsr_read(vcpu, run); ret = EMULATE_DONE; - else { + } else ret = EMULATE_DO_IOCSR; - /* Save register id for iocsr read completion */ - vcpu->arch.io_gpr = rd; - } trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL); } @@ -468,6 +468,8 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) if (ret == EMULATE_DO_MMIO) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, run->mmio.phys_addr, NULL); + vcpu->arch.io_gpr = rd; /* Set for kvm_complete_mmio_read() use */ + /* * If mmio device such as PCH-PIC is emulated in KVM, * it need not return to user space to handle the mmio @@ -475,16 +477,15 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) */ idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, - run->mmio.len, &vcpu->arch.gprs[rd]); + run->mmio.len, run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) { + kvm_complete_mmio_read(vcpu, run); update_pc(&vcpu->arch); vcpu->mmio_needed = 0; return EMULATE_DONE; } - /* Set for kvm_complete_mmio_read() use */ - vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; return EMULATE_DO_MMIO; diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 5a8481dda052..05cefd29282e 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -7,13 +7,26 @@ #include <asm/kvm_ipi.h> #include <asm/kvm_vcpu.h> -static void ipi_send(struct kvm *kvm, uint64_t data) +static void ipi_set(struct kvm_vcpu *vcpu, uint32_t data) { - int cpu, action; uint32_t status; - struct kvm_vcpu *vcpu; struct kvm_interrupt irq; + spin_lock(&vcpu->arch.ipi_state.lock); + status = vcpu->arch.ipi_state.status; + vcpu->arch.ipi_state.status |= data; + spin_unlock(&vcpu->arch.ipi_state.lock); + if ((status == 0) && data) { + irq.irq = LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } +} + +static void ipi_send(struct kvm *kvm, uint64_t data) +{ + int cpu; + struct kvm_vcpu *vcpu; + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); if (unlikely(vcpu == NULL)) { @@ -21,15 +34,7 @@ static void ipi_send(struct kvm *kvm, uint64_t data) return; } - action = BIT(data & 0x1f); - spin_lock(&vcpu->arch.ipi_state.lock); - status = vcpu->arch.ipi_state.status; - vcpu->arch.ipi_state.status |= action; - spin_unlock(&vcpu->arch.ipi_state.lock); - if (status == 0) { - irq.irq = LARCH_INT_IPI; - kvm_vcpu_ioctl_interrupt(vcpu, &irq); - } + ipi_set(vcpu, BIT(data & 0x1f)); } static void ipi_clear(struct kvm_vcpu *vcpu, uint64_t data) @@ -96,6 +101,34 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int spin_unlock(&vcpu->arch.ipi_state.lock); } +static int mail_send(struct kvm *kvm, uint64_t data) +{ + int i, cpu, mailbox, offset; + uint32_t val = 0, mask = 0; + struct kvm_vcpu *vcpu; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + mailbox = ((data & 0xffffffff) >> 2) & 0x7; + offset = IOCSR_IPI_BUF_20 + mailbox * 4; + if ((data >> 27) & 0xf) { + val = read_mailbox(vcpu, offset, 4); + for (i = 0; i < 4; i++) + if (data & (BIT(27 + i))) + mask |= (0xff << (i * 8)); + val &= mask; + } + + val |= ((uint32_t)(data >> 32) & ~mask); + write_mailbox(vcpu, offset, val, 4); + + return 0; +} + static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { int i, idx, ret; @@ -132,23 +165,6 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) return ret; } -static int mail_send(struct kvm *kvm, uint64_t data) -{ - int cpu, mailbox, offset; - struct kvm_vcpu *vcpu; - - cpu = ((data & 0xffffffff) >> 16) & 0x3ff; - vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); - if (unlikely(vcpu == NULL)) { - kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); - return -EINVAL; - } - mailbox = ((data & 0xffffffff) >> 2) & 0x7; - offset = IOCSR_IPI_BASE + IOCSR_IPI_BUF_20 + mailbox * 4; - - return send_ipi_data(vcpu, offset, data); -} - static int any_send(struct kvm *kvm, uint64_t data) { int cpu, offset; @@ -231,7 +247,7 @@ static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, int len, cons spin_unlock(&vcpu->arch.ipi_state.lock); break; case IOCSR_IPI_SET: - ret = -EINVAL; + ipi_set(vcpu, data); break; case IOCSR_IPI_CLEAR: /* Just clear the status of the current vcpu */ @@ -250,10 +266,10 @@ static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, int len, cons ipi_send(vcpu->kvm, data); break; case IOCSR_MAIL_SEND: - ret = mail_send(vcpu->kvm, *(uint64_t *)val); + ret = mail_send(vcpu->kvm, data); break; case IOCSR_ANY_SEND: - ret = any_send(vcpu->kvm, *(uint64_t *)val); + ret = any_send(vcpu->kvm, data); break; default: kvm_err("%s: unknown addr: %llx\n", __func__, addr); diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index baf3b4faf7ea..a698a73de399 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -35,16 +35,11 @@ static void pch_pic_update_irq(struct loongarch_pch_pic *s, int irq, int level) /* update batch irqs, the irq_mask is a bitmap of irqs */ static void pch_pic_update_batch_irqs(struct loongarch_pch_pic *s, u64 irq_mask, int level) { - int irq, bits; + unsigned int irq; + DECLARE_BITMAP(irqs, 64) = { BITMAP_FROM_U64(irq_mask) }; - /* find each irq by irqs bitmap and update each irq */ - bits = sizeof(irq_mask) * 8; - irq = find_first_bit((void *)&irq_mask, bits); - while (irq < bits) { + for_each_set_bit(irq, irqs, 64) pch_pic_update_irq(s, irq, level); - bitmap_clear((void *)&irq_mask, irq, 1); - irq = find_first_bit((void *)&irq_mask, bits); - } } /* called when a irq is triggered in pch pic */ @@ -77,109 +72,65 @@ void pch_msi_set_irq(struct kvm *kvm, int irq, int level) eiointc_set_irq(kvm->arch.eiointc, irq, level); } -/* - * pch pic register is 64-bit, but it is accessed by 32-bit, - * so we use high to get whether low or high 32 bits we want - * to read. - */ -static u32 pch_pic_read_reg(u64 *s, int high) -{ - u64 val = *s; - - /* read the high 32 bits when high is 1 */ - return high ? (u32)(val >> 32) : (u32)val; -} - -/* - * pch pic register is 64-bit, but it is accessed by 32-bit, - * so we use high to get whether low or high 32 bits we want - * to write. - */ -static u32 pch_pic_write_reg(u64 *s, int high, u32 v) -{ - u64 val = *s, data = v; - - if (high) { - /* - * Clear val high 32 bits - * Write the high 32 bits when the high is 1 - */ - *s = (val << 32 >> 32) | (data << 32); - val >>= 32; - } else - /* - * Clear val low 32 bits - * Write the low 32 bits when the high is 0 - */ - *s = (val >> 32 << 32) | v; - - return (u32)val; -} - static int loongarch_pch_pic_read(struct loongarch_pch_pic *s, gpa_t addr, int len, void *val) { - int offset, index, ret = 0; - u32 data = 0; - u64 int_id = 0; + int ret = 0, offset; + u64 data = 0; + void *ptemp; offset = addr - s->pch_pic_base; + offset -= offset & 7; spin_lock(&s->lock); switch (offset) { case PCH_PIC_INT_ID_START ... PCH_PIC_INT_ID_END: - /* int id version */ - int_id |= (u64)PCH_PIC_INT_ID_VER << 32; - /* irq number */ - int_id |= (u64)31 << (32 + 16); - /* int id value */ - int_id |= PCH_PIC_INT_ID_VAL; - *(u64 *)val = int_id; + data = s->id.data; break; case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: - offset -= PCH_PIC_MASK_START; - index = offset >> 2; - /* read mask reg */ - data = pch_pic_read_reg(&s->mask, index); - *(u32 *)val = data; + data = s->mask; break; case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: - offset -= PCH_PIC_HTMSI_EN_START; - index = offset >> 2; /* read htmsi enable reg */ - data = pch_pic_read_reg(&s->htmsi_en, index); - *(u32 *)val = data; + data = s->htmsi_en; break; case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: - offset -= PCH_PIC_EDGE_START; - index = offset >> 2; /* read edge enable reg */ - data = pch_pic_read_reg(&s->edge, index); - *(u32 *)val = data; + data = s->edge; break; case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: /* we only use default mode: fixed interrupt distribution mode */ - *(u32 *)val = 0; break; case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: /* only route to int0: eiointc */ - *(u8 *)val = 1; + ptemp = s->route_entry + (offset - PCH_PIC_ROUTE_ENTRY_START); + data = *(u64 *)ptemp; break; case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: - offset -= PCH_PIC_HTMSI_VEC_START; /* read htmsi vector */ - data = s->htmsi_vector[offset]; - *(u8 *)val = data; + ptemp = s->htmsi_vector + (offset - PCH_PIC_HTMSI_VEC_START); + data = *(u64 *)ptemp; break; case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: - /* we only use defalut value 0: high level triggered */ - *(u32 *)val = 0; + data = s->polarity; + break; + case PCH_PIC_INT_IRR_START: + data = s->irr; + break; + case PCH_PIC_INT_ISR_START: + data = s->isr; break; default: ret = -EINVAL; } spin_unlock(&s->lock); + if (ret == 0) { + offset = (addr - s->pch_pic_base) & 7; + data = data >> (offset * 8); + memcpy(val, &data, len); + } + return ret; } @@ -210,81 +161,69 @@ static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, static int loongarch_pch_pic_write(struct loongarch_pch_pic *s, gpa_t addr, int len, const void *val) { - int ret; - u32 old, data, offset, index; - u64 irq; + int ret = 0, offset; + u64 old, data, mask; + void *ptemp; + + switch (len) { + case 1: + data = *(u8 *)val; + mask = 0xFF; + break; + case 2: + data = *(u16 *)val; + mask = USHRT_MAX; + break; + case 4: + data = *(u32 *)val; + mask = UINT_MAX; + break; + case 8: + default: + data = *(u64 *)val; + mask = ULONG_MAX; + break; + } - ret = 0; - data = *(u32 *)val; - offset = addr - s->pch_pic_base; + offset = (addr - s->pch_pic_base) & 7; + mask = mask << (offset * 8); + data = data << (offset * 8); + offset = (addr - s->pch_pic_base) - offset; spin_lock(&s->lock); switch (offset) { - case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: - offset -= PCH_PIC_MASK_START; - /* get whether high or low 32 bits we want to write */ - index = offset >> 2; - old = pch_pic_write_reg(&s->mask, index, data); - /* enable irq when mask value change to 0 */ - irq = (old & ~data) << (32 * index); - pch_pic_update_batch_irqs(s, irq, 1); - /* disable irq when mask value change to 1 */ - irq = (~old & data) << (32 * index); - pch_pic_update_batch_irqs(s, irq, 0); - break; - case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: - offset -= PCH_PIC_HTMSI_EN_START; - index = offset >> 2; - pch_pic_write_reg(&s->htmsi_en, index, data); + case PCH_PIC_MASK_START: + old = s->mask; + s->mask = (old & ~mask) | data; + if (old & ~data) + pch_pic_update_batch_irqs(s, old & ~data, 1); + if (~old & data) + pch_pic_update_batch_irqs(s, ~old & data, 0); break; - case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: - offset -= PCH_PIC_EDGE_START; - index = offset >> 2; - /* 1: edge triggered, 0: level triggered */ - pch_pic_write_reg(&s->edge, index, data); - break; - case PCH_PIC_CLEAR_START ... PCH_PIC_CLEAR_END: - offset -= PCH_PIC_CLEAR_START; - index = offset >> 2; - /* write 1 to clear edge irq */ - old = pch_pic_read_reg(&s->irr, index); - /* - * get the irq bitmap which is edge triggered and - * already set and to be cleared - */ - irq = old & pch_pic_read_reg(&s->edge, index) & data; - /* write irr to the new state where irqs have been cleared */ - pch_pic_write_reg(&s->irr, index, old & ~irq); - /* update cleared irqs */ - pch_pic_update_batch_irqs(s, irq, 0); + case PCH_PIC_HTMSI_EN_START: + s->htmsi_en = (s->htmsi_en & ~mask) | data; break; - case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: - offset -= PCH_PIC_AUTO_CTRL0_START; - index = offset >> 2; - /* we only use default mode: fixed interrupt distribution mode */ - pch_pic_write_reg(&s->auto_ctrl0, index, 0); + case PCH_PIC_EDGE_START: + s->edge = (s->edge & ~mask) | data; break; - case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: - offset -= PCH_PIC_AUTO_CTRL1_START; - index = offset >> 2; - /* we only use default mode: fixed interrupt distribution mode */ - pch_pic_write_reg(&s->auto_ctrl1, index, 0); + case PCH_PIC_POLARITY_START: + s->polarity = (s->polarity & ~mask) | data; break; - case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: - offset -= PCH_PIC_ROUTE_ENTRY_START; - /* only route to int0: eiointc */ - s->route_entry[offset] = 1; + case PCH_PIC_CLEAR_START: + old = s->irr & s->edge & data; + if (old) { + s->irr &= ~old; + pch_pic_update_batch_irqs(s, old, 0); + } break; case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: - /* route table to eiointc */ - offset -= PCH_PIC_HTMSI_VEC_START; - s->htmsi_vector[offset] = (u8)data; + ptemp = s->htmsi_vector + (offset - PCH_PIC_HTMSI_VEC_START); + *(u64 *)ptemp = (*(u64 *)ptemp & ~mask) | data; break; - case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: - offset -= PCH_PIC_POLARITY_START; - index = offset >> 2; - /* we only use defalut value 0: high level triggered */ - pch_pic_write_reg(&s->polarity, index, 0); + /* Not implemented */ + case PCH_PIC_AUTO_CTRL0_START: + case PCH_PIC_AUTO_CTRL1_START: + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: break; default: ret = -EINVAL; @@ -484,7 +423,7 @@ static int kvm_setup_default_irq_routing(struct kvm *kvm) static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) { - int ret; + int i, ret, irq_num; struct kvm *kvm = dev->kvm; struct loongarch_pch_pic *s; @@ -500,6 +439,22 @@ static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) if (!s) return -ENOMEM; + /* + * Interrupt controller identification register 1 + * Bit 24-31 Interrupt Controller ID + * Interrupt controller identification register 2 + * Bit 0-7 Interrupt Controller version number + * Bit 16-23 The number of interrupt sources supported + */ + irq_num = 32; + s->mask = -1UL; + s->id.desc.id = PCH_PIC_INT_ID_VAL; + s->id.desc.version = PCH_PIC_INT_ID_VER; + s->id.desc.irq_num = irq_num - 1; + for (i = 0; i < irq_num; i++) { + s->route_entry[i] = 1; + s->htmsi_vector[i] = i; + } spin_lock_init(&s->lock); s->kvm = kvm; kvm->arch.pch_pic = s; diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index 145514dab6d5..3467ee22b704 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -161,6 +161,41 @@ TRACE_EVENT(kvm_aux, __entry->pc) ); +#define KVM_TRACE_IOCSR_READ_UNSATISFIED 0 +#define KVM_TRACE_IOCSR_READ 1 +#define KVM_TRACE_IOCSR_WRITE 2 + +#define kvm_trace_symbol_iocsr \ + { KVM_TRACE_IOCSR_READ_UNSATISFIED, "unsatisfied-read" }, \ + { KVM_TRACE_IOCSR_READ, "read" }, \ + { KVM_TRACE_IOCSR_WRITE, "write" } + +TRACE_EVENT(kvm_iocsr, + TP_PROTO(int type, int len, u64 gpa, void *val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( + __field( u32, type ) + __field( u32, len ) + __field( u64, gpa ) + __field( u64, val ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); + ), + + TP_printk("iocsr %s len %u gpa 0x%llx val 0x%llx", + __print_symbolic(__entry->type, kvm_trace_symbol_iocsr), + __entry->len, __entry->gpa, __entry->val) +); + TRACE_EVENT(kvm_vpid_change, TP_PROTO(struct kvm_vcpu *vcpu, unsigned long vpid), TP_ARGS(vcpu, vpid), diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index ce478151466c..9c802f7103c6 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -680,6 +680,8 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) *v |= CPUCFG2_ARMBT; if (cpu_has_lbt_mips) *v |= CPUCFG2_MIPSBT; + if (cpu_has_ptw) + *v |= CPUCFG2_PTW; return 0; case LOONGARCH_CPUCFG3: diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index edccfc8c9cd8..a49b1c1a3dd1 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -146,6 +146,10 @@ static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr if (kvm_pvtime_supported()) return 0; return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PTW: + if (cpu_has_ptw) + return 0; + return -ENXIO; default: return -ENXIO; } diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index b9ef2da15fb2..fc2725cbca18 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -140,6 +140,8 @@ CONFIG_MICREL_PHY=y CONFIG_MICROSEMI_PHY=y CONFIG_MOTORCOMM_PHY=y CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_GPIO=y CONFIG_KEYBOARD_SUN4I_LRADC=m CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 59c975f750c9..e29ded3416b4 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -32,6 +32,7 @@ enum riscv_irqchip_type { ACPI_RISCV_IRQCHIP_IMSIC = 0x01, ACPI_RISCV_IRQCHIP_PLIC = 0x02, ACPI_RISCV_IRQCHIP_APLIC = 0x03, + ACPI_RISCV_IRQCHIP_SMSI = 0x04, }; int riscv_acpi_get_gsi_info(struct fwnode_handle *fwnode, u32 *gsi_base, @@ -42,6 +43,7 @@ unsigned long acpi_rintc_ext_parent_to_hartid(unsigned int plic_id, unsigned int unsigned int acpi_rintc_get_plic_nr_contexts(unsigned int plic_id); unsigned int acpi_rintc_get_plic_context(unsigned int plic_id, unsigned int ctxt_idx); int __init acpi_rintc_get_imsic_mmio_info(u32 index, struct resource *res); +int riscv_acpi_update_gsi_range(u32 gsi_base, u32 nr_irqs); #else static inline int riscv_acpi_get_gsi_info(struct fwnode_handle *fwnode, u32 *gsi_base, @@ -76,6 +78,10 @@ static inline int __init acpi_rintc_get_imsic_mmio_info(u32 index, struct resour return 0; } +static inline int riscv_acpi_update_gsi_range(u32 gsi_base, u32 nr_irqs) +{ + return -ENODEV; +} #endif /* CONFIG_ACPI */ #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index d71d3299a335..4d794573e3db 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include <asm/kvm_vcpu_fp.h> #include <asm/kvm_vcpu_insn.h> #include <asm/kvm_vcpu_sbi.h> +#include <asm/kvm_vcpu_sbi_fwft.h> #include <asm/kvm_vcpu_timer.h> #include <asm/kvm_vcpu_pmu.h> @@ -263,6 +264,9 @@ struct kvm_vcpu_arch { /* Performance monitoring context */ struct kvm_pmu pmu_context; + /* Firmware feature SBI extension context */ + struct kvm_sbi_fwft fwft_context; + /* 'static' configurations which are set only once */ struct kvm_vcpu_config cfg; diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 1d85b6617508..9a930afc8f57 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -98,6 +98,9 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, unsigned long saddr_high, unsigned long flags, struct kvm_vcpu_sbi_return *retdata); +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long num_events, + unsigned long flags, struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index d678fd7e5973..3497489e04db 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -11,7 +11,7 @@ #define KVM_SBI_IMPID 3 -#define KVM_SBI_VERSION_MAJOR 2 +#define KVM_SBI_VERSION_MAJOR 3 #define KVM_SBI_VERSION_MINOR 0 enum kvm_riscv_sbi_ext_status { @@ -59,6 +59,14 @@ struct kvm_vcpu_sbi_extension { void (*deinit)(struct kvm_vcpu *vcpu); void (*reset)(struct kvm_vcpu *vcpu); + + unsigned long state_reg_subtype; + unsigned long (*get_state_reg_count)(struct kvm_vcpu *vcpu); + int (*get_state_reg_id)(struct kvm_vcpu *vcpu, int index, u64 *reg_id); + int (*get_state_reg)(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val); + int (*set_state_reg)(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val); }; void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run); @@ -69,27 +77,21 @@ void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu, unsigned long pc, unsigned long a1); void kvm_riscv_vcpu_sbi_load_reset_state(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_riscv_vcpu_reg_indices_sbi_ext(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg); -int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_reg_indices_sbi(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( struct kvm_vcpu *vcpu, unsigned long extid); -bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx); int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_reset(struct kvm_vcpu *vcpu); -int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, - unsigned long *reg_val); -int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, - unsigned long reg_val); - #ifdef CONFIG_RISCV_SBI_V01 extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; #endif @@ -102,6 +104,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_fwft; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h b/arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h new file mode 100644 index 000000000000..5604cec79902 --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 Rivos Inc. + * + * Authors: + * Clément Léger <cleger@rivosinc.com> + */ + +#ifndef __KVM_VCPU_RISCV_FWFT_H +#define __KVM_VCPU_RISCV_FWFT_H + +#include <asm/sbi.h> + +struct kvm_sbi_fwft_feature; + +struct kvm_sbi_fwft_config { + const struct kvm_sbi_fwft_feature *feature; + bool supported; + bool enabled; + unsigned long flags; +}; + +/* FWFT data structure per vcpu */ +struct kvm_sbi_fwft { + struct kvm_sbi_fwft_config *configs; +#ifndef CONFIG_32BIT + bool have_vs_pmlen_7; + bool have_vs_pmlen_16; +#endif +}; + +#define vcpu_to_fwft(vcpu) (&(vcpu)->arch.fwft_context) + +#endif /* !__KVM_VCPU_RISCV_FWFT_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 341e74238aa0..ccc77a89b1e2 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -36,6 +36,7 @@ enum sbi_ext_id { SBI_EXT_STA = 0x535441, SBI_EXT_NACL = 0x4E41434C, SBI_EXT_FWFT = 0x46574654, + SBI_EXT_MPXY = 0x4D505859, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -136,6 +137,7 @@ enum sbi_ext_pmu_fid { SBI_EXT_PMU_COUNTER_FW_READ, SBI_EXT_PMU_COUNTER_FW_READ_HI, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + SBI_EXT_PMU_EVENT_GET_INFO, }; union sbi_pmu_ctr_info { @@ -159,9 +161,20 @@ struct riscv_pmu_snapshot_data { u64 reserved[447]; }; +struct riscv_pmu_event_info { + u32 event_idx; + u32 output; + u64 event_data; +}; + +#define RISCV_PMU_EVENT_INFO_OUTPUT_MASK 0x01 + #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) #define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0) +/* SBI v3.0 allows extended hpmeventX width value */ +#define RISCV_PMU_RAW_EVENT_V2_MASK GENMASK_ULL(55, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 +#define RISCV_PMU_RAW_EVENT_V2_IDX 0x30000 #define RISCV_PLAT_FW_EVENT 0xFFFF /** General pmu event codes specified in SBI PMU extension */ @@ -219,6 +232,7 @@ enum sbi_pmu_event_type { SBI_PMU_EVENT_TYPE_HW = 0x0, SBI_PMU_EVENT_TYPE_CACHE = 0x1, SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3, SBI_PMU_EVENT_TYPE_FW = 0xf, }; @@ -430,6 +444,67 @@ enum sbi_fwft_feature_t { #define SBI_FWFT_SET_FLAG_LOCK BIT(0) +enum sbi_ext_mpxy_fid { + SBI_EXT_MPXY_GET_SHMEM_SIZE, + SBI_EXT_MPXY_SET_SHMEM, + SBI_EXT_MPXY_GET_CHANNEL_IDS, + SBI_EXT_MPXY_READ_ATTRS, + SBI_EXT_MPXY_WRITE_ATTRS, + SBI_EXT_MPXY_SEND_MSG_WITH_RESP, + SBI_EXT_MPXY_SEND_MSG_WITHOUT_RESP, + SBI_EXT_MPXY_GET_NOTIFICATION_EVENTS, +}; + +enum sbi_mpxy_attribute_id { + /* Standard channel attributes managed by MPXY framework */ + SBI_MPXY_ATTR_MSG_PROT_ID = 0x00000000, + SBI_MPXY_ATTR_MSG_PROT_VER = 0x00000001, + SBI_MPXY_ATTR_MSG_MAX_LEN = 0x00000002, + SBI_MPXY_ATTR_MSG_SEND_TIMEOUT = 0x00000003, + SBI_MPXY_ATTR_MSG_COMPLETION_TIMEOUT = 0x00000004, + SBI_MPXY_ATTR_CHANNEL_CAPABILITY = 0x00000005, + SBI_MPXY_ATTR_SSE_EVENT_ID = 0x00000006, + SBI_MPXY_ATTR_MSI_CONTROL = 0x00000007, + SBI_MPXY_ATTR_MSI_ADDR_LO = 0x00000008, + SBI_MPXY_ATTR_MSI_ADDR_HI = 0x00000009, + SBI_MPXY_ATTR_MSI_DATA = 0x0000000A, + SBI_MPXY_ATTR_EVENTS_STATE_CONTROL = 0x0000000B, + SBI_MPXY_ATTR_STD_ATTR_MAX_IDX, + /* + * Message protocol specific attributes, managed by + * the message protocol specification. + */ + SBI_MPXY_ATTR_MSGPROTO_ATTR_START = 0x80000000, + SBI_MPXY_ATTR_MSGPROTO_ATTR_END = 0xffffffff +}; + +/* Possible values of MSG_PROT_ID attribute as-per SBI v3.0 (or higher) */ +enum sbi_mpxy_msgproto_id { + SBI_MPXY_MSGPROTO_RPMI_ID = 0x0, +}; + +/* RPMI message protocol specific MPXY attributes */ +enum sbi_mpxy_rpmi_attribute_id { + SBI_MPXY_RPMI_ATTR_SERVICEGROUP_ID = SBI_MPXY_ATTR_MSGPROTO_ATTR_START, + SBI_MPXY_RPMI_ATTR_SERVICEGROUP_VERSION, + SBI_MPXY_RPMI_ATTR_IMPL_ID, + SBI_MPXY_RPMI_ATTR_IMPL_VERSION, + SBI_MPXY_RPMI_ATTR_MAX_ID +}; + +/* Encoding of MSG_PROT_VER attribute */ +#define SBI_MPXY_MSG_PROT_VER_MAJOR(__ver) upper_16_bits(__ver) +#define SBI_MPXY_MSG_PROT_VER_MINOR(__ver) lower_16_bits(__ver) +#define SBI_MPXY_MSG_PROT_MKVER(__maj, __min) (((u32)(__maj) << 16) | (u16)(__min)) + +/* Capabilities available through CHANNEL_CAPABILITY attribute */ +#define SBI_MPXY_CHAN_CAP_MSI BIT(0) +#define SBI_MPXY_CHAN_CAP_SSE BIT(1) +#define SBI_MPXY_CHAN_CAP_EVENTS_STATE BIT(2) +#define SBI_MPXY_CHAN_CAP_SEND_WITH_RESP BIT(3) +#define SBI_MPXY_CHAN_CAP_SEND_WITHOUT_RESP BIT(4) +#define SBI_MPXY_CHAN_CAP_GET_NOTIFICATIONS BIT(5) + /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 251099d860aa..759a4852c09a 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -56,6 +56,7 @@ struct kvm_riscv_config { unsigned long mimpid; unsigned long zicboz_block_size; unsigned long satp_mode; + unsigned long zicbop_block_size; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -185,6 +186,10 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZICCRSE, KVM_RISCV_ISA_EXT_ZAAMO, KVM_RISCV_ISA_EXT_ZALRSC, + KVM_RISCV_ISA_EXT_ZICBOP, + KVM_RISCV_ISA_EXT_ZFBFMIN, + KVM_RISCV_ISA_EXT_ZVFBFMIN, + KVM_RISCV_ISA_EXT_ZVFBFWMA, KVM_RISCV_ISA_EXT_MAX, }; @@ -205,6 +210,7 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_DBCN, KVM_RISCV_SBI_EXT_STA, KVM_RISCV_SBI_EXT_SUSP, + KVM_RISCV_SBI_EXT_FWFT, KVM_RISCV_SBI_EXT_MAX, }; @@ -214,6 +220,18 @@ struct kvm_riscv_sbi_sta { unsigned long shmem_hi; }; +struct kvm_riscv_sbi_fwft_feature { + unsigned long enable; + unsigned long flags; + unsigned long value; +}; + +/* SBI FWFT extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_sbi_fwft { + struct kvm_riscv_sbi_fwft_feature misaligned_deleg; + struct kvm_riscv_sbi_fwft_feature pointer_masking; +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -297,6 +315,9 @@ struct kvm_riscv_sbi_sta { #define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) #define KVM_REG_RISCV_SBI_STA_REG(name) \ (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long)) +#define KVM_REG_RISCV_SBI_FWFT (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_FWFT_REG(name) \ + (offsetof(struct kvm_riscv_sbi_fwft, name) / sizeof(unsigned long)) /* Device Control API: RISC-V AIA */ #define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 4b199dc3e58b..07197395750e 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -27,6 +27,7 @@ kvm-y += vcpu_onereg.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o kvm-y += vcpu_sbi.o kvm-y += vcpu_sbi_base.o +kvm-y += vcpu_sbi_fwft.o kvm-y += vcpu_sbi_hsm.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o kvm-y += vcpu_sbi_replace.o diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c index 24c270d6d0e2..b67d60d722c2 100644 --- a/arch/riscv/kvm/gstage.c +++ b/arch/riscv/kvm/gstage.c @@ -321,7 +321,7 @@ void __init kvm_riscv_gstage_mode_detect(void) if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { kvm_riscv_gstage_mode = HGATP_MODE_SV57X4; kvm_riscv_gstage_pgd_levels = 5; - goto skip_sv48x4_test; + goto done; } /* Try Sv48x4 G-stage mode */ @@ -329,10 +329,31 @@ void __init kvm_riscv_gstage_mode_detect(void) if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { kvm_riscv_gstage_mode = HGATP_MODE_SV48X4; kvm_riscv_gstage_pgd_levels = 4; + goto done; } -skip_sv48x4_test: + /* Try Sv39x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV39X4; + kvm_riscv_gstage_pgd_levels = 3; + goto done; + } +#else /* CONFIG_32BIT */ + /* Try Sv32x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV32X4; + kvm_riscv_gstage_pgd_levels = 2; + goto done; + } +#endif + + /* KVM depends on !HGATP_MODE_OFF */ + kvm_riscv_gstage_mode = HGATP_MODE_OFF; + kvm_riscv_gstage_pgd_levels = 0; + +done: csr_write(CSR_HGATP, 0); kvm_riscv_local_hfence_gvma_all(); -#endif } diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 67c876de74ef..77dc1655b442 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -93,6 +93,23 @@ static int __init riscv_kvm_init(void) return rc; kvm_riscv_gstage_mode_detect(); + switch (kvm_riscv_gstage_mode) { + case HGATP_MODE_SV32X4: + str = "Sv32x4"; + break; + case HGATP_MODE_SV39X4: + str = "Sv39x4"; + break; + case HGATP_MODE_SV48X4: + str = "Sv48x4"; + break; + case HGATP_MODE_SV57X4: + str = "Sv57x4"; + break; + default: + kvm_riscv_nacl_exit(); + return -ENODEV; + } kvm_riscv_gstage_vmid_detect(); @@ -135,22 +152,6 @@ static int __init riscv_kvm_init(void) (rc) ? slist : "no features"); } - switch (kvm_riscv_gstage_mode) { - case HGATP_MODE_SV32X4: - str = "Sv32x4"; - break; - case HGATP_MODE_SV39X4: - str = "Sv39x4"; - break; - case HGATP_MODE_SV48X4: - str = "Sv48x4"; - break; - case HGATP_MODE_SV57X4: - str = "Sv57x4"; - break; - default: - return -ENODEV; - } kvm_info("using %s G-stage page table format\n", str); kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 3ebcfffaa978..47bcf190ccc5 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -133,6 +133,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; + + vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); @@ -570,7 +572,6 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } - cfg->hedeleg = KVM_HEDELEG_DEFAULT; if (vcpu->guest_debug) cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index cce6a38ea54f..865dae903aa0 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -65,9 +65,11 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZCF), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), @@ -88,6 +90,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZTSO), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), KVM_ISA_EXT_ARR(ZVFH), KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), @@ -173,7 +177,6 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: - case KVM_RISCV_ISA_EXT_SMNPM: /* There is not architectural config bit to disable sscofpmf completely */ case KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_RISCV_ISA_EXT_SSNPM: @@ -199,8 +202,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZCF: case KVM_RISCV_ISA_EXT_ZCMOP: case KVM_RISCV_ISA_EXT_ZFA: + case KVM_RISCV_ISA_EXT_ZFBFMIN: case KVM_RISCV_ISA_EXT_ZFH: case KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_RISCV_ISA_EXT_ZICBOP: case KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: @@ -220,6 +225,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZTSO: case KVM_RISCV_ISA_EXT_ZVBB: case KVM_RISCV_ISA_EXT_ZVBC: + case KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_RISCV_ISA_EXT_ZVFBFWMA: case KVM_RISCV_ISA_EXT_ZVFH: case KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_RISCV_ISA_EXT_ZVKB: @@ -277,15 +284,20 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + if (!riscv_isa_extension_available(NULL, ZICBOM)) return -ENOENT; reg_val = riscv_cbom_block_size; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + if (!riscv_isa_extension_available(NULL, ZICBOZ)) return -ENOENT; reg_val = riscv_cboz_block_size; break; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + if (!riscv_isa_extension_available(NULL, ZICBOP)) + return -ENOENT; + reg_val = riscv_cbop_block_size; + break; case KVM_REG_RISCV_CONFIG_REG(mvendorid): reg_val = vcpu->arch.mvendorid; break; @@ -366,17 +378,23 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, } break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + if (!riscv_isa_extension_available(NULL, ZICBOM)) return -ENOENT; if (reg_val != riscv_cbom_block_size) return -EINVAL; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + if (!riscv_isa_extension_available(NULL, ZICBOZ)) return -ENOENT; if (reg_val != riscv_cboz_block_size) return -EINVAL; break; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + if (!riscv_isa_extension_available(NULL, ZICBOP)) + return -ENOENT; + if (reg_val != riscv_cbop_block_size) + return -EINVAL; + break; case KVM_REG_RISCV_CONFIG_REG(mvendorid): if (reg_val == vcpu->arch.mvendorid) break; @@ -817,10 +835,13 @@ static int copy_config_reg_indices(const struct kvm_vcpu *vcpu, * was not available. */ if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) && - !riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + !riscv_isa_extension_available(NULL, ZICBOM)) continue; else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) && - !riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + !riscv_isa_extension_available(NULL, ZICBOZ)) + continue; + else if (i == KVM_REG_RISCV_CONFIG_REG(zicbop_block_size) && + !riscv_isa_extension_available(NULL, ZICBOP)) continue; size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; @@ -1061,66 +1082,14 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) return copy_isa_ext_reg_indices(vcpu, NULL); } -static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - unsigned int n = 0; - - for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? - KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | - KVM_REG_RISCV_SBI_SINGLE | i; - - if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) - continue; - - if (uindices) { - if (put_user(reg, uindices)) - return -EFAULT; - uindices++; - } - - n++; - } - - return n; -} - static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu) { - return copy_sbi_ext_reg_indices(vcpu, NULL); -} - -static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; - int total = 0; - - if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); - - for (int i = 0; i < n; i++) { - u64 reg = KVM_REG_RISCV | size | - KVM_REG_RISCV_SBI_STATE | - KVM_REG_RISCV_SBI_STA | i; - - if (uindices) { - if (put_user(reg, uindices)) - return -EFAULT; - uindices++; - } - } - - total += n; - } - - return total; + return kvm_riscv_vcpu_reg_indices_sbi_ext(vcpu, NULL); } static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu) { - return copy_sbi_reg_indices(vcpu, NULL); + return kvm_riscv_vcpu_reg_indices_sbi(vcpu, NULL); } static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu) @@ -1243,12 +1212,12 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu, return ret; uindices += ret; - ret = copy_sbi_ext_reg_indices(vcpu, uindices); + ret = kvm_riscv_vcpu_reg_indices_sbi_ext(vcpu, uindices); if (ret < 0) return ret; uindices += ret; - ret = copy_sbi_reg_indices(vcpu, uindices); + ret = kvm_riscv_vcpu_reg_indices_sbi(vcpu, uindices); if (ret < 0) return ret; uindices += ret; diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 78ac3216a54d..a2fae70ee174 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -60,6 +60,7 @@ static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) type = PERF_TYPE_HW_CACHE; break; case SBI_PMU_EVENT_TYPE_RAW: + case SBI_PMU_EVENT_TYPE_RAW_V2: case SBI_PMU_EVENT_TYPE_FW: type = PERF_TYPE_RAW; break; @@ -128,6 +129,9 @@ static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) case SBI_PMU_EVENT_TYPE_RAW: config = evt_data & RISCV_PMU_RAW_EVENT_MASK; break; + case SBI_PMU_EVENT_TYPE_RAW_V2: + config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; + break; case SBI_PMU_EVENT_TYPE_FW: if (ecode < SBI_PMU_FW_MAX) config = (1ULL << 63) | ecode; @@ -405,8 +409,6 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); int sbiret = 0; gpa_t saddr; - unsigned long hva; - bool writable; if (!kvpmu || flags) { sbiret = SBI_ERR_INVALID_PARAM; @@ -428,19 +430,14 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s goto out; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) { - sbiret = SBI_ERR_INVALID_ADDRESS; - goto out; - } - kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); if (!kvpmu->sdata) return -ENOMEM; + /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { kfree(kvpmu->sdata); - sbiret = SBI_ERR_FAILURE; + sbiret = SBI_ERR_INVALID_ADDRESS; goto out; } @@ -452,6 +449,65 @@ out: return 0; } +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long num_events, + unsigned long flags, struct kvm_vcpu_sbi_return *retdata) +{ + struct riscv_pmu_event_info *einfo = NULL; + int shmem_size = num_events * sizeof(*einfo); + gpa_t shmem; + u32 eidx, etype; + u64 econfig; + int ret; + + if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) { + ret = SBI_ERR_INVALID_PARAM; + goto out; + } + + shmem = saddr_low; + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) { + shmem |= ((gpa_t)saddr_high << 32); + } else { + ret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + } + + einfo = kzalloc(shmem_size, GFP_KERNEL); + if (!einfo) + return -ENOMEM; + + ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_FAILURE; + goto free_mem; + } + + for (int i = 0; i < num_events; i++) { + eidx = einfo[i].event_idx; + etype = kvm_pmu_get_perf_event_type(eidx); + econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); + ret = riscv_pmu_get_event_info(etype, econfig, NULL); + einfo[i].output = (ret > 0) ? 1 : 0; + } + + ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_INVALID_ADDRESS; + goto free_mem; + } + + ret = 0; +free_mem: + kfree(einfo); +out: + retdata->err_val = ret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index a56c4959f9ad..1b13623380e1 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -79,6 +79,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_sta, }, { + .ext_idx = KVM_RISCV_SBI_EXT_FWFT, + .ext_ptr = &vcpu_sbi_ext_fwft, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL, .ext_ptr = &vcpu_sbi_ext_experimental, }, @@ -106,7 +110,7 @@ riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx) return sext; } -bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) +static bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) { struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; const struct kvm_riscv_sbi_extension_entry *sext; @@ -284,6 +288,31 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu, return 0; } +int kvm_riscv_vcpu_reg_indices_sbi_ext(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int n = 0; + + for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | + KVM_REG_RISCV_SBI_SINGLE | i; + + if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) + continue; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + n++; + } + + return n; +} + int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { @@ -360,64 +389,163 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, return 0; } -int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +int kvm_riscv_vcpu_reg_indices_sbi(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + unsigned long state_reg_count; + int i, j, rc, count = 0; + u64 reg; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + + if (!ext->get_state_reg_count || + scontext->ext_status[entry->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_ENABLED) + continue; + + state_reg_count = ext->get_state_reg_count(vcpu); + if (!uindices) + goto skip_put_user; + + for (j = 0; j < state_reg_count; j++) { + if (ext->get_state_reg_id) { + rc = ext->get_state_reg_id(vcpu, j, ®); + if (rc) + return rc; + } else { + reg = KVM_REG_RISCV | + (IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64) | + KVM_REG_RISCV_SBI_STATE | + ext->state_reg_subtype | j; + } + + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + +skip_put_user: + count += state_reg_count; + } + + return count; +} + +static const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext_withstate(struct kvm_vcpu *vcpu, + unsigned long subtype) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + int i; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + + if (ext->get_state_reg_count && + ext->state_reg_subtype == subtype && + scontext->ext_status[entry->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) + return ext; + } + + return NULL; +} + +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_SBI_STATE); - unsigned long reg_subtype, reg_val; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + const struct kvm_vcpu_sbi_extension *ext; + unsigned long reg_subtype; + void *reg_val; + u64 data64; + u32 data32; + u16 data16; + u8 data8; + + switch (KVM_REG_SIZE(reg->id)) { + case 1: + reg_val = &data8; + break; + case 2: + reg_val = &data16; + break; + case 4: + reg_val = &data32; + break; + case 8: + reg_val = &data64; + break; + default: return -EINVAL; + } - if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_SBI_STA: - return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val); - default: + ext = kvm_vcpu_sbi_find_ext_withstate(vcpu, reg_subtype); + if (!ext || !ext->set_state_reg) return -EINVAL; - } - return 0; + return ext->set_state_reg(vcpu, reg_num, KVM_REG_SIZE(reg->id), reg_val); } -int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_SBI_STATE); - unsigned long reg_subtype, reg_val; + const struct kvm_vcpu_sbi_extension *ext; + unsigned long reg_subtype; + void *reg_val; + u64 data64; + u32 data32; + u16 data16; + u8 data8; int ret; - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + switch (KVM_REG_SIZE(reg->id)) { + case 1: + reg_val = &data8; + break; + case 2: + reg_val = &data16; + break; + case 4: + reg_val = &data32; + break; + case 8: + reg_val = &data64; + break; + default: return -EINVAL; + } reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_SBI_STA: - ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, ®_val); - break; - default: + ext = kvm_vcpu_sbi_find_ext_withstate(vcpu, reg_subtype); + if (!ext || !ext->get_state_reg) return -EINVAL; - } + ret = ext->get_state_reg(vcpu, reg_num, KVM_REG_SIZE(reg->id), reg_val); if (ret) return ret; - if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id))) return -EFAULT; return 0; diff --git a/arch/riscv/kvm/vcpu_sbi_fwft.c b/arch/riscv/kvm/vcpu_sbi_fwft.c new file mode 100644 index 000000000000..62cc9c3d5759 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_fwft.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Rivos Inc. + * + * Authors: + * Clément Léger <cleger@rivosinc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/cpufeature.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_sbi.h> +#include <asm/kvm_vcpu_sbi_fwft.h> + +#define MIS_DELEG (BIT_ULL(EXC_LOAD_MISALIGNED) | BIT_ULL(EXC_STORE_MISALIGNED)) + +struct kvm_sbi_fwft_feature { + /** + * @id: Feature ID + */ + enum sbi_fwft_feature_t id; + + /** + * @first_reg_num: ONE_REG index of the first ONE_REG register + */ + unsigned long first_reg_num; + + /** + * @supported: Check if the feature is supported on the vcpu + * + * This callback is optional, if not provided the feature is assumed to + * be supported + */ + bool (*supported)(struct kvm_vcpu *vcpu); + + /** + * @reset: Reset the feature value irrespective whether feature is supported or not + * + * This callback is mandatory + */ + void (*reset)(struct kvm_vcpu *vcpu); + + /** + * @set: Set the feature value + * + * Return SBI_SUCCESS on success or an SBI error (SBI_ERR_*) + * + * This callback is mandatory + */ + long (*set)(struct kvm_vcpu *vcpu, struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value); + + /** + * @get: Get the feature current value + * + * Return SBI_SUCCESS on success or an SBI error (SBI_ERR_*) + * + * This callback is mandatory + */ + long (*get)(struct kvm_vcpu *vcpu, struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value); +}; + +static const enum sbi_fwft_feature_t kvm_fwft_defined_features[] = { + SBI_FWFT_MISALIGNED_EXC_DELEG, + SBI_FWFT_LANDING_PAD, + SBI_FWFT_SHADOW_STACK, + SBI_FWFT_DOUBLE_TRAP, + SBI_FWFT_PTE_AD_HW_UPDATING, + SBI_FWFT_POINTER_MASKING_PMLEN, +}; + +static bool kvm_fwft_is_defined_feature(enum sbi_fwft_feature_t feature) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kvm_fwft_defined_features); i++) { + if (kvm_fwft_defined_features[i] == feature) + return true; + } + + return false; +} + +static bool kvm_sbi_fwft_misaligned_delegation_supported(struct kvm_vcpu *vcpu) +{ + return misaligned_traps_can_delegate(); +} + +static void kvm_sbi_fwft_reset_misaligned_delegation(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + cfg->hedeleg &= ~MIS_DELEG; +} + +static long kvm_sbi_fwft_set_misaligned_delegation(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + if (value == 1) { + cfg->hedeleg |= MIS_DELEG; + if (!one_reg_access) + csr_set(CSR_HEDELEG, MIS_DELEG); + } else if (value == 0) { + cfg->hedeleg &= ~MIS_DELEG; + if (!one_reg_access) + csr_clear(CSR_HEDELEG, MIS_DELEG); + } else { + return SBI_ERR_INVALID_PARAM; + } + + return SBI_SUCCESS; +} + +static long kvm_sbi_fwft_get_misaligned_delegation(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + *value = (cfg->hedeleg & MIS_DELEG) == MIS_DELEG; + return SBI_SUCCESS; +} + +#ifndef CONFIG_32BIT + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_HENVCFG, value); + return (csr_read_clear(CSR_HENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +static bool kvm_sbi_fwft_pointer_masking_pmlen_supported(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + if (!riscv_isa_extension_available(vcpu->arch.isa, SMNPM)) + return false; + + fwft->have_vs_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + fwft->have_vs_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + return fwft->have_vs_pmlen_7 || fwft->have_vs_pmlen_16; +} + +static void kvm_sbi_fwft_reset_pointer_masking_pmlen(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cfg.henvcfg &= ~ENVCFG_PMM; +} + +static long kvm_sbi_fwft_set_pointer_masking_pmlen(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + unsigned long pmm; + + switch (value) { + case 0: + pmm = ENVCFG_PMM_PMLEN_0; + break; + case 7: + if (!fwft->have_vs_pmlen_7) + return SBI_ERR_INVALID_PARAM; + pmm = ENVCFG_PMM_PMLEN_7; + break; + case 16: + if (!fwft->have_vs_pmlen_16) + return SBI_ERR_INVALID_PARAM; + pmm = ENVCFG_PMM_PMLEN_16; + break; + default: + return SBI_ERR_INVALID_PARAM; + } + + vcpu->arch.cfg.henvcfg &= ~ENVCFG_PMM; + vcpu->arch.cfg.henvcfg |= pmm; + + /* + * Instead of waiting for vcpu_load/put() to update HENVCFG CSR, + * update here so that VCPU see's pointer masking mode change + * immediately. + */ + if (!one_reg_access) + csr_write(CSR_HENVCFG, vcpu->arch.cfg.henvcfg); + + return SBI_SUCCESS; +} + +static long kvm_sbi_fwft_get_pointer_masking_pmlen(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value) +{ + switch (vcpu->arch.cfg.henvcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_0: + *value = 0; + break; + case ENVCFG_PMM_PMLEN_7: + *value = 7; + break; + case ENVCFG_PMM_PMLEN_16: + *value = 16; + break; + default: + return SBI_ERR_FAILURE; + } + + return SBI_SUCCESS; +} + +#endif + +static const struct kvm_sbi_fwft_feature features[] = { + { + .id = SBI_FWFT_MISALIGNED_EXC_DELEG, + .first_reg_num = offsetof(struct kvm_riscv_sbi_fwft, misaligned_deleg.enable) / + sizeof(unsigned long), + .supported = kvm_sbi_fwft_misaligned_delegation_supported, + .reset = kvm_sbi_fwft_reset_misaligned_delegation, + .set = kvm_sbi_fwft_set_misaligned_delegation, + .get = kvm_sbi_fwft_get_misaligned_delegation, + }, +#ifndef CONFIG_32BIT + { + .id = SBI_FWFT_POINTER_MASKING_PMLEN, + .first_reg_num = offsetof(struct kvm_riscv_sbi_fwft, pointer_masking.enable) / + sizeof(unsigned long), + .supported = kvm_sbi_fwft_pointer_masking_pmlen_supported, + .reset = kvm_sbi_fwft_reset_pointer_masking_pmlen, + .set = kvm_sbi_fwft_set_pointer_masking_pmlen, + .get = kvm_sbi_fwft_get_pointer_masking_pmlen, + }, +#endif +}; + +static const struct kvm_sbi_fwft_feature *kvm_sbi_fwft_regnum_to_feature(unsigned long reg_num) +{ + const struct kvm_sbi_fwft_feature *feature; + int i; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + feature = &features[i]; + if (feature->first_reg_num <= reg_num && reg_num < (feature->first_reg_num + 3)) + return feature; + } + + return NULL; +} + +static struct kvm_sbi_fwft_config * +kvm_sbi_fwft_get_config(struct kvm_vcpu *vcpu, enum sbi_fwft_feature_t feature) +{ + int i; + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + for (i = 0; i < ARRAY_SIZE(features); i++) { + if (fwft->configs[i].feature->id == feature) + return &fwft->configs[i]; + } + + return NULL; +} + +static int kvm_fwft_get_feature(struct kvm_vcpu *vcpu, u32 feature, + struct kvm_sbi_fwft_config **conf) +{ + struct kvm_sbi_fwft_config *tconf; + + tconf = kvm_sbi_fwft_get_config(vcpu, feature); + if (!tconf) { + if (kvm_fwft_is_defined_feature(feature)) + return SBI_ERR_NOT_SUPPORTED; + + return SBI_ERR_DENIED; + } + + if (!tconf->supported || !tconf->enabled) + return SBI_ERR_NOT_SUPPORTED; + + *conf = tconf; + + return SBI_SUCCESS; +} + +static int kvm_sbi_fwft_set(struct kvm_vcpu *vcpu, u32 feature, + unsigned long value, unsigned long flags) +{ + int ret; + struct kvm_sbi_fwft_config *conf; + + ret = kvm_fwft_get_feature(vcpu, feature, &conf); + if (ret) + return ret; + + if ((flags & ~SBI_FWFT_SET_FLAG_LOCK) != 0) + return SBI_ERR_INVALID_PARAM; + + if (conf->flags & SBI_FWFT_SET_FLAG_LOCK) + return SBI_ERR_DENIED_LOCKED; + + conf->flags = flags; + + return conf->feature->set(vcpu, conf, false, value); +} + +static int kvm_sbi_fwft_get(struct kvm_vcpu *vcpu, unsigned long feature, + unsigned long *value) +{ + int ret; + struct kvm_sbi_fwft_config *conf; + + ret = kvm_fwft_get_feature(vcpu, feature, &conf); + if (ret) + return ret; + + return conf->feature->get(vcpu, conf, false, value); +} + +static int kvm_sbi_ext_fwft_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + + switch (funcid) { + case SBI_EXT_FWFT_SET: + ret = kvm_sbi_fwft_set(vcpu, cp->a0, cp->a1, cp->a2); + break; + case SBI_EXT_FWFT_GET: + ret = kvm_sbi_fwft_get(vcpu, cp->a0, &retdata->out_val); + break; + default: + ret = SBI_ERR_NOT_SUPPORTED; + break; + } + + retdata->err_val = ret; + + return 0; +} + +static int kvm_sbi_ext_fwft_init(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + int i; + + fwft->configs = kcalloc(ARRAY_SIZE(features), sizeof(struct kvm_sbi_fwft_config), + GFP_KERNEL); + if (!fwft->configs) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + feature = &features[i]; + conf = &fwft->configs[i]; + if (feature->supported) + conf->supported = feature->supported(vcpu); + else + conf->supported = true; + + conf->enabled = conf->supported; + conf->feature = feature; + } + + return 0; +} + +static void kvm_sbi_ext_fwft_deinit(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + kfree(fwft->configs); +} + +static void kvm_sbi_ext_fwft_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + const struct kvm_sbi_fwft_feature *feature; + int i; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + fwft->configs[i].flags = 0; + feature = &features[i]; + if (feature->reset) + feature->reset(vcpu); + } +} + +static unsigned long kvm_sbi_ext_fwft_get_reg_count(struct kvm_vcpu *vcpu) +{ + unsigned long max_reg_count = sizeof(struct kvm_riscv_sbi_fwft) / sizeof(unsigned long); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long reg, ret = 0; + + for (reg = 0; reg < max_reg_count; reg++) { + feature = kvm_sbi_fwft_regnum_to_feature(reg); + if (!feature) + continue; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + continue; + + ret++; + } + + return ret; +} + +static int kvm_sbi_ext_fwft_get_reg_id(struct kvm_vcpu *vcpu, int index, u64 *reg_id) +{ + int reg, max_reg_count = sizeof(struct kvm_riscv_sbi_fwft) / sizeof(unsigned long); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + int idx = 0; + + for (reg = 0; reg < max_reg_count; reg++) { + feature = kvm_sbi_fwft_regnum_to_feature(reg); + if (!feature) + continue; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + continue; + + if (index == idx) { + *reg_id = KVM_REG_RISCV | + (IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64) | + KVM_REG_RISCV_SBI_STATE | + KVM_REG_RISCV_SBI_FWFT | reg; + return 0; + } + + idx++; + } + + return -ENOENT; +} + +static int kvm_sbi_ext_fwft_get_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val) +{ + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long *value; + int ret = 0; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = reg_val; + + feature = kvm_sbi_fwft_regnum_to_feature(reg_num); + if (!feature) + return -ENOENT; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + return -ENOENT; + + switch (reg_num - feature->first_reg_num) { + case 0: + *value = conf->enabled; + break; + case 1: + *value = conf->flags; + break; + case 2: + ret = conf->feature->get(vcpu, conf, true, value); + break; + default: + return -ENOENT; + } + + return sbi_err_map_linux_errno(ret); +} + +static int kvm_sbi_ext_fwft_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val) +{ + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long value; + int ret = 0; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = *(const unsigned long *)reg_val; + + feature = kvm_sbi_fwft_regnum_to_feature(reg_num); + if (!feature) + return -ENOENT; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + return -ENOENT; + + switch (reg_num - feature->first_reg_num) { + case 0: + switch (value) { + case 0: + conf->enabled = false; + break; + case 1: + conf->enabled = true; + break; + default: + return -EINVAL; + } + break; + case 1: + conf->flags = value & SBI_FWFT_SET_FLAG_LOCK; + break; + case 2: + ret = conf->feature->set(vcpu, conf, true, value); + break; + default: + return -ENOENT; + } + + return sbi_err_map_linux_errno(ret); +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_fwft = { + .extid_start = SBI_EXT_FWFT, + .extid_end = SBI_EXT_FWFT, + .handler = kvm_sbi_ext_fwft_handler, + .init = kvm_sbi_ext_fwft_init, + .deinit = kvm_sbi_ext_fwft_deinit, + .reset = kvm_sbi_ext_fwft_reset, + .state_reg_subtype = KVM_REG_RISCV_SBI_FWFT, + .get_state_reg_count = kvm_sbi_ext_fwft_get_reg_count, + .get_state_reg_id = kvm_sbi_ext_fwft_get_reg_id, + .get_state_reg = kvm_sbi_ext_fwft_get_reg, + .set_state_reg = kvm_sbi_ext_fwft_set_reg, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index e4be34e03e83..a020d979d179 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -73,6 +73,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; + case SBI_EXT_PMU_EVENT_GET_INFO: + ret = kvm_riscv_vcpu_pmu_event_info(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, retdata); + break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index cc6cb7c8f0e4..afa0545c3bcf 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -85,8 +85,6 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) unsigned long shmem_phys_hi = cp->a1; u32 flags = cp->a2; struct sbi_sta_struct zero_sta = {0}; - unsigned long hva; - bool writable; gpa_t shmem; int ret; @@ -111,13 +109,10 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) return SBI_ERR_INVALID_ADDRESS; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) - return SBI_ERR_INVALID_ADDRESS; - + /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta)); if (ret) - return SBI_ERR_FAILURE; + return SBI_ERR_INVALID_ADDRESS; vcpu->arch.sta.shmem = shmem; vcpu->arch.sta.last_steal = current->sched_info.run_delay; @@ -151,63 +146,82 @@ static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu) return !!sched_info_on(); } -const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { - .extid_start = SBI_EXT_STA, - .extid_end = SBI_EXT_STA, - .handler = kvm_sbi_ext_sta_handler, - .probe = kvm_sbi_ext_sta_probe, - .reset = kvm_riscv_vcpu_sbi_sta_reset, -}; +static unsigned long kvm_sbi_ext_sta_get_state_reg_count(struct kvm_vcpu *vcpu) +{ + return sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); +} -int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long *reg_val) +static int kvm_sbi_ext_sta_get_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val) { + unsigned long *value; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = reg_val; + switch (reg_num) { case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): - *reg_val = (unsigned long)vcpu->arch.sta.shmem; + *value = (unsigned long)vcpu->arch.sta.shmem; break; case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): if (IS_ENABLED(CONFIG_32BIT)) - *reg_val = upper_32_bits(vcpu->arch.sta.shmem); + *value = upper_32_bits(vcpu->arch.sta.shmem); else - *reg_val = 0; + *value = 0; break; default: - return -EINVAL; + return -ENOENT; } return 0; } -int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long reg_val) +static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val) { + unsigned long value; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = *(const unsigned long *)reg_val; + switch (reg_num) { case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): if (IS_ENABLED(CONFIG_32BIT)) { gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem = value; vcpu->arch.sta.shmem |= hi << 32; } else { - vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem = value; } break; case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): if (IS_ENABLED(CONFIG_32BIT)) { gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32); + vcpu->arch.sta.shmem = ((gpa_t)value << 32); vcpu->arch.sta.shmem |= lo; - } else if (reg_val != 0) { + } else if (value != 0) { return -EINVAL; } break; default: - return -EINVAL; + return -ENOENT; } return 0; } + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { + .extid_start = SBI_EXT_STA, + .extid_end = SBI_EXT_STA, + .handler = kvm_sbi_ext_sta_handler, + .probe = kvm_sbi_ext_sta_probe, + .reset = kvm_riscv_vcpu_sbi_sta_reset, + .state_reg_subtype = KVM_REG_RISCV_SBI_STA, + .get_state_reg_count = kvm_sbi_ext_sta_get_state_reg_count, + .get_state_reg = kvm_sbi_ext_sta_get_reg, + .set_state_reg = kvm_sbi_ext_sta_set_reg, +}; diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 3b426c800480..abb1c2bf2542 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -14,6 +14,7 @@ #include <linux/smp.h> #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/kvm_mmu.h> #include <asm/kvm_tlb.h> #include <asm/kvm_vmid.h> @@ -24,15 +25,12 @@ static DEFINE_SPINLOCK(vmid_lock); void __init kvm_riscv_gstage_vmid_detect(void) { - unsigned long old; - /* Figure-out number of VMID bits in HW */ - old = csr_read(CSR_HGATP); - csr_write(CSR_HGATP, old | HGATP_VMID); + csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID); vmid_bits = csr_read(CSR_HGATP); vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; vmid_bits = fls_long(vmid_bits); - csr_write(CSR_HGATP, old); + csr_write(CSR_HGATP, 0); /* We polluted local TLB so flush all guest TLB */ kvm_riscv_local_hfence_gvma_all(); diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index cadb4b13622a..b9cd9572dd35 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -10,14 +10,15 @@ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H +#include <crypto/hash.h> #include <crypto/sha2.h> #include <crypto/sha3.h> +#include <linux/build_bug.h> #include <linux/types.h> /* must be big enough for the largest SHA variant */ #define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE #define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE -#define S390_SHA_CTX_SIZE sizeof(struct s390_sha_ctx) struct s390_sha_ctx { u64 count; /* message length in bytes */ @@ -42,4 +43,9 @@ int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, u8 *out); +static inline void __check_s390_sha_ctx_size(void) +{ + BUILD_BUG_ON(S390_SHA_CTX_SIZE != sizeof(struct s390_sha_ctx)); +} + #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f870d09515cc..95d15416c39d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -356,7 +356,7 @@ struct kvm_s390_float_interrupt { int counters[FIRQ_MAX_COUNT]; struct kvm_s390_mchk_info mchk; struct kvm_s390_ext_info srv_signal; - int next_rr_cpu; + int last_sleep_cpu; struct mutex ais_lock; u8 simm; u8 nimm; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c1a7a92f0575..b7100c6a4054 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -2055,4 +2055,26 @@ static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt) return res; } +static inline pgste_t pgste_get_lock(pte_t *ptep) +{ + unsigned long value = 0; +#ifdef CONFIG_PGSTE + unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE); + + do { + value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr); + } while (value & PGSTE_PCL_BIT); + value |= PGSTE_PCL_BIT; +#endif + return __pgste(value); +} + +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + barrier(); + WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT); +#endif +} + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9384572ffa7b..c62a868cf2b6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1323,6 +1323,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime); no_timer: kvm_vcpu_srcu_read_unlock(vcpu); + vcpu->kvm->arch.float_int.last_sleep_cpu = vcpu->vcpu_idx; kvm_vcpu_halt(vcpu); vcpu->valid_wakeup = false; __unset_cpu_idle(vcpu); @@ -1949,18 +1950,15 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) if (!online_vcpus) return; - /* find idle VCPUs first, then round robin */ - sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus); - if (sigcpu == online_vcpus) { - do { - sigcpu = kvm->arch.float_int.next_rr_cpu++; - kvm->arch.float_int.next_rr_cpu %= online_vcpus; - /* avoid endless loops if all vcpus are stopped */ - if (nr_tries++ >= online_vcpus) - return; - } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu))); + for (sigcpu = kvm->arch.float_int.last_sleep_cpu; ; sigcpu++) { + sigcpu %= online_vcpus; + dst_vcpu = kvm_get_vcpu(kvm, sigcpu); + if (!is_vcpu_stopped(dst_vcpu)) + break; + /* avoid endless loops if all vcpus are stopped */ + if (nr_tries++ >= online_vcpus) + return; } - dst_vcpu = kvm_get_vcpu(kvm, sigcpu); /* make the VCPU drop out of the SIE, or wake it up if sleeping */ switch (type) { diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index b63f427e7289..d4c3c36855e2 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -15,6 +15,7 @@ #include <linux/pagewalk.h> #include <linux/ksm.h> #include <asm/gmap_helpers.h> +#include <asm/pgtable.h> /** * ptep_zap_swap_entry() - discard a swap entry. @@ -47,6 +48,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) { struct vm_area_struct *vma; spinlock_t *ptl; + pgste_t pgste; pte_t *ptep; mmap_assert_locked(mm); @@ -60,8 +62,16 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) ptep = get_locked_pte(mm, vmaddr, &ptl); if (unlikely(!ptep)) return; - if (pte_swap(*ptep)) + if (pte_swap(*ptep)) { + preempt_disable(); + pgste = pgste_get_lock(ptep); + ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + pte_clear(mm, vmaddr, ptep); + + pgste_set_unlock(ptep, pgste); + preempt_enable(); + } pte_unmap_unlock(ptep, ptl); } EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 50eb57c976bc..0fde20bbc50b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,6 +24,7 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/page-states.h> +#include <asm/pgtable.h> #include <asm/machine.h> pgprot_t pgprot_writecombine(pgprot_t prot) @@ -115,28 +116,6 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, return old; } -static inline pgste_t pgste_get_lock(pte_t *ptep) -{ - unsigned long value = 0; -#ifdef CONFIG_PGSTE - unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE); - - do { - value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr); - } while (value & PGSTE_PCL_BIT); - value |= PGSTE_PCL_BIT; -#endif - return __pgste(value); -} - -static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - barrier(); - WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT); -#endif -} - static inline pgste_t pgste_get(pte_t *ptep) { unsigned long pgste = 0; diff --git a/arch/sparc/include/asm/adi_64.h b/arch/sparc/include/asm/adi_64.h index 4301c6fd87f7..0c066fdab696 100644 --- a/arch/sparc/include/asm/adi_64.h +++ b/arch/sparc/include/asm/adi_64.h @@ -9,7 +9,7 @@ #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct adi_caps { __u64 blksz; @@ -41,6 +41,6 @@ static inline unsigned long adi_nbits(void) return adi_state.caps.nbits; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* !(__ASM_SPARC64_ADI_H) */ diff --git a/arch/sparc/include/asm/auxio.h b/arch/sparc/include/asm/auxio.h index a2681052e900..d0a933ed0d04 100644 --- a/arch/sparc/include/asm/auxio.h +++ b/arch/sparc/include/asm/auxio.h @@ -2,11 +2,11 @@ #ifndef ___ASM_SPARC_AUXIO_H #define ___ASM_SPARC_AUXIO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void __iomem *auxio_register; -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ #if defined(__sparc__) && defined(__arch64__) #include <asm/auxio_64.h> diff --git a/arch/sparc/include/asm/auxio_32.h b/arch/sparc/include/asm/auxio_32.h index 852457c7a265..db58fa28de9e 100644 --- a/arch/sparc/include/asm/auxio_32.h +++ b/arch/sparc/include/asm/auxio_32.h @@ -29,7 +29,7 @@ #define AUXIO_FLPY_EJCT 0x02 /* Eject floppy disk. Write only. */ #define AUXIO_LED 0x01 /* On if set, off if unset. Read/Write */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * NOTE: these routines are implementation dependent-- @@ -75,7 +75,7 @@ do { \ } \ } while (0) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* AUXIO2 (Power Off Control) */ diff --git a/arch/sparc/include/asm/auxio_64.h b/arch/sparc/include/asm/auxio_64.h index ae1ed41987db..8a4ae07daf16 100644 --- a/arch/sparc/include/asm/auxio_64.h +++ b/arch/sparc/include/asm/auxio_64.h @@ -74,7 +74,7 @@ #define AUXIO_PCIO_CPWR_OFF 0x02 /* Courtesy Power Off */ #define AUXIO_PCIO_SPWR_OFF 0x01 /* System Power Off */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define AUXIO_LTE_ON 1 #define AUXIO_LTE_OFF 0 @@ -94,6 +94,6 @@ void auxio_set_lte(int on); */ void auxio_set_led(int on); -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ #endif /* !(_SPARC64_AUXIO_H) */ diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h index 2b1261b77ecd..06092572c045 100644 --- a/arch/sparc/include/asm/cacheflush_64.h +++ b/arch/sparc/include/asm/cacheflush_64.h @@ -4,7 +4,7 @@ #include <asm/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm.h> @@ -78,6 +78,6 @@ void flush_ptrace_access(struct vm_area_struct *, struct page *, #define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _SPARC64_CACHEFLUSH_H */ diff --git a/arch/sparc/include/asm/cpudata.h b/arch/sparc/include/asm/cpudata.h index d213165ee713..67022a153023 100644 --- a/arch/sparc/include/asm/cpudata.h +++ b/arch/sparc/include/asm/cpudata.h @@ -2,14 +2,14 @@ #ifndef ___ASM_SPARC_CPUDATA_H #define ___ASM_SPARC_CPUDATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/threads.h> #include <linux/percpu.h> extern const struct seq_operations cpuinfo_op; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #if defined(__sparc__) && defined(__arch64__) #include <asm/cpudata_64.h> diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 9c3fc03abe9a..056b3c0e7ef9 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -7,7 +7,7 @@ #ifndef _SPARC64_CPUDATA_H #define _SPARC64_CPUDATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef struct { /* Dcache line 1 */ @@ -35,7 +35,7 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) #define local_cpu_data() (*this_cpu_ptr(&__cpu_data)) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #include <asm/trap_block.h> diff --git a/arch/sparc/include/asm/delay_64.h b/arch/sparc/include/asm/delay_64.h index 22213b1c119d..5de5b5f23188 100644 --- a/arch/sparc/include/asm/delay_64.h +++ b/arch/sparc/include/asm/delay_64.h @@ -7,12 +7,12 @@ #ifndef _SPARC64_DELAY_H #define _SPARC64_DELAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void __delay(unsigned long loops); void udelay(unsigned long usecs); #define mdelay(n) udelay((n) * 1000) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _SPARC64_DELAY_H */ diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 8fb09eec8c3e..694ed081cf8d 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -58,6 +58,7 @@ #define R_SPARC_7 43 #define R_SPARC_5 44 #define R_SPARC_6 45 +#define R_SPARC_UA64 54 /* Bits present in AT_HWCAP, primarily for Sparc32. */ #define HWCAP_SPARC_FLUSH 0x00000001 diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index 135f9a49b6ba..d1bb0f13352c 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -13,6 +13,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/dma-mapping.h> +#include <linux/string.h> #include <asm/auxio.h> @@ -615,7 +616,7 @@ static unsigned long __init sun_floppy_init(void) sun_pci_fd_ebus_dma.callback = sun_pci_fd_dma_callback; sun_pci_fd_ebus_dma.client_cookie = NULL; sun_pci_fd_ebus_dma.irq = FLOPPY_IRQ; - strcpy(sun_pci_fd_ebus_dma.name, "floppy"); + strscpy(sun_pci_fd_ebus_dma.name, "floppy"); if (ebus_dma_register(&sun_pci_fd_ebus_dma)) return 0; diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index e284394cb3aa..f7c9036199c5 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -6,7 +6,7 @@ #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void _mcount(void); #endif diff --git a/arch/sparc/include/asm/hvtramp.h b/arch/sparc/include/asm/hvtramp.h index ce2453ea4f2b..8cf7a54fa528 100644 --- a/arch/sparc/include/asm/hvtramp.h +++ b/arch/sparc/include/asm/hvtramp.h @@ -2,7 +2,7 @@ #ifndef _SPARC64_HVTRAP_H #define _SPARC64_HVTRAP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index f220edcf17c7..94ac56d43746 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -102,7 +102,7 @@ */ #define HV_FAST_MACH_EXIT 0x00 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void sun4v_mach_exit(unsigned long exit_code); #endif @@ -131,7 +131,7 @@ void sun4v_mach_exit(unsigned long exit_code); */ #define HV_FAST_MACH_DESC 0x01 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mach_desc(unsigned long buffer_pa, unsigned long buf_len, unsigned long *real_buf_len); @@ -152,7 +152,7 @@ unsigned long sun4v_mach_desc(unsigned long buffer_pa, */ #define HV_FAST_MACH_SIR 0x02 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void sun4v_mach_sir(void); #endif @@ -208,7 +208,7 @@ void sun4v_mach_sir(void); */ #define HV_FAST_MACH_SET_WATCHDOG 0x05 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mach_set_watchdog(unsigned long timeout, unsigned long *orig_timeout); #endif @@ -254,7 +254,7 @@ unsigned long sun4v_mach_set_watchdog(unsigned long timeout, */ #define HV_FAST_CPU_START 0x10 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_start(unsigned long cpuid, unsigned long pc, unsigned long rtba, @@ -282,7 +282,7 @@ unsigned long sun4v_cpu_start(unsigned long cpuid, */ #define HV_FAST_CPU_STOP 0x11 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_stop(unsigned long cpuid); #endif @@ -299,7 +299,7 @@ unsigned long sun4v_cpu_stop(unsigned long cpuid); */ #define HV_FAST_CPU_YIELD 0x12 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_yield(void); #endif @@ -317,7 +317,7 @@ unsigned long sun4v_cpu_yield(void); */ #define HV_FAST_CPU_POKE 0x13 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_poke(unsigned long cpuid); #endif @@ -363,7 +363,7 @@ unsigned long sun4v_cpu_poke(unsigned long cpuid); #define HV_CPU_QUEUE_RES_ERROR 0x3e #define HV_CPU_QUEUE_NONRES_ERROR 0x3f -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_qconf(unsigned long type, unsigned long queue_paddr, unsigned long num_queue_entries); @@ -416,7 +416,7 @@ unsigned long sun4v_cpu_qconf(unsigned long type, */ #define HV_FAST_CPU_MONDO_SEND 0x42 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, unsigned long cpu_list_pa, unsigned long mondo_block_pa); @@ -449,7 +449,7 @@ unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, #define HV_CPU_STATE_RUNNING 0x02 #define HV_CPU_STATE_ERROR 0x03 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ long sun4v_cpu_state(unsigned long cpuid); #endif @@ -485,7 +485,7 @@ long sun4v_cpu_state(unsigned long cpuid); * * Layout of a TSB description for mmu_tsb_ctx{,non}0() calls. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_tsb_descr { unsigned short pgsz_idx; unsigned short assoc; @@ -536,7 +536,7 @@ struct hv_tsb_descr { * The fault status block is a multiple of 64-bytes and must be aligned * on a 64-byte boundary. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_fault_status { unsigned long i_fault_type; unsigned long i_fault_addr; @@ -651,7 +651,7 @@ struct hv_fault_status { */ #define HV_FAST_MMU_TSB_CTX0 0x20 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mmu_tsb_ctx0(unsigned long num_descriptions, unsigned long tsb_desc_ra); #endif @@ -736,7 +736,7 @@ unsigned long sun4v_mmu_tsb_ctx0(unsigned long num_descriptions, */ #define HV_FAST_MMU_DEMAP_ALL 0x24 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void sun4v_mmu_demap_all(void); #endif @@ -766,7 +766,7 @@ void sun4v_mmu_demap_all(void); */ #define HV_FAST_MMU_MAP_PERM_ADDR 0x25 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mmu_map_perm_addr(unsigned long vaddr, unsigned long set_to_zero, unsigned long tte, @@ -990,7 +990,7 @@ unsigned long sun4v_mmu_map_perm_addr(unsigned long vaddr, */ #define HV_CCB_SUBMIT 0x34 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ccb_submit(unsigned long ccb_buf, unsigned long len, unsigned long flags, @@ -1035,7 +1035,7 @@ unsigned long sun4v_ccb_submit(unsigned long ccb_buf, */ #define HV_CCB_INFO 0x35 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ccb_info(unsigned long ca, void *info_arr); #endif @@ -1069,7 +1069,7 @@ unsigned long sun4v_ccb_info(unsigned long ca, */ #define HV_CCB_KILL 0x36 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ccb_kill(unsigned long ca, void *kill_status); #endif @@ -1104,7 +1104,7 @@ unsigned long sun4v_ccb_kill(unsigned long ca, */ #define HV_FAST_TOD_GET 0x50 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_tod_get(unsigned long *time); #endif @@ -1121,7 +1121,7 @@ unsigned long sun4v_tod_get(unsigned long *time); */ #define HV_FAST_TOD_SET 0x51 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_tod_set(unsigned long time); #endif @@ -1197,7 +1197,7 @@ unsigned long sun4v_tod_set(unsigned long time); */ #define HV_FAST_CONS_WRITE 0x63 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ long sun4v_con_getchar(long *status); long sun4v_con_putchar(long c); long sun4v_con_read(unsigned long buffer, @@ -1239,7 +1239,7 @@ unsigned long sun4v_con_write(unsigned long buffer, #define HV_SOFT_STATE_NORMAL 0x01 #define HV_SOFT_STATE_TRANSITION 0x02 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mach_set_soft_state(unsigned long soft_state, unsigned long msg_string_ra); #endif @@ -1318,7 +1318,7 @@ unsigned long sun4v_mach_set_soft_state(unsigned long soft_state, */ #define HV_FAST_SVC_CLRSTATUS 0x84 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_svc_send(unsigned long svc_id, unsigned long buffer, unsigned long buffer_size, @@ -1348,7 +1348,7 @@ unsigned long sun4v_svc_clrstatus(unsigned long svc_id, * start (offset 0) of the trap trace buffer, and is described as * follows: */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_trap_trace_control { unsigned long head_offset; unsigned long tail_offset; @@ -1367,7 +1367,7 @@ struct hv_trap_trace_control { * * Each trap trace buffer entry is laid out as follows: */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_trap_trace_entry { unsigned char type; /* Hypervisor or guest entry? */ unsigned char hpstate; /* Hyper-privileged state */ @@ -1617,7 +1617,7 @@ struct hv_trap_trace_entry { */ #define HV_FAST_INTR_DEVINO2SYSINO 0xa0 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_devino_to_sysino(unsigned long devhandle, unsigned long devino); #endif @@ -1635,7 +1635,7 @@ unsigned long sun4v_devino_to_sysino(unsigned long devhandle, */ #define HV_FAST_INTR_GETENABLED 0xa1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_getenabled(unsigned long sysino); #endif @@ -1651,7 +1651,7 @@ unsigned long sun4v_intr_getenabled(unsigned long sysino); */ #define HV_FAST_INTR_SETENABLED 0xa2 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_setenabled(unsigned long sysino, unsigned long intr_enabled); #endif @@ -1668,7 +1668,7 @@ unsigned long sun4v_intr_setenabled(unsigned long sysino, */ #define HV_FAST_INTR_GETSTATE 0xa3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_getstate(unsigned long sysino); #endif @@ -1688,7 +1688,7 @@ unsigned long sun4v_intr_getstate(unsigned long sysino); */ #define HV_FAST_INTR_SETSTATE 0xa4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state); #endif @@ -1706,7 +1706,7 @@ unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state */ #define HV_FAST_INTR_GETTARGET 0xa5 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_gettarget(unsigned long sysino); #endif @@ -1723,7 +1723,7 @@ unsigned long sun4v_intr_gettarget(unsigned long sysino); */ #define HV_FAST_INTR_SETTARGET 0xa6 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid); #endif @@ -1807,7 +1807,7 @@ unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid); */ #define HV_FAST_VINTR_SET_TARGET 0xae -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_vintr_get_cookie(unsigned long dev_handle, unsigned long dev_ino, unsigned long *cookie); @@ -3047,7 +3047,7 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, #define LDC_MTE_SZ64K 0x0000000000000001 /* 64K page */ #define LDC_MTE_SZ8K 0x0000000000000000 /* 8K page */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct ldc_mtable_entry { unsigned long mte; unsigned long cookie; @@ -3130,7 +3130,7 @@ struct ldc_mtable_entry { */ #define HV_FAST_LDC_REVOKE 0xef -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ldc_tx_qconf(unsigned long channel, unsigned long ra, unsigned long num_entries); @@ -3230,7 +3230,7 @@ unsigned long sun4v_ldc_revoke(unsigned long channel, #define HV_FAST_N2_GET_PERFREG 0x104 #define HV_FAST_N2_SET_PERFREG 0x105 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_niagara_getperf(unsigned long reg, unsigned long *val); unsigned long sun4v_niagara_setperf(unsigned long reg, @@ -3247,7 +3247,7 @@ unsigned long sun4v_niagara2_setperf(unsigned long reg, * a buffer where these statistics can be collected. It is continually * updated once configured. The layout is as follows: */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_mmu_statistics { unsigned long immu_tsb_hits_ctx0_8k_tte; unsigned long immu_tsb_ticks_ctx0_8k_tte; @@ -3332,7 +3332,7 @@ struct hv_mmu_statistics { */ #define HV_FAST_MMUSTAT_INFO 0x103 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mmustat_conf(unsigned long ra, unsigned long *orig_ra); unsigned long sun4v_mmustat_info(unsigned long *ra); #endif @@ -3343,7 +3343,7 @@ unsigned long sun4v_mmustat_info(unsigned long *ra); #define HV_NCS_QCONF 0x01 #define HV_NCS_QTAIL_UPDATE 0x02 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_ncs_queue_entry { /* MAU Control Register */ unsigned long mau_control; @@ -3422,7 +3422,7 @@ struct hv_ncs_qtail_update_arg { */ #define HV_FAST_NCS_REQUEST 0x110 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ncs_request(unsigned long request, unsigned long arg_ra, unsigned long arg_size); @@ -3433,7 +3433,7 @@ unsigned long sun4v_ncs_request(unsigned long request, #define HV_FAST_REBOOT_DATA_SET 0x172 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_reboot_data_set(unsigned long ra, unsigned long len); #endif @@ -3441,7 +3441,7 @@ unsigned long sun4v_reboot_data_set(unsigned long ra, #define HV_FAST_VT_GET_PERFREG 0x184 #define HV_FAST_VT_SET_PERFREG 0x185 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_vt_get_perfreg(unsigned long reg_num, unsigned long *reg_val); unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, @@ -3451,7 +3451,7 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, #define HV_FAST_T5_GET_PERFREG 0x1a8 #define HV_FAST_T5_SET_PERFREG 0x1a9 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_t5_get_perfreg(unsigned long reg_num, unsigned long *reg_val); unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, @@ -3462,7 +3462,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, #define HV_FAST_M7_GET_PERFREG 0x43 #define HV_FAST_M7_SET_PERFREG 0x44 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_m7_get_perfreg(unsigned long reg_num, unsigned long *reg_val); unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, @@ -3506,7 +3506,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, #define HV_GRP_T5_CPU 0x0211 #define HV_GRP_DIAG 0x0300 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_get_version(unsigned long group, unsigned long *major, unsigned long *minor); diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index c9528e4719cd..d8ed296624af 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -250,19 +250,19 @@ void insl(unsigned long, void *, unsigned long); #define insw insw #define insl insl -static inline void readsb(void __iomem *port, void *buf, unsigned long count) +static inline void readsb(const volatile void __iomem *port, void *buf, unsigned long count) { insb((unsigned long __force)port, buf, count); } #define readsb readsb -static inline void readsw(void __iomem *port, void *buf, unsigned long count) +static inline void readsw(const volatile void __iomem *port, void *buf, unsigned long count) { insw((unsigned long __force)port, buf, count); } #define readsw readsw -static inline void readsl(void __iomem *port, void *buf, unsigned long count) +static inline void readsl(const volatile void __iomem *port, void *buf, unsigned long count) { insl((unsigned long __force)port, buf, count); } diff --git a/arch/sparc/include/asm/irqflags_32.h b/arch/sparc/include/asm/irqflags_32.h index 7ca3eaf3dace..f5f20774faac 100644 --- a/arch/sparc/include/asm/irqflags_32.h +++ b/arch/sparc/include/asm/irqflags_32.h @@ -11,7 +11,7 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/psr.h> @@ -43,6 +43,6 @@ static inline notrace bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } -#endif /* (__ASSEMBLY__) */ +#endif /* (__ASSEMBLER__) */ #endif /* !(_ASM_IRQFLAGS_H) */ diff --git a/arch/sparc/include/asm/irqflags_64.h b/arch/sparc/include/asm/irqflags_64.h index c29ed571ae49..0071566c2c22 100644 --- a/arch/sparc/include/asm/irqflags_64.h +++ b/arch/sparc/include/asm/irqflags_64.h @@ -13,7 +13,7 @@ #include <asm/pil.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline notrace unsigned long arch_local_save_flags(void) { @@ -93,6 +93,6 @@ static inline notrace unsigned long arch_local_irq_save(void) return flags; } -#endif /* (__ASSEMBLY__) */ +#endif /* (__ASSEMBLER__) */ #endif /* !(_ASM_IRQFLAGS_H) */ diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 2718cbea826a..f49d1e6104e1 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -2,7 +2,7 @@ #ifndef _ASM_SPARC_JUMP_LABEL_H #define _ASM_SPARC_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -48,5 +48,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/sparc/include/asm/kdebug_32.h b/arch/sparc/include/asm/kdebug_32.h index 763d423823bd..7627701a032c 100644 --- a/arch/sparc/include/asm/kdebug_32.h +++ b/arch/sparc/include/asm/kdebug_32.h @@ -19,7 +19,7 @@ #define DEBUG_BP_TRAP 126 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* The debug vector is passed in %o1 at boot time. It is a pointer to * a structure in the debuggers address space. Here is its format. */ @@ -64,7 +64,7 @@ enum die_val { DIE_OOPS, }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* Some nice offset defines for assembler code. */ #define KDEBUG_ENTRY_OFF 0x0 diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h index c1e05e4ab9e3..053a24b67aed 100644 --- a/arch/sparc/include/asm/leon.h +++ b/arch/sparc/include/asm/leon.h @@ -59,7 +59,7 @@ #define ASI_LEON3_SYSCTRL_CFG_SNOOPING (1 << 27) #define ASI_LEON3_SYSCTRL_CFG_SSIZE(c) (1 << ((c >> 20) & 0xf)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* do a physical address bypass write, i.e. for 0x80000000 */ static inline void leon_store_reg(unsigned long paddr, unsigned long value) @@ -132,7 +132,7 @@ static inline int sparc_leon3_cpuid(void) return sparc_leon3_asr17() >> 28; } -#endif /*!__ASSEMBLY__*/ +#endif /*!__ASSEMBLER__*/ #ifdef CONFIG_SMP # define LEON3_IRQ_IPI_DEFAULT 13 @@ -194,7 +194,7 @@ static inline int sparc_leon3_cpuid(void) #define LEON2_CCR_DSETS_MASK 0x03000000UL #define LEON2_CFG_SSIZE_MASK 0x00007000UL -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct vm_area_struct; unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr); @@ -247,7 +247,7 @@ extern int leon_ipi_irq; #endif /* CONFIG_SMP */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* macros used in leon_mm.c */ #define PFN(x) ((x) >> PAGE_SHIFT) diff --git a/arch/sparc/include/asm/leon_amba.h b/arch/sparc/include/asm/leon_amba.h index 6433a93f5126..2ff5714d7a63 100644 --- a/arch/sparc/include/asm/leon_amba.h +++ b/arch/sparc/include/asm/leon_amba.h @@ -8,7 +8,7 @@ #ifndef LEON_AMBA_H_INCLUDE #define LEON_AMBA_H_INCLUDE -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct amba_prom_registers { unsigned int phys_addr; /* The physical address of this register */ @@ -89,7 +89,7 @@ struct amba_prom_registers { #define LEON3_GPTIMER_CONFIG_NRTIMERS(c) ((c)->config & 0x7) #define LEON3_GPTIMER_CTRL_ISPENDING(r) (((r)&LEON3_GPTIMER_CTRL_PENDING) ? 1 : 0) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct leon3_irqctrl_regs_map { u32 ilevel; @@ -189,7 +189,7 @@ extern int leon_debug_irqout; extern unsigned long leon3_gptimer_irq; extern unsigned int sparc_leon_eirq; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define LEON3_IO_AREA 0xfff00000 #define LEON3_CONF_AREA 0xff000 diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h index 3e4bac33be81..a8bae8ad243a 100644 --- a/arch/sparc/include/asm/mman.h +++ b/arch/sparc/include/asm/mman.h @@ -4,7 +4,7 @@ #include <uapi/asm/mman.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define arch_mmap_check(addr,len,flags) sparc_mmap_check(addr,len) int sparc_mmap_check(unsigned long addr, unsigned long len); @@ -87,5 +87,5 @@ static inline bool arch_validate_flags(vm_flags_t vm_flags) } #endif /* CONFIG_SPARC64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __SPARC_MMAN_H__ */ diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index 7e2704c770e9..4eeb938f3e61 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -59,7 +59,7 @@ #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) #define CTX_NRBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_NR_MASK) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define TSB_ENTRY_ALIGNMENT 16 @@ -117,7 +117,7 @@ typedef struct { spinlock_t tag_lock; } mm_context_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define TSB_CONFIG_TSB 0x00 #define TSB_CONFIG_RSS_LIMIT 0x08 diff --git a/arch/sparc/include/asm/mmu_context_32.h b/arch/sparc/include/asm/mmu_context_32.h index 509043f81560..d9ff73f776f9 100644 --- a/arch/sparc/include/asm/mmu_context_32.h +++ b/arch/sparc/include/asm/mmu_context_32.h @@ -2,7 +2,7 @@ #ifndef __SPARC_MMU_CONTEXT_H #define __SPARC_MMU_CONTEXT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm-generic/mm_hooks.h> @@ -29,6 +29,6 @@ void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, #include <asm-generic/mmu_context.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_MMU_CONTEXT_H) */ diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 08160bf9a0f4..78bbacc14d2d 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -4,7 +4,7 @@ /* Derived heavily from Linus's Alpha/AXP ASN code... */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/spinlock.h> #include <linux/mm_types.h> @@ -193,6 +193,6 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm) #include <asm-generic/mmu_context.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC64_MMU_CONTEXT_H) */ diff --git a/arch/sparc/include/asm/mxcc.h b/arch/sparc/include/asm/mxcc.h index 3a2561bea4dd..bd6339dcf693 100644 --- a/arch/sparc/include/asm/mxcc.h +++ b/arch/sparc/include/asm/mxcc.h @@ -84,7 +84,7 @@ * MID: The moduleID of the cpu your read this from. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void mxcc_set_stream_src(unsigned long *paddr) { @@ -133,6 +133,6 @@ static inline void mxcc_set_creg(unsigned long mxcc_control) "i" (ASI_M_MXCC)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_MXCC_H) */ diff --git a/arch/sparc/include/asm/obio.h b/arch/sparc/include/asm/obio.h index 1b151f738b00..f1ad7f7bcac2 100644 --- a/arch/sparc/include/asm/obio.h +++ b/arch/sparc/include/asm/obio.h @@ -97,7 +97,7 @@ #define CC_EREG 0x1F00E00 /* Error code register */ #define CC_CID 0x1F00F04 /* Component ID */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline int bw_get_intr_mask(int sbus_level) { @@ -221,6 +221,6 @@ static inline void cc_set_igen(unsigned int gen) "i" (ASI_M_MXCC)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_OBIO_H) */ diff --git a/arch/sparc/include/asm/openprom.h b/arch/sparc/include/asm/openprom.h index 69545b3e5454..ce68000dffac 100644 --- a/arch/sparc/include/asm/openprom.h +++ b/arch/sparc/include/asm/openprom.h @@ -11,7 +11,7 @@ /* Empirical constants... */ #define LINUX_OPPROM_MAGIC 0x10010407 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/of.h> /* V0 prom device operations. */ @@ -275,6 +275,6 @@ struct linux_prom_pci_intmask { unsigned int interrupt; }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_OPENPROM_H) */ diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index 9954254ea569..c1bccbedf567 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -13,7 +13,7 @@ #include <vdso/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) #define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) @@ -108,14 +108,14 @@ typedef pte_t *pgtable_t; #define TASK_UNMAPPED_BASE 0x50000000 -#else /* !(__ASSEMBLY__) */ +#else /* !(__ASSEMBLER__) */ #define __pgprot(x) (x) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #define PAGE_OFFSET 0xf0000000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long phys_base; extern unsigned long pfn_base; #endif diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 2a68ff5b6eab..d764d8a8586b 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -30,7 +30,7 @@ #define HUGE_MAX_HSTATE 5 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) struct pt_regs; @@ -128,7 +128,7 @@ extern unsigned long sparc64_va_hole_bottom; extern unsigned long PAGE_OFFSET; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* The maximum number of physical memory address bits we support. The * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS" @@ -139,7 +139,7 @@ extern unsigned long PAGE_OFFSET; #define ILOG2_4MB 22 #define ILOG2_256MB 28 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) @@ -153,7 +153,7 @@ extern unsigned long PAGE_OFFSET; #define virt_to_phys __pa #define phys_to_virt __va -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #include <asm-generic/getorder.h> diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h index 3068809ef9ad..78f14d6620bf 100644 --- a/arch/sparc/include/asm/parport_64.h +++ b/arch/sparc/include/asm/parport_64.h @@ -9,6 +9,7 @@ #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/string.h> #include <asm/ebus_dma.h> #include <asm/ns87303.h> @@ -149,7 +150,7 @@ static int ecpp_probe(struct platform_device *op) sparc_ebus_dmas[slot].info.callback = NULL; sparc_ebus_dmas[slot].info.client_cookie = NULL; sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; - strcpy(sparc_ebus_dmas[slot].info.name, "parport"); + strscpy(sparc_ebus_dmas[slot].info.name, "parport"); if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) goto out_unmap_regs; diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h index 238376b1ffcc..fb5ed6a59535 100644 --- a/arch/sparc/include/asm/pcic.h +++ b/arch/sparc/include/asm/pcic.h @@ -8,7 +8,7 @@ #ifndef __SPARC_PCIC_H #define __SPARC_PCIC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/smp.h> diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 7c199c003ffe..f1538a48484a 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -21,7 +21,7 @@ #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_ALIGN(__addr) (((__addr) + ~PGDIR_MASK) & PGDIR_MASK) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm-generic/pgtable-nopud.h> #include <linux/spinlock.h> @@ -423,7 +423,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, __changed; \ }) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #define VMALLOC_START _AC(0xfe600000,UL) #define VMALLOC_END _AC(0xffc00000,UL) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 669cd02469a1..64b85ff9c766 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -79,7 +79,7 @@ #error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages. #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long VMALLOC_END; @@ -106,7 +106,7 @@ bool kern_addr_valid(unsigned long addr); pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \ __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0)) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* PTE bits which are the same in SUN4U and SUN4V format. */ #define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ @@ -191,7 +191,7 @@ bool kern_addr_valid(unsigned long addr); /* We borrow bit 20 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE _AC(0x0000000000100000, UL) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long); @@ -1177,6 +1177,6 @@ extern unsigned long pte_leaf_size(pte_t pte); #endif /* CONFIG_HUGETLB_PAGE */ -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC64_PGTABLE_H) */ diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h index 18e68d43f036..a265822a475e 100644 --- a/arch/sparc/include/asm/pgtsrmmu.h +++ b/arch/sparc/include/asm/pgtsrmmu.h @@ -10,7 +10,7 @@ #include <asm/page.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/thread_info.h> /* TI_UWINMASK for WINDOW_FLUSH */ #endif @@ -97,7 +97,7 @@ bne 99b; \ restore %g0, %g0, %g0; -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long last_valid_pfn; /* This makes sense. Honest it does - Anton */ @@ -136,6 +136,6 @@ srmmu_get_pte (unsigned long addr) return entry; } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC_PGTSRMMU_H) */ diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 0a0d5c3d184c..321859454ca4 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -21,7 +21,7 @@ * XXX No longer using virtual page tables, kill this upper limit... */ #define VA_BITS 44 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define VPTE_SIZE (1UL << (VA_BITS - PAGE_SHIFT + 3)) #else #define VPTE_SIZE (1 << (VA_BITS - PAGE_SHIFT + 3)) @@ -45,7 +45,7 @@ #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* The Sparc processor specific thread struct. */ /* XXX This should die, everything can go into thread_info now. */ @@ -62,7 +62,7 @@ struct thread_struct { #endif }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #ifndef CONFIG_DEBUG_SPINLOCK #define INIT_THREAD { \ @@ -75,7 +75,7 @@ struct thread_struct { } #endif /* !(CONFIG_DEBUG_SPINLOCK) */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/fpumacro.h> @@ -242,6 +242,6 @@ static inline void prefetchw(const void *x) int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap); -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__ASM_SPARC64_PROCESSOR_H) */ diff --git a/arch/sparc/include/asm/psr.h b/arch/sparc/include/asm/psr.h index 65127ce565ab..5af50ccda023 100644 --- a/arch/sparc/include/asm/psr.h +++ b/arch/sparc/include/asm/psr.h @@ -14,7 +14,7 @@ #include <uapi/asm/psr.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Get the %psr register. */ static inline unsigned int get_psr(void) { @@ -63,6 +63,6 @@ static inline unsigned int get_fsr(void) return fsr; } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__LINUX_SPARC_PSR_H) */ diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h index d1419e669027..8adf3fd2f00f 100644 --- a/arch/sparc/include/asm/ptrace.h +++ b/arch/sparc/include/asm/ptrace.h @@ -5,7 +5,7 @@ #include <uapi/asm/ptrace.h> #if defined(__sparc__) && defined(__arch64__) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/threads.h> @@ -113,10 +113,10 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->u_regs[UREG_I6]; } -#else /* __ASSEMBLY__ */ -#endif /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ +#endif /* __ASSEMBLER__ */ #else /* (defined(__sparc__) && defined(__arch64__)) */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/switch_to.h> static inline bool pt_regs_is_syscall(struct pt_regs *regs) @@ -144,8 +144,8 @@ static inline bool pt_regs_clear_syscall(struct pt_regs *regs) #define instruction_pointer(regs) ((regs)->pc) #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) unsigned long profile_pc(struct pt_regs *); -#else /* (!__ASSEMBLY__) */ -#endif /* (!__ASSEMBLY__) */ +#else /* (!__ASSEMBLER__) */ +#endif /* (!__ASSEMBLER__) */ #endif /* (defined(__sparc__) && defined(__arch64__)) */ #define STACK_BIAS 2047 diff --git a/arch/sparc/include/asm/ross.h b/arch/sparc/include/asm/ross.h index 79a54d66a2c0..53a42b37495d 100644 --- a/arch/sparc/include/asm/ross.h +++ b/arch/sparc/include/asm/ross.h @@ -95,7 +95,7 @@ #define HYPERSPARC_ICCR_FTD 0x00000002 #define HYPERSPARC_ICCR_ICE 0x00000001 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline unsigned int get_ross_icr(void) { @@ -187,6 +187,6 @@ static inline void hyper_flush_cache_page(unsigned long page) } } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC_ROSS_H) */ diff --git a/arch/sparc/include/asm/sbi.h b/arch/sparc/include/asm/sbi.h index 4d6026c1e446..861f85b5bf9b 100644 --- a/arch/sparc/include/asm/sbi.h +++ b/arch/sparc/include/asm/sbi.h @@ -64,7 +64,7 @@ struct sbi_regs { */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline int acquire_sbi(int devid, int mask) { @@ -111,6 +111,6 @@ static inline void set_sbi_ctl(int devid, int cfgno, int cfg) "i" (ASI_M_CTL)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_SBI_H) */ diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h index ee05f9d2bcf2..200f95144fd2 100644 --- a/arch/sparc/include/asm/sigcontext.h +++ b/arch/sparc/include/asm/sigcontext.h @@ -5,7 +5,7 @@ #include <asm/ptrace.h> #include <uapi/asm/sigcontext.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __SUNOS_MAXWIN 31 @@ -104,6 +104,6 @@ typedef struct { #endif /* (CONFIG_SPARC64) */ -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_SIGCONTEXT_H) */ diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h index 28f81081e37d..d93fe93544ec 100644 --- a/arch/sparc/include/asm/signal.h +++ b/arch/sparc/include/asm/signal.h @@ -2,16 +2,16 @@ #ifndef __SPARC_SIGNAL_H #define __SPARC_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/personality.h> #include <linux/types.h> #endif #include <uapi/asm/signal.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __ARCH_HAS_KA_RESTORER #define __ARCH_HAS_SA_RESTORER -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_SIGNAL_H) */ diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 2cf7971d7f6c..9c6ed98fbaf1 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -10,15 +10,15 @@ #include <linux/threads.h> #include <asm/head.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/asi.h> @@ -105,7 +105,7 @@ int hard_smp_processor_id(void); void smp_setup_cpu_possible_map(void); -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* Sparc specific messages. */ #define MSG_CROSS_CALL 0x0005 /* run func on cpus */ diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 0964fede0b2c..759fb4a9530e 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -12,16 +12,16 @@ #include <asm/starfire.h> #include <asm/spitfire.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> #include <linux/cache.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Private routines/data @@ -68,7 +68,7 @@ int __cpu_disable(void); void __cpu_die(unsigned int cpu); #endif -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #else diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index bc5aa6f61676..6d6d261bf8d2 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -7,7 +7,7 @@ #ifndef __SPARC_SPINLOCK_H #define __SPARC_SPINLOCK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/psr.h> #include <asm/barrier.h> @@ -183,6 +183,6 @@ static inline int __arch_read_trylock(arch_rwlock_t *rw) res; \ }) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* __SPARC_SPINLOCK_H */ diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 3a9a0b0c7465..13cd15d346be 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -7,13 +7,13 @@ #ifndef __SPARC64_SPINLOCK_H #define __SPARC64_SPINLOCK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> #include <asm/barrier.h> #include <asm/qspinlock.h> #include <asm/qrwlock.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC64_SPINLOCK_H) */ diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index e9b7d25b29fa..79b9dd5e9ac6 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -68,7 +68,7 @@ #define CPU_ID_M8 ('8') #define CPU_ID_SONOMA1 ('N') -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ enum ultra_tlb_layout { spitfire = 0, @@ -363,6 +363,6 @@ static inline void cheetah_put_itlb_data(int entry, unsigned long data) "i" (ASI_ITLB_DATA_ACCESS)); } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* CONFIG_SPARC64 */ #endif /* !(_SPARC64_SPITFIRE_H) */ diff --git a/arch/sparc/include/asm/starfire.h b/arch/sparc/include/asm/starfire.h index fb1a8c499cb0..8e511ed78775 100644 --- a/arch/sparc/include/asm/starfire.h +++ b/arch/sparc/include/asm/starfire.h @@ -8,7 +8,7 @@ #ifndef _SPARC64_STARFIRE_H #define _SPARC64_STARFIRE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern int this_is_starfire; diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 45b4955b253f..fdaf7b171e0a 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/page.h> @@ -72,7 +72,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); */ #define THREAD_SIZE_ORDER 1 -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Size of kernel stack for each process */ #define THREAD_SIZE (2 * PAGE_SIZE) diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 1a44372e2bc0..c8a73dff27f8 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -26,7 +26,7 @@ #include <asm/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/types.h> @@ -64,7 +64,7 @@ struct thread_info { __attribute__ ((aligned(64))); }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* offsets into the thread_info struct for assembly code access */ #define TI_TASK 0x00000000 @@ -110,7 +110,7 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define INIT_THREAD_INFO(tsk) \ { \ @@ -150,7 +150,7 @@ extern struct thread_info *current_thread_info(void); #define set_thread_fpdepth(val) (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH] = (val)) #define get_thread_wsaved() (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED]) #define set_thread_wsaved(val) (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED] = (val)) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* * Thread information flags, only 16 bits are available as we encode @@ -228,14 +228,14 @@ extern struct thread_info *current_thread_info(void); * Note that there are only 8 bits available. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0) #define test_thread_64bit_stack(__SP) \ ((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \ false : true) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ace0d48e837e..6cf2a60a0156 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -7,7 +7,7 @@ #include <asm/hypervisor.h> #include <asm/asi.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Trap handling code needs to get at a few critical values upon * trap entry and to process TSB misses. These cannot be in the @@ -91,7 +91,7 @@ extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch, __sun_m7_2insn_patch_end; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #define TRAP_PER_CPU_THREAD 0x00 #define TRAP_PER_CPU_PGD_PADDR 0x08 diff --git a/arch/sparc/include/asm/traps.h b/arch/sparc/include/asm/traps.h index 2fba2602ba69..e4e10b0e7887 100644 --- a/arch/sparc/include/asm/traps.h +++ b/arch/sparc/include/asm/traps.h @@ -9,7 +9,7 @@ #include <uapi/asm/traps.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* This is for V8 compliant Sparc CPUS */ struct tt_entry { unsigned long inst_one; @@ -21,5 +21,5 @@ struct tt_entry { /* We set this to _start in system setup. */ extern struct tt_entry *sparc_ttable; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC_TRAPS_H) */ diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index 522a677e050d..239be259e166 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -59,7 +59,7 @@ * The kernel TSB is locked into the TLB by virtue of being in the * kernel image, so we don't play these games for swapper_tsb access. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct tsb_ldquad_phys_patch_entry { unsigned int addr; unsigned int sun4u_insn; diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h index 8f6469408019..b32d3068cce1 100644 --- a/arch/sparc/include/asm/ttable.h +++ b/arch/sparc/include/asm/ttable.h @@ -5,7 +5,7 @@ #include <asm/utrap.h> #include <asm/pil.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/thread_info.h> #endif diff --git a/arch/sparc/include/asm/turbosparc.h b/arch/sparc/include/asm/turbosparc.h index 23df777f9cea..5f73263b6ded 100644 --- a/arch/sparc/include/asm/turbosparc.h +++ b/arch/sparc/include/asm/turbosparc.h @@ -57,7 +57,7 @@ #define TURBOSPARC_WTENABLE 0x00000020 /* Write thru for dcache */ #define TURBOSPARC_SNENABLE 0x40000000 /* DVMA snoop enable */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Bits [13:5] select one of 512 instruction cache tags */ static inline void turbosparc_inv_insn_tag(unsigned long addr) @@ -121,6 +121,6 @@ static inline unsigned long turbosparc_get_ccreg(void) return regval; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_TURBOSPARC_H) */ diff --git a/arch/sparc/include/asm/upa.h b/arch/sparc/include/asm/upa.h index 782691b30f54..b1df3a7f40ed 100644 --- a/arch/sparc/include/asm/upa.h +++ b/arch/sparc/include/asm/upa.h @@ -24,7 +24,7 @@ #define UPA_PORTID_ID 0x000000000000ffff /* Module Identification bits */ /* UPA I/O space accessors */ -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#if defined(__KERNEL__) && !defined(__ASSEMBLER__) static inline unsigned char _upa_readb(unsigned long addr) { unsigned char ret; @@ -105,6 +105,6 @@ static inline void _upa_writeq(unsigned long q, unsigned long addr) #define upa_writew(__w, __addr) (_upa_writew((__w), (unsigned long)(__addr))) #define upa_writel(__l, __addr) (_upa_writel((__l), (unsigned long)(__addr))) #define upa_writeq(__q, __addr) (_upa_writeq((__q), (unsigned long)(__addr))) -#endif /* __KERNEL__ && !__ASSEMBLY__ */ +#endif /* __KERNEL__ && !__ASSEMBLER__ */ #endif /* !(_SPARC64_UPA_H) */ diff --git a/arch/sparc/include/asm/vaddrs.h b/arch/sparc/include/asm/vaddrs.h index 4fec0341e2a8..da567600c897 100644 --- a/arch/sparc/include/asm/vaddrs.h +++ b/arch/sparc/include/asm/vaddrs.h @@ -31,7 +31,7 @@ */ #define SRMMU_NOCACHE_ALCRATIO 64 /* 256 pages per 64MB of system RAM */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/kmap_size.h> enum fixed_addresses { diff --git a/arch/sparc/include/asm/viking.h b/arch/sparc/include/asm/viking.h index 08ffc605035f..bbb714de43c4 100644 --- a/arch/sparc/include/asm/viking.h +++ b/arch/sparc/include/asm/viking.h @@ -110,7 +110,7 @@ #define VIKING_PTAG_DIRTY 0x00010000 /* Block has been modified */ #define VIKING_PTAG_SHARED 0x00000100 /* Shared with some other cache */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void viking_flush_icache(void) { @@ -250,6 +250,6 @@ static inline unsigned long viking_hwprobe(unsigned long vaddr) return val; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_VIKING_H) */ diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 7903e84e09e0..71eb4e9afb3e 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -45,7 +45,7 @@ #define VISExitHalfFast \ wr %o5, 0, %fprs; -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void save_and_clear_fpu(void) { __asm__ __volatile__ ( " rd %%fprs, %%o5\n" diff --git a/arch/sparc/include/uapi/asm/ptrace.h b/arch/sparc/include/uapi/asm/ptrace.h index abe640037a55..2eb677f4eb6a 100644 --- a/arch/sparc/include/uapi/asm/ptrace.h +++ b/arch/sparc/include/uapi/asm/ptrace.h @@ -15,7 +15,7 @@ */ #define PT_REGS_MAGIC 0x57ac6c00 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -88,7 +88,7 @@ struct sparc_trapf { unsigned long _unused; struct pt_regs *regs; }; -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #else /* 32 bit sparc */ @@ -97,7 +97,7 @@ struct sparc_trapf { /* This struct defines the way the registers are stored on the * stack during a system call and basically all traps. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -125,11 +125,11 @@ struct sparc_stackf { unsigned long xargs[6]; unsigned long xxargs[1]; }; -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #endif /* (defined(__sparc__) && defined(__arch64__))*/ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define TRACEREG_SZ sizeof(struct pt_regs) #define STACKFRAME_SZ sizeof(struct sparc_stackf) @@ -137,7 +137,7 @@ struct sparc_stackf { #define TRACEREG32_SZ sizeof(struct pt_regs32) #define STACKFRAME32_SZ sizeof(struct sparc_stackf32) -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #define UREG_G0 0 #define UREG_G1 1 @@ -161,30 +161,30 @@ struct sparc_stackf { #if defined(__sparc__) && defined(__arch64__) /* 64 bit sparc */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ /* For assembly code. */ #define TRACEREG_SZ 0xa0 #define STACKFRAME_SZ 0xc0 #define TRACEREG32_SZ 0x50 #define STACKFRAME32_SZ 0x60 -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #else /* (defined(__sparc__) && defined(__arch64__)) */ /* 32 bit sparc */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ -#else /* (!__ASSEMBLY__) */ +#else /* (!__ASSEMBLER__) */ /* For assembly code. */ #define TRACEREG_SZ 0x50 #define STACKFRAME_SZ 0x60 -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #endif /* (defined(__sparc__) && defined(__arch64__)) */ diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h index b61382924725..9c64d7cb85c2 100644 --- a/arch/sparc/include/uapi/asm/signal.h +++ b/arch/sparc/include/uapi/asm/signal.h @@ -105,7 +105,7 @@ #define __old_sigaction32 sigaction32 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef unsigned long __old_sigset_t; /* at least 32 bits */ @@ -176,6 +176,6 @@ typedef struct sigaltstack { } stack_t; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* _UAPI__SPARC_SIGNAL_H */ diff --git a/arch/sparc/include/uapi/asm/traps.h b/arch/sparc/include/uapi/asm/traps.h index 930db746f8bd..43fe5b8fe8be 100644 --- a/arch/sparc/include/uapi/asm/traps.h +++ b/arch/sparc/include/uapi/asm/traps.h @@ -10,8 +10,8 @@ #define NUM_SPARC_TRAPS 255 -#ifndef __ASSEMBLY__ -#endif /* !(__ASSEMBLY__) */ +#ifndef __ASSEMBLER__ +#endif /* !(__ASSEMBLER__) */ /* For patching the trap table at boot time, we need to know how to * form various common Sparc instructions. Thus these macros... diff --git a/arch/sparc/include/uapi/asm/utrap.h b/arch/sparc/include/uapi/asm/utrap.h index d890b7fc6e83..a489b08b6a33 100644 --- a/arch/sparc/include/uapi/asm/utrap.h +++ b/arch/sparc/include/uapi/asm/utrap.h @@ -44,9 +44,9 @@ #define UTH_NOCHANGE (-1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef int utrap_entry_t; typedef void *utrap_handler_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* !(__ASM_SPARC64_PROCESSOR_H) */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 36f2727e1445..22170d4f8e06 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -4,8 +4,6 @@ # Makefile for the linux kernel. # -asflags-y := -ansi - # Undefine sparc when processing vmlinux.lds - it is used # And teach CPP we are doing $(BITS) builds (for this case) CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS) diff --git a/arch/sparc/kernel/adi_64.c b/arch/sparc/kernel/adi_64.c index e0e4fc527b24..18036a43cf56 100644 --- a/arch/sparc/kernel/adi_64.c +++ b/arch/sparc/kernel/adi_64.c @@ -202,7 +202,7 @@ static tag_storage_desc_t *alloc_tag_store(struct mm_struct *mm, } else { size = sizeof(tag_storage_desc_t)*max_desc; - mm->context.tag_store = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN); + mm->context.tag_store = kzalloc(size, GFP_NOWAIT); if (mm->context.tag_store == NULL) { tag_desc = NULL; goto out; @@ -281,7 +281,7 @@ static tag_storage_desc_t *alloc_tag_store(struct mm_struct *mm, size = (size + (PAGE_SIZE-adi_blksize()))/PAGE_SIZE; size = size * PAGE_SIZE; } - tags = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN); + tags = kzalloc(size, GFP_NOWAIT); if (tags == NULL) { tag_desc->tag_users = 0; tag_desc = NULL; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index ffdc15588ac2..f7fc6f2af2f2 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -781,14 +781,17 @@ void ldom_set_var(const char *var, const char *value) } pkt; char *base, *p; int msg_len, loops; + size_t var_len, value_len; - if (strlen(var) + strlen(value) + 2 > - sizeof(pkt) - sizeof(pkt.header)) { - printk(KERN_ERR PFX - "contents length: %zu, which more than max: %lu," - "so could not set (%s) variable to (%s).\n", - strlen(var) + strlen(value) + 2, - sizeof(pkt) - sizeof(pkt.header), var, value); + var_len = strlen(var) + 1; + value_len = strlen(value) + 1; + + if (var_len + value_len > sizeof(pkt) - sizeof(pkt.header)) { + pr_err(PFX + "contents length: %zu, which more than max: %lu," + "so could not set (%s) variable to (%s).\n", + var_len + value_len, + sizeof(pkt) - sizeof(pkt.header), var, value); return; } @@ -797,10 +800,10 @@ void ldom_set_var(const char *var, const char *value) pkt.header.data.handle = cp->handle; pkt.header.msg.hdr.type = DS_VAR_SET_REQ; base = p = &pkt.header.msg.name_and_value[0]; - strcpy(p, var); - p += strlen(var) + 1; - strcpy(p, value); - p += strlen(value) + 1; + strscpy(p, var, var_len); + p += var_len; + strscpy(p, value, value_len); + p += value_len; msg_len = (sizeof(struct ds_data) + sizeof(struct ds_var_set_msg) + @@ -910,7 +913,7 @@ static int register_services(struct ds_info *dp) pbuf.req.handle = cp->handle; pbuf.req.major = 1; pbuf.req.minor = 0; - strcpy(pbuf.id_buf, cp->service_id); + strscpy(pbuf.id_buf, cp->service_id); err = __ds_send(lp, &pbuf, msg_len); if (err > 0) diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index b8c51cc23d96..49740450a685 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -87,6 +87,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, break; #ifdef CONFIG_SPARC64 case R_SPARC_64: + case R_SPARC_UA64: location[0] = v >> 56; location[1] = v >> 48; location[2] = v >> 40; @@ -141,7 +142,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, break; default: - printk(KERN_ERR "module %s: Unknown relocation: %x\n", + printk(KERN_ERR "module %s: Unknown relocation: 0x%x\n", me->name, (int) (ELF_R_TYPE(rel[i].r_info) & 0xff)); return -ENOEXEC; diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 06012e68bdca..284a4cafa432 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -387,6 +387,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, if (of_device_register(op)) { printk("%pOF: Could not register of device.\n", dp); + put_device(&op->dev); kfree(op); op = NULL; } diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index f98c2901f335..f53092b07b9e 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -677,6 +677,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, if (of_device_register(op)) { printk("%pOF: Could not register of device.\n", dp); + put_device(&op->dev); kfree(op); op = NULL; } diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 25fe0a061732..f894ae79e78a 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/jiffies.h> #include <asm/swift.h> /* for cache flushing. */ @@ -352,7 +353,7 @@ int __init pcic_probe(void) pbm = &pcic->pbm; pbm->prom_node = node; prom_getstring(node, "name", namebuf, 63); namebuf[63] = 0; - strcpy(pbm->prom_name, namebuf); + strscpy(pbm->prom_name, namebuf); { extern int pcic_nmi_trap_patch[4]; @@ -477,7 +478,7 @@ static void pcic_map_pci_device(struct linux_pcic *pcic, int j; if (node == 0 || node == -1) { - strcpy(namebuf, "???"); + strscpy(namebuf, "???"); } else { prom_getstring(node, "name", namebuf, 63); namebuf[63] = 0; } @@ -536,7 +537,7 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) char namebuf[64]; if (node == 0 || node == -1) { - strcpy(namebuf, "???"); + strscpy(namebuf, "???"); } else { prom_getstring(node, "name", namebuf, sizeof(namebuf)); } diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index a67dd67f10c8..cd94f1e8d644 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -187,14 +187,16 @@ char * __init build_path_component(struct device_node *dp) { const char *name = of_get_property(dp, "name", NULL); char tmp_buf[64], *n; + size_t n_sz; tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - strcpy(tmp_buf, name); + strscpy(tmp_buf, name); - n = prom_early_alloc(strlen(tmp_buf) + 1); - strcpy(n, tmp_buf); + n_sz = strlen(tmp_buf) + 1; + n = prom_early_alloc(n_sz); + strscpy(n, tmp_buf, n_sz); return n; } @@ -204,13 +206,14 @@ extern void restore_current(void); void __init of_console_init(void) { char *msg = "OF stdout device is: %s\n"; + const size_t of_console_path_sz = 256; struct device_node *dp; unsigned long flags; const char *type; phandle node; int skip, tmp, fd; - of_console_path = prom_early_alloc(256); + of_console_path = prom_early_alloc(of_console_path_sz); switch (prom_vers) { case PROM_V0: @@ -297,7 +300,7 @@ void __init of_console_init(void) prom_printf("No stdout-path in root node.\n"); prom_halt(); } - strcpy(of_console_path, path); + strscpy(of_console_path, path, of_console_path_sz); } break; } diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index ba82884cb92a..aa4799cbb9c1 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -361,14 +361,16 @@ char * __init build_path_component(struct device_node *dp) { const char *name = of_get_property(dp, "name", NULL); char tmp_buf[64], *n; + size_t n_sz; tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - strcpy(tmp_buf, name); + strscpy(tmp_buf, name); - n = prom_early_alloc(strlen(tmp_buf) + 1); - strcpy(n, tmp_buf); + n_sz = strlen(tmp_buf) + 1; + n = prom_early_alloc(n_sz); + strscpy(n, tmp_buf, n_sz); return n; } diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index c9ec70888a39..d258fd10db01 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -120,11 +120,14 @@ EXPORT_SYMBOL(of_find_in_proplist); */ static int __init handle_nextprop_quirks(char *buf, const char *name) { - if (!name || strlen(name) == 0) + size_t name_len; + + name_len = name ? strlen(name) : 0; + if (name_len == 0) return -1; #ifdef CONFIG_SPARC32 - strcpy(buf, name); + strscpy(buf, name, name_len + 1); #endif return 0; } diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S index cbd42ea7c3f7..99357bfa8e82 100644 --- a/arch/sparc/lib/M7memcpy.S +++ b/arch/sparc/lib/M7memcpy.S @@ -696,16 +696,16 @@ FUNC_NAME: EX_LD_FP(LOAD(ldd, %o4+40, %f26), memcpy_retl_o2_plus_o5_plus_40) faligndata %f24, %f26, %f10 EX_ST_FP(STORE(std, %f6, %o0+24), memcpy_retl_o2_plus_o5_plus_40) - EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_40) + EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_32) faligndata %f26, %f28, %f12 - EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_40) + EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_32) add %o4, 64, %o4 - EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40) + EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_24) faligndata %f28, %f30, %f14 - EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_40) - EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_40) + EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_24) + EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_16) add %o0, 64, %o0 - EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40) + EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_8) fsrc2 %f30, %f14 bgu,pt %xcc, .Lunalign_sloop prefetch [%o4 + (8 * BLOCK_SIZE)], 20 @@ -728,7 +728,7 @@ FUNC_NAME: add %o4, 8, %o4 faligndata %f0, %f2, %f16 subcc %o5, 8, %o5 - EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5) + EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5_plus_8) fsrc2 %f2, %f0 bgu,pt %xcc, .Lunalign_by8 add %o0, 8, %o0 @@ -772,7 +772,7 @@ FUNC_NAME: subcc %o5, 0x20, %o5 EX_ST(STORE(stx, %o3, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) - EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_16) EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) bne,pt %xcc, 1b add %o0, 0x20, %o0 @@ -804,12 +804,12 @@ FUNC_NAME: brz,pt %o3, 2f sub %o2, %o3, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_o3) add %o1, 1, %o1 subcc %o3, 1, %o3 add %o0, 1, %o0 bne,pt %xcc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_o3_plus_1) 2: and %o1, 0x7, %o3 brz,pn %o3, .Lmedium_noprefetch_cp diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index ee5091dd67ed..783bdec0d7be 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for Sparc library files.. # -asflags-y := -ansi -DST_DIV0=0x02 +asflags-y := -DST_DIV0=0x02 lib-$(CONFIG_SPARC32) += ashrdi3.o lib-$(CONFIG_SPARC32) += memcpy.o memset.o diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S index 64fbac28b3db..207343367bb2 100644 --- a/arch/sparc/lib/Memcpy_utils.S +++ b/arch/sparc/lib/Memcpy_utils.S @@ -137,6 +137,15 @@ ENTRY(memcpy_retl_o2_plus_63_8) ba,pt %xcc, __restore_asi add %o2, 8, %o0 ENDPROC(memcpy_retl_o2_plus_63_8) +ENTRY(memcpy_retl_o2_plus_o3) + ba,pt %xcc, __restore_asi + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3) +ENTRY(memcpy_retl_o2_plus_o3_plus_1) + add %o3, 1, %o3 + ba,pt %xcc, __restore_asi + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3_plus_1) ENTRY(memcpy_retl_o2_plus_o5) ba,pt %xcc, __restore_asi add %o2, %o5, %o0 diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 7ad58ebe0d00..df0ec1bd1948 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -281,7 +281,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ subcc %o5, 0x20, %o5 EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_16) EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index ee51c1230689..bbd3ea0a6482 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -79,8 +79,8 @@ #ifndef EX_RETVAL #define EX_RETVAL(x) x __restore_asi: - ret wr %g0, ASI_AIUS, %asi + ret restore ENTRY(NG_ret_i2_plus_i4_plus_1) ba,pt %xcc, __restore_asi @@ -125,15 +125,16 @@ ENTRY(NG_ret_i2_plus_g1_minus_56) ba,pt %xcc, __restore_asi add %i2, %g1, %i0 ENDPROC(NG_ret_i2_plus_g1_minus_56) -ENTRY(NG_ret_i2_plus_i4) +ENTRY(NG_ret_i2_plus_i4_plus_16) + add %i4, 16, %i4 ba,pt %xcc, __restore_asi add %i2, %i4, %i0 -ENDPROC(NG_ret_i2_plus_i4) -ENTRY(NG_ret_i2_plus_i4_minus_8) - sub %i4, 8, %i4 +ENDPROC(NG_ret_i2_plus_i4_plus_16) +ENTRY(NG_ret_i2_plus_i4_plus_8) + add %i4, 8, %i4 ba,pt %xcc, __restore_asi add %i2, %i4, %i0 -ENDPROC(NG_ret_i2_plus_i4_minus_8) +ENDPROC(NG_ret_i2_plus_i4_plus_8) ENTRY(NG_ret_i2_plus_8) ba,pt %xcc, __restore_asi add %i2, 8, %i0 @@ -160,6 +161,12 @@ ENTRY(NG_ret_i2_and_7_plus_i4) ba,pt %xcc, __restore_asi add %i2, %i4, %i0 ENDPROC(NG_ret_i2_and_7_plus_i4) +ENTRY(NG_ret_i2_and_7_plus_i4_plus_8) + and %i2, 7, %i2 + add %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) #endif .align 64 @@ -405,13 +412,13 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4_plus_16) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4_plus_16) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4_plus_16) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_plus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 @@ -468,7 +475,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index 635398ec7540..154fbd35400c 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -164,17 +164,18 @@ ENTRY(U1_gs_40_fp) retl add %o0, %o2, %o0 ENDPROC(U1_gs_40_fp) -ENTRY(U1_g3_0_fp) - VISExitHalf - retl - add %g3, %o2, %o0 -ENDPROC(U1_g3_0_fp) ENTRY(U1_g3_8_fp) VISExitHalf add %g3, 8, %g3 retl add %g3, %o2, %o0 ENDPROC(U1_g3_8_fp) +ENTRY(U1_g3_16_fp) + VISExitHalf + add %g3, 16, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_16_fp) ENTRY(U1_o2_0_fp) VISExitHalf retl @@ -547,18 +548,18 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 62: FINISH_VISCHUNK(o0, f44, f46) 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) -93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_8_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_16_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_8_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_16_fp) bge,pt %xcc, 93b add %o0, 8, %o0 diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 9248d59c734c..bace3a18f836 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -267,6 +267,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) + and %o2, 0x3f, %o2 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 bgu,pt %XCC, 1f @@ -336,7 +337,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * Also notice how this code is careful not to perform a * load past the end of the src buffer. */ - and %o2, 0x3f, %o2 andcc %o2, 0x38, %g2 be,pn %XCC, 2f subcc %g2, 0x8, %g2 diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 2d1752108d77..e9d232561c82 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -2,8 +2,6 @@ # Makefile for the linux Sparc-specific parts of the memory manager. # -asflags-y := -ansi - obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o obj-y += fault_$(BITS).o obj-y += init_$(BITS).o diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 4b9431311e05..4652e868663b 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -22,6 +22,26 @@ static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift) { + unsigned long hugepage_size = _PAGE_SZ4MB_4U; + + pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4U; + + switch (shift) { + case HPAGE_256MB_SHIFT: + hugepage_size = _PAGE_SZ256MB_4U; + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + case HPAGE_SHIFT: + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + case HPAGE_64K_SHIFT: + hugepage_size = _PAGE_SZ64K_4U; + break; + default: + WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift); + } + + pte_val(entry) = pte_val(entry) | hugepage_size; return entry; } diff --git a/arch/sparc/prom/Makefile b/arch/sparc/prom/Makefile index a1adc75d8055..92db8bb4ad4c 100644 --- a/arch/sparc/prom/Makefile +++ b/arch/sparc/prom/Makefile @@ -2,7 +2,6 @@ # Makefile for the Sun Boot PROM interface library under # Linux. # -asflags := -ansi lib-y := bootstr_$(BITS).o lib-y += init_$(BITS).o diff --git a/arch/sparc/prom/tree_64.c b/arch/sparc/prom/tree_64.c index 88793e5b0ab5..7388339bbd7e 100644 --- a/arch/sparc/prom/tree_64.c +++ b/arch/sparc/prom/tree_64.c @@ -272,7 +272,7 @@ char *prom_nextprop(phandle node, const char *oprop, char *buffer) return buffer; } if (oprop == buffer) { - strcpy (buf, oprop); + strscpy(buf, oprop); oprop = buf; } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e3cccf4256ca..9d034a987c6e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1902,7 +1902,6 @@ config INTEL_TDX_HOST depends on X86_X2APIC select ARCH_KEEP_MEMBLOCK depends on CONTIG_ALLOC - depends on !KEXEC_CORE depends on X86_MCE help Intel Trust Domain Extensions (TDX) protects guest VMs from malicious diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index 29c5c32c16c3..907bd233c6c1 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -16,7 +16,7 @@ .macro FRED_ENTER UNWIND_HINT_END_OF_STACK - ENDBR + ANNOTATE_NOENDBR PUSH_AND_CLEAR_REGS movq %rsp, %rdi /* %rdi -> pt_regs */ .endm diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index c9103a6fa06e..6e6c0a740837 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -124,7 +124,12 @@ bool emulate_vsyscall(unsigned long error_code, if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) return false; - if (!(error_code & X86_PF_INSTR)) { + /* + * Assume that faults at regs->ip are because of an + * instruction fetch. Return early and avoid + * emulation for faults during data accesses: + */ + if (address != regs->ip) { /* Failed vsyscall read */ if (vsyscall_mode == EMULATE) return false; @@ -137,12 +142,18 @@ bool emulate_vsyscall(unsigned long error_code, } /* + * X86_PF_INSTR is only set when NX is supported. When + * available, use it to double-check that the emulation code + * is only being used for instruction fetches: + */ + if (cpu_feature_enabled(X86_FEATURE_NX)) + WARN_ON_ONCE(!(error_code & X86_PF_INSTR)); + + /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. */ - WARN_ON_ONCE(address != regs->ip); - if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index f2ad77929d6e..5cfb27f26583 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -13,6 +13,15 @@ # define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */ #endif +#ifdef CONFIG_X86_64 + +#include <linux/bits.h> + +#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0) +#define RELOC_KERNEL_CACHE_INCOHERENT BIT(1) + +#endif + # define KEXEC_CONTROL_PAGE_SIZE 4096 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 @@ -121,8 +130,7 @@ typedef unsigned long relocate_kernel_fn(unsigned long indirection_page, unsigned long pa_control_page, unsigned long start_address, - unsigned int preserve_context, - unsigned int host_mem_enc_active); + unsigned int flags); #endif extern relocate_kernel_fn relocate_kernel; #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 18a5c3119e1a..62c3e4de3303 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -145,7 +145,7 @@ KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) -KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level) +KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level) KVM_X86_OP_OPTIONAL(gmem_invalidate) #undef KVM_X86_OP diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f19a76d3ca0e..c56cc54d682a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1922,7 +1922,7 @@ struct kvm_x86_ops { void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); - int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn); + int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); }; struct kvm_x86_nested_ops { @@ -2276,10 +2276,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) -#else -#define kvm_arch_has_private_mem(kvm) false #endif #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 57bc74e112f2..4a47c16e2df8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -124,7 +124,6 @@ bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait_schedule(u32 token); -void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_apf_flags(void); bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); @@ -148,7 +147,6 @@ static inline void kvm_spinlock_init(void) #else /* CONFIG_KVM_GUEST */ #define kvm_async_pf_task_wait_schedule(T) do {} while(0) -#define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bde58f6510ac..a24c7805acdb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -731,6 +731,8 @@ void __noreturn stop_this_cpu(void *dummy); void microcode_check(struct cpuinfo_x86 *prev_info); void store_cpu_caps(struct cpuinfo_x86 *info); +DECLARE_PER_CPU(bool, cache_state_incoherent); + enum l1tf_mitigations { L1TF_MITIGATION_OFF, L1TF_MITIGATION_AUTO, diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 7ddef3a69866..6b338d7f01b7 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -102,10 +102,31 @@ u64 __seamcall_ret(u64 fn, struct tdx_module_args *args); u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args); void tdx_init(void); +#include <linux/preempt.h> #include <asm/archrandom.h> +#include <asm/processor.h> typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); +static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn, + struct tdx_module_args *args) +{ + lockdep_assert_preemption_disabled(); + + /* + * SEAMCALLs are made to the TDX module and can generate dirty + * cachelines of TDX private memory. Mark cache state incoherent + * so that the cache can be flushed during kexec. + * + * This needs to be done before actually making the SEAMCALL, + * because kexec-ing CPU could send NMI to stop remote CPUs, + * in which case even disabling IRQ won't help here. + */ + this_cpu_write(cache_state_incoherent, true); + + return func(fn, args); +} + static __always_inline u64 sc_retry(sc_func_t func, u64 fn, struct tdx_module_args *args) { @@ -113,7 +134,9 @@ static __always_inline u64 sc_retry(sc_func_t func, u64 fn, u64 ret; do { - ret = func(fn, args); + preempt_disable(); + ret = __seamcall_dirty_cache(func, fn, args); + preempt_enable(); } while (ret == TDX_RND_NO_ENTROPY && --retry); return ret; @@ -131,6 +154,8 @@ int tdx_guest_keyid_alloc(void); u32 tdx_get_nr_guest_keyids(void); void tdx_guest_keyid_free(unsigned int keyid); +void tdx_quirk_reset_page(struct page *page); + struct tdx_td { /* TD root structure: */ struct page *tdr_page; @@ -146,6 +171,8 @@ struct tdx_td { struct tdx_vp { /* TDVP root page */ struct page *tdvpr_page; + /* precalculated page_to_phys(tdvpr_page) for use in noinstr code */ + phys_addr_t tdvpr_pa; /* TD vCPU control structure: */ struct page **tdcx_pages; @@ -203,5 +230,11 @@ static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; } static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; } #endif /* CONFIG_INTEL_TDX_HOST */ +#ifdef CONFIG_KEXEC_CORE +void tdx_cpu_flush_cache_for_kexec(void); +#else +static inline void tdx_cpu_flush_cache_for_kexec(void) { } +#endif + #endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a6f88ca1a6b4..5398db4dedb4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -546,6 +546,23 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) u64 msr; /* + * Mark using WBINVD is needed during kexec on processors that + * support SME. This provides support for performing a successful + * kexec when going from SME inactive to SME active (or vice-versa). + * + * The cache must be cleared so that if there are entries with the + * same physical address, both with and without the encryption bit, + * they don't race each other when flushed and potentially end up + * with the wrong entry being committed to memory. + * + * Test the CPUID bit directly because with mem_encrypt=off the + * BSP will clear the X86_FEATURE_SME bit and the APs will not + * see it set after that. + */ + if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + __this_cpu_write(cache_state_incoherent, true); + + /* * BIOS support is required for SME and SEV. * For SME: If BIOS has enabled SME then adjust x86_phys_bits by * the SME physical address space reduction value. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8ae750cde0c6..b67d7c59dca0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -190,7 +190,7 @@ static void apf_task_wake_all(void) } } -void kvm_async_pf_task_wake(u32 token) +static void kvm_async_pf_task_wake(u32 token) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; @@ -241,7 +241,6 @@ again: /* A dummy token might be allocated and ultimately not used. */ kfree(dummy); } -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); noinstr u32 kvm_read_and_reset_apf_flags(void) { @@ -933,6 +932,19 @@ static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc) static void __init kvm_init_platform(void) { + u64 tolud = PFN_PHYS(e820__end_of_low_ram_pfn()); + /* + * Note, hardware requires variable MTRR ranges to be power-of-2 sized + * and naturally aligned. But when forcing guest MTRR state, Linux + * doesn't program the forced ranges into hardware. Don't bother doing + * the math to generate a technically-legal range. + */ + struct mtrr_var_range pci_hole = { + .base_lo = tolud | X86_MEMTYPE_UC, + .mask_lo = (u32)(~(SZ_4G - tolud - 1)) | MTRR_PHYSMASK_V, + .mask_hi = (BIT_ULL(boot_cpu_data.x86_phys_bits) - 1) >> 32, + }; + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) { unsigned long nr_pages; @@ -982,8 +994,12 @@ static void __init kvm_init_platform(void) kvmclock_init(); x86_platform.apic_post_init = kvm_apic_init; - /* Set WB as the default cache mode for SEV-SNP and TDX */ - guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); + /* + * Set WB as the default cache mode for SEV-SNP and TDX, with a single + * UC range for the legacy PCI hole, e.g. so that devices that expect + * to get UC/WC mappings don't get surprised with WB. + */ + guest_force_mtrr_state(&pci_hole, 1, MTRR_TYPE_WRBACK); } #if defined(CONFIG_AMD_MEM_ENCRYPT) @@ -1073,16 +1089,6 @@ static void kvm_wait(u8 *ptr, u8 val) void __init kvm_spinlock_init(void) { /* - * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an - * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is - * preferred over native qspinlock when vCPU is preempted. - */ - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) { - pr_info("PV spinlocks disabled, no host support\n"); - return; - } - - /* * Disable PV spinlocks and use native qspinlock when dedicated pCPUs * are available. */ @@ -1101,6 +1107,16 @@ void __init kvm_spinlock_init(void) goto out; } + /* + * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an + * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is + * preferred over native qspinlock when vCPU is preempted. + */ + if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) { + pr_info("PV spinlocks disabled, no host support\n"); + return; + } + pr_info("PV spinlocks enabled\n"); __pv_init_lock_hash(); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 697fb99406e6..15088d14904f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -29,6 +29,7 @@ #include <asm/set_memory.h> #include <asm/cpu.h> #include <asm/efi.h> +#include <asm/processor.h> #ifdef CONFIG_ACPI /* @@ -346,6 +347,22 @@ int machine_kexec_prepare(struct kimage *image) unsigned long reloc_end = (unsigned long)__relocate_kernel_end; int result; + /* + * Some early TDX-capable platforms have an erratum. A kernel + * partial write (a write transaction of less than cacheline + * lands at memory controller) to TDX private memory poisons that + * memory, and a subsequent read triggers a machine check. + * + * On those platforms the old kernel must reset TDX private + * memory before jumping to the new kernel otherwise the new + * kernel may see unexpected machine check. For simplicity + * just fail kexec/kdump on those platforms. + */ + if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) { + pr_info_once("Not allowed on platform with tdx_pw_mce bug\n"); + return -EOPNOTSUPP; + } + /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, __pa(control_page)); if (result) @@ -384,16 +401,10 @@ void __nocfi machine_kexec(struct kimage *image) { unsigned long reloc_start = (unsigned long)__relocate_kernel_start; relocate_kernel_fn *relocate_kernel_ptr; - unsigned int host_mem_enc_active; + unsigned int relocate_kernel_flags; int save_ftrace_enabled; void *control_page; - /* - * This must be done before load_segments() since if call depth tracking - * is used then GS must be valid to make any function calls. - */ - host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT); - #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) save_processor_state(); @@ -427,6 +438,17 @@ void __nocfi machine_kexec(struct kimage *image) */ relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start; + relocate_kernel_flags = 0; + if (image->preserve_context) + relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT; + + /* + * This must be done before load_segments() since it resets + * GS to 0 and percpu data needs the correct GS to work. + */ + if (this_cpu_read(cache_state_incoherent)) + relocate_kernel_flags |= RELOC_KERNEL_CACHE_INCOHERENT; + /* * The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is @@ -436,6 +458,11 @@ void __nocfi machine_kexec(struct kimage *image) * * Take advantage of this here by force loading the segments, * before the GDT is zapped with an invalid value. + * + * load_segments() resets GS to 0. Don't make any function call + * after here since call depth tracking uses percpu variables to + * operate (relocate_kernel() is explicitly ignored by call depth + * tracking). */ load_segments(); @@ -443,8 +470,7 @@ void __nocfi machine_kexec(struct kimage *image) image->start = relocate_kernel_ptr((unsigned long)image->head, virt_to_phys(control_page), image->start, - image->preserve_context, - host_mem_enc_active); + relocate_kernel_flags); #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e3a3987b0c4f..4c718f8adc59 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -89,6 +89,16 @@ DEFINE_PER_CPU(bool, __tss_limit_invalid); EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); /* + * The cache may be in an incoherent state and needs flushing during kexec. + * E.g., on SME/TDX platforms, dirty cacheline aliases with and without + * encryption bit(s) can coexist and the cache needs to be flushed before + * booting to the new kernel to avoid the silent memory corruption due to + * dirty cachelines with different encryption property being written back + * to the memory. + */ +DEFINE_PER_CPU(bool, cache_state_incoherent); + +/* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. */ @@ -827,19 +837,7 @@ void __noreturn stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(c); - /* - * Use wbinvd on processors that support SME. This provides support - * for performing a successful kexec when going from SME inactive - * to SME active (or vice-versa). The cache must be cleared so that - * if there are entries with the same physical address, both with and - * without the encryption bit, they don't race each other when flushed - * and potentially end up with the wrong entry being committed to - * memory. - * - * Test the CPUID bit directly because the machine might've cleared - * X86_FEATURE_SME due to cmdline options. - */ - if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + if (this_cpu_read(cache_state_incoherent)) wbinvd(); /* diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index ea604f4d0b52..11e20bb13aca 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -66,8 +66,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) * %rdi indirection_page * %rsi pa_control_page * %rdx start address - * %rcx preserve_context - * %r8 host_mem_enc_active + * %rcx flags: RELOC_KERNEL_* */ /* Save the CPU context, used for jumping back */ @@ -111,7 +110,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* save indirection list for jumping back */ movq %rdi, pa_backup_pages_map(%rip) - /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ + /* Save the flags to %r11 as swap_pages clobbers %rcx. */ movq %rcx, %r11 /* setup a new stack at the end of the physical control page */ @@ -129,9 +128,8 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) /* * %rdi indirection page * %rdx start address - * %r8 host_mem_enc_active * %r9 page table page - * %r11 preserve_context + * %r11 flags: RELOC_KERNEL_* * %r13 original CR4 when relocate_kernel() was invoked */ @@ -200,14 +198,21 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movq %r9, %cr3 /* + * If the memory cache is in incoherent state, e.g., due to + * memory encryption, do WBINVD to flush cache. + * * If SME is active, there could be old encrypted cache line * entries that will conflict with the now unencrypted memory * used by kexec. Flush the caches before copying the kernel. + * + * Note SME sets this flag to true when the platform supports + * SME, so the WBINVD is performed even SME is not activated + * by the kernel. But this has no harm. */ - testq %r8, %r8 - jz .Lsme_off + testb $RELOC_KERNEL_CACHE_INCOHERENT, %r11b + jz .Lnowbinvd wbinvd -.Lsme_off: +.Lnowbinvd: call swap_pages @@ -220,7 +225,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movq %cr3, %rax movq %rax, %cr3 - testq %r11, %r11 /* preserve_context */ + testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b jnz .Lrelocate /* @@ -273,7 +278,13 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) ANNOTATE_NOENDBR andq $PAGE_MASK, %r8 lea PAGE_SIZE(%r8), %rsp - movl $1, %r11d /* Ensure preserve_context flag is set */ + /* + * Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so that + * swap_pages() can swap pages correctly. Note all other + * RELOC_KERNEL_* flags passed to relocate_kernel() are not + * restored. + */ + movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d call swap_pages movq kexec_va_control_page(%rip), %rax 0: addq $virtual_mapped - 0b, %rax @@ -321,7 +332,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) UNWIND_HINT_END_OF_STACK /* * %rdi indirection page - * %r11 preserve_context + * %r11 flags: RELOC_KERNEL_* */ movq %rdi, %rcx /* Put the indirection_page in %rcx */ xorl %edi, %edi @@ -357,7 +368,8 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) movq %rdi, %rdx /* Save destination page to %rdx */ movq %rsi, %rax /* Save source page to %rax */ - testq %r11, %r11 /* Only actually swap for ::preserve_context */ + /* Only actually swap for ::preserve_context */ + testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b jz .Lnoswap /* copy source page to swap page */ diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 2c86673155c9..4e43923656d0 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -46,8 +46,8 @@ config KVM_X86 select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_PRE_FAULT_MEMORY - select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM select KVM_WERROR if WERROR + select KVM_GUEST_MEMFD if X86_64 config KVM tristate "Kernel-based Virtual Machine (KVM) support" @@ -74,7 +74,7 @@ config KVM_WERROR # FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning. # Building KVM with -Werror and KASAN is still doable via enabling # the kernel-wide WERROR=y. - depends on KVM && ((EXPERT && !KASAN) || WERROR) + depends on KVM_X86 && ((EXPERT && !KASAN) || WERROR) help Add -Werror to the build flags for KVM. @@ -83,7 +83,8 @@ config KVM_WERROR config KVM_SW_PROTECTED_VM bool "Enable support for KVM software-protected VMs" depends on EXPERT - depends on KVM && X86_64 + depends on KVM_X86 && X86_64 + select KVM_GENERIC_MEMORY_ATTRIBUTES help Enable support for KVM software-protected VMs. Currently, software- protected VMs are purely a development and testing vehicle for @@ -95,8 +96,6 @@ config KVM_SW_PROTECTED_VM config KVM_INTEL tristate "KVM for Intel (and compatible) processors support" depends on KVM && IA32_FEAT_CTL - select KVM_GENERIC_PRIVATE_MEM if INTEL_TDX_HOST - select KVM_GENERIC_MEMORY_ATTRIBUTES if INTEL_TDX_HOST help Provides support for KVM on processors equipped with Intel's VT extensions, a.k.a. Virtual Machine Extensions (VMX). @@ -135,6 +134,8 @@ config KVM_INTEL_TDX bool "Intel Trust Domain Extensions (TDX) support" default y depends on INTEL_TDX_HOST + select KVM_GENERIC_MEMORY_ATTRIBUTES + select HAVE_KVM_ARCH_GMEM_POPULATE help Provides support for launching Intel Trust Domain Extensions (TDX) confidential VMs on Intel processors. @@ -157,9 +158,10 @@ config KVM_AMD_SEV depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM - select KVM_GENERIC_PRIVATE_MEM + select KVM_GENERIC_MEMORY_ATTRIBUTES select HAVE_KVM_ARCH_GMEM_PREPARE select HAVE_KVM_ARCH_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_POPULATE help Provides support for launching encrypted VMs which use Secure Encrypted Virtualization (SEV), Secure Encrypted Virtualization with @@ -169,7 +171,7 @@ config KVM_AMD_SEV config KVM_IOAPIC bool "I/O APIC, PIC, and PIT emulation" default y - depends on KVM + depends on KVM_X86 help Provides support for KVM to emulate an I/O APIC, PIC, and PIT, i.e. for full in-kernel APIC emulation. @@ -179,7 +181,7 @@ config KVM_IOAPIC config KVM_SMM bool "System Management Mode emulation" default y - depends on KVM + depends on KVM_X86 help Provides support for KVM to emulate System Management Mode (SMM) in virtual machines. This can be used by the virtual machine @@ -189,7 +191,7 @@ config KVM_SMM config KVM_HYPERV bool "Support for Microsoft Hyper-V emulation" - depends on KVM + depends on KVM_X86 default y help Provides KVM support for emulating Microsoft Hyper-V. This allows KVM @@ -203,7 +205,7 @@ config KVM_HYPERV config KVM_XEN bool "Support for Xen hypercall interface" - depends on KVM + depends on KVM_X86 help Provides KVM support for the hosting Xen HVM guests and passing Xen hypercalls to userspace. @@ -213,7 +215,7 @@ config KVM_XEN config KVM_PROVE_MMU bool "Prove KVM MMU correctness" depends on DEBUG_KERNEL - depends on KVM + depends on KVM_X86 depends on EXPERT help Enables runtime assertions in KVM's MMU that are too costly to enable @@ -228,7 +230,7 @@ config KVM_EXTERNAL_WRITE_TRACKING config KVM_MAX_NR_VCPUS int "Maximum number of vCPUs per KVM guest" - depends on KVM + depends on KVM_X86 range 1024 4096 default 4096 if MAXSMP default 1024 diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6e838cb6c9e1..56c80588efa0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3285,12 +3285,72 @@ out: return level; } -static int __kvm_mmu_max_mapping_level(struct kvm *kvm, - const struct kvm_memory_slot *slot, - gfn_t gfn, int max_level, bool is_private) +static u8 kvm_max_level_for_order(int order) +{ + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); + + KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) + return PG_LEVEL_1G; + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) + return PG_LEVEL_2M; + + return PG_LEVEL_4K; +} + +static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, + const struct kvm_memory_slot *slot, gfn_t gfn, + bool is_private) +{ + u8 max_level, coco_level; + kvm_pfn_t pfn; + + /* For faults, use the gmem information that was resolved earlier. */ + if (fault) { + pfn = fault->pfn; + max_level = fault->max_level; + } else { + /* TODO: Call into guest_memfd once hugepages are supported. */ + WARN_ONCE(1, "Get pfn+order from guest_memfd"); + pfn = KVM_PFN_ERR_FAULT; + max_level = PG_LEVEL_4K; + } + + if (max_level == PG_LEVEL_4K) + return max_level; + + /* + * CoCo may influence the max mapping level, e.g. due to RMP or S-EPT + * restrictions. A return of '0' means "no additional restrictions", to + * allow for using an optional "ret0" static call. + */ + coco_level = kvm_x86_call(gmem_max_mapping_level)(kvm, pfn, is_private); + if (coco_level) + max_level = min(max_level, coco_level); + + return max_level; +} + +int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, + const struct kvm_memory_slot *slot, gfn_t gfn) { struct kvm_lpage_info *linfo; - int host_level; + int host_level, max_level; + bool is_private; + + lockdep_assert_held(&kvm->mmu_lock); + + if (fault) { + max_level = fault->max_level; + is_private = fault->is_private; + } else { + max_level = PG_LEVEL_NUM; + is_private = kvm_mem_is_private(kvm, gfn); + } max_level = min(max_level, max_huge_page_level); for ( ; max_level > PG_LEVEL_4K; max_level--) { @@ -3299,25 +3359,17 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, break; } - if (is_private) - return max_level; - if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; - host_level = host_pfn_mapping_level(kvm, gfn, slot); + if (is_private || kvm_memslot_is_gmem_only(slot)) + host_level = kvm_gmem_max_mapping_level(kvm, fault, slot, gfn, + is_private); + else + host_level = host_pfn_mapping_level(kvm, gfn, slot); return min(host_level, max_level); } -int kvm_mmu_max_mapping_level(struct kvm *kvm, - const struct kvm_memory_slot *slot, gfn_t gfn) -{ - bool is_private = kvm_slot_can_be_private(slot) && - kvm_mem_is_private(kvm, gfn); - - return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); -} - void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; @@ -3338,9 +3390,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault * Enforce the iTLB multihit workaround after capturing the requested * level, which will be used to do precise, accurate accounting. */ - fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, - fault->gfn, fault->max_level, - fault->is_private); + fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, fault, + fault->slot, fault->gfn); if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) return; @@ -4503,42 +4554,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) vcpu->stat.pf_fixed++; } -static inline u8 kvm_max_level_for_order(int order) -{ - BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); - - KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) - return PG_LEVEL_1G; - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) - return PG_LEVEL_2M; - - return PG_LEVEL_4K; -} - -static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, - u8 max_level, int gmem_order) -{ - u8 req_max_level; - - if (max_level == PG_LEVEL_4K) - return PG_LEVEL_4K; - - max_level = min(kvm_max_level_for_order(gmem_order), max_level); - if (max_level == PG_LEVEL_4K) - return PG_LEVEL_4K; - - req_max_level = kvm_x86_call(private_max_mapping_level)(kvm, pfn); - if (req_max_level) - max_level = min(max_level, req_max_level); - - return max_level; -} - static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int r) { @@ -4546,12 +4561,12 @@ static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu, r == RET_PF_RETRY, fault->map_writable); } -static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault) +static int kvm_mmu_faultin_pfn_gmem(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) { int max_order, r; - if (!kvm_slot_can_be_private(fault->slot)) { + if (!kvm_slot_has_gmem(fault->slot)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } @@ -4564,8 +4579,7 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, } fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY); - fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn, - fault->max_level, max_order); + fault->max_level = kvm_max_level_for_order(max_order); return RET_PF_CONTINUE; } @@ -4575,8 +4589,8 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, { unsigned int foll = fault->write ? FOLL_WRITE : 0; - if (fault->is_private) - return kvm_mmu_faultin_pfn_private(vcpu, fault); + if (fault->is_private || kvm_memslot_is_gmem_only(fault->slot)) + return kvm_mmu_faultin_pfn_gmem(vcpu, fault); foll |= FOLL_NOWAIT; fault->pfn = __kvm_faultin_pfn(fault->slot, fault->gfn, foll, @@ -7165,7 +7179,7 @@ restart: * mapping if the indirect sp has level = 1. */ if (sp->role.direct && - sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn)) { + sp->role.level < kvm_mmu_max_mapping_level(kvm, NULL, slot, sp->gfn)) { kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); if (kvm_available_flush_remote_tlbs_range()) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 65f3c89d7c5d..b776be783a2f 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -411,7 +411,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return r; } -int kvm_mmu_max_mapping_level(struct kvm *kvm, +int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, const struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7f3d7229b2c1..740cb06accdb 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1813,7 +1813,7 @@ retry: if (iter.gfn < start || iter.gfn >= end) continue; - max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot, iter.gfn); + max_mapping_level = kvm_mmu_max_mapping_level(kvm, NULL, slot, iter.gfn); if (max_mapping_level < iter.level) continue; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0635bd71c10e..5bac4d20aec0 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2361,7 +2361,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) mutex_lock(&kvm->slots_lock); memslot = gfn_to_memslot(kvm, params.gfn_start); - if (!kvm_slot_can_be_private(memslot)) { + if (!kvm_slot_has_gmem(memslot)) { ret = -EINVAL; goto out; } @@ -4715,7 +4715,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) } slot = gfn_to_memslot(kvm, gfn); - if (!kvm_slot_can_be_private(slot)) { + if (!kvm_slot_has_gmem(slot)) { pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n", gpa); return; @@ -4943,7 +4943,7 @@ next_pfn: } } -int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) { int level, rc; bool assigned; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1bfebe40854f..3a9fe0a8b78c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5179,7 +5179,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .gmem_prepare = sev_gmem_prepare, .gmem_invalidate = sev_gmem_invalidate, - .private_max_mapping_level = sev_private_max_mapping_level, + .gmem_max_mapping_level = sev_gmem_max_mapping_level, }; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 58b9d168e0c8..70df7c6413cf 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -866,7 +866,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code); void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu); int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); -int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn); +int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu); void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa); #else @@ -895,7 +895,7 @@ static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, in return 0; } static inline void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) {} -static inline int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +static inline int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) { return 0; } diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index dbab1c15b0cd..bb5f182f6788 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -831,10 +831,11 @@ static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp) return tdx_vcpu_ioctl(vcpu, argp); } -static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +static int vt_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, + bool is_private) { if (is_td(kvm)) - return tdx_gmem_private_max_mapping_level(kvm, pfn); + return tdx_gmem_max_mapping_level(kvm, pfn, is_private); return 0; } @@ -1005,7 +1006,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .mem_enc_ioctl = vt_op_tdx_only(mem_enc_ioctl), .vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl), - .private_max_mapping_level = vt_op_tdx_only(gmem_private_max_mapping_level) + .gmem_max_mapping_level = vt_op_tdx_only(gmem_max_mapping_level) }; struct kvm_x86_init_ops vt_init_ops __initdata = { diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 66744f5768c8..00f8bfd2330d 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -281,25 +281,6 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -static void tdx_clear_page(struct page *page) -{ - const void *zero_page = (const void *) page_to_virt(ZERO_PAGE(0)); - void *dest = page_to_virt(page); - unsigned long i; - - /* - * The page could have been poisoned. MOVDIR64B also clears - * the poison bit so the kernel can safely use the page again. - */ - for (i = 0; i < PAGE_SIZE; i += 64) - movdir64b(dest + i, zero_page); - /* - * MOVDIR64B store uses WC buffer. Prevent following memory reads - * from seeing potentially poisoned cache. - */ - __mb(); -} - static void tdx_no_vcpus_enter_start(struct kvm *kvm) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); @@ -345,7 +326,7 @@ static int tdx_reclaim_page(struct page *page) r = __tdx_reclaim_page(page); if (!r) - tdx_clear_page(page); + tdx_quirk_reset_page(page); return r; } @@ -442,6 +423,16 @@ void tdx_disable_virtualization_cpu(void) tdx_flush_vp(&arg); } local_irq_restore(flags); + + /* + * Flush cache now if kexec is possible: this is necessary to avoid + * having dirty private memory cachelines when the new kernel boots, + * but WBINVD is a relatively expensive operation and doing it during + * kexec can exacerbate races in native_stop_other_cpus(). Do it + * now, since this is a safe moment and there is going to be no more + * TDX activity on this CPU from this point on. + */ + tdx_cpu_flush_cache_for_kexec(); } #define TDX_SEAMCALL_RETRIES 10000 @@ -593,7 +584,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm) pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err); return; } - tdx_clear_page(kvm_tdx->td.tdr_page); + tdx_quirk_reset_page(kvm_tdx->td.tdr_page); __free_page(kvm_tdx->td.tdr_page); kvm_tdx->td.tdr_page = NULL; @@ -861,6 +852,7 @@ void tdx_vcpu_free(struct kvm_vcpu *vcpu) if (tdx->vp.tdvpr_page) { tdx_reclaim_control_page(tdx->vp.tdvpr_page); tdx->vp.tdvpr_page = 0; + tdx->vp.tdvpr_pa = 0; } tdx->state = VCPU_TD_STATE_UNINITIALIZED; @@ -1714,7 +1706,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err); return -EIO; } - tdx_clear_page(page); + tdx_quirk_reset_page(page); tdx_unpin(kvm, page); return 0; } @@ -2940,6 +2932,13 @@ static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx) return -ENOMEM; tdx->vp.tdvpr_page = page; + /* + * page_to_phys() does not work in 'noinstr' code, like guest + * entry via tdh_vp_enter(). Precalculate and store it instead + * of doing it at runtime later. + */ + tdx->vp.tdvpr_pa = page_to_phys(tdx->vp.tdvpr_page); + tdx->vp.tdcx_pages = kcalloc(kvm_tdx->td.tdcx_nr_pages, sizeof(*tdx->vp.tdcx_pages), GFP_KERNEL); if (!tdx->vp.tdcx_pages) { @@ -3002,6 +3001,7 @@ free_tdvpr: if (tdx->vp.tdvpr_page) __free_page(tdx->vp.tdvpr_page); tdx->vp.tdvpr_page = 0; + tdx->vp.tdvpr_pa = 0; return ret; } @@ -3318,8 +3318,11 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) return ret; } -int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) { + if (!is_private) + return 0; + return PG_LEVEL_4K; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..0bdf9405969a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5785,6 +5785,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; + /* + * Ensure that any updates to kvm->buses[] observed by the + * previous instruction (emulated or otherwise) are also + * visible to the instruction KVM is about to emulate. + */ + smp_rmb(); + if (!kvm_emulate_instruction(vcpu, 0)) return 0; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 2b3424f638db..4c70f56c57c8 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -153,7 +153,7 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); void tdx_flush_tlb_current(struct kvm_vcpu *vcpu); void tdx_flush_tlb_all(struct kvm_vcpu *vcpu); void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); -int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn); +int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); #endif #endif /* __KVM_X86_VMX_X86_OPS_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 706b6fd56d3c..f122906ed9f3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13530,6 +13530,16 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); +#ifdef CONFIG_KVM_GUEST_MEMFD +/* + * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory + * (the private vs. shared tracking needs to be moved into guest_memfd). + */ +bool kvm_arch_supports_gmem_mmap(struct kvm *kvm) +{ + return !kvm_arch_has_private_mem(kvm); +} + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) { @@ -13543,6 +13553,7 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) kvm_x86_call(gmem_invalidate)(start, end); } #endif +#endif int kvm_spec_ctrl_test_value(u64 value) { diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 8834c76f91c9..d2d54b8c4dbb 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -399,15 +399,6 @@ static void cpa_flush_all(unsigned long cache) on_each_cpu(__cpa_flush_all, (void *) cache, 1); } -static void __cpa_flush_tlb(void *data) -{ - struct cpa_data *cpa = data; - unsigned int i; - - for (i = 0; i < cpa->numpages; i++) - flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); -} - static int collapse_large_pages(unsigned long addr, struct list_head *pgtables); static void cpa_collapse_large_pages(struct cpa_data *cpa) @@ -444,6 +435,7 @@ static void cpa_collapse_large_pages(struct cpa_data *cpa) static void cpa_flush(struct cpa_data *cpa, int cache) { + unsigned long start, end; unsigned int i; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); @@ -453,10 +445,12 @@ static void cpa_flush(struct cpa_data *cpa, int cache) goto collapse_large_pages; } - if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) - flush_tlb_all(); - else - on_each_cpu(__cpa_flush_tlb, cpa, 1); + start = fix_addr(__cpa_addr(cpa, 0)); + end = fix_addr(__cpa_addr(cpa, cpa->numpages)); + if (cpa->force_flush_all) + end = TLB_FLUSH_ALL; + + flush_tlb_kernel_range(start, end); if (!cache) goto collapse_large_pages; diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index c7a9a087ccaf..eac403248462 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -633,15 +633,19 @@ err: } /* - * Convert TDX private pages back to normal by using MOVDIR64B to - * clear these pages. Note this function doesn't flush cache of - * these TDX private pages. The caller should make sure of that. + * Convert TDX private pages back to normal by using MOVDIR64B to clear these + * pages. Typically, any write to the page will convert it from TDX private back + * to normal kernel memory. Systems with the X86_BUG_TDX_PW_MCE erratum need to + * do the conversion explicitly via MOVDIR64B. */ -static void reset_tdx_pages(unsigned long base, unsigned long size) +static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size) { const void *zero_page = (const void *)page_address(ZERO_PAGE(0)); unsigned long phys, end; + if (!boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) + return; + end = base + size; for (phys = base; phys < end; phys += 64) movdir64b(__va(phys), zero_page); @@ -654,17 +658,23 @@ static void reset_tdx_pages(unsigned long base, unsigned long size) mb(); } -static void tdmr_reset_pamt(struct tdmr_info *tdmr) +void tdx_quirk_reset_page(struct page *page) +{ + tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE); +} +EXPORT_SYMBOL_GPL(tdx_quirk_reset_page); + +static void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr) { - tdmr_do_pamt_func(tdmr, reset_tdx_pages); + tdmr_do_pamt_func(tdmr, tdx_quirk_reset_paddr); } -static void tdmrs_reset_pamt_all(struct tdmr_info_list *tdmr_list) +static void tdmrs_quirk_reset_pamt_all(struct tdmr_info_list *tdmr_list) { int i; for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) - tdmr_reset_pamt(tdmr_entry(tdmr_list, i)); + tdmr_quirk_reset_pamt(tdmr_entry(tdmr_list, i)); } static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list) @@ -1136,15 +1146,7 @@ err_reset_pamts: * to the kernel. */ wbinvd_on_all_cpus(); - /* - * According to the TDX hardware spec, if the platform - * doesn't have the "partial write machine check" - * erratum, any kernel read/write will never cause #MC - * in kernel space, thus it's OK to not convert PAMTs - * back to normal. But do the conversion anyway here - * as suggested by the TDX spec. - */ - tdmrs_reset_pamt_all(&tdx_tdmr_list); + tdmrs_quirk_reset_pamt_all(&tdx_tdmr_list); err_free_pamts: tdmrs_free_pamt_all(&tdx_tdmr_list); err_free_tdmrs: @@ -1266,7 +1268,7 @@ static bool paddr_is_tdx_private(unsigned long phys) return false; /* Get page type from the TDX module */ - sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args); + sret = __seamcall_dirty_cache(__seamcall_ret, TDH_PHYMEM_PAGE_RDMD, &args); /* * The SEAMCALL will not return success unless there is a @@ -1502,11 +1504,6 @@ static inline u64 tdx_tdr_pa(struct tdx_td *td) return page_to_phys(td->tdr_page); } -static inline u64 tdx_tdvpr_pa(struct tdx_vp *td) -{ - return page_to_phys(td->tdvpr_page); -} - /* * The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether * a CLFLUSH of pages is required before handing them to the TDX module. @@ -1518,11 +1515,11 @@ static void tdx_clflush_page(struct page *page) clflush_cache_range(page_to_virt(page), PAGE_SIZE); } -noinstr __flatten u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args) +noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args) { - args->rcx = tdx_tdvpr_pa(td); + args->rcx = td->tdvpr_pa; - return __seamcall_saved_ret(TDH_VP_ENTER, args); + return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args); } EXPORT_SYMBOL_GPL(tdh_vp_enter); @@ -1581,7 +1578,7 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) { struct tdx_module_args args = { .rcx = page_to_phys(tdcx_page), - .rdx = tdx_tdvpr_pa(vp), + .rdx = vp->tdvpr_pa, }; tdx_clflush_page(tdcx_page); @@ -1650,7 +1647,7 @@ EXPORT_SYMBOL_GPL(tdh_mng_create); u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = tdx_tdr_pa(td), }; @@ -1706,7 +1703,7 @@ EXPORT_SYMBOL_GPL(tdh_mr_finalize); u64 tdh_vp_flush(struct tdx_vp *vp) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, }; return seamcall(TDH_VP_FLUSH, &args); @@ -1752,7 +1749,7 @@ EXPORT_SYMBOL_GPL(tdh_mng_init); u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = field, }; u64 ret; @@ -1769,7 +1766,7 @@ EXPORT_SYMBOL_GPL(tdh_vp_rd); u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = field, .r8 = data, .r9 = mask, @@ -1782,7 +1779,7 @@ EXPORT_SYMBOL_GPL(tdh_vp_wr); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = initial_rcx, .r8 = x2apicid, }; @@ -1870,3 +1867,22 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); + +#ifdef CONFIG_KEXEC_CORE +void tdx_cpu_flush_cache_for_kexec(void) +{ + lockdep_assert_preemption_disabled(); + + if (!this_cpu_read(cache_state_incoherent)) + return; + + /* + * Private memory cachelines need to be clean at the time of + * kexec. Write them back now, as the caller promises that + * there should be no more SEAMCALLs on this CPU. + */ + wbinvd(); + this_cpu_write(cache_state_incoherent, false); +} +EXPORT_SYMBOL_GPL(tdx_cpu_flush_cache_for_kexec); +#endif |