diff options
author | Marc Zyngier <maz@kernel.org> | 2025-05-23 10:59:43 +0100 |
---|---|---|
committer | Marc Zyngier <maz@kernel.org> | 2025-05-23 10:59:43 +0100 |
commit | 1b85d923ba8c9e6afaf19e26708411adde94fba8 (patch) | |
tree | bdc5e5c9e698d60c064fa9c5cab4c687f980e397 | |
parent | 7f3225fe8bc2b4a406160d3efdb900bd01037aea (diff) | |
parent | bf809a0aab2f3a3a97264f217f2dbee0913b2ed7 (diff) |
Merge branch kvm-arm64/misc-6.16 into kvmarm-master/next
* kvm-arm64/misc-6.16:
: .
: Misc changes and improvements for 6.16:
:
: - Add a new selftest for the SVE host state being corrupted by a guest
:
: - Keep HCR_EL2.xMO set at all times for systems running with the kernel at EL2,
: ensuring that the window for interrupts is slightly bigger, and avoiding
: a pretty bad erratum on the AmpereOne HW
:
: - Replace a couple of open-coded on/off strings with str_on_off()
:
: - Get rid of the pKVM memblock sorting, which now appears to be superflous
:
: - Drop superflous clearing of ICH_LR_EOI in the LR when nesting
:
: - Add workaround for AmpereOne's erratum AC04_CPU_23, which suffers from
: a pretty bad case of TLB corruption unless accesses to HCR_EL2 are
: heavily synchronised
:
: - Add a per-VM, per-ITS debugfs entry to dump the state of the ITS tables
: in a human-friendly fashion
: .
KVM: arm64: Fix documentation for vgic_its_iter_next()
KVM: arm64: vgic-its: Add debugfs interface to expose ITS tables
arm64: errata: Work around AmpereOne's erratum AC04_CPU_23
KVM: arm64: nv: Remove clearing of ICH_LR<n>.EOI if ICH_LR<n>.HW == 1
KVM: arm64: Drop sort_memblock_regions()
KVM: arm64: selftests: Add test for SVE host corruption
KVM: arm64: Force HCR_EL2.xMO to 1 at all times in VHE mode
KVM: arm64: Replace ternary flags with str_on_off() helper
Signed-off-by: Marc Zyngier <maz@kernel.org>
26 files changed, 496 insertions, 89 deletions
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index f968c13b46a7..b18ef4064bc0 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -57,6 +57,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 | +----------------+-----------------+-----------------+-----------------------------+ +| Ampere | AmpereOne AC04 | AC04_CPU_23 | AMPERE_ERRATUM_AC04_CPU_23 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a182295e6f08..3ae4e80e3002 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -464,6 +464,23 @@ config AMPERE_ERRATUM_AC03_CPU_38 If unsure, say Y. +config AMPERE_ERRATUM_AC04_CPU_23 + bool "AmpereOne: AC04_CPU_23: Failure to synchronize writes to HCR_EL2 may corrupt address translations." + default y + help + This option adds an alternative code sequence to work around Ampere + errata AC04_CPU_23 on AmpereOne. + + Updates to HCR_EL2 can rarely corrupt simultaneous translations for + data addresses initiated by load/store instructions. Only + instruction initiated translations are vulnerable, not translations + from prefetches for example. A DSB before the store to HCR_EL2 is + sufficient to prevent older instructions from hitting the window + for corruption, and an ISB after is sufficient to prevent younger + instructions from hitting the window for corruption. + + If unsure, say Y. + config ARM64_WORKAROUND_CLEAN_CACHE bool diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 055e69a4184c..e42228d2c889 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -38,7 +38,7 @@ orr x0, x0, #HCR_E2H .LnVHE_\@: - msr hcr_el2, x0 + msr_hcr_el2 x0 isb .endm diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index cbfa7b6f2e09..77d6b8c63d4e 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -41,7 +41,7 @@ do { \ \ ___hcr = read_sysreg(hcr_el2); \ if (!(___hcr & HCR_TGE)) { \ - write_sysreg(___hcr | HCR_TGE, hcr_el2); \ + write_sysreg_hcr(___hcr | HCR_TGE); \ isb(); \ } \ /* \ @@ -82,7 +82,7 @@ do { \ */ \ barrier(); \ if (!___ctx->cnt && !(___hcr & HCR_TGE)) \ - write_sysreg(___hcr, hcr_el2); \ + write_sysreg_hcr(___hcr); \ } while (0) static inline void ack_bad_irq(unsigned int irq) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 52b3aeb19efc..9a839563584d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -103,7 +103,7 @@ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define MPAMHCR_HOST_FLAGS 0 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ad00e8f1e561..cd853801a8f7 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1093,6 +1093,15 @@ __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) .endm + .macro msr_hcr_el2, reg +#if IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23) + dsb nsh + msr hcr_el2, \reg + isb +#else + msr hcr_el2, \reg +#endif + .endm #else #include <linux/bitfield.h> @@ -1180,6 +1189,13 @@ write_sysreg(__scs_new, sysreg); \ } while (0) +#define sysreg_clear_set_hcr(clear, set) do { \ + u64 __scs_val = read_sysreg(hcr_el2); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_hcr(__scs_new); \ +} while (0) + #define sysreg_clear_set_s(sysreg, clear, set) do { \ u64 __scs_val = read_sysreg_s(sysreg); \ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ @@ -1187,6 +1203,17 @@ write_sysreg_s(__scs_new, sysreg); \ } while (0) +#define write_sysreg_hcr(__val) do { \ + if (IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23) && \ + (!system_capabilities_finalized() || \ + alternative_has_cap_unlikely(ARM64_WORKAROUND_AMPERE_AC04_CPU_23))) \ + asm volatile("dsb nsh; msr hcr_el2, %x0; isb" \ + : : "rZ" (__val)); \ + else \ + asm volatile("msr hcr_el2, %x0" \ + : : "rZ" (__val)); \ +} while (0) + #define read_sysreg_par() ({ \ u64 par; \ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6b0ad5070d3e..59d723c9ab8f 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -557,6 +557,13 @@ static const struct midr_range erratum_ac03_cpu_38_list[] = { }; #endif +#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23 +static const struct midr_range erratum_ac04_cpu_23_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -876,6 +883,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list), }, #endif +#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23 + { + .desc = "AmpereOne erratum AC04_CPU_23", + .capability = ARM64_WORKAROUND_AMPERE_AC04_CPU_23, + ERRATA_MIDR_RANGE_LIST(erratum_ac04_cpu_23_list), + }, +#endif { .desc = "Broken CNTVOFF_EL2", .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF, diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index ae990da1eae5..36e2d26b54f5 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -97,7 +97,7 @@ SYM_CODE_START_LOCAL(__finalise_el2) 2: // Engage the VHE magic! mov_q x0, HCR_HOST_VHE_FLAGS - msr hcr_el2, x0 + msr_hcr_el2 x0 isb // Use the EL1 allocated stack, per-cpu offset diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index da5359668b9c..a25be111cd8f 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1245,7 +1245,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) skip_mmu_switch: /* Temporarily switch back to guest context */ - write_sysreg(vcpu->arch.hcr_el2, hcr_el2); + write_sysreg_hcr(vcpu->arch.hcr_el2); isb(); switch (op) { @@ -1277,7 +1277,7 @@ skip_mmu_switch: if (!fail) par = read_sysreg_par(); - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + write_sysreg_hcr(HCR_HOST_VHE_FLAGS); if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) __mmu_config_restore(&config); @@ -1340,7 +1340,7 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) if (!vcpu_el2_e2h_is_set(vcpu)) val |= HCR_NV | HCR_NV1; - write_sysreg(val, hcr_el2); + write_sysreg_hcr(val); isb(); par = SYS_PAR_EL1_F; @@ -1365,7 +1365,7 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) if (!fail) par = read_sysreg_par(); - write_sysreg(hcr, hcr_el2); + write_sysreg_hcr(hcr); isb(); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f131bca36bd3..eef310cdbdbd 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -345,7 +345,7 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr) if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) hcr |= HCR_TVM; - write_sysreg(hcr, hcr_el2); + write_sysreg_hcr(hcr); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 58f0cb2298cc..eef15b374abb 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -124,7 +124,7 @@ SYM_FUNC_START(__hyp_do_panic) /* Ensure host stage-2 is disabled */ mrs x0, hcr_el2 bic x0, x0, #HCR_VM - msr hcr_el2, x0 + msr_hcr_el2 x0 isb tlbi vmalls12e1 dsb nsh diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index f8af11189572..aada42522e7b 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -100,7 +100,7 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init) msr mair_el2, x1 ldr x1, [x0, #NVHE_INIT_HCR_EL2] - msr hcr_el2, x1 + msr_hcr_el2 x1 mov x2, #HCR_E2H and x2, x1, x2 @@ -262,7 +262,7 @@ reset: alternative_if ARM64_KVM_PROTECTED_MODE mov_q x5, HCR_HOST_NVHE_FLAGS - msr hcr_el2, x5 + msr_hcr_el2 x5 alternative_else_nop_endif /* Install stub vectors */ diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index e359cd24ecf0..f67c1a91e4eb 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -333,7 +333,7 @@ int __pkvm_prot_finalize(void) */ kvm_flush_dcache_to_poc(params, sizeof(*params)); - write_sysreg(params->hcr_el2, hcr_el2); + write_sysreg_hcr(params->hcr_el2); __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); /* diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6947aaf117f6..73affe1333a4 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -154,7 +154,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) __deactivate_traps_common(vcpu); - write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); + write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2); __deactivate_cptr_traps(vcpu); write_sysreg(__kvm_hyp_host_vector, vbar_el2); diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index f38565e28a23..f162b0df5cae 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -429,23 +429,27 @@ u64 __vgic_v3_get_gic_config(void) /* * To check whether we have a MMIO-based (GICv2 compatible) * CPU interface, we need to disable the system register - * view. To do that safely, we have to prevent any interrupt - * from firing (which would be deadly). + * view. * - * Note that this only makes sense on VHE, as interrupts are - * already masked for nVHE as part of the exception entry to - * EL2. - */ - if (has_vhe()) - flags = local_daif_save(); - - /* * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates * that to be able to set ICC_SRE_EL1.SRE to 0, all the * interrupt overrides must be set. You've got to love this. + * + * As we always run VHE with HCR_xMO set, no extra xMO + * manipulation is required in that case. + * + * To safely disable SRE, we have to prevent any interrupt + * from firing (which would be deadly). This only makes sense + * on VHE, as interrupts are already masked for nVHE as part + * of the exception entry to EL2. */ - sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); - isb(); + if (has_vhe()) { + flags = local_daif_save(); + } else { + sysreg_clear_set_hcr(0, HCR_AMO | HCR_FMO | HCR_IMO); + isb(); + } + write_gicreg(0, ICC_SRE_EL1); isb(); @@ -453,11 +457,13 @@ u64 __vgic_v3_get_gic_config(void) write_gicreg(sre, ICC_SRE_EL1); isb(); - sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); - isb(); - if (has_vhe()) + if (has_vhe()) { local_daif_restore(flags); + } else { + sysreg_clear_set_hcr(HCR_AMO | HCR_FMO | HCR_IMO, 0); + isb(); + } val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); val |= read_gicreg(ICH_VTR_EL2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 5902e5f15d09..c9b330dc2066 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -209,7 +209,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) ___deactivate_traps(vcpu); - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + write_sysreg_hcr(HCR_HOST_VHE_FLAGS); if (has_cntpoff()) { struct timer_map map; diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 3d50a1bd2bdb..ec2569818629 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -63,7 +63,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu, __load_stage2(mmu, mmu->arch); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; - write_sysreg(val, hcr_el2); + write_sysreg_hcr(val); isb(); } @@ -73,7 +73,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt) * We're done with the TLB operation, let's restore the host's * view of HCR_EL2. */ - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + write_sysreg_hcr(HCR_HOST_VHE_FLAGS); isb(); /* ... and the stage-2 MMU context that we switched away from */ diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 793089ba23c5..fcd70bfe44fb 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -11,7 +11,6 @@ #include <asm/kvm_mmu.h> #include <linux/memblock.h> #include <linux/mutex.h> -#include <linux/sort.h> #include <asm/kvm_pkvm.h> @@ -25,23 +24,6 @@ static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); phys_addr_t hyp_mem_base; phys_addr_t hyp_mem_size; -static int cmp_hyp_memblock(const void *p1, const void *p2) -{ - const struct memblock_region *r1 = p1; - const struct memblock_region *r2 = p2; - - return r1->base < r2->base ? -1 : (r1->base > r2->base); -} - -static void __init sort_memblock_regions(void) -{ - sort(hyp_memory, - *hyp_memblock_nr_ptr, - sizeof(struct memblock_region), - cmp_hyp_memblock, - NULL); -} - static int __init register_memblock_regions(void) { struct memblock_region *reg; @@ -53,7 +35,6 @@ static int __init register_memblock_regions(void) hyp_memory[*hyp_memblock_nr_ptr] = *reg; (*hyp_memblock_nr_ptr)++; } - sort_memblock_regions(); return 0; } diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index c18c1a95831e..9c60f6465c78 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -176,7 +176,7 @@ TRACE_EVENT(kvm_set_way_flush, ), TP_printk("S/W flush at 0x%016lx (cache %s)", - __entry->vcpu_pc, __entry->cache ? "on" : "off") + __entry->vcpu_pc, str_on_off(__entry->cache)) ); TRACE_EVENT(kvm_toggle_cache, @@ -196,8 +196,8 @@ TRACE_EVENT(kvm_toggle_cache, ), TP_printk("VM op at 0x%016lx (cache was %s, now %s)", - __entry->vcpu_pc, __entry->was ? "on" : "off", - __entry->now ? "on" : "off") + __entry->vcpu_pc, str_on_off(__entry->was), + str_on_off(__entry->now)) ); /* diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index afb018528bc3..f8425f381de9 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -320,3 +320,227 @@ void vgic_debug_init(struct kvm *kvm) void vgic_debug_destroy(struct kvm *kvm) { } + +/** + * struct vgic_its_iter - Iterator for traversing VGIC ITS device tables. + * @dev: Pointer to the current its_device being processed. + * @ite: Pointer to the current its_ite within the device being processed. + * + * This structure is used to maintain the current position during iteration + * over the ITS device tables. It holds pointers to both the current device + * and the current ITE within that device. + */ +struct vgic_its_iter { + struct its_device *dev; + struct its_ite *ite; +}; + +/** + * end_of_iter - Checks if the iterator has reached the end. + * @iter: The iterator to check. + * + * When the iterator completed processing the final ITE in the last device + * table, it was marked to indicate the end of iteration by setting its + * device and ITE pointers to NULL. + * This function checks whether the iterator was marked as end. + * + * Return: True if the iterator is marked as end, false otherwise. + */ +static inline bool end_of_iter(struct vgic_its_iter *iter) +{ + return !iter->dev && !iter->ite; +} + +/** + * vgic_its_iter_next - Advances the iterator to the next entry in the ITS tables. + * @its: The VGIC ITS structure. + * @iter: The iterator to advance. + * + * This function moves the iterator to the next ITE within the current device, + * or to the first ITE of the next device if the current ITE is the last in + * the device. If the current device is the last device, the iterator is set + * to indicate the end of iteration. + */ +static void vgic_its_iter_next(struct vgic_its *its, struct vgic_its_iter *iter) +{ + struct its_device *dev = iter->dev; + struct its_ite *ite = iter->ite; + + if (!ite || list_is_last(&ite->ite_list, &dev->itt_head)) { + if (list_is_last(&dev->dev_list, &its->device_list)) { + dev = NULL; + ite = NULL; + } else { + dev = list_next_entry(dev, dev_list); + ite = list_first_entry_or_null(&dev->itt_head, + struct its_ite, + ite_list); + } + } else { + ite = list_next_entry(ite, ite_list); + } + + iter->dev = dev; + iter->ite = ite; +} + +/** + * vgic_its_debug_start - Start function for the seq_file interface. + * @s: The seq_file structure. + * @pos: The starting position (offset). + * + * This function initializes the iterator to the beginning of the ITS tables + * and advances it to the specified position. It acquires the its_lock mutex + * to protect shared data. + * + * Return: An iterator pointer on success, NULL if no devices are found or + * the end of the list is reached, or ERR_PTR(-ENOMEM) on memory + * allocation failure. + */ +static void *vgic_its_debug_start(struct seq_file *s, loff_t *pos) +{ + struct vgic_its *its = s->private; + struct vgic_its_iter *iter; + struct its_device *dev; + loff_t offset = *pos; + + mutex_lock(&its->its_lock); + + dev = list_first_entry_or_null(&its->device_list, + struct its_device, dev_list); + if (!dev) + return NULL; + + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return ERR_PTR(-ENOMEM); + + iter->dev = dev; + iter->ite = list_first_entry_or_null(&dev->itt_head, + struct its_ite, ite_list); + + while (!end_of_iter(iter) && offset--) + vgic_its_iter_next(its, iter); + + if (end_of_iter(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +/** + * vgic_its_debug_next - Next function for the seq_file interface. + * @s: The seq_file structure. + * @v: The current iterator. + * @pos: The current position (offset). + * + * This function advances the iterator to the next entry and increments the + * position. + * + * Return: An iterator pointer on success, or NULL if the end of the list is + * reached. + */ +static void *vgic_its_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct vgic_its *its = s->private; + struct vgic_its_iter *iter = v; + + ++*pos; + vgic_its_iter_next(its, iter); + + if (end_of_iter(iter)) { + kfree(iter); + return NULL; + } + return iter; +} + +/** + * vgic_its_debug_stop - Stop function for the seq_file interface. + * @s: The seq_file structure. + * @v: The current iterator. + * + * This function frees the iterator and releases the its_lock mutex. + */ +static void vgic_its_debug_stop(struct seq_file *s, void *v) +{ + struct vgic_its *its = s->private; + struct vgic_its_iter *iter = v; + + if (!IS_ERR_OR_NULL(iter)) + kfree(iter); + mutex_unlock(&its->its_lock); +} + +/** + * vgic_its_debug_show - Show function for the seq_file interface. + * @s: The seq_file structure. + * @v: The current iterator. + * + * This function formats and prints the ITS table entry information to the + * seq_file output. + * + * Return: 0 on success. + */ +static int vgic_its_debug_show(struct seq_file *s, void *v) +{ + struct vgic_its_iter *iter = v; + struct its_device *dev = iter->dev; + struct its_ite *ite = iter->ite; + + if (list_is_first(&ite->ite_list, &dev->itt_head)) { + seq_printf(s, "\n"); + seq_printf(s, "Device ID: 0x%x, Event ID Range: [0 - %llu]\n", + dev->device_id, BIT_ULL(dev->num_eventid_bits) - 1); + seq_printf(s, "EVENT_ID INTID HWINTID TARGET COL_ID HW\n"); + seq_printf(s, "-----------------------------------------------\n"); + } + + if (ite && ite->irq && ite->collection) { + seq_printf(s, "%8u %8u %8u %8u %8u %2d\n", + ite->event_id, ite->irq->intid, ite->irq->hwintid, + ite->collection->target_addr, + ite->collection->collection_id, ite->irq->hw); + } + + return 0; +} + +static const struct seq_operations vgic_its_debug_sops = { + .start = vgic_its_debug_start, + .next = vgic_its_debug_next, + .stop = vgic_its_debug_stop, + .show = vgic_its_debug_show +}; + +DEFINE_SEQ_ATTRIBUTE(vgic_its_debug); + +/** + * vgic_its_debug_init - Initializes the debugfs interface for VGIC ITS. + * @dev: The KVM device structure. + * + * This function creates a debugfs file named "vgic-its-state@%its_base" + * to expose the ITS table information. + * + * Return: 0 on success. + */ +int vgic_its_debug_init(struct kvm_device *dev) +{ + struct vgic_its *its = dev->private; + char *name; + + name = kasprintf(GFP_KERNEL, "vgic-its-state@%llx", (u64)its->vgic_its_base); + if (!name) + return -ENOMEM; + + debugfs_create_file(name, 0444, dev->kvm->debugfs_dentry, its, &vgic_its_debug_fops); + + kfree(name); + return 0; +} + +void vgic_its_debug_destroy(struct kvm_device *dev) +{ +} diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index fb96802799c6..569f9da9049f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -154,36 +154,6 @@ out_unlock: return irq; } -struct its_device { - struct list_head dev_list; - - /* the head for the list of ITTEs */ - struct list_head itt_head; - u32 num_eventid_bits; - gpa_t itt_addr; - u32 device_id; -}; - -#define COLLECTION_NOT_MAPPED ((u32)~0) - -struct its_collection { - struct list_head coll_list; - - u32 collection_id; - u32 target_addr; -}; - -#define its_is_collection_mapped(coll) ((coll) && \ - ((coll)->target_addr != COLLECTION_NOT_MAPPED)) - -struct its_ite { - struct list_head ite_list; - - struct vgic_irq *irq; - struct its_collection *collection; - u32 event_id; -}; - /** * struct vgic_its_abi - ITS abi ops and settings * @cte_esz: collection table entry size @@ -1938,6 +1908,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) mutex_lock(&its->its_lock); + vgic_its_debug_destroy(kvm_dev); + vgic_its_free_device_list(kvm, its); vgic_its_free_collection_list(kvm, its); vgic_its_invalidate_cache(its); @@ -2771,7 +2743,12 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (ret) return ret; - return vgic_register_its_iodev(dev->kvm, its, addr); + ret = vgic_register_its_iodev(dev->kvm, its, addr); + if (ret) + return ret; + + return vgic_its_debug_init(dev); + } case KVM_DEV_ARM_VGIC_GRP_CTRL: return vgic_its_ctrl(dev->kvm, its, attr->attr); diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index bfa5bde1f106..4f6954c30674 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -240,9 +240,6 @@ static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu, goto next; } - /* It is illegal to have the EOI bit set with HW */ - lr &= ~ICH_LR_EOI; - /* Translate the virtual mapping to the real one */ lr &= ~ICH_LR_PHYS_ID_MASK; lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0c5a63712702..4349084cb9a6 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -172,6 +172,36 @@ struct vgic_reg_attr { gpa_t addr; }; +struct its_device { + struct list_head dev_list; + + /* the head for the list of ITTEs */ + struct list_head itt_head; + u32 num_eventid_bits; + gpa_t itt_addr; + u32 device_id; +}; + +#define COLLECTION_NOT_MAPPED ((u32)~0) + +struct its_collection { + struct list_head coll_list; + + u32 collection_id; + u32 target_addr; +}; + +#define its_is_collection_mapped(coll) ((coll) && \ + ((coll)->target_addr != COLLECTION_NOT_MAPPED)) + +struct its_ite { + struct list_head ite_list; + + struct vgic_irq *irq; + struct its_collection *collection; + u32 event_id; +}; + int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, struct vgic_reg_attr *reg_attr); int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, @@ -359,4 +389,7 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu); void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu); +int vgic_its_debug_init(struct kvm_device *dev); +void vgic_its_debug_destroy(struct kvm_device *dev); + #endif diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 39b154d2198f..10effd4cff6b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -95,6 +95,7 @@ WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 WORKAROUND_AMPERE_AC03_CPU_38 +WORKAROUND_AMPERE_AC04_CPU_23 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f62b0a5aba35..d37072054a3d 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -147,6 +147,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls TEST_GEN_PROGS_arm64 += arm64/mmio_abort TEST_GEN_PROGS_arm64 += arm64/page_fault_test diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c new file mode 100644 index 000000000000..3826772fd470 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/host_sve.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls. + * + * Copyright 2025 Arm, Ltd + */ + +#include <errno.h> +#include <signal.h> +#include <sys/auxv.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "ucall_common.h" + +static void guest_code(void) +{ + for (int i = 0; i < 10; i++) { + GUEST_UCALL_NONE(); + } + + GUEST_DONE(); +} + +void handle_sigill(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *uctx = ctx; + + printf(" < host signal %d >\n", sig); + + /* + * Skip the UDF + */ + uctx->uc_mcontext.pc += 4; +} + +void register_sigill_handler(void) +{ + struct sigaction sa = { + .sa_sigaction = handle_sigill, + .sa_flags = SA_SIGINFO, + }; + sigaction(SIGILL, &sa, NULL); +} + +static void do_sve_roundtrip(void) +{ + unsigned long before, after; + + /* + * Set all bits in a predicate register, force a save/restore via a + * SIGILL (which handle_sigill() will recover from), then report + * whether the value has changed. + */ + asm volatile( + " .arch_extension sve\n" + " ptrue p0.B\n" + " cntp %[before], p0, p0.B\n" + " udf #0\n" + " cntp %[after], p0, p0.B\n" + : [before] "=r" (before), + [after] "=r" (after) + : + : "p0" + ); + + if (before != after) { + TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n", + before, after); + } else { + printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n", + before, after); + } +} + +static void test_run(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + register_sigill_handler(); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + do_sve_roundtrip(); + + while (!guest_done) { + + printf("Running VCPU...\n"); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_NONE: + do_sve_roundtrip(); + do_sve_roundtrip(); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + /* + * This is testing the host environment, we don't care about + * guest SVE support. + */ + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + printf("SVE not supported\n"); + return KSFT_SKIP; + } + + test_run(); + return 0; +} |