diff options
Diffstat (limited to 'arch/riscv/kvm')
28 files changed, 2015 insertions, 1038 deletions
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 0c3cbb0915ff..77379f77840a 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -18,18 +18,18 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION config KVM - tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" + tristate "Kernel-based Virtual Machine (KVM) support" depends on RISCV_SBI && MMU select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI - select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_READONLY_MEM + select HAVE_KVM_DIRTY_RING_ACQ_REL select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 0fb1840c3e0a..3b8afb038b35 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -14,6 +14,7 @@ kvm-y += aia.o kvm-y += aia_aplic.o kvm-y += aia_device.o kvm-y += aia_imsic.o +kvm-y += gstage.o kvm-y += main.o kvm-y += mmu.o kvm-y += nacl.o @@ -26,10 +27,13 @@ kvm-y += vcpu_onereg.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o kvm-y += vcpu_sbi.o kvm-y += vcpu_sbi_base.o +kvm-y += vcpu_sbi_forward.o +kvm-y += vcpu_sbi_fwft.o kvm-y += vcpu_sbi_hsm.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_sta.o +kvm-y += vcpu_sbi_system.o kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_switch.o kvm-y += vcpu_timer.o diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index dcced4db7fe8..dad318185660 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei; unsigned int kvm_riscv_aia_max_ids; DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available); -static int aia_find_hgei(struct kvm_vcpu *owner) -{ - int i, hgei; - unsigned long flags; - struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei); - - raw_spin_lock_irqsave(&hgctrl->lock, flags); - - hgei = -1; - for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) { - if (hgctrl->owners[i] == owner) { - hgei = i; - break; - } - } - - raw_spin_unlock_irqrestore(&hgctrl->lock, flags); - - put_cpu_ptr(&aia_hgei); - return hgei; -} - static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) { - int hgei; unsigned long seip; if (!kvm_riscv_aia_available()) @@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip) return false; - hgei = aia_find_hgei(vcpu); - if (hgei > 0) - return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); - - return false; + return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu); } void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) @@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif } + + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu); } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) @@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) if (!kvm_riscv_aia_available()) return; + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_put(vcpu); + if (kvm_riscv_nacl_available()) { nsh = nacl_shmem(); csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); @@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei) raw_spin_unlock_irqrestore(&hgctrl->lock, flags); } -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable) -{ - int hgei; - - if (!kvm_riscv_aia_available()) - return; - - hgei = aia_find_hgei(owner); - if (hgei > 0) { - if (enable) - csr_set(CSR_HGEIE, BIT(hgei)); - else - csr_clear(CSR_HGEIE, BIT(hgei)); - } -} - static irqreturn_t hgei_interrupt(int irq, void *dev_id) { int i; @@ -590,7 +553,7 @@ void kvm_riscv_aia_enable(void) csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) - csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); + csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index 39cd26af5a69..b195a93add1c 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c @@ -12,36 +12,6 @@ #include <linux/kvm_host.h> #include <linux/uaccess.h> -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -static void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -static bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - static int aia_create(struct kvm_device *dev, u32 type) { int ret; @@ -53,7 +23,7 @@ static int aia_create(struct kvm_device *dev, u32 type) return -EEXIST; ret = -EBUSY; - if (!lock_all_vcpus(kvm)) + if (kvm_trylock_all_vcpus(kvm)) return ret; kvm_for_each_vcpu(i, vcpu, kvm) { @@ -65,7 +35,7 @@ static int aia_create(struct kvm_device *dev, u32 type) kvm->arch.aia.in_kernel = true; out_unlock: - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); return ret; } @@ -526,12 +496,10 @@ int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; - struct kvm_vcpu_aia_csr *reset_csr = - &vcpu->arch.aia_context.guest_reset_csr; if (!kvm_riscv_aia_available()) return; - memcpy(csr, reset_csr, sizeof(*csr)); + memset(csr, 0, sizeof(*csr)); /* Proceed only if AIA was initialized successfully */ if (!kvm_riscv_aia_initialized(vcpu->kvm)) @@ -541,12 +509,12 @@ void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_aia_imsic_reset(vcpu); } -int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) +void kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) { struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context; if (!kvm_riscv_aia_available()) - return 0; + return; /* * We don't do any memory allocations over here because these @@ -558,8 +526,6 @@ int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) /* Initialize default values in AIA vcpu context */ vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR; vaia->hart_index = vcpu->vcpu_idx; - - return 0; } void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index a8085cd8215e..e597e86491c3 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -16,6 +16,7 @@ #include <linux/swab.h> #include <kvm/iodev.h> #include <asm/csr.h> +#include <asm/kvm_mmu.h> #define IMSIC_MAX_EIX (IMSIC_MAX_ID / BITS_PER_TYPE(u64)) @@ -676,6 +677,60 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu, imsic_swfile_extirq_update(vcpu); } +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + bool ret = false; + + /* + * The IMSIC SW-file directly injects interrupt via hvip so + * only check for interrupt when IMSIC VS-file is being used. + */ + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) { + /* + * This function is typically called from kvm_vcpu_block() via + * kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block() + * can be preempted and the blocking VCPU might resume on a + * different CPU. This means it is possible that current CPU + * does not match the imsic->vsfile_cpu hence this function + * must check imsic->vsfile_cpu before accessing HGEIP CSR. + */ + if (imsic->vsfile_cpu != vcpu->cpu) + ret = true; + else + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + } + read_unlock_irqrestore(&imsic->vsfile_lock, flags); + + return ret; +} + +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu) +{ + /* + * No need to explicitly clear HGEIE CSR bits because the + * hgei interrupt handler (aka hgei_interrupt()) will always + * clear it for us. + */ +} + +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + + if (!kvm_vcpu_is_blocking(vcpu)) + return; + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); +} + void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -703,9 +758,8 @@ void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) */ /* Purge the G-stage mapping */ - kvm_riscv_gstage_iounmap(vcpu->kvm, - vcpu->arch.aia_context.imsic_addr, - IMSIC_MMIO_PAGE_SZ); + kvm_riscv_mmu_iounmap(vcpu->kvm, vcpu->arch.aia_context.imsic_addr, + IMSIC_MMIO_PAGE_SZ); /* TODO: Purge the IOMMU mapping ??? */ @@ -760,7 +814,7 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) /* For HW acceleration mode, we can't continue */ if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_HWACCEL) { run->fail_entry.hardware_entry_failure_reason = - CSR_HSTATUS; + KVM_EXIT_FAIL_ENTRY_NO_VSFILE; run->fail_entry.cpu = vcpu->cpu; run->exit_reason = KVM_EXIT_FAIL_ENTRY; return 0; @@ -781,13 +835,16 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) * producers to the new IMSIC VS-file. */ + /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */ + csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei)); + /* Zero-out new IMSIC VS-file */ imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix); /* Update G-stage mapping for the new IMSIC VS-file */ - ret = kvm_riscv_gstage_ioremap(kvm, vcpu->arch.aia_context.imsic_addr, - new_vsfile_pa, IMSIC_MMIO_PAGE_SZ, - true, true); + ret = kvm_riscv_mmu_ioremap(kvm, vcpu->arch.aia_context.imsic_addr, + new_vsfile_pa, IMSIC_MMIO_PAGE_SZ, + true, true); if (ret) goto fail_free_vsfile_hgei; @@ -974,7 +1031,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, if (imsic->vsfile_cpu >= 0) { writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE); - kvm_vcpu_kick(vcpu); } else { eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)]; set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip); diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c new file mode 100644 index 000000000000..b67d60d722c2 --- /dev/null +++ b/arch/riscv/kvm/gstage.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * Copyright (c) 2025 Ventana Micro Systems Inc. + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/pgtable.h> +#include <asm/kvm_gstage.h> + +#ifdef CONFIG_64BIT +unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4; +unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3; +#else +unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4; +unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2; +#endif + +#define gstage_pte_leaf(__ptep) \ + (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)) + +static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) +{ + unsigned long mask; + unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level); + + if (level == (kvm_riscv_gstage_pgd_levels - 1)) + mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1; + else + mask = PTRS_PER_PTE - 1; + + return (addr >> shift) & mask; +} + +static inline unsigned long gstage_pte_page_vaddr(pte_t pte) +{ + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); +} + +static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) +{ + u32 i; + unsigned long psz = 1UL << 12; + + for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) { + if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) { + *out_level = i; + return 0; + } + } + + return -EINVAL; +} + +static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder) +{ + if (kvm_riscv_gstage_pgd_levels < level) + return -EINVAL; + + *out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits); + return 0; +} + +static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize) +{ + int rc; + unsigned long page_order = PAGE_SHIFT; + + rc = gstage_level_to_page_order(level, &page_order); + if (rc) + return rc; + + *out_pgsize = BIT(page_order); + return 0; +} + +bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, + pte_t **ptepp, u32 *ptep_level) +{ + pte_t *ptep; + u32 current_level = kvm_riscv_gstage_pgd_levels - 1; + + *ptep_level = current_level; + ptep = (pte_t *)gstage->pgd; + ptep = &ptep[gstage_pte_index(addr, current_level)]; + while (ptep && pte_val(ptep_get(ptep))) { + if (gstage_pte_leaf(ptep)) { + *ptep_level = current_level; + *ptepp = ptep; + return true; + } + + if (current_level) { + current_level--; + *ptep_level = current_level; + ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); + ptep = &ptep[gstage_pte_index(addr, current_level)]; + } else { + ptep = NULL; + } + } + + return false; +} + +static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr) +{ + unsigned long order = PAGE_SHIFT; + + if (gstage_level_to_page_order(level, &order)) + return; + addr &= ~(BIT(order) - 1); + + if (gstage->flags & KVM_GSTAGE_FLAGS_LOCAL) + kvm_riscv_local_hfence_gvma_vmid_gpa(gstage->vmid, addr, BIT(order), order); + else + kvm_riscv_hfence_gvma_vmid_gpa(gstage->kvm, -1UL, 0, addr, BIT(order), order, + gstage->vmid); +} + +int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage, + struct kvm_mmu_memory_cache *pcache, + const struct kvm_gstage_mapping *map) +{ + u32 current_level = kvm_riscv_gstage_pgd_levels - 1; + pte_t *next_ptep = (pte_t *)gstage->pgd; + pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; + + if (current_level < map->level) + return -EINVAL; + + while (current_level != map->level) { + if (gstage_pte_leaf(ptep)) + return -EEXIST; + + if (!pte_val(ptep_get(ptep))) { + if (!pcache) + return -ENOMEM; + next_ptep = kvm_mmu_memory_cache_alloc(pcache); + if (!next_ptep) + return -ENOMEM; + set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)), + __pgprot(_PAGE_TABLE))); + } else { + if (gstage_pte_leaf(ptep)) + return -EEXIST; + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); + } + + current_level--; + ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; + } + + if (pte_val(*ptep) != pte_val(map->pte)) { + set_pte(ptep, map->pte); + if (gstage_pte_leaf(ptep)) + gstage_tlb_flush(gstage, current_level, map->addr); + } + + return 0; +} + +int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage, + struct kvm_mmu_memory_cache *pcache, + gpa_t gpa, phys_addr_t hpa, unsigned long page_size, + bool page_rdonly, bool page_exec, + struct kvm_gstage_mapping *out_map) +{ + pgprot_t prot; + int ret; + + out_map->addr = gpa; + out_map->level = 0; + + ret = gstage_page_size_to_level(page_size, &out_map->level); + if (ret) + return ret; + + /* + * A RISC-V implementation can choose to either: + * 1) Update 'A' and 'D' PTE bits in hardware + * 2) Generate page fault when 'A' and/or 'D' bits are not set + * PTE so that software can update these bits. + * + * We support both options mentioned above. To achieve this, we + * always set 'A' and 'D' PTE bits at time of creating G-stage + * mapping. To support KVM dirty page logging with both options + * mentioned above, we will write-protect G-stage PTEs to track + * dirty pages. + */ + + if (page_exec) { + if (page_rdonly) + prot = PAGE_READ_EXEC; + else + prot = PAGE_WRITE_EXEC; + } else { + if (page_rdonly) + prot = PAGE_READ; + else + prot = PAGE_WRITE; + } + out_map->pte = pfn_pte(PFN_DOWN(hpa), prot); + out_map->pte = pte_mkdirty(out_map->pte); + + return kvm_riscv_gstage_set_pte(gstage, pcache, out_map); +} + +void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, + pte_t *ptep, u32 ptep_level, enum kvm_riscv_gstage_op op) +{ + int i, ret; + pte_t old_pte, *next_ptep; + u32 next_ptep_level; + unsigned long next_page_size, page_size; + + ret = gstage_level_to_page_size(ptep_level, &page_size); + if (ret) + return; + + WARN_ON(addr & (page_size - 1)); + + if (!pte_val(ptep_get(ptep))) + return; + + if (ptep_level && !gstage_pte_leaf(ptep)) { + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); + next_ptep_level = ptep_level - 1; + ret = gstage_level_to_page_size(next_ptep_level, &next_page_size); + if (ret) + return; + + if (op == GSTAGE_OP_CLEAR) + set_pte(ptep, __pte(0)); + for (i = 0; i < PTRS_PER_PTE; i++) + kvm_riscv_gstage_op_pte(gstage, addr + i * next_page_size, + &next_ptep[i], next_ptep_level, op); + if (op == GSTAGE_OP_CLEAR) + put_page(virt_to_page(next_ptep)); + } else { + old_pte = *ptep; + if (op == GSTAGE_OP_CLEAR) + set_pte(ptep, __pte(0)); + else if (op == GSTAGE_OP_WP) + set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE)); + if (pte_val(*ptep) != pte_val(old_pte)) + gstage_tlb_flush(gstage, ptep_level, addr); + } +} + +void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage, + gpa_t start, gpa_t size, bool may_block) +{ + int ret; + pte_t *ptep; + u32 ptep_level; + bool found_leaf; + unsigned long page_size; + gpa_t addr = start, end = start + size; + + while (addr < end) { + found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); + ret = gstage_level_to_page_size(ptep_level, &page_size); + if (ret) + break; + + if (!found_leaf) + goto next; + + if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) + kvm_riscv_gstage_op_pte(gstage, addr, ptep, + ptep_level, GSTAGE_OP_CLEAR); + +next: + addr += page_size; + + /* + * If the range is too large, release the kvm->mmu_lock + * to prevent starvation and lockup detector warnings. + */ + if (!(gstage->flags & KVM_GSTAGE_FLAGS_LOCAL) && may_block && addr < end) + cond_resched_lock(&gstage->kvm->mmu_lock); + } +} + +void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end) +{ + int ret; + pte_t *ptep; + u32 ptep_level; + bool found_leaf; + gpa_t addr = start; + unsigned long page_size; + + while (addr < end) { + found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); + ret = gstage_level_to_page_size(ptep_level, &page_size); + if (ret) + break; + + if (!found_leaf) + goto next; + + if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) + kvm_riscv_gstage_op_pte(gstage, addr, ptep, + ptep_level, GSTAGE_OP_WP); + +next: + addr += page_size; + } +} + +void __init kvm_riscv_gstage_mode_detect(void) +{ +#ifdef CONFIG_64BIT + /* Try Sv57x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV57X4; + kvm_riscv_gstage_pgd_levels = 5; + goto done; + } + + /* Try Sv48x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV48X4; + kvm_riscv_gstage_pgd_levels = 4; + goto done; + } + + /* Try Sv39x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV39X4; + kvm_riscv_gstage_pgd_levels = 3; + goto done; + } +#else /* CONFIG_32BIT */ + /* Try Sv32x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV32X4; + kvm_riscv_gstage_pgd_levels = 2; + goto done; + } +#endif + + /* KVM depends on !HGATP_MODE_OFF */ + kvm_riscv_gstage_mode = HGATP_MODE_OFF; + kvm_riscv_gstage_pgd_levels = 0; + +done: + csr_write(CSR_HGATP, 0); + kvm_riscv_local_hfence_gvma_all(); +} diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 1fa8be5ee509..45536af521f0 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -11,9 +11,22 @@ #include <linux/module.h> #include <linux/kvm_host.h> #include <asm/cpufeature.h> +#include <asm/kvm_mmu.h> #include <asm/kvm_nacl.h> #include <asm/sbi.h> +DEFINE_STATIC_KEY_FALSE(kvm_riscv_vsstage_tlb_no_gpa); + +static void kvm_riscv_setup_vendor_features(void) +{ + /* Andes AX66: split two-stage TLBs */ + if (riscv_cached_mvendorid(0) == ANDES_VENDOR_ID && + (riscv_cached_marchid(0) & 0xFFFF) == 0x8A66) { + static_branch_enable(&kvm_riscv_vsstage_tlb_no_gpa); + kvm_info("VS-stage TLB does not cache guest physical address and VMID\n"); + } +} + long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -92,6 +105,23 @@ static int __init riscv_kvm_init(void) return rc; kvm_riscv_gstage_mode_detect(); + switch (kvm_riscv_gstage_mode) { + case HGATP_MODE_SV32X4: + str = "Sv32x4"; + break; + case HGATP_MODE_SV39X4: + str = "Sv39x4"; + break; + case HGATP_MODE_SV48X4: + str = "Sv48x4"; + break; + case HGATP_MODE_SV57X4: + str = "Sv57x4"; + break; + default: + kvm_riscv_nacl_exit(); + return -ENODEV; + } kvm_riscv_gstage_vmid_detect(); @@ -134,22 +164,6 @@ static int __init riscv_kvm_init(void) (rc) ? slist : "no features"); } - switch (kvm_riscv_gstage_mode()) { - case HGATP_MODE_SV32X4: - str = "Sv32x4"; - break; - case HGATP_MODE_SV39X4: - str = "Sv39x4"; - break; - case HGATP_MODE_SV48X4: - str = "Sv48x4"; - break; - case HGATP_MODE_SV57X4: - str = "Sv57x4"; - break; - default: - return -ENODEV; - } kvm_info("using %s G-stage page table format\n", str); kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); @@ -158,6 +172,8 @@ static int __init riscv_kvm_init(void) kvm_info("AIA available with %d guest external interrupts\n", kvm_riscv_aia_nr_hgei); + kvm_riscv_setup_vendor_features(); + kvm_register_perf_callbacks(NULL); rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); @@ -172,8 +188,8 @@ module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { - kvm_riscv_teardown(); - kvm_exit(); + + kvm_riscv_teardown(); } module_exit(riscv_kvm_exit); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 1087ea74567b..4ab06697bfc0 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -6,371 +6,73 @@ * Anup Patel <anup.patel@wdc.com> */ -#include <linux/bitops.h> #include <linux/errno.h> -#include <linux/err.h> #include <linux/hugetlb.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/kvm_host.h> #include <linux/sched/signal.h> +#include <asm/kvm_mmu.h> #include <asm/kvm_nacl.h> -#include <asm/page.h> -#include <asm/pgtable.h> - -#ifdef CONFIG_64BIT -static unsigned long gstage_mode __ro_after_init = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); -static unsigned long gstage_pgd_levels __ro_after_init = 3; -#define gstage_index_bits 9 -#else -static unsigned long gstage_mode __ro_after_init = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); -static unsigned long gstage_pgd_levels __ro_after_init = 2; -#define gstage_index_bits 10 -#endif - -#define gstage_pgd_xbits 2 -#define gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + gstage_pgd_xbits)) -#define gstage_gpa_bits (HGATP_PAGE_SHIFT + \ - (gstage_pgd_levels * gstage_index_bits) + \ - gstage_pgd_xbits) -#define gstage_gpa_size ((gpa_t)(1ULL << gstage_gpa_bits)) - -#define gstage_pte_leaf(__ptep) \ - (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)) - -static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) -{ - unsigned long mask; - unsigned long shift = HGATP_PAGE_SHIFT + (gstage_index_bits * level); - - if (level == (gstage_pgd_levels - 1)) - mask = (PTRS_PER_PTE * (1UL << gstage_pgd_xbits)) - 1; - else - mask = PTRS_PER_PTE - 1; - - return (addr >> shift) & mask; -} - -static inline unsigned long gstage_pte_page_vaddr(pte_t pte) -{ - return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); -} - -static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) -{ - u32 i; - unsigned long psz = 1UL << 12; - - for (i = 0; i < gstage_pgd_levels; i++) { - if (page_size == (psz << (i * gstage_index_bits))) { - *out_level = i; - return 0; - } - } - - return -EINVAL; -} - -static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder) -{ - if (gstage_pgd_levels < level) - return -EINVAL; - - *out_pgorder = 12 + (level * gstage_index_bits); - return 0; -} - -static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize) -{ - int rc; - unsigned long page_order = PAGE_SHIFT; - - rc = gstage_level_to_page_order(level, &page_order); - if (rc) - return rc; - - *out_pgsize = BIT(page_order); - return 0; -} - -static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr, - pte_t **ptepp, u32 *ptep_level) -{ - pte_t *ptep; - u32 current_level = gstage_pgd_levels - 1; - - *ptep_level = current_level; - ptep = (pte_t *)kvm->arch.pgd; - ptep = &ptep[gstage_pte_index(addr, current_level)]; - while (ptep && pte_val(ptep_get(ptep))) { - if (gstage_pte_leaf(ptep)) { - *ptep_level = current_level; - *ptepp = ptep; - return true; - } - - if (current_level) { - current_level--; - *ptep_level = current_level; - ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); - ptep = &ptep[gstage_pte_index(addr, current_level)]; - } else { - ptep = NULL; - } - } - - return false; -} - -static void gstage_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) -{ - unsigned long order = PAGE_SHIFT; - - if (gstage_level_to_page_order(level, &order)) - return; - addr &= ~(BIT(order) - 1); - - kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0, addr, BIT(order), order); -} - -static int gstage_set_pte(struct kvm *kvm, u32 level, - struct kvm_mmu_memory_cache *pcache, - gpa_t addr, const pte_t *new_pte) -{ - u32 current_level = gstage_pgd_levels - 1; - pte_t *next_ptep = (pte_t *)kvm->arch.pgd; - pte_t *ptep = &next_ptep[gstage_pte_index(addr, current_level)]; - - if (current_level < level) - return -EINVAL; - - while (current_level != level) { - if (gstage_pte_leaf(ptep)) - return -EEXIST; - - if (!pte_val(ptep_get(ptep))) { - if (!pcache) - return -ENOMEM; - next_ptep = kvm_mmu_memory_cache_alloc(pcache); - if (!next_ptep) - return -ENOMEM; - set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)), - __pgprot(_PAGE_TABLE))); - } else { - if (gstage_pte_leaf(ptep)) - return -EEXIST; - next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); - } - - current_level--; - ptep = &next_ptep[gstage_pte_index(addr, current_level)]; - } - - set_pte(ptep, *new_pte); - if (gstage_pte_leaf(ptep)) - gstage_remote_tlb_flush(kvm, current_level, addr); - - return 0; -} - -static int gstage_map_page(struct kvm *kvm, - struct kvm_mmu_memory_cache *pcache, - gpa_t gpa, phys_addr_t hpa, - unsigned long page_size, - bool page_rdonly, bool page_exec) -{ - int ret; - u32 level = 0; - pte_t new_pte; - pgprot_t prot; - - ret = gstage_page_size_to_level(page_size, &level); - if (ret) - return ret; - - /* - * A RISC-V implementation can choose to either: - * 1) Update 'A' and 'D' PTE bits in hardware - * 2) Generate page fault when 'A' and/or 'D' bits are not set - * PTE so that software can update these bits. - * - * We support both options mentioned above. To achieve this, we - * always set 'A' and 'D' PTE bits at time of creating G-stage - * mapping. To support KVM dirty page logging with both options - * mentioned above, we will write-protect G-stage PTEs to track - * dirty pages. - */ - - if (page_exec) { - if (page_rdonly) - prot = PAGE_READ_EXEC; - else - prot = PAGE_WRITE_EXEC; - } else { - if (page_rdonly) - prot = PAGE_READ; - else - prot = PAGE_WRITE; - } - new_pte = pfn_pte(PFN_DOWN(hpa), prot); - new_pte = pte_mkdirty(new_pte); - - return gstage_set_pte(kvm, level, pcache, gpa, &new_pte); -} -enum gstage_op { - GSTAGE_OP_NOP = 0, /* Nothing */ - GSTAGE_OP_CLEAR, /* Clear/Unmap */ - GSTAGE_OP_WP, /* Write-protect */ -}; - -static void gstage_op_pte(struct kvm *kvm, gpa_t addr, - pte_t *ptep, u32 ptep_level, enum gstage_op op) -{ - int i, ret; - pte_t *next_ptep; - u32 next_ptep_level; - unsigned long next_page_size, page_size; - - ret = gstage_level_to_page_size(ptep_level, &page_size); - if (ret) - return; - - BUG_ON(addr & (page_size - 1)); - - if (!pte_val(ptep_get(ptep))) - return; - - if (ptep_level && !gstage_pte_leaf(ptep)) { - next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); - next_ptep_level = ptep_level - 1; - ret = gstage_level_to_page_size(next_ptep_level, - &next_page_size); - if (ret) - return; - - if (op == GSTAGE_OP_CLEAR) - set_pte(ptep, __pte(0)); - for (i = 0; i < PTRS_PER_PTE; i++) - gstage_op_pte(kvm, addr + i * next_page_size, - &next_ptep[i], next_ptep_level, op); - if (op == GSTAGE_OP_CLEAR) - put_page(virt_to_page(next_ptep)); - } else { - if (op == GSTAGE_OP_CLEAR) - set_pte(ptep, __pte(0)); - else if (op == GSTAGE_OP_WP) - set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE)); - gstage_remote_tlb_flush(kvm, ptep_level, addr); - } -} - -static void gstage_unmap_range(struct kvm *kvm, gpa_t start, - gpa_t size, bool may_block) -{ - int ret; - pte_t *ptep; - u32 ptep_level; - bool found_leaf; - unsigned long page_size; - gpa_t addr = start, end = start + size; - - while (addr < end) { - found_leaf = gstage_get_leaf_entry(kvm, addr, - &ptep, &ptep_level); - ret = gstage_level_to_page_size(ptep_level, &page_size); - if (ret) - break; - - if (!found_leaf) - goto next; - - if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) - gstage_op_pte(kvm, addr, ptep, - ptep_level, GSTAGE_OP_CLEAR); - -next: - addr += page_size; - - /* - * If the range is too large, release the kvm->mmu_lock - * to prevent starvation and lockup detector warnings. - */ - if (may_block && addr < end) - cond_resched_lock(&kvm->mmu_lock); - } -} - -static void gstage_wp_range(struct kvm *kvm, gpa_t start, gpa_t end) -{ - int ret; - pte_t *ptep; - u32 ptep_level; - bool found_leaf; - gpa_t addr = start; - unsigned long page_size; - - while (addr < end) { - found_leaf = gstage_get_leaf_entry(kvm, addr, - &ptep, &ptep_level); - ret = gstage_level_to_page_size(ptep_level, &page_size); - if (ret) - break; - - if (!found_leaf) - goto next; - - if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) - gstage_op_pte(kvm, addr, ptep, - ptep_level, GSTAGE_OP_WP); - -next: - addr += page_size; - } -} - -static void gstage_wp_memory_region(struct kvm *kvm, int slot) +static void mmu_wp_memory_region(struct kvm *kvm, int slot) { struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + struct kvm_gstage gstage; + + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; spin_lock(&kvm->mmu_lock); - gstage_wp_range(kvm, start, end); + kvm_riscv_gstage_wp_range(&gstage, start, end); spin_unlock(&kvm->mmu_lock); - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_memslot(kvm, memslot); } -int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa, - phys_addr_t hpa, unsigned long size, - bool writable, bool in_atomic) +int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, + unsigned long size, bool writable, bool in_atomic) { - pte_t pte; int ret = 0; + pgprot_t prot; unsigned long pfn; phys_addr_t addr, end; struct kvm_mmu_memory_cache pcache = { .gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0, .gfp_zero = __GFP_ZERO, }; + struct kvm_gstage_mapping map; + struct kvm_gstage gstage; + + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); + prot = pgprot_noncached(PAGE_WRITE); for (addr = gpa; addr < end; addr += PAGE_SIZE) { - pte = pfn_pte(pfn, PAGE_KERNEL_IO); + map.addr = addr; + map.pte = pfn_pte(pfn, prot); + map.pte = pte_mkdirty(map.pte); + map.level = 0; if (!writable) - pte = pte_wrprotect(pte); + map.pte = pte_wrprotect(map.pte); - ret = kvm_mmu_topup_memory_cache(&pcache, gstage_pgd_levels); + ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels); if (ret) goto out; spin_lock(&kvm->mmu_lock); - ret = gstage_set_pte(kvm, 0, &pcache, addr, &pte); + ret = kvm_riscv_gstage_set_pte(&gstage, &pcache, &map); spin_unlock(&kvm->mmu_lock); if (ret) goto out; @@ -383,10 +85,17 @@ out: return ret; } -void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size) +void kvm_riscv_mmu_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size) { + struct kvm_gstage gstage; + + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; + spin_lock(&kvm->mmu_lock); - gstage_unmap_range(kvm, gpa, size, false); + kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); spin_unlock(&kvm->mmu_lock); } @@ -398,8 +107,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, phys_addr_t base_gfn = slot->base_gfn + gfn_offset; phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + struct kvm_gstage gstage; - gstage_wp_range(kvm, start, end); + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; + + kvm_riscv_gstage_wp_range(&gstage, start, end); } void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) @@ -416,7 +131,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_riscv_gstage_free_pgd(kvm); + kvm_riscv_mmu_free_pgd(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -424,9 +139,15 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, { gpa_t gpa = slot->base_gfn << PAGE_SHIFT; phys_addr_t size = slot->npages << PAGE_SHIFT; + struct kvm_gstage gstage; + + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; spin_lock(&kvm->mmu_lock); - gstage_unmap_range(kvm, gpa, size, false); + kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); spin_unlock(&kvm->mmu_lock); } @@ -440,8 +161,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * allocated dirty_bitmap[], dirty pages will be tracked while * the memory slot is write protected. */ - if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) - gstage_wp_memory_region(kvm, new->id); + if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) { + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + return; + mmu_wp_memory_region(kvm, new->id); + } } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -450,7 +174,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change) { hva_t hva, reg_end, size; - gpa_t base_gpa; bool writable; int ret = 0; @@ -463,21 +186,19 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * space addressable by the KVM guest GPA space. */ if ((new->base_gfn + new->npages) >= - (gstage_gpa_size >> PAGE_SHIFT)) + (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT)) return -EFAULT; hva = new->userspace_addr; size = new->npages << PAGE_SHIFT; reg_end = hva + size; - base_gpa = new->base_gfn << PAGE_SHIFT; writable = !(new->flags & KVM_MEM_READONLY); mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and - * any holes between them, so iterate over all of them to find - * out if we can map any of them right now. + * any holes between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -487,10 +208,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * +--------------------------------------------+ */ do { - struct vm_area_struct *vma = find_vma(current->mm, hva); - hva_t vm_start, vm_end; + struct vm_area_struct *vma; + hva_t vm_end; - if (!vma || vma->vm_start >= reg_end) + vma = find_vma_intersection(current->mm, hva, reg_end); + if (!vma) break; /* @@ -503,37 +225,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } /* Take the intersection of this VMA with the memory region */ - vm_start = max(hva, vma->vm_start); vm_end = min(reg_end, vma->vm_end); if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = base_gpa + (vm_start - hva); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; goto out; } - - ret = kvm_riscv_gstage_ioremap(kvm, gpa, pa, - vm_end - vm_start, - writable, false); - if (ret) - break; } hva = vm_end; } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - if (ret) - kvm_riscv_gstage_iounmap(kvm, base_gpa, size); - out: mmap_read_unlock(current->mm); return ret; @@ -541,12 +244,18 @@ out: bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { + struct kvm_gstage gstage; + if (!kvm->arch.pgd) return false; - gstage_unmap_range(kvm, range->start << PAGE_SHIFT, - (range->end - range->start) << PAGE_SHIFT, - range->may_block); + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT, + (range->end - range->start) << PAGE_SHIFT, + range->may_block); return false; } @@ -555,14 +264,19 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) pte_t *ptep; u32 ptep_level = 0; u64 size = (range->end - range->start) << PAGE_SHIFT; + struct kvm_gstage gstage; if (!kvm->arch.pgd) return false; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT, - &ptep, &ptep_level)) + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; + if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, + &ptep, &ptep_level)) return false; return ptep_test_and_clear_young(NULL, 0, ptep); @@ -573,22 +287,27 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) pte_t *ptep; u32 ptep_level = 0; u64 size = (range->end - range->start) << PAGE_SHIFT; + struct kvm_gstage gstage; if (!kvm->arch.pgd) return false; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT, - &ptep, &ptep_level)) + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; + if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, + &ptep, &ptep_level)) return false; return pte_young(ptep_get(ptep)); } -int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, - struct kvm_memory_slot *memslot, - gpa_t gpa, unsigned long hva, bool is_write) +int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, + gpa_t gpa, unsigned long hva, bool is_write, + struct kvm_gstage_mapping *out_map) { int ret; kvm_pfn_t hfn; @@ -601,10 +320,19 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, bool logging = (memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY)) ? true : false; unsigned long vma_pagesize, mmu_seq; + struct kvm_gstage gstage; struct page *page; + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; + + /* Setup initial state of output mapping */ + memset(out_map, 0, sizeof(*out_map)); + /* We need minimum second+third level pages */ - ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); + ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels); if (ret) { kvm_err("Failed to topup G-stage cache\n"); return ret; @@ -648,7 +376,8 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return -EFAULT; } - hfn = kvm_faultin_pfn(vcpu, gfn, is_write, &writable, &page); + hfn = __kvm_faultin_pfn(memslot, gfn, is_write ? FOLL_WRITE : 0, + &writable, &page); if (hfn == KVM_PFN_ERR_HWPOISON) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, vma_pageshift, current); @@ -670,12 +399,12 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, goto out_unlock; if (writable) { - mark_page_dirty(kvm, gfn); - ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, - vma_pagesize, false, true); + mark_page_dirty_in_slot(kvm, memslot, gfn); + ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT, + vma_pagesize, false, true, out_map); } else { - ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, - vma_pagesize, true, true); + ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT, + vma_pagesize, true, true, out_map); } if (ret) @@ -687,7 +416,7 @@ out_unlock: return ret; } -int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm) +int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm) { struct page *pgd_page; @@ -697,7 +426,7 @@ int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm) } pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, - get_order(gstage_pgd_size)); + get_order(kvm_riscv_gstage_pgd_size)); if (!pgd_page) return -ENOMEM; kvm->arch.pgd = page_to_virt(pgd_page); @@ -706,13 +435,18 @@ int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm) return 0; } -void kvm_riscv_gstage_free_pgd(struct kvm *kvm) +void kvm_riscv_mmu_free_pgd(struct kvm *kvm) { + struct kvm_gstage gstage; void *pgd = NULL; spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - gstage_unmap_range(kvm, 0UL, gstage_gpa_size, false); + gstage.kvm = kvm; + gstage.flags = 0; + gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; kvm->arch.pgd_phys = 0; @@ -720,12 +454,12 @@ void kvm_riscv_gstage_free_pgd(struct kvm *kvm) spin_unlock(&kvm->mmu_lock); if (pgd) - free_pages((unsigned long)pgd, get_order(gstage_pgd_size)); + free_pages((unsigned long)pgd, get_order(kvm_riscv_gstage_pgd_size)); } -void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) +void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu) { - unsigned long hgatp = gstage_mode; + unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT; struct kvm_arch *k = &vcpu->kvm->arch; hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; @@ -736,37 +470,3 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) if (!kvm_riscv_gstage_vmid_bits()) kvm_riscv_local_hfence_gvma_all(); } - -void __init kvm_riscv_gstage_mode_detect(void) -{ -#ifdef CONFIG_64BIT - /* Try Sv57x4 G-stage mode */ - csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); - if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { - gstage_mode = (HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); - gstage_pgd_levels = 5; - goto skip_sv48x4_test; - } - - /* Try Sv48x4 G-stage mode */ - csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); - if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { - gstage_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); - gstage_pgd_levels = 4; - } -skip_sv48x4_test: - - csr_write(CSR_HGATP, 0); - kvm_riscv_local_hfence_gvma_all(); -#endif -} - -unsigned long __init kvm_riscv_gstage_mode(void) -{ - return gstage_mode >> HGATP_MODE_SHIFT; -} - -int kvm_riscv_gstage_gpa_bits(void) -{ - return gstage_gpa_bits; -} diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 2f91ea5f8493..ff1aeac4eb8e 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -15,6 +15,8 @@ #include <asm/cpufeature.h> #include <asm/insn-def.h> #include <asm/kvm_nacl.h> +#include <asm/kvm_tlb.h> +#include <asm/kvm_vmid.h> #define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) @@ -177,6 +179,13 @@ void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu) vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid); kvm_riscv_local_hfence_gvma_vmid_all(vmid); + + /* + * Flush VS-stage TLB entries for implementation where VS-stage + * TLB does not cahce guest physical address and VMID. + */ + if (static_branch_unlikely(&kvm_riscv_vsstage_tlb_no_gpa)) + kvm_riscv_local_hfence_vvma_all(vmid); } void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu) @@ -185,7 +194,7 @@ void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu) local_flush_icache_all(); } -void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu) +void kvm_riscv_tlb_flush_process(struct kvm_vcpu *vcpu) { struct kvm_vmid *v = &vcpu->kvm->arch.vmid; unsigned long vmid = READ_ONCE(v->vmid); @@ -258,51 +267,58 @@ static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu, void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) { - unsigned long vmid; struct kvm_riscv_hfence d = { 0 }; - struct kvm_vmid *v = &vcpu->kvm->arch.vmid; while (vcpu_hfence_dequeue(vcpu, &d)) { switch (d.type) { case KVM_RISCV_HFENCE_UNKNOWN: break; case KVM_RISCV_HFENCE_GVMA_VMID_GPA: - vmid = READ_ONCE(v->vmid); if (kvm_riscv_nacl_available()) - nacl_hfence_gvma_vmid(nacl_shmem(), vmid, + nacl_hfence_gvma_vmid(nacl_shmem(), d.vmid, d.addr, d.size, d.order); else - kvm_riscv_local_hfence_gvma_vmid_gpa(vmid, d.addr, + kvm_riscv_local_hfence_gvma_vmid_gpa(d.vmid, d.addr, d.size, d.order); break; + case KVM_RISCV_HFENCE_GVMA_VMID_ALL: + if (kvm_riscv_nacl_available()) + nacl_hfence_gvma_vmid_all(nacl_shmem(), d.vmid); + else + kvm_riscv_local_hfence_gvma_vmid_all(d.vmid); + break; case KVM_RISCV_HFENCE_VVMA_ASID_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - vmid = READ_ONCE(v->vmid); if (kvm_riscv_nacl_available()) - nacl_hfence_vvma_asid(nacl_shmem(), vmid, d.asid, + nacl_hfence_vvma_asid(nacl_shmem(), d.vmid, d.asid, d.addr, d.size, d.order); else - kvm_riscv_local_hfence_vvma_asid_gva(vmid, d.asid, d.addr, + kvm_riscv_local_hfence_vvma_asid_gva(d.vmid, d.asid, d.addr, d.size, d.order); break; case KVM_RISCV_HFENCE_VVMA_ASID_ALL: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - vmid = READ_ONCE(v->vmid); if (kvm_riscv_nacl_available()) - nacl_hfence_vvma_asid_all(nacl_shmem(), vmid, d.asid); + nacl_hfence_vvma_asid_all(nacl_shmem(), d.vmid, d.asid); else - kvm_riscv_local_hfence_vvma_asid_all(vmid, d.asid); + kvm_riscv_local_hfence_vvma_asid_all(d.vmid, d.asid); break; case KVM_RISCV_HFENCE_VVMA_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD); - vmid = READ_ONCE(v->vmid); if (kvm_riscv_nacl_available()) - nacl_hfence_vvma(nacl_shmem(), vmid, + nacl_hfence_vvma(nacl_shmem(), d.vmid, d.addr, d.size, d.order); else - kvm_riscv_local_hfence_vvma_gva(vmid, d.addr, + kvm_riscv_local_hfence_vvma_gva(d.vmid, d.addr, d.size, d.order); break; + case KVM_RISCV_HFENCE_VVMA_ALL: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_all(nacl_shmem(), d.vmid); + else + kvm_riscv_local_hfence_vvma_all(d.vmid); + break; default: break; } @@ -355,35 +371,43 @@ void kvm_riscv_fence_i(struct kvm *kvm, void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm, unsigned long hbase, unsigned long hmask, gpa_t gpa, gpa_t gpsz, - unsigned long order) + unsigned long order, unsigned long vmid) { struct kvm_riscv_hfence data; data.type = KVM_RISCV_HFENCE_GVMA_VMID_GPA; data.asid = 0; + data.vmid = vmid; data.addr = gpa; data.size = gpsz; data.order = order; make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, - KVM_REQ_HFENCE_GVMA_VMID_ALL, &data); + KVM_REQ_TLB_FLUSH, &data); } void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm, - unsigned long hbase, unsigned long hmask) + unsigned long hbase, unsigned long hmask, + unsigned long vmid) { - make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_GVMA_VMID_ALL, - KVM_REQ_HFENCE_GVMA_VMID_ALL, NULL); + struct kvm_riscv_hfence data = {0}; + + data.type = KVM_RISCV_HFENCE_GVMA_VMID_ALL; + data.vmid = vmid; + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, + KVM_REQ_TLB_FLUSH, &data); } void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm, unsigned long hbase, unsigned long hmask, unsigned long gva, unsigned long gvsz, - unsigned long order, unsigned long asid) + unsigned long order, unsigned long asid, + unsigned long vmid) { struct kvm_riscv_hfence data; data.type = KVM_RISCV_HFENCE_VVMA_ASID_GVA; data.asid = asid; + data.vmid = vmid; data.addr = gva; data.size = gvsz; data.order = order; @@ -393,13 +417,13 @@ void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm, void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm, unsigned long hbase, unsigned long hmask, - unsigned long asid) + unsigned long asid, unsigned long vmid) { - struct kvm_riscv_hfence data; + struct kvm_riscv_hfence data = {0}; data.type = KVM_RISCV_HFENCE_VVMA_ASID_ALL; data.asid = asid; - data.addr = data.size = data.order = 0; + data.vmid = vmid; make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, KVM_REQ_HFENCE_VVMA_ALL, &data); } @@ -407,12 +431,13 @@ void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm, void kvm_riscv_hfence_vvma_gva(struct kvm *kvm, unsigned long hbase, unsigned long hmask, unsigned long gva, unsigned long gvsz, - unsigned long order) + unsigned long order, unsigned long vmid) { struct kvm_riscv_hfence data; data.type = KVM_RISCV_HFENCE_VVMA_GVA; data.asid = 0; + data.vmid = vmid; data.addr = gva; data.size = gvsz; data.order = order; @@ -421,8 +446,21 @@ void kvm_riscv_hfence_vvma_gva(struct kvm *kvm, } void kvm_riscv_hfence_vvma_all(struct kvm *kvm, - unsigned long hbase, unsigned long hmask) + unsigned long hbase, unsigned long hmask, + unsigned long vmid) +{ + struct kvm_riscv_hfence data = {0}; + + data.type = KVM_RISCV_HFENCE_VVMA_ALL; + data.vmid = vmid; + make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE, + KVM_REQ_HFENCE_VVMA_ALL, &data); +} + +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { - make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_VVMA_ALL, - KVM_REQ_HFENCE_VVMA_ALL, NULL); + kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0, + gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT, + PAGE_SHIFT, READ_ONCE(kvm->arch.vmid.vmid)); + return 0; } diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index dc3f76f6e46c..a55a95da54d0 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,7 +7,6 @@ */ #include <linux/bitops.h> -#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kdebug.h> @@ -18,6 +17,7 @@ #include <linux/fs.h> #include <linux/kvm_host.h> #include <asm/cacheflush.h> +#include <asm/kvm_mmu.h> #include <asm/kvm_nacl.h> #include <asm/kvm_vcpu_vector.h> @@ -34,7 +34,12 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, csr_exit_user), STATS_DESC_COUNTER(VCPU, csr_exit_kernel), STATS_DESC_COUNTER(VCPU, signal_exits), - STATS_DESC_COUNTER(VCPU, exits) + STATS_DESC_COUNTER(VCPU, exits), + STATS_DESC_COUNTER(VCPU, instr_illegal_exits), + STATS_DESC_COUNTER(VCPU, load_misaligned_exits), + STATS_DESC_COUNTER(VCPU, store_misaligned_exits), + STATS_DESC_COUNTER(VCPU, load_access_exits), + STATS_DESC_COUNTER(VCPU, store_access_exits), }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -46,12 +51,33 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; -static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) +static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu, + bool kvm_sbi_reset) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; - struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context; + void *vector_datap = cntx->vector.datap; + + memset(cntx, 0, sizeof(*cntx)); + memset(csr, 0, sizeof(*csr)); + memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); + + /* Restore datap as it's not a part of the guest context. */ + cntx->vector.datap = vector_datap; + + if (kvm_sbi_reset) + kvm_riscv_vcpu_sbi_load_reset_state(vcpu); + + /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ + cntx->sstatus = SR_SPP | SR_SPIE; + + cntx->hstatus |= HSTATUS_VTW; + cntx->hstatus |= HSTATUS_SPVP; + cntx->hstatus |= HSTATUS_SPV; +} + +static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset) +{ bool loaded; /** @@ -66,11 +92,7 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.last_exit_cpu = -1; - memcpy(csr, reset_csr, sizeof(*csr)); - - spin_lock(&vcpu->arch.reset_cntx_lock); - memcpy(cntx, reset_cntx, sizeof(*cntx)); - spin_unlock(&vcpu->arch.reset_cntx_lock); + kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset); kvm_riscv_vcpu_fp_reset(vcpu); @@ -89,7 +111,7 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.hfence_tail = 0; memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue)); - kvm_riscv_vcpu_sbi_sta_reset(vcpu); + kvm_riscv_vcpu_sbi_reset(vcpu); /* Reset the guest CSRs for hotplug usecase */ if (loaded) @@ -105,13 +127,13 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int rc; - struct kvm_cpu_context *cntx; - struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; spin_lock_init(&vcpu->arch.mp_state_lock); /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; + + vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); @@ -126,23 +148,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Setup VCPU hfence queue */ spin_lock_init(&vcpu->arch.hfence_lock); - /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ - spin_lock_init(&vcpu->arch.reset_cntx_lock); - - spin_lock(&vcpu->arch.reset_cntx_lock); - cntx = &vcpu->arch.guest_reset_context; - cntx->sstatus = SR_SPP | SR_SPIE; - cntx->hstatus = 0; - cntx->hstatus |= HSTATUS_VTW; - cntx->hstatus |= HSTATUS_SPVP; - cntx->hstatus |= HSTATUS_SPV; - spin_unlock(&vcpu->arch.reset_cntx_lock); - - if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx)) - return -ENOMEM; + spin_lock_init(&vcpu->arch.reset_state.lock); - /* By default, make CY, TM, and IR counters accessible in VU mode */ - reset_csr->scounteren = 0x7; + rc = kvm_riscv_vcpu_alloc_vector_context(vcpu); + if (rc) + return rc; /* Setup VCPU timer */ kvm_riscv_vcpu_timer_init(vcpu); @@ -151,9 +161,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_pmu_init(vcpu); /* Setup VCPU AIA */ - rc = kvm_riscv_vcpu_aia_init(vcpu); - if (rc) - return rc; + kvm_riscv_vcpu_aia_init(vcpu); /* * Setup SBI extensions @@ -162,7 +170,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_sbi_init(vcpu); /* Reset VCPU */ - kvm_riscv_reset_vcpu(vcpu); + kvm_riscv_reset_vcpu(vcpu, false); return 0; } @@ -180,6 +188,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { + kvm_riscv_vcpu_sbi_deinit(vcpu); + /* Cleanup VCPU AIA context */ kvm_riscv_vcpu_aia_deinit(vcpu); @@ -200,19 +210,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_riscv_vcpu_timer_pending(vcpu); } -void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, true); -} - -void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, false); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } @@ -238,8 +238,8 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -long kvm_arch_vcpu_async_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; @@ -511,6 +511,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, case KVM_MP_STATE_STOPPED: __kvm_riscv_vcpu_power_off(vcpu); break; + case KVM_MP_STATE_INIT_RECEIVED: + if (vcpu->kvm->arch.mp_state_reset) + kvm_riscv_reset_vcpu(vcpu, false); + else + ret = -EINVAL; + break; default: ret = -EINVAL; } @@ -551,6 +557,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, ZICBOZ)) cfg->henvcfg |= ENVCFG_CBZE; + if (riscv_isa_extension_available(isa, SVADU) && + !riscv_isa_extension_available(isa, SVADE)) + cfg->henvcfg |= ENVCFG_ADUE; + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; if (riscv_isa_extension_available(isa, SSAIA)) @@ -561,7 +571,6 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } - cfg->hedeleg = KVM_HEDELEG_DEFAULT; if (vcpu->guest_debug) cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } @@ -613,7 +622,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } } - kvm_riscv_gstage_update_hgatp(vcpu); + kvm_riscv_mmu_update_hgatp(vcpu); kvm_riscv_vcpu_timer_restore(vcpu); @@ -673,7 +682,14 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) } } -static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) +/** + * kvm_riscv_check_vcpu_requests - check and handle pending vCPU requests + * @vcpu: the VCPU pointer + * + * Return: 1 if we should enter the guest + * 0 if we should exit to userspace + */ +static int kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) { struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); @@ -695,20 +711,16 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) - kvm_riscv_reset_vcpu(vcpu); + kvm_riscv_reset_vcpu(vcpu, true); if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu)) - kvm_riscv_gstage_update_hgatp(vcpu); + kvm_riscv_mmu_update_hgatp(vcpu); if (kvm_check_request(KVM_REQ_FENCE_I, vcpu)) kvm_riscv_fence_i_process(vcpu); - /* - * The generic KVM_REQ_TLB_FLUSH is same as - * KVM_REQ_HFENCE_GVMA_VMID_ALL - */ - if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu)) - kvm_riscv_hfence_gvma_vmid_all_process(vcpu); + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvm_riscv_tlb_flush_process(vcpu); if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu)) kvm_riscv_hfence_vvma_all_process(vcpu); @@ -718,7 +730,12 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) kvm_riscv_vcpu_record_steal_time(vcpu); + + if (kvm_dirty_ring_check_request(vcpu)) + return 0; } + + return 1; } static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) @@ -893,14 +910,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret) continue; ret = 1; kvm_riscv_gstage_vmid_update(vcpu); - kvm_riscv_check_vcpu_requests(vcpu); + ret = kvm_riscv_check_vcpu_requests(vcpu); + if (ret <= 0) + continue; preempt_disable(); @@ -944,7 +963,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } /* - * Cleanup stale TLB enteries + * Sanitize VMID mappings cached (TLB) on current CPU * * Note: This should be done after G-stage VMID has been * updated using kvm_riscv_gstage_vmid_ver_changed() diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index fa98e5c024b2..0bb0c51e3c89 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -9,10 +9,13 @@ #include <linux/kvm_host.h> #include <asm/csr.h> #include <asm/insn-def.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_nacl.h> static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_trap *trap) { + struct kvm_gstage_mapping host_map; struct kvm_memory_slot *memslot; unsigned long hva, fault_addr; bool writable; @@ -40,8 +43,9 @@ static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, }; } - ret = kvm_riscv_gstage_map(vcpu, memslot, fault_addr, hva, - (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false); + ret = kvm_riscv_mmu_map(vcpu, memslot, fault_addr, hva, + (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false, + &host_map); if (ret < 0) return ret; @@ -135,7 +139,7 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap) { - unsigned long vsstatus = csr_read(CSR_VSSTATUS); + unsigned long vsstatus = ncsr_read(CSR_VSSTATUS); /* Change Guest SSTATUS.SPP bit */ vsstatus &= ~SR_SPP; @@ -151,20 +155,31 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, vsstatus &= ~SR_SIE; /* Update Guest SSTATUS */ - csr_write(CSR_VSSTATUS, vsstatus); + ncsr_write(CSR_VSSTATUS, vsstatus); /* Update Guest SCAUSE, STVAL, and SEPC */ - csr_write(CSR_VSCAUSE, trap->scause); - csr_write(CSR_VSTVAL, trap->stval); - csr_write(CSR_VSEPC, trap->sepc); + ncsr_write(CSR_VSCAUSE, trap->scause); + ncsr_write(CSR_VSTVAL, trap->stval); + ncsr_write(CSR_VSEPC, trap->sepc); /* Set Guest PC to Guest exception vector */ - vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC); + vcpu->arch.guest_context.sepc = ncsr_read(CSR_VSTVEC); /* Set Guest privilege mode to supervisor */ vcpu->arch.guest_context.sstatus |= SR_SPP; } +static inline int vcpu_redirect(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap) +{ + int ret = -EFAULT; + + if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { + kvm_riscv_vcpu_trap_redirect(vcpu, trap); + ret = 1; + } + return ret; +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -183,14 +198,32 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, run->exit_reason = KVM_EXIT_UNKNOWN; switch (trap->scause) { case EXC_INST_ILLEGAL: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ILLEGAL_INSN); + vcpu->stat.instr_illegal_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_LOAD_MISALIGNED: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_LOAD); + vcpu->stat.load_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_STORE_MISALIGNED: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_STORE); + vcpu->stat.store_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_LOAD_ACCESS: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_LOAD); + vcpu->stat.load_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_STORE_ACCESS: - if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { - kvm_riscv_vcpu_trap_redirect(vcpu, trap); - ret = 1; - } + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_STORE); + vcpu->stat.store_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; + case EXC_INST_ACCESS: + ret = vcpu_redirect(vcpu, trap); break; case EXC_VIRTUAL_INST_FAULT: if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c index 97dec18e6989..4d89b94128ae 100644 --- a/arch/riscv/kvm/vcpu_insn.c +++ b/arch/riscv/kvm/vcpu_insn.c @@ -8,133 +8,7 @@ #include <linux/kvm_host.h> #include <asm/cpufeature.h> - -#define INSN_OPCODE_MASK 0x007c -#define INSN_OPCODE_SHIFT 2 -#define INSN_OPCODE_SYSTEM 28 - -#define INSN_MASK_WFI 0xffffffff -#define INSN_MATCH_WFI 0x10500073 - -#define INSN_MASK_WRS 0xffffffff -#define INSN_MATCH_WRS 0x00d00073 - -#define INSN_MATCH_CSRRW 0x1073 -#define INSN_MASK_CSRRW 0x707f -#define INSN_MATCH_CSRRS 0x2073 -#define INSN_MASK_CSRRS 0x707f -#define INSN_MATCH_CSRRC 0x3073 -#define INSN_MASK_CSRRC 0x707f -#define INSN_MATCH_CSRRWI 0x5073 -#define INSN_MASK_CSRRWI 0x707f -#define INSN_MATCH_CSRRSI 0x6073 -#define INSN_MASK_CSRRSI 0x707f -#define INSN_MATCH_CSRRCI 0x7073 -#define INSN_MASK_CSRRCI 0x707f - -#define INSN_MATCH_LB 0x3 -#define INSN_MASK_LB 0x707f -#define INSN_MATCH_LH 0x1003 -#define INSN_MASK_LH 0x707f -#define INSN_MATCH_LW 0x2003 -#define INSN_MASK_LW 0x707f -#define INSN_MATCH_LD 0x3003 -#define INSN_MASK_LD 0x707f -#define INSN_MATCH_LBU 0x4003 -#define INSN_MASK_LBU 0x707f -#define INSN_MATCH_LHU 0x5003 -#define INSN_MASK_LHU 0x707f -#define INSN_MATCH_LWU 0x6003 -#define INSN_MASK_LWU 0x707f -#define INSN_MATCH_SB 0x23 -#define INSN_MASK_SB 0x707f -#define INSN_MATCH_SH 0x1023 -#define INSN_MASK_SH 0x707f -#define INSN_MATCH_SW 0x2023 -#define INSN_MASK_SW 0x707f -#define INSN_MATCH_SD 0x3023 -#define INSN_MASK_SD 0x707f - -#define INSN_MATCH_C_LD 0x6000 -#define INSN_MASK_C_LD 0xe003 -#define INSN_MATCH_C_SD 0xe000 -#define INSN_MASK_C_SD 0xe003 -#define INSN_MATCH_C_LW 0x4000 -#define INSN_MASK_C_LW 0xe003 -#define INSN_MATCH_C_SW 0xc000 -#define INSN_MASK_C_SW 0xe003 -#define INSN_MATCH_C_LDSP 0x6002 -#define INSN_MASK_C_LDSP 0xe003 -#define INSN_MATCH_C_SDSP 0xe002 -#define INSN_MASK_C_SDSP 0xe003 -#define INSN_MATCH_C_LWSP 0x4002 -#define INSN_MASK_C_LWSP 0xe003 -#define INSN_MATCH_C_SWSP 0xc002 -#define INSN_MASK_C_SWSP 0xe003 - -#define INSN_16BIT_MASK 0x3 - -#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK) - -#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4) - -#ifdef CONFIG_64BIT -#define LOG_REGBYTES 3 -#else -#define LOG_REGBYTES 2 -#endif -#define REGBYTES (1 << LOG_REGBYTES) - -#define SH_RD 7 -#define SH_RS1 15 -#define SH_RS2 20 -#define SH_RS2C 2 -#define MASK_RX 0x1f - -#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) -#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \ - (RV_X(x, 10, 3) << 3) | \ - (RV_X(x, 5, 1) << 6)) -#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \ - (RV_X(x, 5, 2) << 6)) -#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \ - (RV_X(x, 12, 1) << 5) | \ - (RV_X(x, 2, 2) << 6)) -#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \ - (RV_X(x, 12, 1) << 5) | \ - (RV_X(x, 2, 3) << 6)) -#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \ - (RV_X(x, 7, 2) << 6)) -#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \ - (RV_X(x, 7, 3) << 6)) -#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) -#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) -#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) - -#define SHIFT_RIGHT(x, y) \ - ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) - -#define REG_MASK \ - ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES)) - -#define REG_OFFSET(insn, pos) \ - (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK) - -#define REG_PTR(insn, pos, regs) \ - ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))) - -#define GET_FUNCT3(insn) (((insn) >> 12) & 7) - -#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) -#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) -#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) -#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs)) -#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs)) -#define GET_SP(regs) (*REG_PTR(2, 0, regs)) -#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val)) -#define IMM_I(insn) ((s32)(insn) >> 20) -#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \ - (s32)(((insn) >> 7) & 0x1f)) +#include <asm/insn.h> struct insn_func { unsigned long mask; @@ -424,6 +298,22 @@ static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, return (rc <= 0) ? rc : 1; } +static bool is_load_guest_page_fault(unsigned long scause) +{ + /** + * If a g-stage page fault occurs, the direct approach + * is to let the g-stage page fault handler handle it + * naturally, however, calling the g-stage page fault + * handler here seems rather strange. + * Considering this is a corner case, we can directly + * return to the guest and re-execute the same PC, this + * will trigger a g-stage page fault again and then the + * regular g-stage page fault handler will populate + * g-stage page table. + */ + return (scause == EXC_LOAD_GUEST_PAGE_FAULT); +} + /** * kvm_riscv_vcpu_virtual_insn -- Handle virtual instruction trap * @@ -449,6 +339,8 @@ int kvm_riscv_vcpu_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ct->sepc, &utrap); if (utrap.scause) { + if (is_load_guest_page_fault(utrap.scause)) + return 1; utrap.sepc = ct->sepc; kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); return 1; @@ -504,6 +396,8 @@ int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run, insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, &utrap); if (utrap.scause) { + if (is_load_guest_page_fault(utrap.scause)) + return 1; /* Redirect trap if we failed to read instruction */ utrap.sepc = ct->sepc; kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); @@ -630,6 +524,8 @@ int kvm_riscv_vcpu_mmio_store(struct kvm_vcpu *vcpu, struct kvm_run *run, insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, &utrap); if (utrap.scause) { + if (is_load_guest_page_fault(utrap.scause)) + return 1; /* Redirect trap if we failed to read instruction */ utrap.sepc = ct->sepc; kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index b319c4c13c54..865dae903aa0 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -15,6 +15,7 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> +#include <asm/pgtable.h> #include <asm/vector.h> #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) @@ -22,7 +23,7 @@ #define KVM_ISA_EXT_ARR(ext) \ [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext -/* Mapping between KVM ISA Extension ID & Host ISA extension ID */ +/* Mapping between KVM ISA Extension ID & guest ISA extension ID */ static const unsigned long kvm_isa_ext_arr[] = { /* Single letter extensions (alphabetically sorted) */ [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, @@ -34,14 +35,22 @@ static const unsigned long kvm_isa_ext_arr[] = { [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, /* Multi letter extensions (alphabetically sorted) */ + KVM_ISA_EXT_ARR(SMNPM), KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -56,10 +65,13 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZCF), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -78,6 +90,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZTSO), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), KVM_ISA_EXT_ARR(ZVFH), KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), @@ -102,6 +116,36 @@ static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) return KVM_RISCV_ISA_EXT_MAX; } +static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *guest_ext) +{ + unsigned long host_ext; + + if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX || + kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr)) + return -ENOENT; + + *guest_ext = kvm_isa_ext_arr[kvm_ext]; + switch (*guest_ext) { + case RISCV_ISA_EXT_SMNPM: + /* + * Pointer masking effective in (H)S-mode is provided by the + * Smnpm extension, so that extension is reported to the guest, + * even though the CSR bits for configuring VS-mode pointer + * masking on the host side are part of the Ssnpm extension. + */ + host_ext = RISCV_ISA_EXT_SSNPM; + break; + default: + host_ext = *guest_ext; + break; + } + + if (!__riscv_isa_extension_available(NULL, host_ext)) + return -ENOENT; + + return 0; +} + static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) { switch (ext) { @@ -110,6 +154,12 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SSCOFPMF: /* Sscofpmf depends on interrupt filtering defined in ssaia */ return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); + case KVM_RISCV_ISA_EXT_SVADU: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Guest OS can use Svadu only when host OS enable Svadu. + */ + return arch_has_hw_pte_young(); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -129,10 +179,15 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_M: /* There is not architectural config bit to disable sscofpmf completely */ case KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_RISCV_ISA_EXT_SSNPM: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZAAMO: + case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALRSC: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: case KVM_RISCV_ISA_EXT_ZBB: @@ -147,8 +202,11 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZCF: case KVM_RISCV_ISA_EXT_ZCMOP: case KVM_RISCV_ISA_EXT_ZFA: + case KVM_RISCV_ISA_EXT_ZFBFMIN: case KVM_RISCV_ISA_EXT_ZFH: case KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_RISCV_ISA_EXT_ZICBOP: + case KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: case KVM_RISCV_ISA_EXT_ZICSR: @@ -167,6 +225,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZTSO: case KVM_RISCV_ISA_EXT_ZVBB: case KVM_RISCV_ISA_EXT_ZVBC: + case KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_RISCV_ISA_EXT_ZVFBFWMA: case KVM_RISCV_ISA_EXT_ZVFH: case KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_RISCV_ISA_EXT_ZVKB: @@ -181,6 +241,12 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) /* Extensions which can be disabled using Smstateen */ case KVM_RISCV_ISA_EXT_SSAIA: return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); + case KVM_RISCV_ISA_EXT_SVADE: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Svade can't be disabled unless we support Svadu. + */ + return arch_has_hw_pte_young(); default: break; } @@ -190,13 +256,13 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu) { - unsigned long host_isa, i; + unsigned long guest_ext, i; for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) { - host_isa = kvm_isa_ext_arr[i]; - if (__riscv_isa_extension_available(NULL, host_isa) && - kvm_riscv_vcpu_isa_enable_allowed(i)) - set_bit(host_isa, vcpu->arch.isa); + if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext)) + continue; + if (kvm_riscv_vcpu_isa_enable_allowed(i)) + set_bit(guest_ext, vcpu->arch.isa); } } @@ -218,15 +284,20 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + if (!riscv_isa_extension_available(NULL, ZICBOM)) return -ENOENT; reg_val = riscv_cbom_block_size; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + if (!riscv_isa_extension_available(NULL, ZICBOZ)) return -ENOENT; reg_val = riscv_cboz_block_size; break; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + if (!riscv_isa_extension_available(NULL, ZICBOP)) + return -ENOENT; + reg_val = riscv_cbop_block_size; + break; case KVM_REG_RISCV_CONFIG_REG(mvendorid): reg_val = vcpu->arch.mvendorid; break; @@ -307,17 +378,23 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, } break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + if (!riscv_isa_extension_available(NULL, ZICBOM)) return -ENOENT; if (reg_val != riscv_cbom_block_size) return -EINVAL; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + if (!riscv_isa_extension_available(NULL, ZICBOZ)) return -ENOENT; if (reg_val != riscv_cboz_block_size) return -EINVAL; break; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + if (!riscv_isa_extension_available(NULL, ZICBOP)) + return -ENOENT; + if (reg_val != riscv_cbop_block_size) + return -EINVAL; + break; case KVM_REG_RISCV_CONFIG_REG(mvendorid): if (reg_val == vcpu->arch.mvendorid) break; @@ -578,18 +655,15 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu, unsigned long reg_num, unsigned long *reg_val) { - unsigned long host_isa_ext; - - if (reg_num >= KVM_RISCV_ISA_EXT_MAX || - reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) - return -ENOENT; + unsigned long guest_ext; + int ret; - host_isa_ext = kvm_isa_ext_arr[reg_num]; - if (!__riscv_isa_extension_available(NULL, host_isa_ext)) - return -ENOENT; + ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext); + if (ret) + return ret; *reg_val = 0; - if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) + if (__riscv_isa_extension_available(vcpu->arch.isa, guest_ext)) *reg_val = 1; /* Mark the given extension as available */ return 0; @@ -599,17 +673,14 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu, unsigned long reg_num, unsigned long reg_val) { - unsigned long host_isa_ext; - - if (reg_num >= KVM_RISCV_ISA_EXT_MAX || - reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) - return -ENOENT; + unsigned long guest_ext; + int ret; - host_isa_ext = kvm_isa_ext_arr[reg_num]; - if (!__riscv_isa_extension_available(NULL, host_isa_ext)) - return -ENOENT; + ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext); + if (ret) + return ret; - if (reg_val == test_bit(host_isa_ext, vcpu->arch.isa)) + if (reg_val == test_bit(guest_ext, vcpu->arch.isa)) return 0; if (!vcpu->arch.ran_atleast_once) { @@ -619,10 +690,10 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu, */ if (reg_val == 1 && kvm_riscv_vcpu_isa_enable_allowed(reg_num)) - set_bit(host_isa_ext, vcpu->arch.isa); + set_bit(guest_ext, vcpu->arch.isa); else if (!reg_val && kvm_riscv_vcpu_isa_disable_allowed(reg_num)) - clear_bit(host_isa_ext, vcpu->arch.isa); + clear_bit(guest_ext, vcpu->arch.isa); else return -EINVAL; kvm_riscv_vcpu_fp_reset(vcpu); @@ -764,10 +835,13 @@ static int copy_config_reg_indices(const struct kvm_vcpu *vcpu, * was not available. */ if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) && - !riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + !riscv_isa_extension_available(NULL, ZICBOM)) continue; else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) && - !riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + !riscv_isa_extension_available(NULL, ZICBOZ)) + continue; + else if (i == KVM_REG_RISCV_CONFIG_REG(zicbop_block_size) && + !riscv_isa_extension_available(NULL, ZICBOP)) continue; size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; @@ -980,16 +1054,15 @@ static int copy_fp_d_reg_indices(const struct kvm_vcpu *vcpu, static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, u64 __user *uindices) { + unsigned long guest_ext; unsigned int n = 0; - unsigned long isa_ext; for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; - isa_ext = kvm_isa_ext_arr[i]; - if (!__riscv_isa_extension_available(NULL, isa_ext)) + if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext)) continue; if (uindices) { @@ -1009,66 +1082,14 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) return copy_isa_ext_reg_indices(vcpu, NULL); } -static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - unsigned int n = 0; - - for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? - KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | - KVM_REG_RISCV_SBI_SINGLE | i; - - if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) - continue; - - if (uindices) { - if (put_user(reg, uindices)) - return -EFAULT; - uindices++; - } - - n++; - } - - return n; -} - static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu) { - return copy_sbi_ext_reg_indices(vcpu, NULL); -} - -static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; - int total = 0; - - if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); - - for (int i = 0; i < n; i++) { - u64 reg = KVM_REG_RISCV | size | - KVM_REG_RISCV_SBI_STATE | - KVM_REG_RISCV_SBI_STA | i; - - if (uindices) { - if (put_user(reg, uindices)) - return -EFAULT; - uindices++; - } - } - - total += n; - } - - return total; + return kvm_riscv_vcpu_reg_indices_sbi_ext(vcpu, NULL); } static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu) { - return copy_sbi_reg_indices(vcpu, NULL); + return kvm_riscv_vcpu_reg_indices_sbi(vcpu, NULL); } static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu) @@ -1191,12 +1212,12 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu, return ret; uindices += ret; - ret = copy_sbi_ext_reg_indices(vcpu, uindices); + ret = kvm_riscv_vcpu_reg_indices_sbi_ext(vcpu, uindices); if (ret < 0) return ret; uindices += ret; - ret = copy_sbi_reg_indices(vcpu, uindices); + ret = kvm_riscv_vcpu_reg_indices_sbi(vcpu, uindices); if (ret < 0) return ret; uindices += ret; diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 2707a51b082c..a2fae70ee174 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -60,6 +60,7 @@ static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) type = PERF_TYPE_HW_CACHE; break; case SBI_PMU_EVENT_TYPE_RAW: + case SBI_PMU_EVENT_TYPE_RAW_V2: case SBI_PMU_EVENT_TYPE_FW: type = PERF_TYPE_RAW; break; @@ -128,6 +129,9 @@ static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) case SBI_PMU_EVENT_TYPE_RAW: config = evt_data & RISCV_PMU_RAW_EVENT_MASK; break; + case SBI_PMU_EVENT_TYPE_RAW_V2: + config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; + break; case SBI_PMU_EVENT_TYPE_FW: if (ecode < SBI_PMU_FW_MAX) config = (1ULL << 63) | ecode; @@ -405,8 +409,6 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); int sbiret = 0; gpa_t saddr; - unsigned long hva; - bool writable; if (!kvpmu || flags) { sbiret = SBI_ERR_INVALID_PARAM; @@ -428,19 +430,14 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s goto out; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) { - sbiret = SBI_ERR_INVALID_ADDRESS; - goto out; - } - kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); if (!kvpmu->sdata) return -ENOMEM; + /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { kfree(kvpmu->sdata); - sbiret = SBI_ERR_FAILURE; + sbiret = SBI_ERR_INVALID_ADDRESS; goto out; } @@ -452,6 +449,65 @@ out: return 0; } +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long num_events, + unsigned long flags, struct kvm_vcpu_sbi_return *retdata) +{ + struct riscv_pmu_event_info *einfo = NULL; + int shmem_size = num_events * sizeof(*einfo); + gpa_t shmem; + u32 eidx, etype; + u64 econfig; + int ret; + + if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) { + ret = SBI_ERR_INVALID_PARAM; + goto out; + } + + shmem = saddr_low; + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) { + shmem |= ((gpa_t)saddr_high << 32); + } else { + ret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + } + + einfo = kzalloc(shmem_size, GFP_KERNEL); + if (!einfo) + return -ENOMEM; + + ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_FAILURE; + goto free_mem; + } + + for (int i = 0; i < num_events; i++) { + eidx = einfo[i].event_idx; + etype = kvm_pmu_get_perf_event_type(eidx); + econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); + ret = riscv_pmu_get_event_info(etype, econfig, NULL); + einfo[i].output = (ret > 0) ? 1 : 0; + } + + ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_INVALID_ADDRESS; + goto free_mem; + } + + ret = 0; +free_mem: + kfree(einfo); +out: + retdata->err_val = ret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { @@ -666,6 +722,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba .type = etype, .size = sizeof(struct perf_event_attr), .pinned = true, + .disabled = true, /* * It should never reach here if the platform doesn't support the sscofpmf * extension as mode filtering won't work without it. diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 6e704ed86a83..46ab7b989432 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -71,10 +71,22 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_dbcn, }, { + .ext_idx = KVM_RISCV_SBI_EXT_SUSP, + .ext_ptr = &vcpu_sbi_ext_susp, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_STA, .ext_ptr = &vcpu_sbi_ext_sta, }, { + .ext_idx = KVM_RISCV_SBI_EXT_FWFT, + .ext_ptr = &vcpu_sbi_ext_fwft, + }, + { + .ext_idx = KVM_RISCV_SBI_EXT_MPXY, + .ext_ptr = &vcpu_sbi_ext_mpxy, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL, .ext_ptr = &vcpu_sbi_ext_experimental, }, @@ -102,7 +114,7 @@ riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx) return sext; } -bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) +static bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) { struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; const struct kvm_riscv_sbi_extension_entry *sext; @@ -112,7 +124,9 @@ bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) return sext && scontext->ext_status[sext->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; } -void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_riscv_vcpu_sbi_forward_handler(struct kvm_vcpu *vcpu, + struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -129,6 +143,8 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) run->riscv_sbi.args[5] = cp->a5; run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED; run->riscv_sbi.ret[1] = 0; + retdata->uexit = true; + return 0; } void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, @@ -139,9 +155,9 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, struct kvm_vcpu *tmp; kvm_for_each_vcpu(i, tmp, vcpu->kvm) { - spin_lock(&vcpu->arch.mp_state_lock); + spin_lock(&tmp->arch.mp_state_lock); WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); - spin_unlock(&vcpu->arch.mp_state_lock); + spin_unlock(&tmp->arch.mp_state_lock); } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); @@ -152,6 +168,34 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } +void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu, + unsigned long pc, unsigned long a1) +{ + spin_lock(&vcpu->arch.reset_state.lock); + vcpu->arch.reset_state.pc = pc; + vcpu->arch.reset_state.a1 = a1; + spin_unlock(&vcpu->arch.reset_state.lock); + + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); +} + +void kvm_riscv_vcpu_sbi_load_reset_state(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + struct kvm_vcpu_reset_state *reset_state = &vcpu->arch.reset_state; + + cntx->a0 = vcpu->vcpu_id; + + spin_lock(&vcpu->arch.reset_state.lock); + cntx->sepc = reset_state->pc; + cntx->a1 = reset_state->a1; + spin_unlock(&vcpu->arch.reset_state.lock); + + cntx->sstatus &= ~SR_SIE; + csr->vsatp = 0; +} + int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -252,6 +296,31 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu, return 0; } +int kvm_riscv_vcpu_reg_indices_sbi_ext(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int n = 0; + + for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | + KVM_REG_RISCV_SBI_SINGLE | i; + + if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) + continue; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + n++; + } + + return n; +} + int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { @@ -328,64 +397,163 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, return 0; } -int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +int kvm_riscv_vcpu_reg_indices_sbi(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + unsigned long state_reg_count; + int i, j, rc, count = 0; + u64 reg; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + + if (!ext->get_state_reg_count || + scontext->ext_status[entry->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_ENABLED) + continue; + + state_reg_count = ext->get_state_reg_count(vcpu); + if (!uindices) + goto skip_put_user; + + for (j = 0; j < state_reg_count; j++) { + if (ext->get_state_reg_id) { + rc = ext->get_state_reg_id(vcpu, j, ®); + if (rc) + return rc; + } else { + reg = KVM_REG_RISCV | + (IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64) | + KVM_REG_RISCV_SBI_STATE | + ext->state_reg_subtype | j; + } + + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + +skip_put_user: + count += state_reg_count; + } + + return count; +} + +static const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext_withstate(struct kvm_vcpu *vcpu, + unsigned long subtype) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + int i; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + + if (ext->get_state_reg_count && + ext->state_reg_subtype == subtype && + scontext->ext_status[entry->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) + return ext; + } + + return NULL; +} + +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_SBI_STATE); - unsigned long reg_subtype, reg_val; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + const struct kvm_vcpu_sbi_extension *ext; + unsigned long reg_subtype; + void *reg_val; + u64 data64; + u32 data32; + u16 data16; + u8 data8; + + switch (KVM_REG_SIZE(reg->id)) { + case 1: + reg_val = &data8; + break; + case 2: + reg_val = &data16; + break; + case 4: + reg_val = &data32; + break; + case 8: + reg_val = &data64; + break; + default: return -EINVAL; + } - if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_SBI_STA: - return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val); - default: + ext = kvm_vcpu_sbi_find_ext_withstate(vcpu, reg_subtype); + if (!ext || !ext->set_state_reg) return -EINVAL; - } - return 0; + return ext->set_state_reg(vcpu, reg_num, KVM_REG_SIZE(reg->id), reg_val); } -int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_SBI_STATE); - unsigned long reg_subtype, reg_val; + const struct kvm_vcpu_sbi_extension *ext; + unsigned long reg_subtype; + void *reg_val; + u64 data64; + u32 data32; + u16 data16; + u8 data8; int ret; - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + switch (KVM_REG_SIZE(reg->id)) { + case 1: + reg_val = &data8; + break; + case 2: + reg_val = &data16; + break; + case 4: + reg_val = &data32; + break; + case 8: + reg_val = &data64; + break; + default: return -EINVAL; + } reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_SBI_STA: - ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, ®_val); - break; - default: + ext = kvm_vcpu_sbi_find_ext_withstate(vcpu, reg_subtype); + if (!ext || !ext->get_state_reg) return -EINVAL; - } + ret = ext->get_state_reg(vcpu, reg_num, KVM_REG_SIZE(reg->id), reg_val); if (ret) return ret; - if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id))) return -EFAULT; return 0; @@ -504,5 +672,54 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu) scontext->ext_status[idx] = ext->default_disabled ? KVM_RISCV_SBI_EXT_STATUS_DISABLED : KVM_RISCV_SBI_EXT_STATUS_ENABLED; + + if (ext->init && ext->init(vcpu) != 0) + scontext->ext_status[idx] = KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; + } +} + +void kvm_riscv_vcpu_sbi_deinit(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + int idx, i; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + idx = entry->ext_idx; + + if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status)) + continue; + + if (scontext->ext_status[idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE || + !ext->deinit) + continue; + + ext->deinit(vcpu); + } +} + +void kvm_riscv_vcpu_sbi_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + int idx, i; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + idx = entry->ext_idx; + + if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status)) + continue; + + if (scontext->ext_status[idx] != KVM_RISCV_SBI_EXT_STATUS_ENABLED || + !ext->reset) + continue; + + ext->reset(vcpu); } } diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c index 5bc570b984f4..06fdd5f69364 100644 --- a/arch/riscv/kvm/vcpu_sbi_base.c +++ b/arch/riscv/kvm/vcpu_sbi_base.c @@ -41,8 +41,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, * For experimental/vendor extensions * forward it to the userspace */ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - retdata->uexit = true; + return kvm_riscv_vcpu_sbi_forward_handler(vcpu, run, retdata); } else { sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a0); *out_val = sbi_ext && sbi_ext->probe ? @@ -71,28 +70,3 @@ const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = { .extid_end = SBI_EXT_BASE, .handler = kvm_sbi_ext_base_handler, }; - -static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu, - struct kvm_run *run, - struct kvm_vcpu_sbi_return *retdata) -{ - /* - * Both SBI experimental and vendor extensions are - * unconditionally forwarded to userspace. - */ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - retdata->uexit = true; - return 0; -} - -const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = { - .extid_start = SBI_EXT_EXPERIMENTAL_START, - .extid_end = SBI_EXT_EXPERIMENTAL_END, - .handler = kvm_sbi_ext_forward_handler, -}; - -const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = { - .extid_start = SBI_EXT_VENDOR_START, - .extid_end = SBI_EXT_VENDOR_END, - .handler = kvm_sbi_ext_forward_handler, -}; diff --git a/arch/riscv/kvm/vcpu_sbi_forward.c b/arch/riscv/kvm/vcpu_sbi_forward.c new file mode 100644 index 000000000000..5a3c75eb23c5 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_forward.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Ventana Micro Systems Inc. + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_vcpu_sbi.h> +#include <asm/sbi.h> + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = { + .extid_start = SBI_EXT_EXPERIMENTAL_START, + .extid_end = SBI_EXT_EXPERIMENTAL_END, + .handler = kvm_riscv_vcpu_sbi_forward_handler, +}; + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = { + .extid_start = SBI_EXT_VENDOR_START, + .extid_end = SBI_EXT_VENDOR_END, + .handler = kvm_riscv_vcpu_sbi_forward_handler, +}; + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = { + .extid_start = SBI_EXT_DBCN, + .extid_end = SBI_EXT_DBCN, + .default_disabled = true, + .handler = kvm_riscv_vcpu_sbi_forward_handler, +}; + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_mpxy = { + .extid_start = SBI_EXT_MPXY, + .extid_end = SBI_EXT_MPXY, + .default_disabled = true, + .handler = kvm_riscv_vcpu_sbi_forward_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_fwft.c b/arch/riscv/kvm/vcpu_sbi_fwft.c new file mode 100644 index 000000000000..62cc9c3d5759 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_fwft.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Rivos Inc. + * + * Authors: + * Clément Léger <cleger@rivosinc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/cpufeature.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_sbi.h> +#include <asm/kvm_vcpu_sbi_fwft.h> + +#define MIS_DELEG (BIT_ULL(EXC_LOAD_MISALIGNED) | BIT_ULL(EXC_STORE_MISALIGNED)) + +struct kvm_sbi_fwft_feature { + /** + * @id: Feature ID + */ + enum sbi_fwft_feature_t id; + + /** + * @first_reg_num: ONE_REG index of the first ONE_REG register + */ + unsigned long first_reg_num; + + /** + * @supported: Check if the feature is supported on the vcpu + * + * This callback is optional, if not provided the feature is assumed to + * be supported + */ + bool (*supported)(struct kvm_vcpu *vcpu); + + /** + * @reset: Reset the feature value irrespective whether feature is supported or not + * + * This callback is mandatory + */ + void (*reset)(struct kvm_vcpu *vcpu); + + /** + * @set: Set the feature value + * + * Return SBI_SUCCESS on success or an SBI error (SBI_ERR_*) + * + * This callback is mandatory + */ + long (*set)(struct kvm_vcpu *vcpu, struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value); + + /** + * @get: Get the feature current value + * + * Return SBI_SUCCESS on success or an SBI error (SBI_ERR_*) + * + * This callback is mandatory + */ + long (*get)(struct kvm_vcpu *vcpu, struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value); +}; + +static const enum sbi_fwft_feature_t kvm_fwft_defined_features[] = { + SBI_FWFT_MISALIGNED_EXC_DELEG, + SBI_FWFT_LANDING_PAD, + SBI_FWFT_SHADOW_STACK, + SBI_FWFT_DOUBLE_TRAP, + SBI_FWFT_PTE_AD_HW_UPDATING, + SBI_FWFT_POINTER_MASKING_PMLEN, +}; + +static bool kvm_fwft_is_defined_feature(enum sbi_fwft_feature_t feature) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kvm_fwft_defined_features); i++) { + if (kvm_fwft_defined_features[i] == feature) + return true; + } + + return false; +} + +static bool kvm_sbi_fwft_misaligned_delegation_supported(struct kvm_vcpu *vcpu) +{ + return misaligned_traps_can_delegate(); +} + +static void kvm_sbi_fwft_reset_misaligned_delegation(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + cfg->hedeleg &= ~MIS_DELEG; +} + +static long kvm_sbi_fwft_set_misaligned_delegation(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + if (value == 1) { + cfg->hedeleg |= MIS_DELEG; + if (!one_reg_access) + csr_set(CSR_HEDELEG, MIS_DELEG); + } else if (value == 0) { + cfg->hedeleg &= ~MIS_DELEG; + if (!one_reg_access) + csr_clear(CSR_HEDELEG, MIS_DELEG); + } else { + return SBI_ERR_INVALID_PARAM; + } + + return SBI_SUCCESS; +} + +static long kvm_sbi_fwft_get_misaligned_delegation(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + *value = (cfg->hedeleg & MIS_DELEG) == MIS_DELEG; + return SBI_SUCCESS; +} + +#ifndef CONFIG_32BIT + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_HENVCFG, value); + return (csr_read_clear(CSR_HENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +static bool kvm_sbi_fwft_pointer_masking_pmlen_supported(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + if (!riscv_isa_extension_available(vcpu->arch.isa, SMNPM)) + return false; + + fwft->have_vs_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + fwft->have_vs_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + return fwft->have_vs_pmlen_7 || fwft->have_vs_pmlen_16; +} + +static void kvm_sbi_fwft_reset_pointer_masking_pmlen(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cfg.henvcfg &= ~ENVCFG_PMM; +} + +static long kvm_sbi_fwft_set_pointer_masking_pmlen(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + unsigned long pmm; + + switch (value) { + case 0: + pmm = ENVCFG_PMM_PMLEN_0; + break; + case 7: + if (!fwft->have_vs_pmlen_7) + return SBI_ERR_INVALID_PARAM; + pmm = ENVCFG_PMM_PMLEN_7; + break; + case 16: + if (!fwft->have_vs_pmlen_16) + return SBI_ERR_INVALID_PARAM; + pmm = ENVCFG_PMM_PMLEN_16; + break; + default: + return SBI_ERR_INVALID_PARAM; + } + + vcpu->arch.cfg.henvcfg &= ~ENVCFG_PMM; + vcpu->arch.cfg.henvcfg |= pmm; + + /* + * Instead of waiting for vcpu_load/put() to update HENVCFG CSR, + * update here so that VCPU see's pointer masking mode change + * immediately. + */ + if (!one_reg_access) + csr_write(CSR_HENVCFG, vcpu->arch.cfg.henvcfg); + + return SBI_SUCCESS; +} + +static long kvm_sbi_fwft_get_pointer_masking_pmlen(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value) +{ + switch (vcpu->arch.cfg.henvcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_0: + *value = 0; + break; + case ENVCFG_PMM_PMLEN_7: + *value = 7; + break; + case ENVCFG_PMM_PMLEN_16: + *value = 16; + break; + default: + return SBI_ERR_FAILURE; + } + + return SBI_SUCCESS; +} + +#endif + +static const struct kvm_sbi_fwft_feature features[] = { + { + .id = SBI_FWFT_MISALIGNED_EXC_DELEG, + .first_reg_num = offsetof(struct kvm_riscv_sbi_fwft, misaligned_deleg.enable) / + sizeof(unsigned long), + .supported = kvm_sbi_fwft_misaligned_delegation_supported, + .reset = kvm_sbi_fwft_reset_misaligned_delegation, + .set = kvm_sbi_fwft_set_misaligned_delegation, + .get = kvm_sbi_fwft_get_misaligned_delegation, + }, +#ifndef CONFIG_32BIT + { + .id = SBI_FWFT_POINTER_MASKING_PMLEN, + .first_reg_num = offsetof(struct kvm_riscv_sbi_fwft, pointer_masking.enable) / + sizeof(unsigned long), + .supported = kvm_sbi_fwft_pointer_masking_pmlen_supported, + .reset = kvm_sbi_fwft_reset_pointer_masking_pmlen, + .set = kvm_sbi_fwft_set_pointer_masking_pmlen, + .get = kvm_sbi_fwft_get_pointer_masking_pmlen, + }, +#endif +}; + +static const struct kvm_sbi_fwft_feature *kvm_sbi_fwft_regnum_to_feature(unsigned long reg_num) +{ + const struct kvm_sbi_fwft_feature *feature; + int i; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + feature = &features[i]; + if (feature->first_reg_num <= reg_num && reg_num < (feature->first_reg_num + 3)) + return feature; + } + + return NULL; +} + +static struct kvm_sbi_fwft_config * +kvm_sbi_fwft_get_config(struct kvm_vcpu *vcpu, enum sbi_fwft_feature_t feature) +{ + int i; + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + for (i = 0; i < ARRAY_SIZE(features); i++) { + if (fwft->configs[i].feature->id == feature) + return &fwft->configs[i]; + } + + return NULL; +} + +static int kvm_fwft_get_feature(struct kvm_vcpu *vcpu, u32 feature, + struct kvm_sbi_fwft_config **conf) +{ + struct kvm_sbi_fwft_config *tconf; + + tconf = kvm_sbi_fwft_get_config(vcpu, feature); + if (!tconf) { + if (kvm_fwft_is_defined_feature(feature)) + return SBI_ERR_NOT_SUPPORTED; + + return SBI_ERR_DENIED; + } + + if (!tconf->supported || !tconf->enabled) + return SBI_ERR_NOT_SUPPORTED; + + *conf = tconf; + + return SBI_SUCCESS; +} + +static int kvm_sbi_fwft_set(struct kvm_vcpu *vcpu, u32 feature, + unsigned long value, unsigned long flags) +{ + int ret; + struct kvm_sbi_fwft_config *conf; + + ret = kvm_fwft_get_feature(vcpu, feature, &conf); + if (ret) + return ret; + + if ((flags & ~SBI_FWFT_SET_FLAG_LOCK) != 0) + return SBI_ERR_INVALID_PARAM; + + if (conf->flags & SBI_FWFT_SET_FLAG_LOCK) + return SBI_ERR_DENIED_LOCKED; + + conf->flags = flags; + + return conf->feature->set(vcpu, conf, false, value); +} + +static int kvm_sbi_fwft_get(struct kvm_vcpu *vcpu, unsigned long feature, + unsigned long *value) +{ + int ret; + struct kvm_sbi_fwft_config *conf; + + ret = kvm_fwft_get_feature(vcpu, feature, &conf); + if (ret) + return ret; + + return conf->feature->get(vcpu, conf, false, value); +} + +static int kvm_sbi_ext_fwft_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + + switch (funcid) { + case SBI_EXT_FWFT_SET: + ret = kvm_sbi_fwft_set(vcpu, cp->a0, cp->a1, cp->a2); + break; + case SBI_EXT_FWFT_GET: + ret = kvm_sbi_fwft_get(vcpu, cp->a0, &retdata->out_val); + break; + default: + ret = SBI_ERR_NOT_SUPPORTED; + break; + } + + retdata->err_val = ret; + + return 0; +} + +static int kvm_sbi_ext_fwft_init(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + int i; + + fwft->configs = kcalloc(ARRAY_SIZE(features), sizeof(struct kvm_sbi_fwft_config), + GFP_KERNEL); + if (!fwft->configs) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + feature = &features[i]; + conf = &fwft->configs[i]; + if (feature->supported) + conf->supported = feature->supported(vcpu); + else + conf->supported = true; + + conf->enabled = conf->supported; + conf->feature = feature; + } + + return 0; +} + +static void kvm_sbi_ext_fwft_deinit(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + kfree(fwft->configs); +} + +static void kvm_sbi_ext_fwft_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + const struct kvm_sbi_fwft_feature *feature; + int i; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + fwft->configs[i].flags = 0; + feature = &features[i]; + if (feature->reset) + feature->reset(vcpu); + } +} + +static unsigned long kvm_sbi_ext_fwft_get_reg_count(struct kvm_vcpu *vcpu) +{ + unsigned long max_reg_count = sizeof(struct kvm_riscv_sbi_fwft) / sizeof(unsigned long); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long reg, ret = 0; + + for (reg = 0; reg < max_reg_count; reg++) { + feature = kvm_sbi_fwft_regnum_to_feature(reg); + if (!feature) + continue; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + continue; + + ret++; + } + + return ret; +} + +static int kvm_sbi_ext_fwft_get_reg_id(struct kvm_vcpu *vcpu, int index, u64 *reg_id) +{ + int reg, max_reg_count = sizeof(struct kvm_riscv_sbi_fwft) / sizeof(unsigned long); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + int idx = 0; + + for (reg = 0; reg < max_reg_count; reg++) { + feature = kvm_sbi_fwft_regnum_to_feature(reg); + if (!feature) + continue; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + continue; + + if (index == idx) { + *reg_id = KVM_REG_RISCV | + (IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64) | + KVM_REG_RISCV_SBI_STATE | + KVM_REG_RISCV_SBI_FWFT | reg; + return 0; + } + + idx++; + } + + return -ENOENT; +} + +static int kvm_sbi_ext_fwft_get_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val) +{ + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long *value; + int ret = 0; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = reg_val; + + feature = kvm_sbi_fwft_regnum_to_feature(reg_num); + if (!feature) + return -ENOENT; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + return -ENOENT; + + switch (reg_num - feature->first_reg_num) { + case 0: + *value = conf->enabled; + break; + case 1: + *value = conf->flags; + break; + case 2: + ret = conf->feature->get(vcpu, conf, true, value); + break; + default: + return -ENOENT; + } + + return sbi_err_map_linux_errno(ret); +} + +static int kvm_sbi_ext_fwft_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val) +{ + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long value; + int ret = 0; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = *(const unsigned long *)reg_val; + + feature = kvm_sbi_fwft_regnum_to_feature(reg_num); + if (!feature) + return -ENOENT; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + return -ENOENT; + + switch (reg_num - feature->first_reg_num) { + case 0: + switch (value) { + case 0: + conf->enabled = false; + break; + case 1: + conf->enabled = true; + break; + default: + return -EINVAL; + } + break; + case 1: + conf->flags = value & SBI_FWFT_SET_FLAG_LOCK; + break; + case 2: + ret = conf->feature->set(vcpu, conf, true, value); + break; + default: + return -ENOENT; + } + + return sbi_err_map_linux_errno(ret); +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_fwft = { + .extid_start = SBI_EXT_FWFT, + .extid_end = SBI_EXT_FWFT, + .handler = kvm_sbi_ext_fwft_handler, + .init = kvm_sbi_ext_fwft_init, + .deinit = kvm_sbi_ext_fwft_deinit, + .reset = kvm_sbi_ext_fwft_reset, + .state_reg_subtype = KVM_REG_RISCV_SBI_FWFT, + .get_state_reg_count = kvm_sbi_ext_fwft_get_reg_count, + .get_state_reg_id = kvm_sbi_ext_fwft_get_reg_id, + .get_state_reg = kvm_sbi_ext_fwft_get_reg, + .set_state_reg = kvm_sbi_ext_fwft_set_reg, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index dce667f4b6ab..f26207f84bab 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -9,12 +9,12 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/wordpart.h> #include <asm/sbi.h> #include <asm/kvm_vcpu_sbi.h> static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *reset_cntx; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_vcpu *target_vcpu; unsigned long target_vcpuid = cp->a0; @@ -31,17 +31,7 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) goto out; } - spin_lock(&target_vcpu->arch.reset_cntx_lock); - reset_cntx = &target_vcpu->arch.guest_reset_context; - /* start address */ - reset_cntx->sepc = cp->a1; - /* target vcpu id to start */ - reset_cntx->a0 = target_vcpuid; - /* private data passed from kernel */ - reset_cntx->a1 = cp->a2; - spin_unlock(&target_vcpu->arch.reset_cntx_lock); - - kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); + kvm_riscv_vcpu_sbi_request_reset(target_vcpu, cp->a1, cp->a2); __kvm_riscv_vcpu_power_on(target_vcpu); @@ -79,12 +69,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!kvm_riscv_vcpu_stopped(target_vcpu)) - return SBI_HSM_STATE_STARTED; - else if (vcpu->stat.generic.blocking) + if (kvm_riscv_vcpu_stopped(target_vcpu)) + return SBI_HSM_STATE_STOPPED; + else if (target_vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; else - return SBI_HSM_STATE_STOPPED; + return SBI_HSM_STATE_STARTED; } static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, @@ -109,7 +99,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, } return 0; case SBI_EXT_HSM_HART_SUSPEND: - switch (cp->a0) { + switch (lower_32_bits(cp->a0)) { case SBI_HSM_SUSPEND_RET_DEFAULT: kvm_riscv_vcpu_wfi(vcpu); break; diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index e4be34e03e83..a020d979d179 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -73,6 +73,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; + case SBI_EXT_PMU_EVENT_GET_INFO: + ret = kvm_riscv_vcpu_pmu_event_info(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, retdata); + break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 9c2ab3dfa93a..506a510b6bff 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, u64 next_cycle; if (cp->a6 != SBI_EXT_TIME_SET_TIMER) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_context *cp = &vcpu->arch.guest_context; unsigned long hmask = cp->a0; unsigned long hbase = cp->a1; + unsigned long hart_bit = 0, sentmask = 0; if (cp->a6 != SBI_EXT_IPI_SEND_IPI) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, if (hbase != -1UL) { if (tmp->vcpu_id < hbase) continue; - if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + hart_bit = tmp->vcpu_id - hbase; + if (hart_bit >= __riscv_xlen) + goto done; + if (!(hmask & (1UL << hart_bit))) continue; } ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); if (ret < 0) break; + sentmask |= 1UL << hart_bit; kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD); } +done: + if (hbase != -1UL && (hmask ^ sentmask)) + retdata->err_val = SBI_ERR_INVALID_PARAM; + return ret; } @@ -87,6 +96,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run unsigned long hmask = cp->a0; unsigned long hbase = cp->a1; unsigned long funcid = cp->a6; + unsigned long vmid; switch (funcid) { case SBI_EXT_RFENCE_REMOTE_FENCE_I: @@ -94,22 +104,22 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - if (cp->a2 == 0 && cp->a3 == 0) - kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask); + vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid); + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) + kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask, vmid); else kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, - cp->a2, cp->a3, PAGE_SHIFT); + cp->a2, cp->a3, PAGE_SHIFT, vmid); kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - if (cp->a2 == 0 && cp->a3 == 0) - kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, - hbase, hmask, cp->a4); + vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid); + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) + kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, hbase, hmask, + cp->a4, vmid); else - kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, - hbase, hmask, - cp->a2, cp->a3, - PAGE_SHIFT, cp->a4); + kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, hbase, hmask, cp->a2, + cp->a3, PAGE_SHIFT, cp->a4, vmid); kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_SENT); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: @@ -118,9 +128,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: /* * Until nested virtualization is implemented, the - * SBI HFENCE calls should be treated as NOPs + * SBI HFENCE calls should return not supported + * hence fallthrough. */ - break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } @@ -175,35 +185,3 @@ const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst = { .extid_end = SBI_EXT_SRST, .handler = kvm_sbi_ext_srst_handler, }; - -static int kvm_sbi_ext_dbcn_handler(struct kvm_vcpu *vcpu, - struct kvm_run *run, - struct kvm_vcpu_sbi_return *retdata) -{ - struct kvm_cpu_context *cp = &vcpu->arch.guest_context; - unsigned long funcid = cp->a6; - - switch (funcid) { - case SBI_EXT_DBCN_CONSOLE_WRITE: - case SBI_EXT_DBCN_CONSOLE_READ: - case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: - /* - * The SBI debug console functions are unconditionally - * forwarded to the userspace. - */ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - retdata->uexit = true; - break; - default: - retdata->err_val = SBI_ERR_NOT_SUPPORTED; - } - - return 0; -} - -const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = { - .extid_start = SBI_EXT_DBCN, - .extid_end = SBI_EXT_DBCN, - .default_disabled = true, - .handler = kvm_sbi_ext_dbcn_handler, -}; diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index 5f35427114c1..afa0545c3bcf 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -16,7 +16,7 @@ #include <asm/sbi.h> #include <asm/uaccess.h> -void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu) +static void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu) { vcpu->arch.sta.shmem = INVALID_GPA; vcpu->arch.sta.last_steal = 0; @@ -85,8 +85,6 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) unsigned long shmem_phys_hi = cp->a1; u32 flags = cp->a2; struct sbi_sta_struct zero_sta = {0}; - unsigned long hva; - bool writable; gpa_t shmem; int ret; @@ -111,13 +109,10 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) return SBI_ERR_INVALID_ADDRESS; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) - return SBI_ERR_INVALID_ADDRESS; - + /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta)); if (ret) - return SBI_ERR_FAILURE; + return SBI_ERR_INVALID_ADDRESS; vcpu->arch.sta.shmem = shmem; vcpu->arch.sta.last_steal = current->sched_info.run_delay; @@ -151,62 +146,82 @@ static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu) return !!sched_info_on(); } -const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { - .extid_start = SBI_EXT_STA, - .extid_end = SBI_EXT_STA, - .handler = kvm_sbi_ext_sta_handler, - .probe = kvm_sbi_ext_sta_probe, -}; +static unsigned long kvm_sbi_ext_sta_get_state_reg_count(struct kvm_vcpu *vcpu) +{ + return sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); +} -int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long *reg_val) +static int kvm_sbi_ext_sta_get_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val) { + unsigned long *value; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = reg_val; + switch (reg_num) { case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): - *reg_val = (unsigned long)vcpu->arch.sta.shmem; + *value = (unsigned long)vcpu->arch.sta.shmem; break; case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): if (IS_ENABLED(CONFIG_32BIT)) - *reg_val = upper_32_bits(vcpu->arch.sta.shmem); + *value = upper_32_bits(vcpu->arch.sta.shmem); else - *reg_val = 0; + *value = 0; break; default: - return -EINVAL; + return -ENOENT; } return 0; } -int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long reg_val) +static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val) { + unsigned long value; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = *(const unsigned long *)reg_val; + switch (reg_num) { case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): if (IS_ENABLED(CONFIG_32BIT)) { gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem = value; vcpu->arch.sta.shmem |= hi << 32; } else { - vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem = value; } break; case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): if (IS_ENABLED(CONFIG_32BIT)) { gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32); + vcpu->arch.sta.shmem = ((gpa_t)value << 32); vcpu->arch.sta.shmem |= lo; - } else if (reg_val != 0) { + } else if (value != 0) { return -EINVAL; } break; default: - return -EINVAL; + return -ENOENT; } return 0; } + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { + .extid_start = SBI_EXT_STA, + .extid_end = SBI_EXT_STA, + .handler = kvm_sbi_ext_sta_handler, + .probe = kvm_sbi_ext_sta_probe, + .reset = kvm_riscv_vcpu_sbi_sta_reset, + .state_reg_subtype = KVM_REG_RISCV_SBI_STA, + .get_state_reg_count = kvm_sbi_ext_sta_get_state_reg_count, + .get_state_reg = kvm_sbi_ext_sta_get_reg, + .set_state_reg = kvm_sbi_ext_sta_set_reg, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c new file mode 100644 index 000000000000..c6f7e609ac79 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_system.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#include <linux/kvm_host.h> +#include <linux/wordpart.h> + +#include <asm/kvm_vcpu_sbi.h> +#include <asm/sbi.h> + +static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + unsigned long hva, i; + struct kvm_vcpu *tmp; + + switch (funcid) { + case SBI_EXT_SUSP_SYSTEM_SUSPEND: + if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { + retdata->err_val = SBI_ERR_INVALID_PARAM; + return 0; + } + + if (!(cp->sstatus & SR_SPP)) { + retdata->err_val = SBI_ERR_FAILURE; + return 0; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, cp->a1 >> PAGE_SHIFT, NULL); + if (kvm_is_error_hva(hva)) { + retdata->err_val = SBI_ERR_INVALID_ADDRESS; + return 0; + } + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (tmp == vcpu) + continue; + if (!kvm_riscv_vcpu_stopped(tmp)) { + retdata->err_val = SBI_ERR_DENIED; + return 0; + } + } + + kvm_riscv_vcpu_sbi_request_reset(vcpu, cp->a1, cp->a2); + + /* userspace provides the suspend implementation */ + return kvm_riscv_vcpu_sbi_forward_handler(vcpu, run, retdata); + default: + retdata->err_val = SBI_ERR_NOT_SUPPORTED; + break; + } + + return 0; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp = { + .extid_start = SBI_EXT_SUSP, + .extid_end = SBI_EXT_SUSP, + .default_disabled = true, + .handler = kvm_sbi_ext_susp_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c index 8f4c4fa16227..188d5ea5b3b8 100644 --- a/arch/riscv/kvm/vcpu_sbi_v01.c +++ b/arch/riscv/kvm/vcpu_sbi_v01.c @@ -23,6 +23,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm *kvm = vcpu->kvm; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_cpu_trap *utrap = retdata->utrap; + unsigned long vmid; switch (cp->a7) { case SBI_EXT_0_1_CONSOLE_GETCHAR: @@ -31,8 +32,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be * handled in kernel so we forward these to user-space */ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - retdata->uexit = true; + ret = kvm_riscv_vcpu_sbi_forward_handler(vcpu, run, retdata); break; case SBI_EXT_0_1_SET_TIMER: #if __riscv_xlen == 32 @@ -78,25 +78,21 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) kvm_riscv_fence_i(vcpu->kvm, 0, hmask); else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) { + vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid); if (cp->a1 == 0 && cp->a2 == 0) - kvm_riscv_hfence_vvma_all(vcpu->kvm, - 0, hmask); + kvm_riscv_hfence_vvma_all(vcpu->kvm, 0, hmask, vmid); else - kvm_riscv_hfence_vvma_gva(vcpu->kvm, - 0, hmask, - cp->a1, cp->a2, - PAGE_SHIFT); + kvm_riscv_hfence_vvma_gva(vcpu->kvm, 0, hmask, cp->a1, + cp->a2, PAGE_SHIFT, vmid); } else { + vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid); if (cp->a1 == 0 && cp->a2 == 0) - kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, - 0, hmask, - cp->a3); + kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, 0, hmask, + cp->a3, vmid); else - kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, - 0, hmask, - cp->a1, cp->a2, - PAGE_SHIFT, - cp->a3); + kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, 0, hmask, + cp->a1, cp->a2, PAGE_SHIFT, + cp->a3, vmid); } break; default: diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 96e7a4e463f7..85a7262115e1 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -248,18 +248,19 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) if (t->init_done) return -EINVAL; - hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); t->init_done = true; t->next_set = false; /* Enable sstc for every vcpu if available in hardware */ if (riscv_isa_extension_available(NULL, SSTC)) { t->sstc_enabled = true; - t->hrt.function = kvm_riscv_vcpu_vstimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp; } else { t->sstc_enabled = false; - t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_hrtimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_hrtimer; } @@ -344,8 +345,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) /* * The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync() * upon every VM exit so no need to save here. + * + * If VS-timer expires when no VCPU running on a host CPU then + * WFI executed by such host CPU will be effective NOP resulting + * in no power savings. This is because as-per RISC-V Privileged + * specificaiton: "WFI is also required to resume execution for + * locally enabled interrupts pending at any privilege level, + * regardless of the global interrupt enable at each privilege + * level." + * + * To address the above issue, vstimecmp CSR must be set to -1UL + * over here when VCPU is scheduled-out or exits to user space. */ + csr_write(CSR_VSTIMECMP, -1UL); +#if defined(CONFIG_32BIT) + csr_write(CSR_VSTIMECMPH, -1UL); +#endif + /* timer should be enabled for the remaining operations */ if (unlikely(!t->init_done)) return; diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index d92d1348045c..05f3cc2d8e31 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -22,6 +22,9 @@ void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; cntx->sstatus &= ~SR_VS; + + cntx->vector.vlenb = riscv_v_vsize / 32; + if (riscv_isa_extension_available(isa, v)) { cntx->sstatus |= SR_VS_INITIAL; WARN_ON(!cntx->vector.datap); @@ -70,13 +73,11 @@ void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx) __kvm_riscv_vector_restore(cntx); } -int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, - struct kvm_cpu_context *cntx) +int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu) { - cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL); - if (!cntx->vector.datap) + vcpu->arch.guest_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + if (!vcpu->arch.guest_context.vector.datap) return -ENOMEM; - cntx->vector.vlenb = riscv_v_vsize / 32; vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); if (!vcpu->arch.host_context.vector.datap) @@ -87,7 +88,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu) { - kfree(vcpu->arch.guest_reset_context.vector.datap); + kfree(vcpu->arch.guest_context.vector.datap); kfree(vcpu->arch.host_context.vector.datap); } #endif @@ -181,6 +182,8 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; unsigned long reg_val; + if (reg_size != sizeof(reg_val)) + return -EINVAL; if (copy_from_user(®_val, uaddr, reg_size)) return -EFAULT; if (reg_val != cntx->vector.vlenb) diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 7396b8654f45..66d91ae6e9b2 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/uaccess.h> #include <linux/kvm_host.h> +#include <asm/kvm_mmu.h> const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() @@ -31,13 +32,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int r; - r = kvm_riscv_gstage_alloc_pgd(kvm); + r = kvm_riscv_mmu_alloc_pgd(kvm); if (r) return r; r = kvm_riscv_gstage_vmid_init(kvm); if (r) { - kvm_riscv_gstage_free_pgd(kvm); + kvm_riscv_mmu_free_pgd(kvm); return r; } @@ -199,7 +200,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_USER_MEM_SLOTS; break; case KVM_CAP_VM_GPA_BITS: - r = kvm_riscv_gstage_gpa_bits(); + r = kvm_riscv_gstage_gpa_bits; break; default: r = 0; @@ -209,6 +210,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } +int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + switch (cap->cap) { + case KVM_CAP_RISCV_MP_STATE_RESET: + if (cap->flags) + return -EINVAL; + kvm->arch.mp_state_reset = true; + return 0; + default: + return -EINVAL; + } +} + int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { return -EINVAL; diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index ddc98714ce8e..cf34d448289d 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -14,6 +14,9 @@ #include <linux/smp.h> #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_tlb.h> +#include <asm/kvm_vmid.h> static unsigned long vmid_version = 1; static unsigned long vmid_next; @@ -22,15 +25,12 @@ static DEFINE_SPINLOCK(vmid_lock); void __init kvm_riscv_gstage_vmid_detect(void) { - unsigned long old; - /* Figure-out number of VMID bits in HW */ - old = csr_read(CSR_HGATP); - csr_write(CSR_HGATP, old | HGATP_VMID); + csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID); vmid_bits = csr_read(CSR_HGATP); vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; vmid_bits = fls_long(vmid_bits); - csr_write(CSR_HGATP, old); + csr_write(CSR_HGATP, 0); /* We polluted local TLB so flush all guest TLB */ kvm_riscv_local_hfence_gvma_all(); |
