summaryrefslogtreecommitdiff
path: root/arch/riscv/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r--arch/riscv/kvm/Kconfig1
-rw-r--r--arch/riscv/kvm/Makefile1
-rw-r--r--arch/riscv/kvm/aia.c51
-rw-r--r--arch/riscv/kvm/aia_device.c6
-rw-r--r--arch/riscv/kvm/aia_imsic.c57
-rw-r--r--arch/riscv/kvm/gstage.c338
-rw-r--r--arch/riscv/kvm/main.c3
-rw-r--r--arch/riscv/kvm/mmu.c509
-rw-r--r--arch/riscv/kvm/tlb.c110
-rw-r--r--arch/riscv/kvm/vcpu.c58
-rw-r--r--arch/riscv/kvm/vcpu_exit.c20
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c83
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c49
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c25
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c3
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c25
-rw-r--r--arch/riscv/kvm/vcpu_timer.c16
-rw-r--r--arch/riscv/kvm/vm.c7
-rw-r--r--arch/riscv/kvm/vmid.c25
19 files changed, 788 insertions, 599 deletions
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 704c2899197e..5a62091b0809 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
select HAVE_KVM_MSI
select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_READONLY_MEM
+ select HAVE_KVM_DIRTY_RING_ACQ_REL
select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 4e0bba91d284..4b199dc3e58b 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -14,6 +14,7 @@ kvm-y += aia.o
kvm-y += aia_aplic.o
kvm-y += aia_device.o
kvm-y += aia_imsic.o
+kvm-y += gstage.o
kvm-y += main.o
kvm-y += mmu.o
kvm-y += nacl.o
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 19afd1f23537..dad318185660 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei;
unsigned int kvm_riscv_aia_max_ids;
DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
-static int aia_find_hgei(struct kvm_vcpu *owner)
-{
- int i, hgei;
- unsigned long flags;
- struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
-
- raw_spin_lock_irqsave(&hgctrl->lock, flags);
-
- hgei = -1;
- for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
- if (hgctrl->owners[i] == owner) {
- hgei = i;
- break;
- }
- }
-
- raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
-
- put_cpu_ptr(&aia_hgei);
- return hgei;
-}
-
static inline unsigned long aia_hvictl_value(bool ext_irq_pending)
{
unsigned long hvictl;
@@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{
- int hgei;
unsigned long seip;
if (!kvm_riscv_aia_available())
@@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
return false;
- hgei = aia_find_hgei(vcpu);
- if (hgei > 0)
- return !!(ncsr_read(CSR_HGEIP) & BIT(hgei));
-
- return false;
+ return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu);
}
void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu)
@@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_HVIPRIO2H, csr->hviprio2h);
#endif
}
+
+ if (kvm_riscv_aia_initialized(vcpu->kvm))
+ kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu);
}
void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
@@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
if (!kvm_riscv_aia_available())
return;
+ if (kvm_riscv_aia_initialized(vcpu->kvm))
+ kvm_riscv_vcpu_aia_imsic_put(vcpu);
+
if (kvm_riscv_nacl_available()) {
nsh = nacl_shmem();
csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT);
@@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei)
raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
}
-void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
-{
- int hgei;
-
- if (!kvm_riscv_aia_available())
- return;
-
- hgei = aia_find_hgei(owner);
- if (hgei > 0) {
- if (enable)
- csr_set(CSR_HGEIE, BIT(hgei));
- else
- csr_clear(CSR_HGEIE, BIT(hgei));
- }
-}
-
static irqreturn_t hgei_interrupt(int irq, void *dev_id)
{
int i;
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 806c41931cde..b195a93add1c 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -509,12 +509,12 @@ void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_aia_imsic_reset(vcpu);
}
-int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
+void kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
if (!kvm_riscv_aia_available())
- return 0;
+ return;
/*
* We don't do any memory allocations over here because these
@@ -526,8 +526,6 @@ int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
/* Initialize default values in AIA vcpu context */
vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
vaia->hart_index = vcpu->vcpu_idx;
-
- return 0;
}
void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index 29ef9c2133a9..fda0346f0ea1 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -16,6 +16,7 @@
#include <linux/swab.h>
#include <kvm/iodev.h>
#include <asm/csr.h>
+#include <asm/kvm_mmu.h>
#define IMSIC_MAX_EIX (IMSIC_MAX_ID / BITS_PER_TYPE(u64))
@@ -676,6 +677,48 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu,
imsic_swfile_extirq_update(vcpu);
}
+bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ unsigned long flags;
+ bool ret = false;
+
+ /*
+ * The IMSIC SW-file directly injects interrupt via hvip so
+ * only check for interrupt when IMSIC VS-file is being used.
+ */
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ if (imsic->vsfile_cpu > -1)
+ ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei));
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ return ret;
+}
+
+void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ /*
+ * No need to explicitly clear HGEIE CSR bits because the
+ * hgei interrupt handler (aka hgei_interrupt()) will always
+ * clear it for us.
+ */
+}
+
+void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ unsigned long flags;
+
+ if (!kvm_vcpu_is_blocking(vcpu))
+ return;
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ if (imsic->vsfile_cpu > -1)
+ csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei));
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+}
+
void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -703,9 +746,8 @@ void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
*/
/* Purge the G-stage mapping */
- kvm_riscv_gstage_iounmap(vcpu->kvm,
- vcpu->arch.aia_context.imsic_addr,
- IMSIC_MMIO_PAGE_SZ);
+ kvm_riscv_mmu_iounmap(vcpu->kvm, vcpu->arch.aia_context.imsic_addr,
+ IMSIC_MMIO_PAGE_SZ);
/* TODO: Purge the IOMMU mapping ??? */
@@ -781,13 +823,16 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
* producers to the new IMSIC VS-file.
*/
+ /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */
+ csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei));
+
/* Zero-out new IMSIC VS-file */
imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix);
/* Update G-stage mapping for the new IMSIC VS-file */
- ret = kvm_riscv_gstage_ioremap(kvm, vcpu->arch.aia_context.imsic_addr,
- new_vsfile_pa, IMSIC_MMIO_PAGE_SZ,
- true, true);
+ ret = kvm_riscv_mmu_ioremap(kvm, vcpu->arch.aia_context.imsic_addr,
+ new_vsfile_pa, IMSIC_MMIO_PAGE_SZ,
+ true, true);
if (ret)
goto fail_free_vsfile_hgei;
diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
new file mode 100644
index 000000000000..24c270d6d0e2
--- /dev/null
+++ b/arch/riscv/kvm/gstage.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (c) 2025 Ventana Micro Systems Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/pgtable.h>
+#include <asm/kvm_gstage.h>
+
+#ifdef CONFIG_64BIT
+unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4;
+unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3;
+#else
+unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4;
+unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2;
+#endif
+
+#define gstage_pte_leaf(__ptep) \
+ (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
+
+static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
+{
+ unsigned long mask;
+ unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level);
+
+ if (level == (kvm_riscv_gstage_pgd_levels - 1))
+ mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1;
+ else
+ mask = PTRS_PER_PTE - 1;
+
+ return (addr >> shift) & mask;
+}
+
+static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
+{
+ return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
+}
+
+static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
+{
+ u32 i;
+ unsigned long psz = 1UL << 12;
+
+ for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) {
+ if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) {
+ *out_level = i;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
+{
+ if (kvm_riscv_gstage_pgd_levels < level)
+ return -EINVAL;
+
+ *out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits);
+ return 0;
+}
+
+static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
+{
+ int rc;
+ unsigned long page_order = PAGE_SHIFT;
+
+ rc = gstage_level_to_page_order(level, &page_order);
+ if (rc)
+ return rc;
+
+ *out_pgsize = BIT(page_order);
+ return 0;
+}
+
+bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
+ pte_t **ptepp, u32 *ptep_level)
+{
+ pte_t *ptep;
+ u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
+
+ *ptep_level = current_level;
+ ptep = (pte_t *)gstage->pgd;
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
+ while (ptep && pte_val(ptep_get(ptep))) {
+ if (gstage_pte_leaf(ptep)) {
+ *ptep_level = current_level;
+ *ptepp = ptep;
+ return true;
+ }
+
+ if (current_level) {
+ current_level--;
+ *ptep_level = current_level;
+ ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
+ } else {
+ ptep = NULL;
+ }
+ }
+
+ return false;
+}
+
+static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr)
+{
+ unsigned long order = PAGE_SHIFT;
+
+ if (gstage_level_to_page_order(level, &order))
+ return;
+ addr &= ~(BIT(order) - 1);
+
+ if (gstage->flags & KVM_GSTAGE_FLAGS_LOCAL)
+ kvm_riscv_local_hfence_gvma_vmid_gpa(gstage->vmid, addr, BIT(order), order);
+ else
+ kvm_riscv_hfence_gvma_vmid_gpa(gstage->kvm, -1UL, 0, addr, BIT(order), order,
+ gstage->vmid);
+}
+
+int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
+ struct kvm_mmu_memory_cache *pcache,
+ const struct kvm_gstage_mapping *map)
+{
+ u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
+ pte_t *next_ptep = (pte_t *)gstage->pgd;
+ pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
+
+ if (current_level < map->level)
+ return -EINVAL;
+
+ while (current_level != map->level) {
+ if (gstage_pte_leaf(ptep))
+ return -EEXIST;
+
+ if (!pte_val(ptep_get(ptep))) {
+ if (!pcache)
+ return -ENOMEM;
+ next_ptep = kvm_mmu_memory_cache_alloc(pcache);
+ if (!next_ptep)
+ return -ENOMEM;
+ set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
+ __pgprot(_PAGE_TABLE)));
+ } else {
+ if (gstage_pte_leaf(ptep))
+ return -EEXIST;
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
+ }
+
+ current_level--;
+ ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
+ }
+
+ if (pte_val(*ptep) != pte_val(map->pte)) {
+ set_pte(ptep, map->pte);
+ if (gstage_pte_leaf(ptep))
+ gstage_tlb_flush(gstage, current_level, map->addr);
+ }
+
+ return 0;
+}
+
+int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
+ struct kvm_mmu_memory_cache *pcache,
+ gpa_t gpa, phys_addr_t hpa, unsigned long page_size,
+ bool page_rdonly, bool page_exec,
+ struct kvm_gstage_mapping *out_map)
+{
+ pgprot_t prot;
+ int ret;
+
+ out_map->addr = gpa;
+ out_map->level = 0;
+
+ ret = gstage_page_size_to_level(page_size, &out_map->level);
+ if (ret)
+ return ret;
+
+ /*
+ * A RISC-V implementation can choose to either:
+ * 1) Update 'A' and 'D' PTE bits in hardware
+ * 2) Generate page fault when 'A' and/or 'D' bits are not set
+ * PTE so that software can update these bits.
+ *
+ * We support both options mentioned above. To achieve this, we
+ * always set 'A' and 'D' PTE bits at time of creating G-stage
+ * mapping. To support KVM dirty page logging with both options
+ * mentioned above, we will write-protect G-stage PTEs to track
+ * dirty pages.
+ */
+
+ if (page_exec) {
+ if (page_rdonly)
+ prot = PAGE_READ_EXEC;
+ else
+ prot = PAGE_WRITE_EXEC;
+ } else {
+ if (page_rdonly)
+ prot = PAGE_READ;
+ else
+ prot = PAGE_WRITE;
+ }
+ out_map->pte = pfn_pte(PFN_DOWN(hpa), prot);
+ out_map->pte = pte_mkdirty(out_map->pte);
+
+ return kvm_riscv_gstage_set_pte(gstage, pcache, out_map);
+}
+
+void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
+ pte_t *ptep, u32 ptep_level, enum kvm_riscv_gstage_op op)
+{
+ int i, ret;
+ pte_t old_pte, *next_ptep;
+ u32 next_ptep_level;
+ unsigned long next_page_size, page_size;
+
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ return;
+
+ WARN_ON(addr & (page_size - 1));
+
+ if (!pte_val(ptep_get(ptep)))
+ return;
+
+ if (ptep_level && !gstage_pte_leaf(ptep)) {
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
+ next_ptep_level = ptep_level - 1;
+ ret = gstage_level_to_page_size(next_ptep_level, &next_page_size);
+ if (ret)
+ return;
+
+ if (op == GSTAGE_OP_CLEAR)
+ set_pte(ptep, __pte(0));
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ kvm_riscv_gstage_op_pte(gstage, addr + i * next_page_size,
+ &next_ptep[i], next_ptep_level, op);
+ if (op == GSTAGE_OP_CLEAR)
+ put_page(virt_to_page(next_ptep));
+ } else {
+ old_pte = *ptep;
+ if (op == GSTAGE_OP_CLEAR)
+ set_pte(ptep, __pte(0));
+ else if (op == GSTAGE_OP_WP)
+ set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE));
+ if (pte_val(*ptep) != pte_val(old_pte))
+ gstage_tlb_flush(gstage, ptep_level, addr);
+ }
+}
+
+void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage,
+ gpa_t start, gpa_t size, bool may_block)
+{
+ int ret;
+ pte_t *ptep;
+ u32 ptep_level;
+ bool found_leaf;
+ unsigned long page_size;
+ gpa_t addr = start, end = start + size;
+
+ while (addr < end) {
+ found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ break;
+
+ if (!found_leaf)
+ goto next;
+
+ if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
+ kvm_riscv_gstage_op_pte(gstage, addr, ptep,
+ ptep_level, GSTAGE_OP_CLEAR);
+
+next:
+ addr += page_size;
+
+ /*
+ * If the range is too large, release the kvm->mmu_lock
+ * to prevent starvation and lockup detector warnings.
+ */
+ if (!(gstage->flags & KVM_GSTAGE_FLAGS_LOCAL) && may_block && addr < end)
+ cond_resched_lock(&gstage->kvm->mmu_lock);
+ }
+}
+
+void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end)
+{
+ int ret;
+ pte_t *ptep;
+ u32 ptep_level;
+ bool found_leaf;
+ gpa_t addr = start;
+ unsigned long page_size;
+
+ while (addr < end) {
+ found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ break;
+
+ if (!found_leaf)
+ goto next;
+
+ if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
+ kvm_riscv_gstage_op_pte(gstage, addr, ptep,
+ ptep_level, GSTAGE_OP_WP);
+
+next:
+ addr += page_size;
+ }
+}
+
+void __init kvm_riscv_gstage_mode_detect(void)
+{
+#ifdef CONFIG_64BIT
+ /* Try Sv57x4 G-stage mode */
+ csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
+ if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
+ kvm_riscv_gstage_mode = HGATP_MODE_SV57X4;
+ kvm_riscv_gstage_pgd_levels = 5;
+ goto skip_sv48x4_test;
+ }
+
+ /* Try Sv48x4 G-stage mode */
+ csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
+ if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
+ kvm_riscv_gstage_mode = HGATP_MODE_SV48X4;
+ kvm_riscv_gstage_pgd_levels = 4;
+ }
+skip_sv48x4_test:
+
+ csr_write(CSR_HGATP, 0);
+ kvm_riscv_local_hfence_gvma_all();
+#endif
+}
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 4b24705dc63a..67c876de74ef 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/kvm_host.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_mmu.h>
#include <asm/kvm_nacl.h>
#include <asm/sbi.h>
@@ -134,7 +135,7 @@ static int __init riscv_kvm_init(void)
(rc) ? slist : "no features");
}
- switch (kvm_riscv_gstage_mode()) {
+ switch (kvm_riscv_gstage_mode) {
case HGATP_MODE_SV32X4:
str = "Sv32x4";
break;
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 1087ea74567b..a1c3b2ec1dde 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -6,348 +6,38 @@
* Anup Patel <anup.patel@wdc.com>
*/
-#include <linux/bitops.h>
#include <linux/errno.h>
-#include <linux/err.h>
#include <linux/hugetlb.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/kvm_host.h>
#include <linux/sched/signal.h>
+#include <asm/kvm_mmu.h>
#include <asm/kvm_nacl.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-
-#ifdef CONFIG_64BIT
-static unsigned long gstage_mode __ro_after_init = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
-static unsigned long gstage_pgd_levels __ro_after_init = 3;
-#define gstage_index_bits 9
-#else
-static unsigned long gstage_mode __ro_after_init = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
-static unsigned long gstage_pgd_levels __ro_after_init = 2;
-#define gstage_index_bits 10
-#endif
-
-#define gstage_pgd_xbits 2
-#define gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + gstage_pgd_xbits))
-#define gstage_gpa_bits (HGATP_PAGE_SHIFT + \
- (gstage_pgd_levels * gstage_index_bits) + \
- gstage_pgd_xbits)
-#define gstage_gpa_size ((gpa_t)(1ULL << gstage_gpa_bits))
-
-#define gstage_pte_leaf(__ptep) \
- (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
-
-static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
-{
- unsigned long mask;
- unsigned long shift = HGATP_PAGE_SHIFT + (gstage_index_bits * level);
-
- if (level == (gstage_pgd_levels - 1))
- mask = (PTRS_PER_PTE * (1UL << gstage_pgd_xbits)) - 1;
- else
- mask = PTRS_PER_PTE - 1;
-
- return (addr >> shift) & mask;
-}
-
-static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
-{
- return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
-}
-
-static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
-{
- u32 i;
- unsigned long psz = 1UL << 12;
-
- for (i = 0; i < gstage_pgd_levels; i++) {
- if (page_size == (psz << (i * gstage_index_bits))) {
- *out_level = i;
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
-static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
-{
- if (gstage_pgd_levels < level)
- return -EINVAL;
-
- *out_pgorder = 12 + (level * gstage_index_bits);
- return 0;
-}
-
-static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
-{
- int rc;
- unsigned long page_order = PAGE_SHIFT;
-
- rc = gstage_level_to_page_order(level, &page_order);
- if (rc)
- return rc;
-
- *out_pgsize = BIT(page_order);
- return 0;
-}
-
-static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
- pte_t **ptepp, u32 *ptep_level)
-{
- pte_t *ptep;
- u32 current_level = gstage_pgd_levels - 1;
-
- *ptep_level = current_level;
- ptep = (pte_t *)kvm->arch.pgd;
- ptep = &ptep[gstage_pte_index(addr, current_level)];
- while (ptep && pte_val(ptep_get(ptep))) {
- if (gstage_pte_leaf(ptep)) {
- *ptep_level = current_level;
- *ptepp = ptep;
- return true;
- }
-
- if (current_level) {
- current_level--;
- *ptep_level = current_level;
- ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
- ptep = &ptep[gstage_pte_index(addr, current_level)];
- } else {
- ptep = NULL;
- }
- }
-
- return false;
-}
-
-static void gstage_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
-{
- unsigned long order = PAGE_SHIFT;
-
- if (gstage_level_to_page_order(level, &order))
- return;
- addr &= ~(BIT(order) - 1);
-
- kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0, addr, BIT(order), order);
-}
-
-static int gstage_set_pte(struct kvm *kvm, u32 level,
- struct kvm_mmu_memory_cache *pcache,
- gpa_t addr, const pte_t *new_pte)
-{
- u32 current_level = gstage_pgd_levels - 1;
- pte_t *next_ptep = (pte_t *)kvm->arch.pgd;
- pte_t *ptep = &next_ptep[gstage_pte_index(addr, current_level)];
-
- if (current_level < level)
- return -EINVAL;
-
- while (current_level != level) {
- if (gstage_pte_leaf(ptep))
- return -EEXIST;
-
- if (!pte_val(ptep_get(ptep))) {
- if (!pcache)
- return -ENOMEM;
- next_ptep = kvm_mmu_memory_cache_alloc(pcache);
- if (!next_ptep)
- return -ENOMEM;
- set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
- __pgprot(_PAGE_TABLE)));
- } else {
- if (gstage_pte_leaf(ptep))
- return -EEXIST;
- next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
- }
-
- current_level--;
- ptep = &next_ptep[gstage_pte_index(addr, current_level)];
- }
-
- set_pte(ptep, *new_pte);
- if (gstage_pte_leaf(ptep))
- gstage_remote_tlb_flush(kvm, current_level, addr);
-
- return 0;
-}
-
-static int gstage_map_page(struct kvm *kvm,
- struct kvm_mmu_memory_cache *pcache,
- gpa_t gpa, phys_addr_t hpa,
- unsigned long page_size,
- bool page_rdonly, bool page_exec)
-{
- int ret;
- u32 level = 0;
- pte_t new_pte;
- pgprot_t prot;
-
- ret = gstage_page_size_to_level(page_size, &level);
- if (ret)
- return ret;
-
- /*
- * A RISC-V implementation can choose to either:
- * 1) Update 'A' and 'D' PTE bits in hardware
- * 2) Generate page fault when 'A' and/or 'D' bits are not set
- * PTE so that software can update these bits.
- *
- * We support both options mentioned above. To achieve this, we
- * always set 'A' and 'D' PTE bits at time of creating G-stage
- * mapping. To support KVM dirty page logging with both options
- * mentioned above, we will write-protect G-stage PTEs to track
- * dirty pages.
- */
- if (page_exec) {
- if (page_rdonly)
- prot = PAGE_READ_EXEC;
- else
- prot = PAGE_WRITE_EXEC;
- } else {
- if (page_rdonly)
- prot = PAGE_READ;
- else
- prot = PAGE_WRITE;
- }
- new_pte = pfn_pte(PFN_DOWN(hpa), prot);
- new_pte = pte_mkdirty(new_pte);
-
- return gstage_set_pte(kvm, level, pcache, gpa, &new_pte);
-}
-
-enum gstage_op {
- GSTAGE_OP_NOP = 0, /* Nothing */
- GSTAGE_OP_CLEAR, /* Clear/Unmap */
- GSTAGE_OP_WP, /* Write-protect */
-};
-
-static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
- pte_t *ptep, u32 ptep_level, enum gstage_op op)
-{
- int i, ret;
- pte_t *next_ptep;
- u32 next_ptep_level;
- unsigned long next_page_size, page_size;
-
- ret = gstage_level_to_page_size(ptep_level, &page_size);
- if (ret)
- return;
-
- BUG_ON(addr & (page_size - 1));
-
- if (!pte_val(ptep_get(ptep)))
- return;
-
- if (ptep_level && !gstage_pte_leaf(ptep)) {
- next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
- next_ptep_level = ptep_level - 1;
- ret = gstage_level_to_page_size(next_ptep_level,
- &next_page_size);
- if (ret)
- return;
-
- if (op == GSTAGE_OP_CLEAR)
- set_pte(ptep, __pte(0));
- for (i = 0; i < PTRS_PER_PTE; i++)
- gstage_op_pte(kvm, addr + i * next_page_size,
- &next_ptep[i], next_ptep_level, op);
- if (op == GSTAGE_OP_CLEAR)
- put_page(virt_to_page(next_ptep));
- } else {
- if (op == GSTAGE_OP_CLEAR)
- set_pte(ptep, __pte(0));
- else if (op == GSTAGE_OP_WP)
- set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE));
- gstage_remote_tlb_flush(kvm, ptep_level, addr);
- }
-}
-
-static void gstage_unmap_range(struct kvm *kvm, gpa_t start,
- gpa_t size, bool may_block)
-{
- int ret;
- pte_t *ptep;
- u32 ptep_level;
- bool found_leaf;
- unsigned long page_size;
- gpa_t addr = start, end = start + size;
-
- while (addr < end) {
- found_leaf = gstage_get_leaf_entry(kvm, addr,
- &ptep, &ptep_level);
- ret = gstage_level_to_page_size(ptep_level, &page_size);
- if (ret)
- break;
-
- if (!found_leaf)
- goto next;
-
- if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
- gstage_op_pte(kvm, addr, ptep,
- ptep_level, GSTAGE_OP_CLEAR);
-
-next:
- addr += page_size;
-
- /*
- * If the range is too large, release the kvm->mmu_lock
- * to prevent starvation and lockup detector warnings.
- */
- if (may_block && addr < end)
- cond_resched_lock(&kvm->mmu_lock);
- }
-}
-
-static void gstage_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
-{
- int ret;
- pte_t *ptep;
- u32 ptep_level;
- bool found_leaf;
- gpa_t addr = start;
- unsigned long page_size;
-
- while (addr < end) {
- found_leaf = gstage_get_leaf_entry(kvm, addr,
- &ptep, &ptep_level);
- ret = gstage_level_to_page_size(ptep_level, &page_size);
- if (ret)
- break;
-
- if (!found_leaf)
- goto next;
-
- if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
- gstage_op_pte(kvm, addr, ptep,
- ptep_level, GSTAGE_OP_WP);
-
-next:
- addr += page_size;
- }
-}
-
-static void gstage_wp_memory_region(struct kvm *kvm, int slot)
+static void mmu_wp_memory_region(struct kvm *kvm, int slot)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+ struct kvm_gstage gstage;
+
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
spin_lock(&kvm->mmu_lock);
- gstage_wp_range(kvm, start, end);
+ kvm_riscv_gstage_wp_range(&gstage, start, end);
spin_unlock(&kvm->mmu_lock);
- kvm_flush_remote_tlbs(kvm);
+ kvm_flush_remote_tlbs_memslot(kvm, memslot);
}
-int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa,
- phys_addr_t hpa, unsigned long size,
- bool writable, bool in_atomic)
+int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
+ unsigned long size, bool writable, bool in_atomic)
{
- pte_t pte;
int ret = 0;
unsigned long pfn;
phys_addr_t addr, end;
@@ -355,22 +45,31 @@ int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa,
.gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0,
.gfp_zero = __GFP_ZERO,
};
+ struct kvm_gstage_mapping map;
+ struct kvm_gstage gstage;
+
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(hpa);
for (addr = gpa; addr < end; addr += PAGE_SIZE) {
- pte = pfn_pte(pfn, PAGE_KERNEL_IO);
+ map.addr = addr;
+ map.pte = pfn_pte(pfn, PAGE_KERNEL_IO);
+ map.level = 0;
if (!writable)
- pte = pte_wrprotect(pte);
+ map.pte = pte_wrprotect(map.pte);
- ret = kvm_mmu_topup_memory_cache(&pcache, gstage_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels);
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = gstage_set_pte(kvm, 0, &pcache, addr, &pte);
+ ret = kvm_riscv_gstage_set_pte(&gstage, &pcache, &map);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -383,10 +82,17 @@ out:
return ret;
}
-void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size)
+void kvm_riscv_mmu_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size)
{
+ struct kvm_gstage gstage;
+
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
+
spin_lock(&kvm->mmu_lock);
- gstage_unmap_range(kvm, gpa, size, false);
+ kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false);
spin_unlock(&kvm->mmu_lock);
}
@@ -398,8 +104,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+ struct kvm_gstage gstage;
- gstage_wp_range(kvm, start, end);
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
+
+ kvm_riscv_gstage_wp_range(&gstage, start, end);
}
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
@@ -416,7 +128,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
- kvm_riscv_gstage_free_pgd(kvm);
+ kvm_riscv_mmu_free_pgd(kvm);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -424,9 +136,15 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
{
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;
+ struct kvm_gstage gstage;
+
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
spin_lock(&kvm->mmu_lock);
- gstage_unmap_range(kvm, gpa, size, false);
+ kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false);
spin_unlock(&kvm->mmu_lock);
}
@@ -441,7 +159,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* the memory slot is write protected.
*/
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
- gstage_wp_memory_region(kvm, new->id);
+ mmu_wp_memory_region(kvm, new->id);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -463,7 +181,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* space addressable by the KVM guest GPA space.
*/
if ((new->base_gfn + new->npages) >=
- (gstage_gpa_size >> PAGE_SHIFT))
+ (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT))
return -EFAULT;
hva = new->userspace_addr;
@@ -487,10 +205,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* +--------------------------------------------+
*/
do {
- struct vm_area_struct *vma = find_vma(current->mm, hva);
+ struct vm_area_struct *vma;
hva_t vm_start, vm_end;
- if (!vma || vma->vm_start >= reg_end)
+ vma = find_vma_intersection(current->mm, hva, reg_end);
+ if (!vma)
break;
/*
@@ -519,9 +238,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
goto out;
}
- ret = kvm_riscv_gstage_ioremap(kvm, gpa, pa,
- vm_end - vm_start,
- writable, false);
+ ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start,
+ writable, false);
if (ret)
break;
}
@@ -532,7 +250,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
goto out;
if (ret)
- kvm_riscv_gstage_iounmap(kvm, base_gpa, size);
+ kvm_riscv_mmu_iounmap(kvm, base_gpa, size);
out:
mmap_read_unlock(current->mm);
@@ -541,12 +259,18 @@ out:
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
+ struct kvm_gstage gstage;
+
if (!kvm->arch.pgd)
return false;
- gstage_unmap_range(kvm, range->start << PAGE_SHIFT,
- (range->end - range->start) << PAGE_SHIFT,
- range->may_block);
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT,
+ (range->end - range->start) << PAGE_SHIFT,
+ range->may_block);
return false;
}
@@ -555,14 +279,19 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
pte_t *ptep;
u32 ptep_level = 0;
u64 size = (range->end - range->start) << PAGE_SHIFT;
+ struct kvm_gstage gstage;
if (!kvm->arch.pgd)
return false;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
- &ptep, &ptep_level))
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
+ if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
return false;
return ptep_test_and_clear_young(NULL, 0, ptep);
@@ -573,22 +302,27 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
pte_t *ptep;
u32 ptep_level = 0;
u64 size = (range->end - range->start) << PAGE_SHIFT;
+ struct kvm_gstage gstage;
if (!kvm->arch.pgd)
return false;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
- &ptep, &ptep_level))
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
+ if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
return false;
return pte_young(ptep_get(ptep));
}
-int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
- struct kvm_memory_slot *memslot,
- gpa_t gpa, unsigned long hva, bool is_write)
+int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+ gpa_t gpa, unsigned long hva, bool is_write,
+ struct kvm_gstage_mapping *out_map)
{
int ret;
kvm_pfn_t hfn;
@@ -601,10 +335,19 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
bool logging = (memslot->dirty_bitmap &&
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
unsigned long vma_pagesize, mmu_seq;
+ struct kvm_gstage gstage;
struct page *page;
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
+
+ /* Setup initial state of output mapping */
+ memset(out_map, 0, sizeof(*out_map));
+
/* We need minimum second+third level pages */
- ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels);
if (ret) {
kvm_err("Failed to topup G-stage cache\n");
return ret;
@@ -648,7 +391,8 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
return -EFAULT;
}
- hfn = kvm_faultin_pfn(vcpu, gfn, is_write, &writable, &page);
+ hfn = __kvm_faultin_pfn(memslot, gfn, is_write ? FOLL_WRITE : 0,
+ &writable, &page);
if (hfn == KVM_PFN_ERR_HWPOISON) {
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
vma_pageshift, current);
@@ -670,12 +414,12 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
goto out_unlock;
if (writable) {
- mark_page_dirty(kvm, gfn);
- ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
- vma_pagesize, false, true);
+ mark_page_dirty_in_slot(kvm, memslot, gfn);
+ ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,
+ vma_pagesize, false, true, out_map);
} else {
- ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
- vma_pagesize, true, true);
+ ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,
+ vma_pagesize, true, true, out_map);
}
if (ret)
@@ -687,7 +431,7 @@ out_unlock:
return ret;
}
-int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm)
+int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm)
{
struct page *pgd_page;
@@ -697,7 +441,7 @@ int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm)
}
pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(gstage_pgd_size));
+ get_order(kvm_riscv_gstage_pgd_size));
if (!pgd_page)
return -ENOMEM;
kvm->arch.pgd = page_to_virt(pgd_page);
@@ -706,13 +450,18 @@ int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm)
return 0;
}
-void kvm_riscv_gstage_free_pgd(struct kvm *kvm)
+void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
{
+ struct kvm_gstage gstage;
void *pgd = NULL;
spin_lock(&kvm->mmu_lock);
if (kvm->arch.pgd) {
- gstage_unmap_range(kvm, 0UL, gstage_gpa_size, false);
+ gstage.kvm = kvm;
+ gstage.flags = 0;
+ gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
kvm->arch.pgd_phys = 0;
@@ -720,12 +469,12 @@ void kvm_riscv_gstage_free_pgd(struct kvm *kvm)
spin_unlock(&kvm->mmu_lock);
if (pgd)
- free_pages((unsigned long)pgd, get_order(gstage_pgd_size));
+ free_pages((unsigned long)pgd, get_order(kvm_riscv_gstage_pgd_size));
}
-void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu)
+void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu)
{
- unsigned long hgatp = gstage_mode;
+ unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
struct kvm_arch *k = &vcpu->kvm->arch;
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
@@ -736,37 +485,3 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu)
if (!kvm_riscv_gstage_vmid_bits())
kvm_riscv_local_hfence_gvma_all();
}
-
-void __init kvm_riscv_gstage_mode_detect(void)
-{
-#ifdef CONFIG_64BIT
- /* Try Sv57x4 G-stage mode */
- csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
- if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
- gstage_mode = (HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
- gstage_pgd_levels = 5;
- goto skip_sv48x4_test;
- }
-
- /* Try Sv48x4 G-stage mode */
- csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
- if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
- gstage_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
- gstage_pgd_levels = 4;
- }
-skip_sv48x4_test:
-
- csr_write(CSR_HGATP, 0);
- kvm_riscv_local_hfence_gvma_all();
-#endif
-}
-
-unsigned long __init kvm_riscv_gstage_mode(void)
-{
- return gstage_mode >> HGATP_MODE_SHIFT;
-}
-
-int kvm_riscv_gstage_gpa_bits(void)
-{
- return gstage_gpa_bits;
-}
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
index 2f91ea5f8493..3c5a70a2b927 100644
--- a/arch/riscv/kvm/tlb.c
+++ b/arch/riscv/kvm/tlb.c
@@ -15,6 +15,8 @@
#include <asm/cpufeature.h>
#include <asm/insn-def.h>
#include <asm/kvm_nacl.h>
+#include <asm/kvm_tlb.h>
+#include <asm/kvm_vmid.h>
#define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
@@ -156,36 +158,13 @@ void kvm_riscv_local_hfence_vvma_all(unsigned long vmid)
csr_write(CSR_HGATP, hgatp);
}
-void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
-{
- unsigned long vmid;
-
- if (!kvm_riscv_gstage_vmid_bits() ||
- vcpu->arch.last_exit_cpu == vcpu->cpu)
- return;
-
- /*
- * On RISC-V platforms with hardware VMID support, we share same
- * VMID for all VCPUs of a particular Guest/VM. This means we might
- * have stale G-stage TLB entries on the current Host CPU due to
- * some other VCPU of the same Guest which ran previously on the
- * current Host CPU.
- *
- * To cleanup stale TLB entries, we simply flush all G-stage TLB
- * entries by VMID whenever underlying Host CPU changes for a VCPU.
- */
-
- vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
- kvm_riscv_local_hfence_gvma_vmid_all(vmid);
-}
-
void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
{
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_RCVD);
local_flush_icache_all();
}
-void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu)
+void kvm_riscv_tlb_flush_process(struct kvm_vcpu *vcpu)
{
struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
unsigned long vmid = READ_ONCE(v->vmid);
@@ -258,51 +237,58 @@ static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu,
void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
{
- unsigned long vmid;
struct kvm_riscv_hfence d = { 0 };
- struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
while (vcpu_hfence_dequeue(vcpu, &d)) {
switch (d.type) {
case KVM_RISCV_HFENCE_UNKNOWN:
break;
case KVM_RISCV_HFENCE_GVMA_VMID_GPA:
- vmid = READ_ONCE(v->vmid);
if (kvm_riscv_nacl_available())
- nacl_hfence_gvma_vmid(nacl_shmem(), vmid,
+ nacl_hfence_gvma_vmid(nacl_shmem(), d.vmid,
d.addr, d.size, d.order);
else
- kvm_riscv_local_hfence_gvma_vmid_gpa(vmid, d.addr,
+ kvm_riscv_local_hfence_gvma_vmid_gpa(d.vmid, d.addr,
d.size, d.order);
break;
+ case KVM_RISCV_HFENCE_GVMA_VMID_ALL:
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_gvma_vmid_all(nacl_shmem(), d.vmid);
+ else
+ kvm_riscv_local_hfence_gvma_vmid_all(d.vmid);
+ break;
case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
- vmid = READ_ONCE(v->vmid);
if (kvm_riscv_nacl_available())
- nacl_hfence_vvma_asid(nacl_shmem(), vmid, d.asid,
+ nacl_hfence_vvma_asid(nacl_shmem(), d.vmid, d.asid,
d.addr, d.size, d.order);
else
- kvm_riscv_local_hfence_vvma_asid_gva(vmid, d.asid, d.addr,
+ kvm_riscv_local_hfence_vvma_asid_gva(d.vmid, d.asid, d.addr,
d.size, d.order);
break;
case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
- vmid = READ_ONCE(v->vmid);
if (kvm_riscv_nacl_available())
- nacl_hfence_vvma_asid_all(nacl_shmem(), vmid, d.asid);
+ nacl_hfence_vvma_asid_all(nacl_shmem(), d.vmid, d.asid);
else
- kvm_riscv_local_hfence_vvma_asid_all(vmid, d.asid);
+ kvm_riscv_local_hfence_vvma_asid_all(d.vmid, d.asid);
break;
case KVM_RISCV_HFENCE_VVMA_GVA:
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD);
- vmid = READ_ONCE(v->vmid);
if (kvm_riscv_nacl_available())
- nacl_hfence_vvma(nacl_shmem(), vmid,
+ nacl_hfence_vvma(nacl_shmem(), d.vmid,
d.addr, d.size, d.order);
else
- kvm_riscv_local_hfence_vvma_gva(vmid, d.addr,
+ kvm_riscv_local_hfence_vvma_gva(d.vmid, d.addr,
d.size, d.order);
break;
+ case KVM_RISCV_HFENCE_VVMA_ALL:
+ kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD);
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_vvma_all(nacl_shmem(), d.vmid);
+ else
+ kvm_riscv_local_hfence_vvma_all(d.vmid);
+ break;
default:
break;
}
@@ -355,35 +341,43 @@ void kvm_riscv_fence_i(struct kvm *kvm,
void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
gpa_t gpa, gpa_t gpsz,
- unsigned long order)
+ unsigned long order, unsigned long vmid)
{
struct kvm_riscv_hfence data;
data.type = KVM_RISCV_HFENCE_GVMA_VMID_GPA;
data.asid = 0;
+ data.vmid = vmid;
data.addr = gpa;
data.size = gpsz;
data.order = order;
make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
- KVM_REQ_HFENCE_GVMA_VMID_ALL, &data);
+ KVM_REQ_TLB_FLUSH, &data);
}
void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm,
- unsigned long hbase, unsigned long hmask)
+ unsigned long hbase, unsigned long hmask,
+ unsigned long vmid)
{
- make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_GVMA_VMID_ALL,
- KVM_REQ_HFENCE_GVMA_VMID_ALL, NULL);
+ struct kvm_riscv_hfence data = {0};
+
+ data.type = KVM_RISCV_HFENCE_GVMA_VMID_ALL;
+ data.vmid = vmid;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_TLB_FLUSH, &data);
}
void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
unsigned long gva, unsigned long gvsz,
- unsigned long order, unsigned long asid)
+ unsigned long order, unsigned long asid,
+ unsigned long vmid)
{
struct kvm_riscv_hfence data;
data.type = KVM_RISCV_HFENCE_VVMA_ASID_GVA;
data.asid = asid;
+ data.vmid = vmid;
data.addr = gva;
data.size = gvsz;
data.order = order;
@@ -393,13 +387,13 @@ void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
- unsigned long asid)
+ unsigned long asid, unsigned long vmid)
{
- struct kvm_riscv_hfence data;
+ struct kvm_riscv_hfence data = {0};
data.type = KVM_RISCV_HFENCE_VVMA_ASID_ALL;
data.asid = asid;
- data.addr = data.size = data.order = 0;
+ data.vmid = vmid;
make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
KVM_REQ_HFENCE_VVMA_ALL, &data);
}
@@ -407,12 +401,13 @@ void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
unsigned long gva, unsigned long gvsz,
- unsigned long order)
+ unsigned long order, unsigned long vmid)
{
struct kvm_riscv_hfence data;
data.type = KVM_RISCV_HFENCE_VVMA_GVA;
data.asid = 0;
+ data.vmid = vmid;
data.addr = gva;
data.size = gvsz;
data.order = order;
@@ -421,8 +416,21 @@ void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
}
void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
- unsigned long hbase, unsigned long hmask)
+ unsigned long hbase, unsigned long hmask,
+ unsigned long vmid)
+{
+ struct kvm_riscv_hfence data = {0};
+
+ data.type = KVM_RISCV_HFENCE_VVMA_ALL;
+ data.vmid = vmid;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
{
- make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_VVMA_ALL,
- KVM_REQ_HFENCE_VVMA_ALL, NULL);
+ kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0,
+ gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT,
+ PAGE_SHIFT, READ_ONCE(kvm->arch.vmid.vmid));
+ return 0;
}
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e0a01af426ff..f001e56403f9 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/kvm_host.h>
#include <asm/cacheflush.h>
+#include <asm/kvm_mmu.h>
#include <asm/kvm_nacl.h>
#include <asm/kvm_vcpu_vector.h>
@@ -111,7 +112,7 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset)
vcpu->arch.hfence_tail = 0;
memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
- kvm_riscv_vcpu_sbi_sta_reset(vcpu);
+ kvm_riscv_vcpu_sbi_reset(vcpu);
/* Reset the guest CSRs for hotplug usecase */
if (loaded)
@@ -148,8 +149,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
spin_lock_init(&vcpu->arch.reset_state.lock);
- if (kvm_riscv_vcpu_alloc_vector_context(vcpu))
- return -ENOMEM;
+ rc = kvm_riscv_vcpu_alloc_vector_context(vcpu);
+ if (rc)
+ return rc;
/* Setup VCPU timer */
kvm_riscv_vcpu_timer_init(vcpu);
@@ -158,9 +160,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_pmu_init(vcpu);
/* Setup VCPU AIA */
- rc = kvm_riscv_vcpu_aia_init(vcpu);
- if (rc)
- return rc;
+ kvm_riscv_vcpu_aia_init(vcpu);
/*
* Setup SBI extensions
@@ -187,6 +187,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
+ kvm_riscv_vcpu_sbi_deinit(vcpu);
+
/* Cleanup VCPU AIA context */
kvm_riscv_vcpu_aia_deinit(vcpu);
@@ -207,16 +209,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvm_riscv_vcpu_timer_pending(vcpu);
}
-void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
-{
- kvm_riscv_aia_wakeon_hgei(vcpu, true);
-}
-
-void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
-{
- kvm_riscv_aia_wakeon_hgei(vcpu, false);
-}
-
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
@@ -630,7 +622,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
}
- kvm_riscv_gstage_update_hgatp(vcpu);
+ kvm_riscv_mmu_update_hgatp(vcpu);
kvm_riscv_vcpu_timer_restore(vcpu);
@@ -690,7 +682,14 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
}
}
-static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
+/**
+ * check_vcpu_requests - check and handle pending vCPU requests
+ * @vcpu: the VCPU pointer
+ *
+ * Return: 1 if we should enter the guest
+ * 0 if we should exit to userspace
+ */
+static int kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
{
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
@@ -715,17 +714,13 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
kvm_riscv_reset_vcpu(vcpu, true);
if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
- kvm_riscv_gstage_update_hgatp(vcpu);
+ kvm_riscv_mmu_update_hgatp(vcpu);
if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
kvm_riscv_fence_i_process(vcpu);
- /*
- * The generic KVM_REQ_TLB_FLUSH is same as
- * KVM_REQ_HFENCE_GVMA_VMID_ALL
- */
- if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
- kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ kvm_riscv_tlb_flush_process(vcpu);
if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
kvm_riscv_hfence_vvma_all_process(vcpu);
@@ -735,7 +730,12 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
kvm_riscv_vcpu_record_steal_time(vcpu);
+
+ if (kvm_dirty_ring_check_request(vcpu))
+ return 0;
}
+
+ return 1;
}
static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
@@ -917,7 +917,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_riscv_gstage_vmid_update(vcpu);
- kvm_riscv_check_vcpu_requests(vcpu);
+ ret = kvm_riscv_check_vcpu_requests(vcpu);
+ if (ret <= 0)
+ continue;
preempt_disable();
@@ -961,12 +963,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
/*
- * Cleanup stale TLB enteries
+ * Sanitize VMID mappings cached (TLB) on current CPU
*
* Note: This should be done after G-stage VMID has been
* updated using kvm_riscv_gstage_vmid_ver_changed()
*/
- kvm_riscv_local_tlb_sanitize(vcpu);
+ kvm_riscv_gstage_vmid_sanitize(vcpu);
trace_kvm_entry(vcpu);
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 6e0c18412795..0bb0c51e3c89 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -9,10 +9,13 @@
#include <linux/kvm_host.h>
#include <asm/csr.h>
#include <asm/insn-def.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_nacl.h>
static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_trap *trap)
{
+ struct kvm_gstage_mapping host_map;
struct kvm_memory_slot *memslot;
unsigned long hva, fault_addr;
bool writable;
@@ -40,8 +43,9 @@ static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
};
}
- ret = kvm_riscv_gstage_map(vcpu, memslot, fault_addr, hva,
- (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false);
+ ret = kvm_riscv_mmu_map(vcpu, memslot, fault_addr, hva,
+ (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false,
+ &host_map);
if (ret < 0)
return ret;
@@ -135,7 +139,7 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
struct kvm_cpu_trap *trap)
{
- unsigned long vsstatus = csr_read(CSR_VSSTATUS);
+ unsigned long vsstatus = ncsr_read(CSR_VSSTATUS);
/* Change Guest SSTATUS.SPP bit */
vsstatus &= ~SR_SPP;
@@ -151,15 +155,15 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
vsstatus &= ~SR_SIE;
/* Update Guest SSTATUS */
- csr_write(CSR_VSSTATUS, vsstatus);
+ ncsr_write(CSR_VSSTATUS, vsstatus);
/* Update Guest SCAUSE, STVAL, and SEPC */
- csr_write(CSR_VSCAUSE, trap->scause);
- csr_write(CSR_VSTVAL, trap->stval);
- csr_write(CSR_VSEPC, trap->sepc);
+ ncsr_write(CSR_VSCAUSE, trap->scause);
+ ncsr_write(CSR_VSTVAL, trap->stval);
+ ncsr_write(CSR_VSEPC, trap->sepc);
/* Set Guest PC to Guest exception vector */
- vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC);
+ vcpu->arch.guest_context.sepc = ncsr_read(CSR_VSTVEC);
/* Set Guest privilege mode to supervisor */
vcpu->arch.guest_context.sstatus |= SR_SPP;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index 2e1b646f0d61..cce6a38ea54f 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -23,7 +23,7 @@
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
-/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+/* Mapping between KVM ISA Extension ID & guest ISA extension ID */
static const unsigned long kvm_isa_ext_arr[] = {
/* Single letter extensions (alphabetically sorted) */
[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
@@ -35,7 +35,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
/* Multi letter extensions (alphabetically sorted) */
- [KVM_RISCV_ISA_EXT_SMNPM] = RISCV_ISA_EXT_SSNPM,
+ KVM_ISA_EXT_ARR(SMNPM),
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSCOFPMF),
@@ -112,6 +112,36 @@ static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
return KVM_RISCV_ISA_EXT_MAX;
}
+static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *guest_ext)
+{
+ unsigned long host_ext;
+
+ if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX ||
+ kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -ENOENT;
+
+ *guest_ext = kvm_isa_ext_arr[kvm_ext];
+ switch (*guest_ext) {
+ case RISCV_ISA_EXT_SMNPM:
+ /*
+ * Pointer masking effective in (H)S-mode is provided by the
+ * Smnpm extension, so that extension is reported to the guest,
+ * even though the CSR bits for configuring VS-mode pointer
+ * masking on the host side are part of the Ssnpm extension.
+ */
+ host_ext = RISCV_ISA_EXT_SSNPM;
+ break;
+ default:
+ host_ext = *guest_ext;
+ break;
+ }
+
+ if (!__riscv_isa_extension_available(NULL, host_ext))
+ return -ENOENT;
+
+ return 0;
+}
+
static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
{
switch (ext) {
@@ -219,13 +249,13 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
{
- unsigned long host_isa, i;
+ unsigned long guest_ext, i;
for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
- host_isa = kvm_isa_ext_arr[i];
- if (__riscv_isa_extension_available(NULL, host_isa) &&
- kvm_riscv_vcpu_isa_enable_allowed(i))
- set_bit(host_isa, vcpu->arch.isa);
+ if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext))
+ continue;
+ if (kvm_riscv_vcpu_isa_enable_allowed(i))
+ set_bit(guest_ext, vcpu->arch.isa);
}
}
@@ -607,18 +637,15 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *reg_val)
{
- unsigned long host_isa_ext;
-
- if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
- reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
- return -ENOENT;
+ unsigned long guest_ext;
+ int ret;
- host_isa_ext = kvm_isa_ext_arr[reg_num];
- if (!__riscv_isa_extension_available(NULL, host_isa_ext))
- return -ENOENT;
+ ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext);
+ if (ret)
+ return ret;
*reg_val = 0;
- if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
+ if (__riscv_isa_extension_available(vcpu->arch.isa, guest_ext))
*reg_val = 1; /* Mark the given extension as available */
return 0;
@@ -628,17 +655,14 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long reg_val)
{
- unsigned long host_isa_ext;
-
- if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
- reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
- return -ENOENT;
+ unsigned long guest_ext;
+ int ret;
- host_isa_ext = kvm_isa_ext_arr[reg_num];
- if (!__riscv_isa_extension_available(NULL, host_isa_ext))
- return -ENOENT;
+ ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext);
+ if (ret)
+ return ret;
- if (reg_val == test_bit(host_isa_ext, vcpu->arch.isa))
+ if (reg_val == test_bit(guest_ext, vcpu->arch.isa))
return 0;
if (!vcpu->arch.ran_atleast_once) {
@@ -648,10 +672,10 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
*/
if (reg_val == 1 &&
kvm_riscv_vcpu_isa_enable_allowed(reg_num))
- set_bit(host_isa_ext, vcpu->arch.isa);
+ set_bit(guest_ext, vcpu->arch.isa);
else if (!reg_val &&
kvm_riscv_vcpu_isa_disable_allowed(reg_num))
- clear_bit(host_isa_ext, vcpu->arch.isa);
+ clear_bit(guest_ext, vcpu->arch.isa);
else
return -EINVAL;
kvm_riscv_vcpu_fp_reset(vcpu);
@@ -1009,16 +1033,15 @@ static int copy_fp_d_reg_indices(const struct kvm_vcpu *vcpu,
static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
u64 __user *uindices)
{
+ unsigned long guest_ext;
unsigned int n = 0;
- unsigned long isa_ext;
for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
u64 size = IS_ENABLED(CONFIG_32BIT) ?
KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
- isa_ext = kvm_isa_ext_arr[i];
- if (!__riscv_isa_extension_available(NULL, isa_ext))
+ if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext))
continue;
if (uindices) {
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 6e09b518a5d1..a56c4959f9ad 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -536,5 +536,54 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu)
scontext->ext_status[idx] = ext->default_disabled ?
KVM_RISCV_SBI_EXT_STATUS_DISABLED :
KVM_RISCV_SBI_EXT_STATUS_ENABLED;
+
+ if (ext->init && ext->init(vcpu) != 0)
+ scontext->ext_status[idx] = KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
+ }
+}
+
+void kvm_riscv_vcpu_sbi_deinit(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ const struct kvm_riscv_sbi_extension_entry *entry;
+ const struct kvm_vcpu_sbi_extension *ext;
+ int idx, i;
+
+ for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ entry = &sbi_ext[i];
+ ext = entry->ext_ptr;
+ idx = entry->ext_idx;
+
+ if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status))
+ continue;
+
+ if (scontext->ext_status[idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE ||
+ !ext->deinit)
+ continue;
+
+ ext->deinit(vcpu);
+ }
+}
+
+void kvm_riscv_vcpu_sbi_reset(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ const struct kvm_riscv_sbi_extension_entry *entry;
+ const struct kvm_vcpu_sbi_extension *ext;
+ int idx, i;
+
+ for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ entry = &sbi_ext[i];
+ ext = entry->ext_ptr;
+ idx = entry->ext_idx;
+
+ if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status))
+ continue;
+
+ if (scontext->ext_status[idx] != KVM_RISCV_SBI_EXT_STATUS_ENABLED ||
+ !ext->reset)
+ continue;
+
+ ext->reset(vcpu);
}
}
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 5fbf3f94f1e8..b490ed1428a6 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -96,6 +96,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
unsigned long hmask = cp->a0;
unsigned long hbase = cp->a1;
unsigned long funcid = cp->a6;
+ unsigned long vmid;
switch (funcid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
@@ -103,22 +104,22 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- if (cp->a2 == 0 && cp->a3 == 0)
- kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
+ kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask, vmid);
else
kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
- cp->a2, cp->a3, PAGE_SHIFT);
+ cp->a2, cp->a3, PAGE_SHIFT, vmid);
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- if (cp->a2 == 0 && cp->a3 == 0)
- kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
- hbase, hmask, cp->a4);
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, hbase, hmask,
+ cp->a4, vmid);
else
- kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
- hbase, hmask,
- cp->a2, cp->a3,
- PAGE_SHIFT, cp->a4);
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, hbase, hmask, cp->a2,
+ cp->a3, PAGE_SHIFT, cp->a4, vmid);
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
@@ -127,9 +128,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
/*
* Until nested virtualization is implemented, the
- * SBI HFENCE calls should be treated as NOPs
+ * SBI HFENCE calls should return not supported
+ * hence fallthrough.
*/
- break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index 5f35427114c1..cc6cb7c8f0e4 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -16,7 +16,7 @@
#include <asm/sbi.h>
#include <asm/uaccess.h>
-void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu)
+static void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu)
{
vcpu->arch.sta.shmem = INVALID_GPA;
vcpu->arch.sta.last_steal = 0;
@@ -156,6 +156,7 @@ const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = {
.extid_end = SBI_EXT_STA,
.handler = kvm_sbi_ext_sta_handler,
.probe = kvm_sbi_ext_sta_probe,
+ .reset = kvm_riscv_vcpu_sbi_sta_reset,
};
int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
index 8f4c4fa16227..368dfddd23d9 100644
--- a/arch/riscv/kvm/vcpu_sbi_v01.c
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -23,6 +23,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm *kvm = vcpu->kvm;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm_cpu_trap *utrap = retdata->utrap;
+ unsigned long vmid;
switch (cp->a7) {
case SBI_EXT_0_1_CONSOLE_GETCHAR:
@@ -78,25 +79,21 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
kvm_riscv_fence_i(vcpu->kvm, 0, hmask);
else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) {
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
if (cp->a1 == 0 && cp->a2 == 0)
- kvm_riscv_hfence_vvma_all(vcpu->kvm,
- 0, hmask);
+ kvm_riscv_hfence_vvma_all(vcpu->kvm, 0, hmask, vmid);
else
- kvm_riscv_hfence_vvma_gva(vcpu->kvm,
- 0, hmask,
- cp->a1, cp->a2,
- PAGE_SHIFT);
+ kvm_riscv_hfence_vvma_gva(vcpu->kvm, 0, hmask, cp->a1,
+ cp->a2, PAGE_SHIFT, vmid);
} else {
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
if (cp->a1 == 0 && cp->a2 == 0)
- kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
- 0, hmask,
- cp->a3);
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, 0, hmask,
+ cp->a3, vmid);
else
- kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
- 0, hmask,
- cp->a1, cp->a2,
- PAGE_SHIFT,
- cp->a3);
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm, 0, hmask,
+ cp->a1, cp->a2, PAGE_SHIFT,
+ cp->a3, vmid);
}
break;
default:
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index ff672fa71fcc..85a7262115e1 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -345,8 +345,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
/*
* The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync()
* upon every VM exit so no need to save here.
+ *
+ * If VS-timer expires when no VCPU running on a host CPU then
+ * WFI executed by such host CPU will be effective NOP resulting
+ * in no power savings. This is because as-per RISC-V Privileged
+ * specificaiton: "WFI is also required to resume execution for
+ * locally enabled interrupts pending at any privilege level,
+ * regardless of the global interrupt enable at each privilege
+ * level."
+ *
+ * To address the above issue, vstimecmp CSR must be set to -1UL
+ * over here when VCPU is scheduled-out or exits to user space.
*/
+ csr_write(CSR_VSTIMECMP, -1UL);
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_VSTIMECMPH, -1UL);
+#endif
+
/* timer should be enabled for the remaining operations */
if (unlikely(!t->init_done))
return;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index b27ec8f96697..66d91ae6e9b2 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS()
@@ -31,13 +32,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int r;
- r = kvm_riscv_gstage_alloc_pgd(kvm);
+ r = kvm_riscv_mmu_alloc_pgd(kvm);
if (r)
return r;
r = kvm_riscv_gstage_vmid_init(kvm);
if (r) {
- kvm_riscv_gstage_free_pgd(kvm);
+ kvm_riscv_mmu_free_pgd(kvm);
return r;
}
@@ -199,7 +200,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_VM_GPA_BITS:
- r = kvm_riscv_gstage_gpa_bits();
+ r = kvm_riscv_gstage_gpa_bits;
break;
default:
r = 0;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index ddc98714ce8e..3b426c800480 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -14,6 +14,8 @@
#include <linux/smp.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
+#include <asm/kvm_tlb.h>
+#include <asm/kvm_vmid.h>
static unsigned long vmid_version = 1;
static unsigned long vmid_next;
@@ -122,3 +124,26 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
kvm_for_each_vcpu(i, v, vcpu->kvm)
kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
}
+
+void kvm_riscv_gstage_vmid_sanitize(struct kvm_vcpu *vcpu)
+{
+ unsigned long vmid;
+
+ if (!kvm_riscv_gstage_vmid_bits() ||
+ vcpu->arch.last_exit_cpu == vcpu->cpu)
+ return;
+
+ /*
+ * On RISC-V platforms with hardware VMID support, we share same
+ * VMID for all VCPUs of a particular Guest/VM. This means we might
+ * have stale G-stage TLB entries on the current Host CPU due to
+ * some other VCPU of the same Guest which ran previously on the
+ * current Host CPU.
+ *
+ * To cleanup stale TLB entries, we simply flush all G-stage TLB
+ * entries by VMID whenever underlying Host CPU changes for a VCPU.
+ */
+
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
+}