summaryrefslogtreecommitdiff
path: root/arch/riscv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 11:24:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 11:24:14 -0700
commitd7e0a795bf37a13554c80cfc5ba97abedf53f391 (patch)
tree26f107fbe530b1bd0912a748b808cbe476bfbf49 /arch/riscv
parent44261f8e287d1b02a2e4bfbd7399fb8d37d1ee24 (diff)
parent52cf891d8dbd7592261fa30f373410b97f22b76c (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - More progress on the protected VM front, now with the full fixed feature set as well as the limitation of some hypercalls after initialisation. - Cleanup of the RAZ/WI sysreg handling, which was pointlessly complicated - Fixes for the vgic placement in the IPA space, together with a bunch of selftests - More memcg accounting of the memory allocated on behalf of a guest - Timer and vgic selftests - Workarounds for the Apple M1 broken vgic implementation - KConfig cleanups - New kvmarm.mode=none option, for those who really dislike us RISC-V: - New KVM port. x86: - New API to control TSC offset from userspace - TSC scaling for nested hypervisors on SVM - Switch masterclock protection from raw_spin_lock to seqcount - Clean up function prototypes in the page fault code and avoid repeated memslot lookups - Convey the exit reason to userspace on emulation failure - Configure time between NX page recovery iterations - Expose Predictive Store Forwarding Disable CPUID leaf - Allocate page tracking data structures lazily (if the i915 KVM-GT functionality is not compiled in) - Cleanups, fixes and optimizations for the shadow MMU code s390: - SIGP Fixes - initial preparations for lazy destroy of secure VMs - storage key improvements/fixes - Log the guest CPNC Starting from this release, KVM-PPC patches will come from Michael Ellerman's PPC tree" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (227 commits) RISC-V: KVM: fix boolreturn.cocci warnings RISC-V: KVM: remove unneeded semicolon RISC-V: KVM: Fix GPA passed to __kvm_riscv_hfence_gvma_xyz() functions RISC-V: KVM: Factor-out FP virtualization into separate sources KVM: s390: add debug statement for diag 318 CPNC data KVM: s390: pv: properly handle page flags for protected guests KVM: s390: Fix handle_sske page fault handling KVM: x86: SGX must obey the KVM_INTERNAL_ERROR_EMULATION protocol KVM: x86: On emulation failure, convey the exit reason, etc. to userspace KVM: x86: Get exit_reason as part of kvm_x86_ops.get_exit_info KVM: x86: Clarify the kvm_run.emulation_failure structure layout KVM: s390: Add a routine for setting userspace CPU state KVM: s390: Simplify SIGP Set Arch handling KVM: s390: pv: avoid stalls when making pages secure KVM: s390: pv: avoid stalls for kvm_s390_pv_init_vm KVM: s390: pv: avoid double free of sida page KVM: s390: pv: add macros for UVC CC values s390/mm: optimize reset_guest_reference_bit() s390/mm: optimize set_guest_storage_key() s390/mm: no need for pte_alloc_map_lock() if we know the pmd is present ...
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/Makefile1
-rw-r--r--arch/riscv/include/asm/csr.h87
-rw-r--r--arch/riscv/include/asm/kvm_host.h264
-rw-r--r--arch/riscv/include/asm/kvm_types.h7
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_fp.h59
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_timer.h44
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h128
-rw-r--r--arch/riscv/kernel/asm-offsets.c156
-rw-r--r--arch/riscv/kvm/Kconfig35
-rw-r--r--arch/riscv/kvm/Makefile26
-rw-r--r--arch/riscv/kvm/main.c118
-rw-r--r--arch/riscv/kvm/mmu.c802
-rw-r--r--arch/riscv/kvm/tlb.S74
-rw-r--r--arch/riscv/kvm/vcpu.c825
-rw-r--r--arch/riscv/kvm/vcpu_exit.c701
-rw-r--r--arch/riscv/kvm/vcpu_fp.c167
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c185
-rw-r--r--arch/riscv/kvm/vcpu_switch.S400
-rw-r--r--arch/riscv/kvm/vcpu_timer.c225
-rw-r--r--arch/riscv/kvm/vm.c97
-rw-r--r--arch/riscv/kvm/vmid.c120
22 files changed, 4523 insertions, 0 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c28b743eba57..a34c531be4e7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -566,3 +566,5 @@ menu "Power management options"
source "kernel/power/Kconfig"
endmenu
+
+source "arch/riscv/kvm/Kconfig"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 0eb4568fbd29..58c1a28e20bb 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -100,6 +100,7 @@ endif
head-y := arch/riscv/kernel/head.o
core-$(CONFIG_RISCV_ERRATA_ALTERNATIVE) += arch/riscv/errata/
+core-$(CONFIG_KVM) += arch/riscv/kvm/
libs-y += arch/riscv/lib/
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 87ac65696871..5046f431645c 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -58,22 +58,32 @@
/* Interrupt causes (minus the high bit) */
#define IRQ_S_SOFT 1
+#define IRQ_VS_SOFT 2
#define IRQ_M_SOFT 3
#define IRQ_S_TIMER 5
+#define IRQ_VS_TIMER 6
#define IRQ_M_TIMER 7
#define IRQ_S_EXT 9
+#define IRQ_VS_EXT 10
#define IRQ_M_EXT 11
/* Exception causes */
#define EXC_INST_MISALIGNED 0
#define EXC_INST_ACCESS 1
+#define EXC_INST_ILLEGAL 2
#define EXC_BREAKPOINT 3
#define EXC_LOAD_ACCESS 5
#define EXC_STORE_ACCESS 7
#define EXC_SYSCALL 8
+#define EXC_HYPERVISOR_SYSCALL 9
+#define EXC_SUPERVISOR_SYSCALL 10
#define EXC_INST_PAGE_FAULT 12
#define EXC_LOAD_PAGE_FAULT 13
#define EXC_STORE_PAGE_FAULT 15
+#define EXC_INST_GUEST_PAGE_FAULT 20
+#define EXC_LOAD_GUEST_PAGE_FAULT 21
+#define EXC_VIRTUAL_INST_FAULT 22
+#define EXC_STORE_GUEST_PAGE_FAULT 23
/* PMP configuration */
#define PMP_R 0x01
@@ -85,6 +95,58 @@
#define PMP_A_NAPOT 0x18
#define PMP_L 0x80
+/* HSTATUS flags */
+#ifdef CONFIG_64BIT
+#define HSTATUS_VSXL _AC(0x300000000, UL)
+#define HSTATUS_VSXL_SHIFT 32
+#endif
+#define HSTATUS_VTSR _AC(0x00400000, UL)
+#define HSTATUS_VTW _AC(0x00200000, UL)
+#define HSTATUS_VTVM _AC(0x00100000, UL)
+#define HSTATUS_VGEIN _AC(0x0003f000, UL)
+#define HSTATUS_VGEIN_SHIFT 12
+#define HSTATUS_HU _AC(0x00000200, UL)
+#define HSTATUS_SPVP _AC(0x00000100, UL)
+#define HSTATUS_SPV _AC(0x00000080, UL)
+#define HSTATUS_GVA _AC(0x00000040, UL)
+#define HSTATUS_VSBE _AC(0x00000020, UL)
+
+/* HGATP flags */
+#define HGATP_MODE_OFF _AC(0, UL)
+#define HGATP_MODE_SV32X4 _AC(1, UL)
+#define HGATP_MODE_SV39X4 _AC(8, UL)
+#define HGATP_MODE_SV48X4 _AC(9, UL)
+
+#define HGATP32_MODE_SHIFT 31
+#define HGATP32_VMID_SHIFT 22
+#define HGATP32_VMID_MASK _AC(0x1FC00000, UL)
+#define HGATP32_PPN _AC(0x003FFFFF, UL)
+
+#define HGATP64_MODE_SHIFT 60
+#define HGATP64_VMID_SHIFT 44
+#define HGATP64_VMID_MASK _AC(0x03FFF00000000000, UL)
+#define HGATP64_PPN _AC(0x00000FFFFFFFFFFF, UL)
+
+#define HGATP_PAGE_SHIFT 12
+
+#ifdef CONFIG_64BIT
+#define HGATP_PPN HGATP64_PPN
+#define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT
+#define HGATP_VMID_MASK HGATP64_VMID_MASK
+#define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT
+#else
+#define HGATP_PPN HGATP32_PPN
+#define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT
+#define HGATP_VMID_MASK HGATP32_VMID_MASK
+#define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT
+#endif
+
+/* VSIP & HVIP relation */
+#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
+#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
+ (_AC(1, UL) << IRQ_S_TIMER) | \
+ (_AC(1, UL) << IRQ_S_EXT))
+
/* symbolic CSR names: */
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
@@ -104,6 +166,31 @@
#define CSR_SIP 0x144
#define CSR_SATP 0x180
+#define CSR_VSSTATUS 0x200
+#define CSR_VSIE 0x204
+#define CSR_VSTVEC 0x205
+#define CSR_VSSCRATCH 0x240
+#define CSR_VSEPC 0x241
+#define CSR_VSCAUSE 0x242
+#define CSR_VSTVAL 0x243
+#define CSR_VSIP 0x244
+#define CSR_VSATP 0x280
+
+#define CSR_HSTATUS 0x600
+#define CSR_HEDELEG 0x602
+#define CSR_HIDELEG 0x603
+#define CSR_HIE 0x604
+#define CSR_HTIMEDELTA 0x605
+#define CSR_HCOUNTEREN 0x606
+#define CSR_HGEIE 0x607
+#define CSR_HTIMEDELTAH 0x615
+#define CSR_HTVAL 0x643
+#define CSR_HIP 0x644
+#define CSR_HVIP 0x645
+#define CSR_HTINST 0x64a
+#define CSR_HGATP 0x680
+#define CSR_HGEIP 0xe12
+
#define CSR_MSTATUS 0x300
#define CSR_MISA 0x301
#define CSR_MIE 0x304
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
new file mode 100644
index 000000000000..25ba21f98504
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#ifndef __RISCV_KVM_HOST_H__
+#define __RISCV_KVM_HOST_H__
+
+#include <linux/types.h>
+#include <linux/kvm.h>
+#include <linux/kvm_types.h>
+#include <asm/kvm_vcpu_fp.h>
+#include <asm/kvm_vcpu_timer.h>
+
+#ifdef CONFIG_64BIT
+#define KVM_MAX_VCPUS (1U << 16)
+#else
+#define KVM_MAX_VCPUS (1U << 9)
+#endif
+
+#define KVM_HALT_POLL_NS_DEFAULT 500000
+
+#define KVM_VCPU_MAX_FEATURES 0
+
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+#define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2)
+
+struct kvm_vm_stat {
+ struct kvm_vm_stat_generic generic;
+};
+
+struct kvm_vcpu_stat {
+ struct kvm_vcpu_stat_generic generic;
+ u64 ecall_exit_stat;
+ u64 wfi_exit_stat;
+ u64 mmio_exit_user;
+ u64 mmio_exit_kernel;
+ u64 exits;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct kvm_vmid {
+ /*
+ * Writes to vmid_version and vmid happen with vmid_lock held
+ * whereas reads happen without any lock held.
+ */
+ unsigned long vmid_version;
+ unsigned long vmid;
+};
+
+struct kvm_arch {
+ /* stage2 vmid */
+ struct kvm_vmid vmid;
+
+ /* stage2 page table */
+ pgd_t *pgd;
+ phys_addr_t pgd_phys;
+
+ /* Guest Timer */
+ struct kvm_guest_timer timer;
+};
+
+struct kvm_mmio_decode {
+ unsigned long insn;
+ int insn_len;
+ int len;
+ int shift;
+ int return_handled;
+};
+
+struct kvm_sbi_context {
+ int return_handled;
+};
+
+#define KVM_MMU_PAGE_CACHE_NR_OBJS 32
+
+struct kvm_mmu_page_cache {
+ int nobjs;
+ void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS];
+};
+
+struct kvm_cpu_trap {
+ unsigned long sepc;
+ unsigned long scause;
+ unsigned long stval;
+ unsigned long htval;
+ unsigned long htinst;
+};
+
+struct kvm_cpu_context {
+ unsigned long zero;
+ unsigned long ra;
+ unsigned long sp;
+ unsigned long gp;
+ unsigned long tp;
+ unsigned long t0;
+ unsigned long t1;
+ unsigned long t2;
+ unsigned long s0;
+ unsigned long s1;
+ unsigned long a0;
+ unsigned long a1;
+ unsigned long a2;
+ unsigned long a3;
+ unsigned long a4;
+ unsigned long a5;
+ unsigned long a6;
+ unsigned long a7;
+ unsigned long s2;
+ unsigned long s3;
+ unsigned long s4;
+ unsigned long s5;
+ unsigned long s6;
+ unsigned long s7;
+ unsigned long s8;
+ unsigned long s9;
+ unsigned long s10;
+ unsigned long s11;
+ unsigned long t3;
+ unsigned long t4;
+ unsigned long t5;
+ unsigned long t6;
+ unsigned long sepc;
+ unsigned long sstatus;
+ unsigned long hstatus;
+ union __riscv_fp_state fp;
+};
+
+struct kvm_vcpu_csr {
+ unsigned long vsstatus;
+ unsigned long vsie;
+ unsigned long vstvec;
+ unsigned long vsscratch;
+ unsigned long vsepc;
+ unsigned long vscause;
+ unsigned long vstval;
+ unsigned long hvip;
+ unsigned long vsatp;
+ unsigned long scounteren;
+};
+
+struct kvm_vcpu_arch {
+ /* VCPU ran at least once */
+ bool ran_atleast_once;
+
+ /* ISA feature bits (similar to MISA) */
+ unsigned long isa;
+
+ /* SSCRATCH, STVEC, and SCOUNTEREN of Host */
+ unsigned long host_sscratch;
+ unsigned long host_stvec;
+ unsigned long host_scounteren;
+
+ /* CPU context of Host */
+ struct kvm_cpu_context host_context;
+
+ /* CPU context of Guest VCPU */
+ struct kvm_cpu_context guest_context;
+
+ /* CPU CSR context of Guest VCPU */
+ struct kvm_vcpu_csr guest_csr;
+
+ /* CPU context upon Guest VCPU reset */
+ struct kvm_cpu_context guest_reset_context;
+
+ /* CPU CSR context upon Guest VCPU reset */
+ struct kvm_vcpu_csr guest_reset_csr;
+
+ /*
+ * VCPU interrupts
+ *
+ * We have a lockless approach for tracking pending VCPU interrupts
+ * implemented using atomic bitops. The irqs_pending bitmap represent
+ * pending interrupts whereas irqs_pending_mask represent bits changed
+ * in irqs_pending. Our approach is modeled around multiple producer
+ * and single consumer problem where the consumer is the VCPU itself.
+ */
+ unsigned long irqs_pending;
+ unsigned long irqs_pending_mask;
+
+ /* VCPU Timer */
+ struct kvm_vcpu_timer timer;
+
+ /* MMIO instruction details */
+ struct kvm_mmio_decode mmio_decode;
+
+ /* SBI context */
+ struct kvm_sbi_context sbi_context;
+
+ /* Cache pages needed to program page tables with spinlock held */
+ struct kvm_mmu_page_cache mmu_page_cache;
+
+ /* VCPU power-off state */
+ bool power_off;
+
+ /* Don't run the VCPU (blocked) */
+ bool pause;
+
+ /* SRCU lock index for in-kernel run loop */
+ int srcu_idx;
+};
+
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+
+void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa_divby_4,
+ unsigned long vmid);
+void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
+void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa_divby_4);
+void __kvm_riscv_hfence_gvma_all(void);
+
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot,
+ gpa_t gpa, unsigned long hva, bool is_write);
+void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
+int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
+void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
+void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
+void kvm_riscv_stage2_mode_detect(void);
+unsigned long kvm_riscv_stage2_mode(void);
+
+void kvm_riscv_stage2_vmid_detect(void);
+unsigned long kvm_riscv_stage2_vmid_bits(void);
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
+
+void __kvm_riscv_unpriv_trap(void);
+
+unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
+ bool read_insn,
+ unsigned long guest_addr,
+ struct kvm_cpu_trap *trap);
+void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_trap *trap);
+int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap);
+
+void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
+
+int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
+void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
+bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask);
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/asm/kvm_types.h b/arch/riscv/include/asm/kvm_types.h
new file mode 100644
index 000000000000..e476b404eb67
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_KVM_TYPES_H
+#define _ASM_RISCV_KVM_TYPES_H
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_RISCV_KVM_TYPES_H */
diff --git a/arch/riscv/include/asm/kvm_vcpu_fp.h b/arch/riscv/include/asm/kvm_vcpu_fp.h
new file mode 100644
index 000000000000..4da9b8e0f050
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_fp.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#ifndef __KVM_VCPU_RISCV_FP_H
+#define __KVM_VCPU_RISCV_FP_H
+
+#include <linux/types.h>
+
+struct kvm_cpu_context;
+
+#ifdef CONFIG_FPU
+void __kvm_riscv_fp_f_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_f_restore(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_save(struct kvm_cpu_context *context);
+void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context);
+
+void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa);
+void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
+ unsigned long isa);
+void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx);
+void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx);
+#else
+static inline void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
+{
+}
+static inline void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+}
+static inline void kvm_riscv_vcpu_guest_fp_restore(
+ struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+}
+static inline void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
+{
+}
+static inline void kvm_riscv_vcpu_host_fp_restore(
+ struct kvm_cpu_context *cntx)
+{
+}
+#endif
+
+int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype);
+int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype);
+
+#endif
diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h
new file mode 100644
index 000000000000..375281eb49e0
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#ifndef __KVM_VCPU_RISCV_TIMER_H
+#define __KVM_VCPU_RISCV_TIMER_H
+
+#include <linux/hrtimer.h>
+
+struct kvm_guest_timer {
+ /* Mult & Shift values to get nanoseconds from cycles */
+ u32 nsec_mult;
+ u32 nsec_shift;
+ /* Time delta value */
+ u64 time_delta;
+};
+
+struct kvm_vcpu_timer {
+ /* Flag for whether init is done */
+ bool init_done;
+ /* Flag for whether timer event is configured */
+ bool next_set;
+ /* Next timer event cycles */
+ u64 next_cycles;
+ /* Underlying hrtimer instance */
+ struct hrtimer hrt;
+};
+
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles);
+int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu);
+int kvm_riscv_guest_timer_init(struct kvm *kvm);
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..f808ad1ce500
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#ifndef __LINUX_KVM_RISCV_H
+#define __LINUX_KVM_RISCV_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#define KVM_INTERRUPT_SET -1U
+#define KVM_INTERRUPT_UNSET -2U
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+};
+
+/* KVM Debug exit structure */
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+};
+
+/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_config {
+ unsigned long isa;
+};
+
+/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_core {
+ struct user_regs_struct regs;
+ unsigned long mode;
+};
+
+/* Possible privilege modes for kvm_riscv_core */
+#define KVM_RISCV_MODE_S 1
+#define KVM_RISCV_MODE_U 0
+
+/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_csr {
+ unsigned long sstatus;
+ unsigned long sie;
+ unsigned long stvec;
+ unsigned long sscratch;
+ unsigned long sepc;
+ unsigned long scause;
+ unsigned long stval;
+ unsigned long sip;
+ unsigned long satp;
+ unsigned long scounteren;
+};
+
+/* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_timer {
+ __u64 frequency;
+ __u64 time;
+ __u64 compare;
+ __u64 state;
+};
+
+/* Possible states for kvm_riscv_timer */
+#define KVM_RISCV_TIMER_STATE_OFF 0
+#define KVM_RISCV_TIMER_STATE_ON 1
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
+#define KVM_REG_RISCV_TYPE_SHIFT 24
+
+/* Config registers are mapped as type 1 */
+#define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CONFIG_REG(name) \
+ (offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long))
+
+/* Core registers are mapped as type 2 */
+#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CORE_REG(name) \
+ (offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long))
+
+/* Control and status registers are mapped as type 3 */
+#define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CSR_REG(name) \
+ (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
+
+/* Timer registers are mapped as type 4 */
+#define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_TIMER_REG(name) \
+ (offsetof(struct kvm_riscv_timer, name) / sizeof(__u64))
+
+/* F extension registers are mapped as type 5 */
+#define KVM_REG_RISCV_FP_F (0x05 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_F_REG(name) \
+ (offsetof(struct __riscv_f_ext_state, name) / sizeof(__u32))
+
+/* D extension registers are mapped as type 6 */
+#define KVM_REG_RISCV_FP_D (0x06 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_D_REG(name) \
+ (offsetof(struct __riscv_d_ext_state, name) / sizeof(__u64))
+
+#endif
+
+#endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 478d9f02dab5..253126e4beef 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -7,7 +7,9 @@
#define GENERATING_ASM_OFFSETS
#include <linux/kbuild.h>
+#include <linux/mm.h>
#include <linux/sched.h>
+#include <asm/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/ptrace.h>
@@ -110,6 +112,160 @@ void asm_offsets(void)
OFFSET(PT_BADADDR, pt_regs, badaddr);
OFFSET(PT_CAUSE, pt_regs, cause);
+ OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero);
+ OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra);
+ OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp);
+ OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp);
+ OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp);
+ OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0);
+ OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1);
+ OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2);
+ OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0);
+ OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1);
+ OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0);
+ OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1);
+ OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2);
+ OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3);
+ OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4);
+ OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5);
+ OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6);
+ OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7);
+ OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2);
+ OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3);
+ OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4);
+ OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5);
+ OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6);
+ OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7);
+ OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8);
+ OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9);
+ OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10);
+ OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11);
+ OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3);
+ OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4);
+ OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5);
+ OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6);
+ OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc);
+ OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus);
+ OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus);
+ OFFSET(KVM_ARCH_GUEST_SCOUNTEREN, kvm_vcpu_arch, guest_csr.scounteren);
+
+ OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero);
+ OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra);
+ OFFSET(KVM_ARCH_HOST_SP, kvm_vcpu_arch, host_context.sp);
+ OFFSET(KVM_ARCH_HOST_GP, kvm_vcpu_arch, host_context.gp);
+ OFFSET(KVM_ARCH_HOST_TP, kvm_vcpu_arch, host_context.tp);
+ OFFSET(KVM_ARCH_HOST_T0, kvm_vcpu_arch, host_context.t0);
+ OFFSET(KVM_ARCH_HOST_T1, kvm_vcpu_arch, host_context.t1);
+ OFFSET(KVM_ARCH_HOST_T2, kvm_vcpu_arch, host_context.t2);
+ OFFSET(KVM_ARCH_HOST_S0, kvm_vcpu_arch, host_context.s0);
+ OFFSET(KVM_ARCH_HOST_S1, kvm_vcpu_arch, host_context.s1);
+ OFFSET(KVM_ARCH_HOST_A0, kvm_vcpu_arch, host_context.a0);
+ OFFSET(KVM_ARCH_HOST_A1, kvm_vcpu_arch, host_context.a1);
+ OFFSET(KVM_ARCH_HOST_A2, kvm_vcpu_arch, host_context.a2);
+ OFFSET(KVM_ARCH_HOST_A3, kvm_vcpu_arch, host_context.a3);
+ OFFSET(KVM_ARCH_HOST_A4, kvm_vcpu_arch, host_context.a4);
+ OFFSET(KVM_ARCH_HOST_A5, kvm_vcpu_arch, host_context.a5);
+ OFFSET(KVM_ARCH_HOST_A6, kvm_vcpu_arch, host_context.a6);
+ OFFSET(KVM_ARCH_HOST_A7, kvm_vcpu_arch, host_context.a7);
+ OFFSET(KVM_ARCH_HOST_S2, kvm_vcpu_arch, host_context.s2);
+ OFFSET(KVM_ARCH_HOST_S3, kvm_vcpu_arch, host_context.s3);
+ OFFSET(KVM_ARCH_HOST_S4, kvm_vcpu_arch, host_context.s4);
+ OFFSET(KVM_ARCH_HOST_S5, kvm_vcpu_arch, host_context.s5);
+ OFFSET(KVM_ARCH_HOST_S6, kvm_vcpu_arch, host_context.s6);
+ OFFSET(KVM_ARCH_HOST_S7, kvm_vcpu_arch, host_context.s7);
+ OFFSET(KVM_ARCH_HOST_S8, kvm_vcpu_arch, host_context.s8);
+ OFFSET(KVM_ARCH_HOST_S9, kvm_vcpu_arch, host_context.s9);
+ OFFSET(KVM_ARCH_HOST_S10, kvm_vcpu_arch, host_context.s10);
+ OFFSET(KVM_ARCH_HOST_S11, kvm_vcpu_arch, host_context.s11);
+ OFFSET(KVM_ARCH_HOST_T3, kvm_vcpu_arch, host_context.t3);
+ OFFSET(KVM_ARCH_HOST_T4, kvm_vcpu_arch, host_context.t4);
+ OFFSET(KVM_ARCH_HOST_T5, kvm_vcpu_arch, host_context.t5);
+ OFFSET(KVM_ARCH_HOST_T6, kvm_vcpu_arch, host_context.t6);
+ OFFSET(KVM_ARCH_HOST_SEPC, kvm_vcpu_arch, host_context.sepc);
+ OFFSET(KVM_ARCH_HOST_SSTATUS, kvm_vcpu_arch, host_context.sstatus);
+ OFFSET(KVM_ARCH_HOST_HSTATUS, kvm_vcpu_arch, host_context.hstatus);
+ OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch);
+ OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec);
+ OFFSET(KVM_ARCH_HOST_SCOUNTEREN, kvm_vcpu_arch, host_scounteren);
+
+ OFFSET(KVM_ARCH_TRAP_SEPC, kvm_cpu_trap, sepc);
+ OFFSET(KVM_ARCH_TRAP_SCAUSE, kvm_cpu_trap, scause);
+ OFFSET(KVM_ARCH_TRAP_STVAL, kvm_cpu_trap, stval);
+ OFFSET(KVM_ARCH_TRAP_HTVAL, kvm_cpu_trap, htval);
+ OFFSET(KVM_ARCH_TRAP_HTINST, kvm_cpu_trap, htinst);
+
+ /* F extension */
+
+ OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]);
+ OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]);
+ OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]);
+ OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]);
+ OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]);
+ OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]);
+ OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]);
+ OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]);
+ OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]);
+ OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]);
+ OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]);
+ OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]);
+ OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]);
+ OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]);
+ OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]);
+ OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]);
+ OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]);
+ OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]);
+ OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]);
+ OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]);
+ OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]);
+ OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]);
+ OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]);
+ OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]);
+ OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]);
+ OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]);
+ OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]);
+ OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]);
+ OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]);
+ OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]);
+ OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]);
+ OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]);
+ OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr);
+
+ /* D extension */
+
+ OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]);
+ OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]);
+ OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]);
+ OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]);
+ OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]);
+ OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]);
+ OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]);
+ OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]);
+ OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]);
+ OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]);
+ OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]);
+ OFFSET(KVM_ARCH_FP_D_F11, kvm_cpu_context, fp.d.f[11]);
+ OFFSET(KVM_ARCH_FP_D_F12, kvm_cpu_context, fp.d.f[12]);
+ OFFSET(KVM_ARCH_FP_D_F13, kvm_cpu_context, fp.d.f[13]);
+ OFFSET(KVM_ARCH_FP_D_F14, kvm_cpu_context, fp.d.f[14]);
+ OFFSET(KVM_ARCH_FP_D_F15, kvm_cpu_context, fp.d.f[15]);
+ OFFSET(KVM_ARCH_FP_D_F16, kvm_cpu_context, fp.d.f[16]);
+ OFFSET(KVM_ARCH_FP_D_F17, kvm_cpu_context, fp.d.f[17]);
+ OFFSET(KVM_ARCH_FP_D_F18, kvm_cpu_context, fp.d.f[18]);
+ OFFSET(KVM_ARCH_FP_D_F19, kvm_cpu_context, fp.d.f[19]);
+ OFFSET(KVM_ARCH_FP_D_F20, kvm_cpu_context, fp.d.f[20]);
+ OFFSET(KVM_ARCH_FP_D_F21, kvm_cpu_context, fp.d.f[21]);
+ OFFSET(KVM_ARCH_FP_D_F22, kvm_cpu_context, fp.d.f[22]);
+ OFFSET(KVM_ARCH_FP_D_F23, kvm_cpu_context, fp.d.f[23]);
+ OFFSET(KVM_ARCH_FP_D_F24, kvm_cpu_context, fp.d.f[24]);
+ OFFSET(KVM_ARCH_FP_D_F25, kvm_cpu_context, fp.d.f[25]);
+ OFFSET(KVM_ARCH_FP_D_F26, kvm_cpu_context, fp.d.f[26]);
+ OFFSET(KVM_ARCH_FP_D_F27, kvm_cpu_context, fp.d.f[27]);
+ OFFSET(KVM_ARCH_FP_D_F28, kvm_cpu_context, fp.d.f[28]);
+ OFFSET(KVM_ARCH_FP_D_F29, kvm_cpu_context, fp.d.f[29]);
+ OFFSET(KVM_ARCH_FP_D_F30, kvm_cpu_context, fp.d.f[30]);
+ OFFSET(KVM_ARCH_FP_D_F31, kvm_cpu_context, fp.d.f[31]);
+ OFFSET(KVM_ARCH_FP_D_FCSR, kvm_cpu_context, fp.d.fcsr);
+
/*
* THREAD_{F,X}* might be larger than a S-type offset can handle, but
* these are used in performance-sensitive assembly so we can't resort
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
new file mode 100644
index 000000000000..f5a342fa1b1d
--- /dev/null
+++ b/arch/riscv/kvm/Kconfig
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ help
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
+ depends on RISCV_SBI && MMU
+ select MMU_NOTIFIER
+ select PREEMPT_NOTIFIERS
+ select KVM_MMIO
+ select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select HAVE_KVM_EVENTFD
+ select SRCU
+ help
+ Support hosting virtualized guest machines.
+
+ If unsure, say N.
+
+endif # VIRTUALIZATION
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
new file mode 100644
index 000000000000..30cdd1df0098
--- /dev/null
+++ b/arch/riscv/kvm/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for RISC-V KVM support
+#
+
+ccflags-y += -I $(srctree)/$(src)
+
+KVM := ../../../virt/kvm
+
+obj-$(CONFIG_KVM) += kvm.o
+
+kvm-y += $(KVM)/kvm_main.o
+kvm-y += $(KVM)/coalesced_mmio.o
+kvm-y += $(KVM)/binary_stats.o
+kvm-y += $(KVM)/eventfd.o
+kvm-y += main.o
+kvm-y += vm.o
+kvm-y += vmid.o
+kvm-y += tlb.o
+kvm-y += mmu.o
+kvm-y += vcpu.o
+kvm-y += vcpu_exit.o
+kvm-y += vcpu_fp.o
+kvm-y += vcpu_switch.o
+kvm-y += vcpu_sbi.o
+kvm-y += vcpu_timer.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
new file mode 100644
index 000000000000..421ecf4e6360
--- /dev/null
+++ b/arch/riscv/kvm/main.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/sbi.h>
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_check_processor_compat(void *opaque)
+{
+ return 0;
+}
+
+int kvm_arch_hardware_setup(void *opaque)
+{
+ return 0;
+}
+
+int kvm_arch_hardware_enable(void)
+{
+ unsigned long hideleg, hedeleg;
+
+ hedeleg = 0;
+ hedeleg |= (1UL << EXC_INST_MISALIGNED);
+ hedeleg |= (1UL << EXC_BREAKPOINT);
+ hedeleg |= (1UL << EXC_SYSCALL);
+ hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
+ hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
+ hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
+ csr_write(CSR_HEDELEG, hedeleg);
+
+ hideleg = 0;
+ hideleg |= (1UL << IRQ_VS_SOFT);
+ hideleg |= (1UL << IRQ_VS_TIMER);
+ hideleg |= (1UL << IRQ_VS_EXT);
+ csr_write(CSR_HIDELEG, hideleg);
+
+ csr_write(CSR_HCOUNTEREN, -1UL);
+
+ csr_write(CSR_HVIP, 0);
+
+ return 0;
+}
+
+void kvm_arch_hardware_disable(void)
+{
+ csr_write(CSR_HEDELEG, 0);
+ csr_write(CSR_HIDELEG, 0);
+}
+
+int kvm_arch_init(void *opaque)
+{
+ const char *str;
+
+ if (!riscv_isa_extension_available(NULL, h)) {
+ kvm_info("hypervisor extension not available\n");
+ return -ENODEV;
+ }
+
+ if (sbi_spec_is_0_1()) {
+ kvm_info("require SBI v0.2 or higher\n");
+ return -ENODEV;
+ }
+
+ if (sbi_probe_extension(SBI_EXT_RFENCE) <= 0) {
+ kvm_info("require SBI RFENCE extension\n");
+ return -ENODEV;
+ }
+
+ kvm_riscv_stage2_mode_detect();
+
+ kvm_riscv_stage2_vmid_detect();
+
+ kvm_info("hypervisor extension available\n");
+
+ switch (kvm_riscv_stage2_mode()) {
+ case HGATP_MODE_SV32X4:
+ str = "Sv32x4";
+ break;
+ case HGATP_MODE_SV39X4:
+ str = "Sv39x4";
+ break;
+ case HGATP_MODE_SV48X4:
+ str = "Sv48x4";
+ break;
+ default:
+ return -ENODEV;
+ }
+ kvm_info("using %s G-stage page table format\n", str);
+
+ kvm_info("VMID %ld bits available\n", kvm_riscv_stage2_vmid_bits());
+
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+static int riscv_kvm_init(void)
+{
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+}
+module_init(riscv_kvm_init);
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
new file mode 100644
index 000000000000..d81bae8eb55e
--- /dev/null
+++ b/arch/riscv/kvm/mmu.c
@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/kvm_host.h>
+#include <linux/sched/signal.h>
+#include <asm/csr.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/sbi.h>
+
+#ifdef CONFIG_64BIT
+static unsigned long stage2_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
+static unsigned long stage2_pgd_levels = 3;
+#define stage2_index_bits 9
+#else
+static unsigned long stage2_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
+static unsigned long stage2_pgd_levels = 2;
+#define stage2_index_bits 10
+#endif
+
+#define stage2_pgd_xbits 2
+#define stage2_pgd_size (1UL << (HGATP_PAGE_SHIFT + stage2_pgd_xbits))
+#define stage2_gpa_bits (HGATP_PAGE_SHIFT + \
+ (stage2_pgd_levels * stage2_index_bits) + \
+ stage2_pgd_xbits)
+#define stage2_gpa_size ((gpa_t)(1ULL << stage2_gpa_bits))
+
+#define stage2_pte_leaf(__ptep) \
+ (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
+
+static inline unsigned long stage2_pte_index(gpa_t addr, u32 level)
+{
+ unsigned long mask;
+ unsigned long shift = HGATP_PAGE_SHIFT + (stage2_index_bits * level);
+
+ if (level == (stage2_pgd_levels - 1))
+ mask = (PTRS_PER_PTE * (1UL << stage2_pgd_xbits)) - 1;
+ else
+ mask = PTRS_PER_PTE - 1;
+
+ return (addr >> shift) & mask;
+}
+
+static inline unsigned long stage2_pte_page_vaddr(pte_t pte)
+{
+ return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
+}
+
+static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
+{
+ u32 i;
+ unsigned long psz = 1UL << 12;
+
+ for (i = 0; i < stage2_pgd_levels; i++) {
+ if (page_size == (psz << (i * stage2_index_bits))) {
+ *out_level = i;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
+{
+ if (stage2_pgd_levels < level)
+ return -EINVAL;
+
+ *out_pgsize = 1UL << (12 + (level * stage2_index_bits));
+
+ return 0;
+}
+
+static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
+ int min, int max)
+{
+ void *page;
+
+ BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
+ if (pcache->nobjs >= min)
+ return 0;
+ while (pcache->nobjs < max) {
+ page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+ pcache->objects[pcache->nobjs++] = page;
+ }
+
+ return 0;
+}
+
+static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
+{
+ while (pcache && pcache->nobjs)
+ free_page((unsigned long)pcache->objects[--pcache->nobjs]);
+}
+
+static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
+{
+ void *p;
+
+ if (!pcache)
+ return NULL;
+
+ BUG_ON(!pcache->nobjs);
+ p = pcache->objects[--pcache->nobjs];
+
+ return p;
+}
+
+static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
+ pte_t **ptepp, u32 *ptep_level)
+{
+ pte_t *ptep;
+ u32 current_level = stage2_pgd_levels - 1;
+
+ *ptep_level = current_level;
+ ptep = (pte_t *)kvm->arch.pgd;
+ ptep = &ptep[stage2_pte_index(addr, current_level)];
+ while (ptep && pte_val(*ptep)) {
+ if (stage2_pte_leaf(ptep)) {
+ *ptep_level = current_level;
+ *ptepp = ptep;
+ return true;
+ }
+
+ if (current_level) {
+ current_level--;
+ *ptep_level = current_level;
+ ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+ ptep = &ptep[stage2_pte_index(addr, current_level)];
+ } else {
+ ptep = NULL;
+ }
+ }
+
+ return false;
+}
+
+static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
+{
+ struct cpumask hmask;
+ unsigned long size = PAGE_SIZE;
+ struct kvm_vmid *vmid = &kvm->arch.vmid;
+
+ if (stage2_level_to_page_size(level, &size))
+ return;
+ addr &= ~(size - 1);
+
+ /*
+ * TODO: Instead of cpu_online_mask, we should only target CPUs
+ * where the Guest/VM is running.
+ */
+ preempt_disable();
+ riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
+ sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
+ READ_ONCE(vmid->vmid));
+ preempt_enable();
+}
+
+static int stage2_set_pte(struct kvm *kvm, u32 level,
+ struct kvm_mmu_page_cache *pcache,
+ gpa_t addr, const pte_t *new_pte)
+{
+ u32 current_level = stage2_pgd_levels - 1;
+ pte_t *next_ptep = (pte_t *)kvm->arch.pgd;
+ pte_t *ptep = &next_ptep[stage2_pte_index(addr, current_level)];
+
+ if (current_level < level)
+ return -EINVAL;
+
+ while (current_level != level) {
+ if (stage2_pte_leaf(ptep))
+ return -EEXIST;
+
+ if (!pte_val(*ptep)) {
+ next_ptep = stage2_cache_alloc(pcache);
+ if (!next_ptep)
+ return -ENOMEM;
+ *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
+ __pgprot(_PAGE_TABLE));
+ } else {
+ if (stage2_pte_leaf(ptep))
+ return -EEXIST;
+ next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+ }
+
+ current_level--;
+ ptep = &next_ptep[stage2_pte_index(addr, current_level)];
+ }
+
+ *ptep = *new_pte;
+ if (stage2_pte_leaf(ptep))
+ stage2_remote_tlb_flush(kvm, current_level, addr);
+
+ return 0;
+}
+
+static int stage2_map_page(struct kvm *kvm,
+ struct kvm_mmu_page_cache *pcache,
+ gpa_t gpa, phys_addr_t hpa,
+ unsigned long page_size,
+ bool page_rdonly, bool page_exec)
+{
+ int ret;
+ u32 level = 0;
+ pte_t new_pte;
+ pgprot_t prot;
+
+ ret = stage2_page_size_to_level(page_size, &level);
+ if (ret)
+ return ret;
+
+ /*
+ * A RISC-V implementation can choose to either:
+ * 1) Update 'A' and 'D' PTE bits in hardware
+ * 2) Generate page fault when 'A' and/or 'D' bits are not set
+ * PTE so that software can update these bits.
+ *
+ * We support both options mentioned above. To achieve this, we
+ * always set 'A' and 'D' PTE bits at time of creating stage2
+ * mapping. To support KVM dirty page logging with both options
+ * mentioned above, we will write-protect stage2 PTEs to track
+ * dirty pages.
+ */
+
+ if (page_exec) {
+ if (page_rdonly)
+ prot = PAGE_READ_EXEC;
+ else
+ prot = PAGE_WRITE_EXEC;
+ } else {
+ if (page_rdonly)
+ prot = PAGE_READ;
+ else
+ prot = PAGE_WRITE;
+ }
+ new_pte = pfn_pte(PFN_DOWN(hpa), prot);
+ new_pte = pte_mkdirty(new_pte);
+
+ return stage2_set_pte(kvm, level, pcache, gpa, &new_pte);
+}
+
+enum stage2_op {
+ STAGE2_OP_NOP = 0, /* Nothing */
+ STAGE2_OP_CLEAR, /* Clear/Unmap */
+ STAGE2_OP_WP, /* Write-protect */
+};
+
+static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
+ pte_t *ptep, u32 ptep_level, enum stage2_op op)
+{
+ int i, ret;
+ pte_t *next_ptep;
+ u32 next_ptep_level;
+ unsigned long next_page_size, page_size;
+
+ ret = stage2_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ return;
+
+ BUG_ON(addr & (page_size - 1));
+
+ if (!pte_val(*ptep))
+ return;
+
+ if (ptep_level && !stage2_pte_leaf(ptep)) {
+ next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+ next_ptep_level = ptep_level - 1;
+ ret = stage2_level_to_page_size(next_ptep_level,
+ &next_page_size);
+ if (ret)
+ return;
+
+ if (op == STAGE2_OP_CLEAR)
+ set_pte(ptep, __pte(0));
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ stage2_op_pte(kvm, addr + i * next_page_size,
+ &next_ptep[i], next_ptep_level, op);
+ if (op == STAGE2_OP_CLEAR)
+ put_page(virt_to_page(next_ptep));
+ } else {
+ if (op == STAGE2_OP_CLEAR)
+ set_pte(ptep, __pte(0));
+ else if (op == STAGE2_OP_WP)
+ set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
+ stage2_remote_tlb_flush(kvm, ptep_level, addr);
+ }
+}
+
+static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
+ gpa_t size, bool may_block)
+{
+ int ret;
+ pte_t *ptep;
+ u32 ptep_level;
+ bool found_leaf;
+ unsigned long page_size;
+ gpa_t addr = start, end = start + size;
+
+ while (addr < end) {
+ found_leaf = stage2_get_leaf_entry(kvm, addr,
+ &ptep, &ptep_level);
+ ret = stage2_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ break;
+
+ if (!found_leaf)
+ goto next;
+
+ if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
+ stage2_op_pte(kvm, addr, ptep,
+ ptep_level, STAGE2_OP_CLEAR);
+
+next:
+ addr += page_size;
+
+ /*
+ * If the range is too large, release the kvm->mmu_lock
+ * to prevent starvation and lockup detector warnings.
+ */
+ if (may_block && addr < end)
+ cond_resched_lock(&kvm->mmu_lock);
+ }
+}
+
+static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
+{
+ int ret;
+ pte_t *ptep;
+ u32 ptep_level;
+ bool found_leaf;
+ gpa_t addr = start;
+ unsigned long page_size;
+
+ while (addr < end) {
+ found_leaf = stage2_get_leaf_entry(kvm, addr,
+ &ptep, &ptep_level);
+ ret = stage2_level_to_page_size(ptep_level, &page_size);
+ if (ret)
+ break;
+
+ if (!found_leaf)
+ goto next;
+
+ if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
+ stage2_op_pte(kvm, addr, ptep,
+ ptep_level, STAGE2_OP_WP);
+
+next:
+ addr += page_size;
+ }
+}
+
+static void stage2_wp_memory_region(struct kvm *kvm, int slot)
+{
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
+ phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ stage2_wp_range(kvm, start, end);
+ spin_unlock(&kvm->mmu_lock);
+ kvm_flush_remote_tlbs(kvm);
+}
+
+static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
+ unsigned long size, bool writable)
+{
+ pte_t pte;
+ int ret = 0;
+ unsigned long pfn;
+ phys_addr_t addr, end;
+ struct kvm_mmu_page_cache pcache = { 0, };
+
+ end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
+ pfn = __phys_to_pfn(hpa);
+
+ for (addr = gpa; addr < end; addr += PAGE_SIZE) {
+ pte = pfn_pte(pfn, PAGE_KERNEL);
+
+ if (!writable)
+ pte = pte_wrprotect(pte);
+
+ ret = stage2_cache_topup(&pcache,
+ stage2_pgd_levels,
+ KVM_MMU_PAGE_CACHE_NR_OBJS);
+ if (ret)
+ goto out;
+
+ spin_lock(&kvm->mmu_lock);
+ ret = stage2_set_pte(kvm, 0, &pcache, addr, &pte);
+ spin_unlock(&kvm->mmu_lock);
+ if (ret)
+ goto out;
+
+ pfn++;
+ }
+
+out:
+ stage2_cache_flush(&pcache);
+ return ret;
+}
+
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset,
+ unsigned long mask)
+{
+ phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+ phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
+ phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+ stage2_wp_range(kvm, start, end);
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
+{
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+ kvm_riscv_stage2_free_pgd(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
+{
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ /*
+ * At this point memslot has been committed and there is an
+ * allocated dirty_bitmap[], dirty pages will be tracked while
+ * the memory slot is write protected.
+ */
+ if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
+ stage2_wp_memory_region(kvm, mem->slot);
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
+{
+ hva_t hva = mem->userspace_addr;
+ hva_t reg_end = hva + mem->memory_size;
+ bool writable = !(mem->flags & KVM_MEM_READONLY);
+ int ret = 0;
+
+ if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
+ change != KVM_MR_FLAGS_ONLY)
+ return 0;
+
+ /*
+ * Prevent userspace from creating a memory region outside of the GPA
+ * space addressable by the KVM guest GPA space.
+ */
+ if ((memslot->base_gfn + memslot->npages) >=
+ (stage2_gpa_size >> PAGE_SHIFT))
+ return -EFAULT;
+
+ mmap_read_lock(current->mm);
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and
+ * any holes between them, so iterate over all of them to find
+ * out if we can map any of them right now.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Mapping a read-only VMA is only allowed if the
+ * memory region is configured as read-only.
+ */
+ if (writable && !(vma->vm_flags & VM_WRITE)) {
+ ret = -EPERM;
+ break;
+ }
+
+ /* Take the intersection of this VMA with the memory region */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (vma->vm_flags & VM_PFNMAP) {
+ gpa_t gpa = mem->guest_phys_addr +
+ (vm_start - mem->userspace_addr);
+ phys_addr_t pa;
+
+ pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+ pa += vm_start - vma->vm_start;
+
+ /* IO region dirty page logging not allowed */
+ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = stage2_ioremap(kvm, gpa, pa,
+ vm_end - vm_start, writable);
+ if (ret)
+ break;
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+
+ if (change == KVM_MR_FLAGS_ONLY)
+ goto out;
+
+ spin_lock(&kvm->mmu_lock);
+ if (ret)
+ stage2_unmap_range(kvm, mem->guest_phys_addr,
+ mem->memory_size, false);
+ spin_unlock(&kvm->mmu_lock);
+
+out:
+ mmap_read_unlock(current->mm);
+ return ret;
+}
+
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ if (!kvm->arch.pgd)
+ return false;
+
+ stage2_unmap_range(kvm, range->start << PAGE_SHIFT,
+ (range->end - range->start) << PAGE_SHIFT,
+ range->may_block);
+ return false;
+}
+
+bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ int ret;
+ kvm_pfn_t pfn = pte_pfn(range->pte);
+
+ if (!kvm->arch.pgd)
+ return false;
+
+ WARN_ON(range->end - range->start != 1);
+
+ ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT,
+ __pfn_to_phys(pfn), PAGE_SIZE, true, true);
+ if (ret) {
+ kvm_debug("Failed to map stage2 page (error %d)\n", ret);
+ return true;
+ }
+
+ return false;
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ pte_t *ptep;
+ u32 ptep_level = 0;
+ u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+ if (!kvm->arch.pgd)
+ return false;
+
+ WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+ if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
+ return false;
+
+ return ptep_test_and_clear_young(NULL, 0, ptep);
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ pte_t *ptep;
+ u32 ptep_level = 0;
+ u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+ if (!kvm->arch.pgd)
+ return false;
+
+ WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+ if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
+ return false;
+
+ return pte_young(*ptep);
+}
+
+int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot,
+ gpa_t gpa, unsigned long hva, bool is_write)
+{
+ int ret;
+ kvm_pfn_t hfn;
+ bool writeable;
+ short vma_pageshift;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct vm_area_struct *vma;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache;
+ bool logging = (memslot->dirty_bitmap &&
+ !(memslot->flags & KVM_MEM_READONLY)) ? true : false;
+ unsigned long vma_pagesize, mmu_seq;
+
+ mmap_read_lock(current->mm);
+
+ vma = find_vma_intersection(current->mm, hva, hva + 1);
+ if (unlikely(!vma)) {
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+ mmap_read_unlock(current->mm);
+ return -EFAULT;
+ }
+
+ if (is_vm_hugetlb_page(vma))
+ vma_pageshift = huge_page_shift(hstate_vma(vma));
+ else
+ vma_pageshift = PAGE_SHIFT;
+ vma_pagesize = 1ULL << vma_pageshift;
+ if (logging || (vma->vm_flags & VM_PFNMAP))
+ vma_pagesize = PAGE_SIZE;
+
+ if (vma_pagesize == PMD_SIZE || vma_pagesize == PGDIR_SIZE)
+ gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
+
+ mmap_read_unlock(current->mm);
+
+ if (vma_pagesize != PGDIR_SIZE &&
+ vma_pagesize != PMD_SIZE &&
+ vma_pagesize != PAGE_SIZE) {
+ kvm_err("Invalid VMA page size 0x%lx\n", vma_pagesize);
+ return -EFAULT;
+ }
+
+ /* We need minimum second+third level pages */
+ ret = stage2_cache_topup(pcache, stage2_pgd_levels,
+ KVM_MMU_PAGE_CACHE_NR_OBJS);
+ if (ret) {
+ kvm_err("Failed to topup stage2 cache\n");
+ return ret;
+ }
+
+ mmu_seq = kvm->mmu_notifier_seq;
+
+ hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable);
+ if (hfn == KVM_PFN_ERR_HWPOISON) {
+ send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
+ vma_pageshift, current);
+ return 0;
+ }
+ if (is_error_noslot_pfn(hfn))
+ return -EFAULT;
+
+ /*
+ * If logging is active then we allow writable pages only
+ * for write faults.
+ */
+ if (logging && !is_write)
+ writeable = false;
+
+ spin_lock(&kvm->mmu_lock);
+
+ if (mmu_notifier_retry(kvm, mmu_seq))
+ goto out_unlock;
+
+ if (writeable) {
+ kvm_set_pfn_dirty(hfn);
+ mark_page_dirty(kvm, gfn);
+ ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ vma_pagesize, false, true);
+ } else {
+ ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ vma_pagesize, true, true);
+ }
+
+ if (ret)
+ kvm_err("Failed to map in stage2\n");
+
+out_unlock:
+ spin_unlock(&kvm->mmu_lock);
+ kvm_set_pfn_accessed(hfn);
+ kvm_release_pfn_clean(hfn);
+ return ret;
+}
+
+void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
+{
+ stage2_cache_flush(&vcpu->arch.mmu_page_cache);
+}
+
+int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
+{
+ struct page *pgd_page;
+
+ if (kvm->arch.pgd != NULL) {
+ kvm_err("kvm_arch already initialized?\n");
+ return -EINVAL;
+ }
+
+ pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(stage2_pgd_size));
+ if (!pgd_page)
+ return -ENOMEM;
+ kvm->arch.pgd = page_to_virt(pgd_page);
+ kvm->arch.pgd_phys = page_to_phys(pgd_page);
+
+ return 0;
+}
+
+void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
+{
+ void *pgd = NULL;
+
+ spin_lock(&kvm->mmu_lock);
+ if (kvm->arch.pgd) {
+ stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false);
+ pgd = READ_ONCE(kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+ kvm->arch.pgd_phys = 0;
+ }
+ spin_unlock(&kvm->mmu_lock);
+
+ if (pgd)
+ free_pages((unsigned long)pgd, get_order(stage2_pgd_size));
+}
+
+void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu)
+{
+ unsigned long hgatp = stage2_mode;
+ struct kvm_arch *k = &vcpu->kvm->arch;
+
+ hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) &
+ HGATP_VMID_MASK;
+ hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
+
+ csr_write(CSR_HGATP, hgatp);
+
+ if (!kvm_riscv_stage2_vmid_bits())
+ __kvm_riscv_hfence_gvma_all();
+}
+
+void kvm_riscv_stage2_mode_detect(void)
+{
+#ifdef CONFIG_64BIT
+ /* Try Sv48x4 stage2 mode */
+ csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
+ if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
+ stage2_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
+ stage2_pgd_levels = 4;
+ }
+ csr_write(CSR_HGATP, 0);
+
+ __kvm_riscv_hfence_gvma_all();
+#endif
+}
+
+unsigned long kvm_riscv_stage2_mode(void)
+{
+ return stage2_mode >> HGATP_MODE_SHIFT;
+}
diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
new file mode 100644
index 000000000000..899f75d60bad
--- /dev/null
+++ b/arch/riscv/kvm/tlb.S
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ .text
+ .altmacro
+ .option norelax
+
+ /*
+ * Instruction encoding of hfence.gvma is:
+ * HFENCE.GVMA rs1, rs2
+ * HFENCE.GVMA zero, rs2
+ * HFENCE.GVMA rs1
+ * HFENCE.GVMA
+ *
+ * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2
+ * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2
+ * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1
+ * rs1==zero and rs2==zero ==> HFENCE.GVMA
+ *
+ * Instruction encoding of HFENCE.GVMA is:
+ * 0110001 rs2(5) rs1(5) 000 00000 1110011
+ */
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
+ /*
+ * rs1 = a0 (GPA >> 2)
+ * rs2 = a1 (VMID)
+ * HFENCE.GVMA a0, a1
+ * 0110001 01011 01010 000 00000 1110011
+ */
+ .word 0x62b50073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid)
+ /*
+ * rs1 = zero
+ * rs2 = a0 (VMID)
+ * HFENCE.GVMA zero, a0
+ * 0110001 01010 00000 000 00000 1110011
+ */
+ .word 0x62a00073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid)
+
+ENTRY(__kvm_riscv_hfence_gvma_gpa)
+ /*
+ * rs1 = a0 (GPA >> 2)
+ * rs2 = zero
+ * HFENCE.GVMA a0
+ * 0110001 00000 01010 000 00000 1110011
+ */
+ .word 0x62050073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_all)
+ /*
+ * rs1 = zero
+ * rs2 = zero
+ * HFENCE.GVMA
+ * 0110001 00000 00000 000 00000 1110011
+ */
+ .word 0x62000073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_all)
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
new file mode 100644
index 000000000000..e3d3aed46184
--- /dev/null
+++ b/arch/riscv/kvm/vcpu.c
@@ -0,0 +1,825 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/hwcap.h>
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
+ STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
+ STATS_DESC_COUNTER(VCPU, mmio_exit_user),
+ STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
+ STATS_DESC_COUNTER(VCPU, exits)
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
+};
+
+#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \
+ riscv_isa_extension_mask(c) | \
+ riscv_isa_extension_mask(d) | \
+ riscv_isa_extension_mask(f) | \
+ riscv_isa_extension_mask(i) | \
+ riscv_isa_extension_mask(m) | \
+ riscv_isa_extension_mask(s) | \
+ riscv_isa_extension_mask(u))
+
+static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
+
+ memcpy(csr, reset_csr, sizeof(*csr));
+
+ memcpy(cntx, reset_cntx, sizeof(*cntx));
+
+ kvm_riscv_vcpu_fp_reset(vcpu);
+
+ kvm_riscv_vcpu_timer_reset(vcpu);
+
+ WRITE_ONCE(vcpu->arch.irqs_pending, 0);
+ WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+}
+
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cntx;
+
+ /* Mark this VCPU never ran */
+ vcpu->arch.ran_atleast_once = false;
+
+ /* Setup ISA features available to VCPU */
+ vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
+
+ /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
+ cntx = &vcpu->arch.guest_reset_context;
+ cntx->sstatus = SR_SPP | SR_SPIE;
+ cntx->hstatus = 0;
+ cntx->hstatus |= HSTATUS_VTW;
+ cntx->hstatus |= HSTATUS_SPVP;
+ cntx->hstatus |= HSTATUS_SPV;
+
+ /* Setup VCPU timer */
+ kvm_riscv_vcpu_timer_init(vcpu);
+
+ /* Reset VCPU */
+ kvm_riscv_reset_vcpu(vcpu);
+
+ return 0;
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ /* Cleanup VCPU timer */
+ kvm_riscv_vcpu_timer_deinit(vcpu);
+
+ /* Flush the pages pre-allocated for Stage2 page table mappings */
+ kvm_riscv_stage2_flush_cache(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER);
+}
+
+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
+ !vcpu->arch.power_off && !vcpu->arch.pause);
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CONFIG);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ reg_val = vcpu->arch.isa;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CONFIG);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ if (!vcpu->arch.ran_atleast_once) {
+ vcpu->arch.isa = reg_val;
+ vcpu->arch.isa &= riscv_isa_extension_base(NULL);
+ vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
+ kvm_riscv_vcpu_fp_reset(vcpu);
+ } else {
+ return -EOPNOTSUPP;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CORE);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+ reg_val = cntx->sepc;
+ else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+ reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+ reg_val = ((unsigned long *)cntx)[reg_num];
+ else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
+ reg_val = (cntx->sstatus & SR_SPP) ?
+ KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
+ else
+ return -EINVAL;
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CORE);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+ cntx->sepc = reg_val;
+ else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+ reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+ ((unsigned long *)cntx)[reg_num] = reg_val;
+ else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
+ if (reg_val == KVM_RISCV_MODE_S)
+ cntx->sstatus |= SR_SPP;
+ else
+ cntx->sstatus &= ~SR_SPP;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CSR);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+ kvm_riscv_vcpu_flush_interrupts(vcpu);
+ reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
+ } else
+ reg_val = ((unsigned long *)csr)[reg_num];
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CSR);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+ reg_val &= VSIP_VALID_MASK;
+ reg_val <<= VSIP_TO_HVIP_SHIFT;
+ }
+
+ ((unsigned long *)csr)[reg_num] = reg_val;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
+ WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
+ return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
+ return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
+ return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
+ return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
+ return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_F);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
+ return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_D);
+
+ return -EINVAL;
+}
+
+static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
+ return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
+ return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
+ return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
+ return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
+ return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_F);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
+ return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_D);
+
+ return -EINVAL;
+}
+
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ if (ioctl == KVM_INTERRUPT) {
+ struct kvm_interrupt irq;
+
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ return -EFAULT;
+
+ if (irq.irq == KVM_INTERRUPT_SET)
+ return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
+ else
+ return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r = -EINVAL;
+
+ switch (ioctl) {
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG: {
+ struct kvm_one_reg reg;
+
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
+ else
+ r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ unsigned long mask, val;
+
+ if (READ_ONCE(vcpu->arch.irqs_pending_mask)) {
+ mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0);
+ val = READ_ONCE(vcpu->arch.irqs_pending) & mask;
+
+ csr->hvip &= ~mask;
+ csr->hvip |= val;
+ }
+}
+
+void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
+{
+ unsigned long hvip;
+ struct kvm_vcpu_arch *v = &vcpu->arch;
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ /* Read current HVIP and VSIE CSRs */
+ csr->vsie = csr_read(CSR_VSIE);
+
+ /* Sync-up HVIP.VSSIP bit changes does by Guest */
+ hvip = csr_read(CSR_HVIP);
+ if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
+ if (hvip & (1UL << IRQ_VS_SOFT)) {
+ if (!test_and_set_bit(IRQ_VS_SOFT,
+ &v->irqs_pending_mask))
+ set_bit(IRQ_VS_SOFT, &v->irqs_pending);
+ } else {
+ if (!test_and_set_bit(IRQ_VS_SOFT,
+ &v->irqs_pending_mask))
+ clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
+ }
+ }
+}
+
+int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+ if (irq != IRQ_VS_SOFT &&
+ irq != IRQ_VS_TIMER &&
+ irq != IRQ_VS_EXT)
+ return -EINVAL;
+
+ set_bit(irq, &vcpu->arch.irqs_pending);
+ smp_mb__before_atomic();
+ set_bit(irq, &vcpu->arch.irqs_pending_mask);
+
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
+{
+ if (irq != IRQ_VS_SOFT &&
+ irq != IRQ_VS_TIMER &&
+ irq != IRQ_VS_EXT)
+ return -EINVAL;
+
+ clear_bit(irq, &vcpu->arch.irqs_pending);
+ smp_mb__before_atomic();
+ set_bit(irq, &vcpu->arch.irqs_pending_mask);
+
+ return 0;
+}
+
+bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask)
+{
+ unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
+ << VSIP_TO_HVIP_SHIFT) & mask;
+
+ return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false;
+}
+
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.power_off = true;
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.power_off = false;
+ kvm_vcpu_wake_up(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ if (vcpu->arch.power_off)
+ mp_state->mp_state = KVM_MP_STATE_STOPPED;
+ else
+ mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ int ret = 0;
+
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.power_off = false;
+ break;
+ case KVM_MP_STATE_STOPPED:
+ kvm_riscv_vcpu_power_off(vcpu);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ /* TODO; To be implemented later. */
+ return -EINVAL;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ csr_write(CSR_VSSTATUS, csr->vsstatus);
+ csr_write(CSR_VSIE, csr->vsie);
+ csr_write(CSR_VSTVEC, csr->vstvec);
+ csr_write(CSR_VSSCRATCH, csr->vsscratch);
+ csr_write(CSR_VSEPC, csr->vsepc);
+ csr_write(CSR_VSCAUSE, csr->vscause);
+ csr_write(CSR_VSTVAL, csr->vstval);
+ csr_write(CSR_HVIP, csr->hvip);
+ csr_write(CSR_VSATP, csr->vsatp);
+
+ kvm_riscv_stage2_update_hgatp(vcpu);
+
+ kvm_riscv_vcpu_timer_restore(vcpu);
+
+ kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
+ kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
+
+ vcpu->cpu = cpu;
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ vcpu->cpu = -1;
+
+ kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
+ kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
+
+ csr_write(CSR_HGATP, 0);
+
+ csr->vsstatus = csr_read(CSR_VSSTATUS);
+ csr->vsie = csr_read(CSR_VSIE);
+ csr->vstvec = csr_read(CSR_VSTVEC);
+ csr->vsscratch = csr_read(CSR_VSSCRATCH);
+ csr->vsepc = csr_read(CSR_VSEPC);
+ csr->vscause = csr_read(CSR_VSCAUSE);
+ csr->vstval = csr_read(CSR_VSTVAL);
+ csr->hvip = csr_read(CSR_HVIP);
+ csr->vsatp = csr_read(CSR_VSATP);
+}
+
+static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+
+ if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
+ rcuwait_wait_event(wait,
+ (!vcpu->arch.power_off) && (!vcpu->arch.pause),
+ TASK_INTERRUPTIBLE);
+
+ if (vcpu->arch.power_off || vcpu->arch.pause) {
+ /*
+ * Awaken to handle a signal, request to
+ * sleep again later.
+ */
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ }
+ }
+
+ if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
+ kvm_riscv_reset_vcpu(vcpu);
+
+ if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
+ kvm_riscv_stage2_update_hgatp(vcpu);
+
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ __kvm_riscv_hfence_gvma_all();
+ }
+}
+
+static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ csr_write(CSR_HVIP, csr->hvip);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+ int ret;
+ struct kvm_cpu_trap trap;
+ struct kvm_run *run = vcpu->run;
+
+ /* Mark this VCPU ran at least once */
+ vcpu->arch.ran_atleast_once = true;
+
+ vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ /* Process MMIO value returned from user-space */
+ if (run->exit_reason == KVM_EXIT_MMIO) {
+ ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
+ if (ret) {
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ return ret;
+ }
+ }
+
+ /* Process SBI value returned from user-space */
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
+ ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
+ if (ret) {
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ return ret;
+ }
+ }
+
+ if (run->immediate_exit) {
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ return -EINTR;
+ }
+
+ vcpu_load(vcpu);
+
+ kvm_sigset_activate(vcpu);
+
+ ret = 1;
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ while (ret > 0) {
+ /* Check conditions before entering the guest */
+ cond_resched();
+
+ kvm_riscv_stage2_vmid_update(vcpu);
+
+ kvm_riscv_check_vcpu_requests(vcpu);
+
+ preempt_disable();
+
+ local_irq_disable();
+
+ /*
+ * Exit if we have a signal pending so that we can deliver
+ * the signal to user space.
+ */
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ run->exit_reason = KVM_EXIT_INTR;
+ }
+
+ /*
+ * Ensure we set mode to IN_GUEST_MODE after we disable
+ * interrupts and before the final VCPU requests check.
+ * See the comment in kvm_vcpu_exiting_guest_mode() and
+ * Documentation/virtual/kvm/vcpu-requests.rst
+ */
+ vcpu->mode = IN_GUEST_MODE;
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ smp_mb__after_srcu_read_unlock();
+
+ /*
+ * We might have got VCPU interrupts updated asynchronously
+ * so update it in HW.
+ */
+ kvm_riscv_vcpu_flush_interrupts(vcpu);
+
+ /* Update HVIP CSR for current CPU */
+ kvm_riscv_update_hvip(vcpu);
+
+ if (ret <= 0 ||
+ kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
+ kvm_request_pending(vcpu)) {
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ local_irq_enable();
+ preempt_enable();
+ vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ continue;
+ }
+
+ guest_enter_irqoff();
+
+ __kvm_riscv_switch_to(&vcpu->arch);
+
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ vcpu->stat.exits++;
+
+ /*
+ * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
+ * get an interrupt between __kvm_riscv_switch_to() and
+ * local_irq_enable() which can potentially change CSRs.
+ */
+ trap.sepc = vcpu->arch.guest_context.sepc;
+ trap.scause = csr_read(CSR_SCAUSE);
+ trap.stval = csr_read(CSR_STVAL);
+ trap.htval = csr_read(CSR_HTVAL);
+ trap.htinst = csr_read(CSR_HTINST);
+
+ /* Syncup interrupts state with HW */
+ kvm_riscv_vcpu_sync_interrupts(vcpu);
+
+ /*
+ * We may have taken a host interrupt in VS/VU-mode (i.e.
+ * while executing the guest). This interrupt is still
+ * pending, as we haven't serviced it yet!
+ *
+ * We're now back in HS-mode with interrupts disabled
+ * so enabling the interrupts now will have the effect
+ * of taking the interrupt again, in HS-mode this time.
+ */
+ local_irq_enable();
+
+ /*
+ * We do local_irq_enable() before calling guest_exit() so
+ * that if a timer interrupt hits while running the guest
+ * we account that tick as being spent in the guest. We
+ * enable preemption after calling guest_exit() so that if
+ * we get preempted we make sure ticks after that is not
+ * counted as guest time.
+ */
+ guest_exit();
+
+ preempt_enable();
+
+ vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
+ }
+
+ kvm_sigset_deactivate(vcpu);
+
+ vcpu_put(vcpu);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+
+ return ret;
+}
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
new file mode 100644
index 000000000000..7f2d742ae4c6
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -0,0 +1,701 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_WFI 0xffffffff
+#define INSN_MATCH_WFI 0x10500073
+
+#define INSN_MATCH_LB 0x3
+#define INSN_MASK_LB 0x707f
+#define INSN_MATCH_LH 0x1003
+#define INSN_MASK_LH 0x707f
+#define INSN_MATCH_LW 0x2003
+#define INSN_MASK_LW 0x707f
+#define INSN_MATCH_LD 0x3003
+#define INSN_MASK_LD 0x707f
+#define INSN_MATCH_LBU 0x4003
+#define INSN_MASK_LBU 0x707f
+#define INSN_MATCH_LHU 0x5003
+#define INSN_MASK_LHU 0x707f
+#define INSN_MATCH_LWU 0x6003
+#define INSN_MASK_LWU 0x707f
+#define INSN_MATCH_SB 0x23
+#define INSN_MASK_SB 0x707f
+#define INSN_MATCH_SH 0x1023
+#define INSN_MASK_SH 0x707f
+#define INSN_MATCH_SW 0x2023
+#define INSN_MASK_SW 0x707f
+#define INSN_MATCH_SD 0x3023
+#define INSN_MASK_SD 0x707f
+
+#define INSN_MATCH_C_LD 0x6000
+#define INSN_MASK_C_LD 0xe003
+#define INSN_MATCH_C_SD 0xe000
+#define INSN_MASK_C_SD 0xe003
+#define INSN_MATCH_C_LW 0x4000
+#define INSN_MASK_C_LW 0xe003
+#define INSN_MATCH_C_SW 0xc000
+#define INSN_MASK_C_SW 0xe003
+#define INSN_MATCH_C_LDSP 0x6002
+#define INSN_MASK_C_LDSP 0xe003
+#define INSN_MATCH_C_SDSP 0xe002
+#define INSN_MASK_C_SDSP 0xe003
+#define INSN_MATCH_C_LWSP 0x4002
+#define INSN_MASK_C_LWSP 0xe003
+#define INSN_MATCH_C_SWSP 0xc002
+#define INSN_MASK_C_SWSP 0xe003
+
+#define INSN_16BIT_MASK 0x3
+
+#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK)
+
+#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4)
+
+#ifdef CONFIG_64BIT
+#define LOG_REGBYTES 3
+#else
+#define LOG_REGBYTES 2
+#endif
+#define REGBYTES (1 << LOG_REGBYTES)
+
+#define SH_RD 7
+#define SH_RS1 15
+#define SH_RS2 20
+#define SH_RS2C 2
+
+#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
+#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \
+ (RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 5, 1) << 6))
+#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 5, 2) << 6))
+#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \
+ (RV_X(x, 12, 1) << 5) | \
+ (RV_X(x, 2, 2) << 6))
+#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \
+ (RV_X(x, 12, 1) << 5) | \
+ (RV_X(x, 2, 3) << 6))
+#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \
+ (RV_X(x, 7, 2) << 6))
+#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 7, 3) << 6))
+#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3))
+#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3))
+#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5)
+
+#define SHIFT_RIGHT(x, y) \
+ ((y) < 0 ? ((x) << -(y)) : ((x) >> (y)))
+
+#define REG_MASK \
+ ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES))
+
+#define REG_OFFSET(insn, pos) \
+ (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK)
+
+#define REG_PTR(insn, pos, regs) \
+ ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)))
+
+#define GET_RM(insn) (((insn) >> 12) & 7)
+
+#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
+#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
+#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
+#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs))
+#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs))
+#define GET_SP(regs) (*REG_PTR(2, 0, regs))
+#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val))
+#define IMM_I(insn) ((s32)(insn) >> 20)
+#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \
+ (s32)(((insn) >> 7) & 0x1f))
+#define MASK_FUNCT3 0x7000
+
+static int truly_illegal_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+ struct kvm_cpu_trap utrap = { 0 };
+
+ /* Redirect trap to Guest VCPU */
+ utrap.sepc = vcpu->arch.guest_context.sepc;
+ utrap.scause = EXC_INST_ILLEGAL;
+ utrap.stval = insn;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+
+ return 1;
+}
+
+static int system_opcode_insn(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ ulong insn)
+{
+ if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
+ vcpu->stat.wfi_exit_stat++;
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_block(vcpu);
+ vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ }
+ vcpu->arch.guest_context.sepc += INSN_LEN(insn);
+ return 1;
+ }
+
+ return truly_illegal_insn(vcpu, run, insn);
+}
+
+static int virtual_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+ unsigned long insn = trap->stval;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct;
+
+ if (unlikely(INSN_IS_16BIT(insn))) {
+ if (insn == 0) {
+ ct = &vcpu->arch.guest_context;
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
+ ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ }
+ if (INSN_IS_16BIT(insn))
+ return truly_illegal_insn(vcpu, run, insn);
+ }
+
+ switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
+ case INSN_OPCODE_SYSTEM:
+ return system_opcode_insn(vcpu, run, insn);
+ default:
+ return truly_illegal_insn(vcpu, run, insn);
+ }
+}
+
+static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long fault_addr, unsigned long htinst)
+{
+ u8 data_buf[8];
+ unsigned long insn;
+ int shift = 0, len = 0, insn_len = 0;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
+
+ /* Determine trapped instruction */
+ if (htinst & 0x1) {
+ /*
+ * Bit[0] == 1 implies trapped instruction value is
+ * transformed instruction or custom instruction.
+ */
+ insn = htinst | INSN_16BIT_MASK;
+ insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
+ } else {
+ /*
+ * Bit[0] == 0 implies trapped instruction value is
+ * zero or special value.
+ */
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ /* Redirect trap if we failed to read instruction */
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ insn_len = INSN_LEN(insn);
+ }
+
+ /* Decode length of MMIO and shift */
+ if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) {
+ len = 1;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
+ len = 1;
+ shift = 8 * (sizeof(ulong) - len);
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) {
+ len = 4;
+#endif
+ } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) {
+ len = 2;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) {
+ len = 2;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+ insn = RVC_RS2S(insn) << SH_RD;
+ } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+#endif
+ } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ insn = RVC_RS2S(insn) << SH_RD;
+ } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ /* Fault address should be aligned to length of MMIO */
+ if (fault_addr & (len - 1))
+ return -EIO;
+
+ /* Save instruction decode info */
+ vcpu->arch.mmio_decode.insn = insn;
+ vcpu->arch.mmio_decode.insn_len = insn_len;
+ vcpu->arch.mmio_decode.shift = shift;
+ vcpu->arch.mmio_decode.len = len;
+ vcpu->arch.mmio_decode.return_handled = 0;
+
+ /* Update MMIO details in kvm_run struct */
+ run->mmio.is_write = false;
+ run->mmio.phys_addr = fault_addr;
+ run->mmio.len = len;
+
+ /* Try to handle MMIO access in the kernel */
+ if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) {
+ /* Successfully handled MMIO access in the kernel so resume */
+ memcpy(run->mmio.data, data_buf, len);
+ vcpu->stat.mmio_exit_kernel++;
+ kvm_riscv_vcpu_mmio_return(vcpu, run);
+ return 1;
+ }
+
+ /* Exit to userspace for MMIO emulation */
+ vcpu->stat.mmio_exit_user++;
+ run->exit_reason = KVM_EXIT_MMIO;
+
+ return 0;
+}
+
+static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long fault_addr, unsigned long htinst)
+{
+ u8 data8;
+ u16 data16;
+ u32 data32;
+ u64 data64;
+ ulong data;
+ unsigned long insn;
+ int len = 0, insn_len = 0;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
+
+ /* Determine trapped instruction */
+ if (htinst & 0x1) {
+ /*
+ * Bit[0] == 1 implies trapped instruction value is
+ * transformed instruction or custom instruction.
+ */
+ insn = htinst | INSN_16BIT_MASK;
+ insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
+ } else {
+ /*
+ * Bit[0] == 0 implies trapped instruction value is
+ * zero or special value.
+ */
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ /* Redirect trap if we failed to read instruction */
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ insn_len = INSN_LEN(insn);
+ }
+
+ data = GET_RS2(insn, &vcpu->arch.guest_context);
+ data8 = data16 = data32 = data64 = data;
+
+ if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) {
+ len = 4;
+ } else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) {
+ len = 1;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
+ len = 8;
+#endif
+ } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
+ len = 2;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
+ len = 8;
+ data64 = GET_RS2S(insn, &vcpu->arch.guest_context);
+ } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 8;
+ data64 = GET_RS2C(insn, &vcpu->arch.guest_context);
+#endif
+ } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
+ len = 4;
+ data32 = GET_RS2S(insn, &vcpu->arch.guest_context);
+ } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 4;
+ data32 = GET_RS2C(insn, &vcpu->arch.guest_context);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ /* Fault address should be aligned to length of MMIO */
+ if (fault_addr & (len - 1))
+ return -EIO;
+
+ /* Save instruction decode info */
+ vcpu->arch.mmio_decode.insn = insn;
+ vcpu->arch.mmio_decode.insn_len = insn_len;
+ vcpu->arch.mmio_decode.shift = 0;
+ vcpu->arch.mmio_decode.len = len;
+ vcpu->arch.mmio_decode.return_handled = 0;
+
+ /* Copy data to kvm_run instance */
+ switch (len) {
+ case 1:
+ *((u8 *)run->mmio.data) = data8;
+ break;
+ case 2:
+ *((u16 *)run->mmio.data) = data16;
+ break;
+ case 4:
+ *((u32 *)run->mmio.data) = data32;
+ break;
+ case 8:
+ *((u64 *)run->mmio.data) = data64;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Update MMIO details in kvm_run struct */
+ run->mmio.is_write = true;
+ run->mmio.phys_addr = fault_addr;
+ run->mmio.len = len;
+
+ /* Try to handle MMIO access in the kernel */
+ if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
+ fault_addr, len, run->mmio.data)) {
+ /* Successfully handled MMIO access in the kernel so resume */
+ vcpu->stat.mmio_exit_kernel++;
+ kvm_riscv_vcpu_mmio_return(vcpu, run);
+ return 1;
+ }
+
+ /* Exit to userspace for MMIO emulation */
+ vcpu->stat.mmio_exit_user++;
+ run->exit_reason = KVM_EXIT_MMIO;
+
+ return 0;
+}
+
+static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long hva, fault_addr;
+ bool writeable;
+ gfn_t gfn;
+ int ret;
+
+ fault_addr = (trap->htval << 2) | (trap->stval & 0x3);
+ gfn = fault_addr >> PAGE_SHIFT;
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
+
+ if (kvm_is_error_hva(hva) ||
+ (trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writeable)) {
+ switch (trap->scause) {
+ case EXC_LOAD_GUEST_PAGE_FAULT:
+ return emulate_load(vcpu, run, fault_addr,
+ trap->htinst);
+ case EXC_STORE_GUEST_PAGE_FAULT:
+ return emulate_store(vcpu, run, fault_addr,
+ trap->htinst);
+ default:
+ return -EOPNOTSUPP;
+ };
+ }
+
+ ret = kvm_riscv_stage2_map(vcpu, memslot, fault_addr, hva,
+ (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false);
+ if (ret < 0)
+ return ret;
+
+ return 1;
+}
+
+/**
+ * kvm_riscv_vcpu_unpriv_read -- Read machine word from Guest memory
+ *
+ * @vcpu: The VCPU pointer
+ * @read_insn: Flag representing whether we are reading instruction
+ * @guest_addr: Guest address to read
+ * @trap: Output pointer to trap details
+ */
+unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
+ bool read_insn,
+ unsigned long guest_addr,
+ struct kvm_cpu_trap *trap)
+{
+ register unsigned long taddr asm("a0") = (unsigned long)trap;
+ register unsigned long ttmp asm("a1");
+ register unsigned long val asm("t0");
+ register unsigned long tmp asm("t1");
+ register unsigned long addr asm("t2") = guest_addr;
+ unsigned long flags;
+ unsigned long old_stvec, old_hstatus;
+
+ local_irq_save(flags);
+
+ old_hstatus = csr_swap(CSR_HSTATUS, vcpu->arch.guest_context.hstatus);
+ old_stvec = csr_swap(CSR_STVEC, (ulong)&__kvm_riscv_unpriv_trap);
+
+ if (read_insn) {
+ /*
+ * HLVX.HU instruction
+ * 0110010 00011 rs1 100 rd 1110011
+ */
+ asm volatile ("\n"
+ ".option push\n"
+ ".option norvc\n"
+ "add %[ttmp], %[taddr], 0\n"
+ /*
+ * HLVX.HU %[val], (%[addr])
+ * HLVX.HU t0, (t2)
+ * 0110010 00011 00111 100 00101 1110011
+ */
+ ".word 0x6433c2f3\n"
+ "andi %[tmp], %[val], 3\n"
+ "addi %[tmp], %[tmp], -3\n"
+ "bne %[tmp], zero, 2f\n"
+ "addi %[addr], %[addr], 2\n"
+ /*
+ * HLVX.HU %[tmp], (%[addr])
+ * HLVX.HU t1, (t2)
+ * 0110010 00011 00111 100 00110 1110011
+ */
+ ".word 0x6433c373\n"
+ "sll %[tmp], %[tmp], 16\n"
+ "add %[val], %[val], %[tmp]\n"
+ "2:\n"
+ ".option pop"
+ : [val] "=&r" (val), [tmp] "=&r" (tmp),
+ [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp),
+ [addr] "+&r" (addr) : : "memory");
+
+ if (trap->scause == EXC_LOAD_PAGE_FAULT)
+ trap->scause = EXC_INST_PAGE_FAULT;
+ } else {
+ /*
+ * HLV.D instruction
+ * 0110110 00000 rs1 100 rd 1110011
+ *
+ * HLV.W instruction
+ * 0110100 00000 rs1 100 rd 1110011
+ */
+ asm volatile ("\n"
+ ".option push\n"
+ ".option norvc\n"
+ "add %[ttmp], %[taddr], 0\n"
+#ifdef CONFIG_64BIT
+ /*
+ * HLV.D %[val], (%[addr])
+ * HLV.D t0, (t2)
+ * 0110110 00000 00111 100 00101 1110011
+ */
+ ".word 0x6c03c2f3\n"
+#else
+ /*
+ * HLV.W %[val], (%[addr])
+ * HLV.W t0, (t2)
+ * 0110100 00000 00111 100 00101 1110011
+ */
+ ".word 0x6803c2f3\n"
+#endif
+ ".option pop"
+ : [val] "=&r" (val),
+ [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp)
+ : [addr] "r" (addr) : "memory");
+ }
+
+ csr_write(CSR_STVEC, old_stvec);
+ csr_write(CSR_HSTATUS, old_hstatus);
+
+ local_irq_restore(flags);
+
+ return val;
+}
+
+/**
+ * kvm_riscv_vcpu_trap_redirect -- Redirect trap to Guest
+ *
+ * @vcpu: The VCPU pointer
+ * @trap: Trap details
+ */
+void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_trap *trap)
+{
+ unsigned long vsstatus = csr_read(CSR_VSSTATUS);
+
+ /* Change Guest SSTATUS.SPP bit */
+ vsstatus &= ~SR_SPP;
+ if (vcpu->arch.guest_context.sstatus & SR_SPP)
+ vsstatus |= SR_SPP;
+
+ /* Change Guest SSTATUS.SPIE bit */
+ vsstatus &= ~SR_SPIE;
+ if (vsstatus & SR_SIE)
+ vsstatus |= SR_SPIE;
+
+ /* Clear Guest SSTATUS.SIE bit */
+ vsstatus &= ~SR_SIE;
+
+ /* Update Guest SSTATUS */
+ csr_write(CSR_VSSTATUS, vsstatus);
+
+ /* Update Guest SCAUSE, STVAL, and SEPC */
+ csr_write(CSR_VSCAUSE, trap->scause);
+ csr_write(CSR_VSTVAL, trap->stval);
+ csr_write(CSR_VSEPC, trap->sepc);
+
+ /* Set Guest PC to Guest exception vector */
+ vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC);
+}
+
+/**
+ * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation
+ * or in-kernel IO emulation
+ *
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ */
+int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u8 data8;
+ u16 data16;
+ u32 data32;
+ u64 data64;
+ ulong insn;
+ int len, shift;
+
+ if (vcpu->arch.mmio_decode.return_handled)
+ return 0;
+
+ vcpu->arch.mmio_decode.return_handled = 1;
+ insn = vcpu->arch.mmio_decode.insn;
+
+ if (run->mmio.is_write)
+ goto done;
+
+ len = vcpu->arch.mmio_decode.len;
+ shift = vcpu->arch.mmio_decode.shift;
+
+ switch (len) {
+ case 1:
+ data8 = *((u8 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data8 << shift >> shift);
+ break;
+ case 2:
+ data16 = *((u16 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data16 << shift >> shift);
+ break;
+ case 4:
+ data32 = *((u32 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data32 << shift >> shift);
+ break;
+ case 8:
+ data64 = *((u64 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data64 << shift >> shift);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+done:
+ /* Move to next instruction */
+ vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len;
+
+ return 0;
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+ int ret;
+
+ /* If we got host interrupt then do nothing */
+ if (trap->scause & CAUSE_IRQ_FLAG)
+ return 1;
+
+ /* Handle guest traps */
+ ret = -EFAULT;
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ switch (trap->scause) {
+ case EXC_VIRTUAL_INST_FAULT:
+ if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+ ret = virtual_inst_fault(vcpu, run, trap);
+ break;
+ case EXC_INST_GUEST_PAGE_FAULT:
+ case EXC_LOAD_GUEST_PAGE_FAULT:
+ case EXC_STORE_GUEST_PAGE_FAULT:
+ if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+ ret = stage2_page_fault(vcpu, run, trap);
+ break;
+ case EXC_SUPERVISOR_SYSCALL:
+ if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
+ ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
+ break;
+ default:
+ break;
+ }
+
+ /* Print details in-case of error */
+ if (ret < 0) {
+ kvm_err("VCPU exit error %d\n", ret);
+ kvm_err("SEPC=0x%lx SSTATUS=0x%lx HSTATUS=0x%lx\n",
+ vcpu->arch.guest_context.sepc,
+ vcpu->arch.guest_context.sstatus,
+ vcpu->arch.guest_context.hstatus);
+ kvm_err("SCAUSE=0x%lx STVAL=0x%lx HTVAL=0x%lx HTINST=0x%lx\n",
+ trap->scause, trap->stval, trap->htval, trap->htinst);
+ }
+
+ return ret;
+}
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
new file mode 100644
index 000000000000..1b070152578f
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+
+#ifdef CONFIG_FPU
+void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
+{
+ unsigned long isa = vcpu->arch.isa;
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+ cntx->sstatus &= ~SR_FS;
+ if (riscv_isa_extension_available(&isa, f) ||
+ riscv_isa_extension_available(&isa, d))
+ cntx->sstatus |= SR_FS_INITIAL;
+ else
+ cntx->sstatus |= SR_FS_OFF;
+}
+
+void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
+{
+ cntx->sstatus &= ~SR_FS;
+ cntx->sstatus |= SR_FS_CLEAN;
+}
+
+void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+ if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) {
+ if (riscv_isa_extension_available(&isa, d))
+ __kvm_riscv_fp_d_save(cntx);
+ else if (riscv_isa_extension_available(&isa, f))
+ __kvm_riscv_fp_f_save(cntx);
+ kvm_riscv_vcpu_fp_clean(cntx);
+ }
+}
+
+void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
+ unsigned long isa)
+{
+ if ((cntx->sstatus & SR_FS) != SR_FS_OFF) {
+ if (riscv_isa_extension_available(&isa, d))
+ __kvm_riscv_fp_d_restore(cntx);
+ else if (riscv_isa_extension_available(&isa, f))
+ __kvm_riscv_fp_f_restore(cntx);
+ kvm_riscv_vcpu_fp_clean(cntx);
+ }
+}
+
+void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
+{
+ /* No need to check host sstatus as it can be modified outside */
+ if (riscv_isa_extension_available(NULL, d))
+ __kvm_riscv_fp_d_save(cntx);
+ else if (riscv_isa_extension_available(NULL, f))
+ __kvm_riscv_fp_f_save(cntx);
+}
+
+void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx)
+{
+ if (riscv_isa_extension_available(NULL, d))
+ __kvm_riscv_fp_d_restore(cntx);
+ else if (riscv_isa_extension_available(NULL, f))
+ __kvm_riscv_fp_f_restore(cntx);
+}
+#endif
+
+int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long isa = vcpu->arch.isa;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ rtype);
+ void *reg_val;
+
+ if ((rtype == KVM_REG_RISCV_FP_F) &&
+ riscv_isa_extension_available(&isa, f)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+ reg_val = &cntx->fp.f.fcsr;
+ else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+ reg_val = &cntx->fp.f.f[reg_num];
+ else
+ return -EINVAL;
+ } else if ((rtype == KVM_REG_RISCV_FP_D) &&
+ riscv_isa_extension_available(&isa, d)) {
+ if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.fcsr;
+ } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.f[reg_num];
+ } else
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long isa = vcpu->arch.isa;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ rtype);
+ void *reg_val;
+
+ if ((rtype == KVM_REG_RISCV_FP_F) &&
+ riscv_isa_extension_available(&isa, f)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
+ reg_val = &cntx->fp.f.fcsr;
+ else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
+ reg_val = &cntx->fp.f.f[reg_num];
+ else
+ return -EINVAL;
+ } else if ((rtype == KVM_REG_RISCV_FP_D) &&
+ riscv_isa_extension_available(&isa, d)) {
+ if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u32))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.fcsr;
+ } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) &&
+ reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) {
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ reg_val = &cntx->fp.d.f[reg_num];
+ } else
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
new file mode 100644
index 000000000000..eb3c045edf11
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+
+#define SBI_VERSION_MAJOR 0
+#define SBI_VERSION_MINOR 1
+
+static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ vcpu->arch.sbi_context.return_handled = 0;
+ vcpu->stat.ecall_exit_stat++;
+ run->exit_reason = KVM_EXIT_RISCV_SBI;
+ run->riscv_sbi.extension_id = cp->a7;
+ run->riscv_sbi.function_id = cp->a6;
+ run->riscv_sbi.args[0] = cp->a0;
+ run->riscv_sbi.args[1] = cp->a1;
+ run->riscv_sbi.args[2] = cp->a2;
+ run->riscv_sbi.args[3] = cp->a3;
+ run->riscv_sbi.args[4] = cp->a4;
+ run->riscv_sbi.args[5] = cp->a5;
+ run->riscv_sbi.ret[0] = cp->a0;
+ run->riscv_sbi.ret[1] = cp->a1;
+}
+
+int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ /* Handle SBI return only once */
+ if (vcpu->arch.sbi_context.return_handled)
+ return 0;
+ vcpu->arch.sbi_context.return_handled = 1;
+
+ /* Update return values */
+ cp->a0 = run->riscv_sbi.ret[0];
+ cp->a1 = run->riscv_sbi.ret[1];
+
+ /* Move to next instruction */
+ vcpu->arch.guest_context.sepc += 4;
+
+ return 0;
+}
+
+#ifdef CONFIG_RISCV_SBI_V01
+
+static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
+ struct kvm_run *run, u32 type)
+{
+ int i;
+ struct kvm_vcpu *tmp;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+ tmp->arch.power_off = true;
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+ memset(&run->system_event, 0, sizeof(run->system_event));
+ run->system_event.type = type;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ ulong hmask;
+ int i, ret = 1;
+ u64 next_cycle;
+ struct kvm_vcpu *rvcpu;
+ bool next_sepc = true;
+ struct cpumask cm, hm;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ if (!cp)
+ return -EINVAL;
+
+ switch (cp->a7) {
+ case SBI_EXT_0_1_CONSOLE_GETCHAR:
+ case SBI_EXT_0_1_CONSOLE_PUTCHAR:
+ /*
+ * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
+ * handled in kernel so we forward these to user-space
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ next_sepc = false;
+ ret = 0;
+ break;
+ case SBI_EXT_0_1_SET_TIMER:
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+ break;
+ case SBI_EXT_0_1_CLEAR_IPI:
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
+ break;
+ case SBI_EXT_0_1_SEND_IPI:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ &utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap.scause) {
+ utrap.sepc = cp->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ next_sepc = false;
+ break;
+ }
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
+ }
+ break;
+ case SBI_EXT_0_1_SHUTDOWN:
+ kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
+ next_sepc = false;
+ ret = 0;
+ break;
+ case SBI_EXT_0_1_REMOTE_FENCE_I:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ &utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap.scause) {
+ utrap.sepc = cp->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ next_sepc = false;
+ break;
+ }
+ cpumask_clear(&cm);
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ if (rvcpu->cpu < 0)
+ continue;
+ cpumask_set_cpu(rvcpu->cpu, &cm);
+ }
+ riscv_cpuid_to_hartid_mask(&cm, &hm);
+ if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
+ sbi_remote_fence_i(cpumask_bits(&hm));
+ else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
+ sbi_remote_hfence_vvma(cpumask_bits(&hm),
+ cp->a1, cp->a2);
+ else
+ sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
+ cp->a1, cp->a2, cp->a3);
+ break;
+ default:
+ /* Return error for unsupported SBI calls */
+ cp->a0 = SBI_ERR_NOT_SUPPORTED;
+ break;
+ }
+
+ if (next_sepc)
+ cp->sepc += 4;
+
+ return ret;
+}
+
+#else
+
+int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ return 0;
+}
+
+#endif
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
new file mode 100644
index 000000000000..029a28a195c6
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -0,0 +1,400 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/csr.h>
+
+ .text
+ .altmacro
+ .option norelax
+
+ENTRY(__kvm_riscv_switch_to)
+ /* Save Host GPRs (except A0 and T0-T6) */
+ REG_S ra, (KVM_ARCH_HOST_RA)(a0)
+ REG_S sp, (KVM_ARCH_HOST_SP)(a0)
+ REG_S gp, (KVM_ARCH_HOST_GP)(a0)
+ REG_S tp, (KVM_ARCH_HOST_TP)(a0)
+ REG_S s0, (KVM_ARCH_HOST_S0)(a0)
+ REG_S s1, (KVM_ARCH_HOST_S1)(a0)
+ REG_S a1, (KVM_ARCH_HOST_A1)(a0)
+ REG_S a2, (KVM_ARCH_HOST_A2)(a0)
+ REG_S a3, (KVM_ARCH_HOST_A3)(a0)
+ REG_S a4, (KVM_ARCH_HOST_A4)(a0)
+ REG_S a5, (KVM_ARCH_HOST_A5)(a0)
+ REG_S a6, (KVM_ARCH_HOST_A6)(a0)
+ REG_S a7, (KVM_ARCH_HOST_A7)(a0)
+ REG_S s2, (KVM_ARCH_HOST_S2)(a0)
+ REG_S s3, (KVM_ARCH_HOST_S3)(a0)
+ REG_S s4, (KVM_ARCH_HOST_S4)(a0)
+ REG_S s5, (KVM_ARCH_HOST_S5)(a0)
+ REG_S s6, (KVM_ARCH_HOST_S6)(a0)
+ REG_S s7, (KVM_ARCH_HOST_S7)(a0)
+ REG_S s8, (KVM_ARCH_HOST_S8)(a0)
+ REG_S s9, (KVM_ARCH_HOST_S9)(a0)
+ REG_S s10, (KVM_ARCH_HOST_S10)(a0)
+ REG_S s11, (KVM_ARCH_HOST_S11)(a0)
+
+ /* Save Host and Restore Guest SSTATUS */
+ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
+ csrrw t0, CSR_SSTATUS, t0
+ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
+
+ /* Save Host and Restore Guest HSTATUS */
+ REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
+ csrrw t1, CSR_HSTATUS, t1
+ REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
+
+ /* Save Host and Restore Guest SCOUNTEREN */
+ REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+ csrrw t2, CSR_SCOUNTEREN, t2
+ REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+
+ /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
+ csrrw t3, CSR_SSCRATCH, a0
+ REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
+
+ /* Save Host STVEC and change it to return path */
+ la t4, __kvm_switch_return
+ csrrw t4, CSR_STVEC, t4
+ REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
+
+ /* Restore Guest SEPC */
+ REG_L t0, (KVM_ARCH_GUEST_SEPC)(a0)
+ csrw CSR_SEPC, t0
+
+ /* Restore Guest GPRs (except A0) */
+ REG_L ra, (KVM_ARCH_GUEST_RA)(a0)
+ REG_L sp, (KVM_ARCH_GUEST_SP)(a0)
+ REG_L gp, (KVM_ARCH_GUEST_GP)(a0)
+ REG_L tp, (KVM_ARCH_GUEST_TP)(a0)
+ REG_L t0, (KVM_ARCH_GUEST_T0)(a0)
+ REG_L t1, (KVM_ARCH_GUEST_T1)(a0)
+ REG_L t2, (KVM_ARCH_GUEST_T2)(a0)
+ REG_L s0, (KVM_ARCH_GUEST_S0)(a0)
+ REG_L s1, (KVM_ARCH_GUEST_S1)(a0)
+ REG_L a1, (KVM_ARCH_GUEST_A1)(a0)
+ REG_L a2, (KVM_ARCH_GUEST_A2)(a0)
+ REG_L a3, (KVM_ARCH_GUEST_A3)(a0)
+ REG_L a4, (KVM_ARCH_GUEST_A4)(a0)
+ REG_L a5, (KVM_ARCH_GUEST_A5)(a0)
+ REG_L a6, (KVM_ARCH_GUEST_A6)(a0)
+ REG_L a7, (KVM_ARCH_GUEST_A7)(a0)
+ REG_L s2, (KVM_ARCH_GUEST_S2)(a0)
+ REG_L s3, (KVM_ARCH_GUEST_S3)(a0)
+ REG_L s4, (KVM_ARCH_GUEST_S4)(a0)
+ REG_L s5, (KVM_ARCH_GUEST_S5)(a0)
+ REG_L s6, (KVM_ARCH_GUEST_S6)(a0)
+ REG_L s7, (KVM_ARCH_GUEST_S7)(a0)
+ REG_L s8, (KVM_ARCH_GUEST_S8)(a0)
+ REG_L s9, (KVM_ARCH_GUEST_S9)(a0)
+ REG_L s10, (KVM_ARCH_GUEST_S10)(a0)
+ REG_L s11, (KVM_ARCH_GUEST_S11)(a0)
+ REG_L t3, (KVM_ARCH_GUEST_T3)(a0)
+ REG_L t4, (KVM_ARCH_GUEST_T4)(a0)
+ REG_L t5, (KVM_ARCH_GUEST_T5)(a0)
+ REG_L t6, (KVM_ARCH_GUEST_T6)(a0)
+
+ /* Restore Guest A0 */
+ REG_L a0, (KVM_ARCH_GUEST_A0)(a0)
+
+ /* Resume Guest */
+ sret
+
+ /* Back to Host */
+ .align 2
+__kvm_switch_return:
+ /* Swap Guest A0 with SSCRATCH */
+ csrrw a0, CSR_SSCRATCH, a0
+
+ /* Save Guest GPRs (except A0) */
+ REG_S ra, (KVM_ARCH_GUEST_RA)(a0)
+ REG_S sp, (KVM_ARCH_GUEST_SP)(a0)
+ REG_S gp, (KVM_ARCH_GUEST_GP)(a0)
+ REG_S tp, (KVM_ARCH_GUEST_TP)(a0)
+ REG_S t0, (KVM_ARCH_GUEST_T0)(a0)
+ REG_S t1, (KVM_ARCH_GUEST_T1)(a0)
+ REG_S t2, (KVM_ARCH_GUEST_T2)(a0)
+ REG_S s0, (KVM_ARCH_GUEST_S0)(a0)
+ REG_S s1, (KVM_ARCH_GUEST_S1)(a0)
+ REG_S a1, (KVM_ARCH_GUEST_A1)(a0)
+ REG_S a2, (KVM_ARCH_GUEST_A2)(a0)
+ REG_S a3, (KVM_ARCH_GUEST_A3)(a0)
+ REG_S a4, (KVM_ARCH_GUEST_A4)(a0)
+ REG_S a5, (KVM_ARCH_GUEST_A5)(a0)
+ REG_S a6, (KVM_ARCH_GUEST_A6)(a0)
+ REG_S a7, (KVM_ARCH_GUEST_A7)(a0)
+ REG_S s2, (KVM_ARCH_GUEST_S2)(a0)
+ REG_S s3, (KVM_ARCH_GUEST_S3)(a0)
+ REG_S s4, (KVM_ARCH_GUEST_S4)(a0)
+ REG_S s5, (KVM_ARCH_GUEST_S5)(a0)
+ REG_S s6, (KVM_ARCH_GUEST_S6)(a0)
+ REG_S s7, (KVM_ARCH_GUEST_S7)(a0)
+ REG_S s8, (KVM_ARCH_GUEST_S8)(a0)
+ REG_S s9, (KVM_ARCH_GUEST_S9)(a0)
+ REG_S s10, (KVM_ARCH_GUEST_S10)(a0)
+ REG_S s11, (KVM_ARCH_GUEST_S11)(a0)
+ REG_S t3, (KVM_ARCH_GUEST_T3)(a0)
+ REG_S t4, (KVM_ARCH_GUEST_T4)(a0)
+ REG_S t5, (KVM_ARCH_GUEST_T5)(a0)
+ REG_S t6, (KVM_ARCH_GUEST_T6)(a0)
+
+ /* Save Guest SEPC */
+ csrr t0, CSR_SEPC
+ REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
+
+ /* Restore Host STVEC */
+ REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
+ csrw CSR_STVEC, t1
+
+ /* Save Guest A0 and Restore Host SSCRATCH */
+ REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ csrrw t2, CSR_SSCRATCH, t2
+ REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
+
+ /* Save Guest and Restore Host SCOUNTEREN */
+ REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ csrrw t3, CSR_SCOUNTEREN, t3
+ REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+
+ /* Save Guest and Restore Host HSTATUS */
+ REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
+ csrrw t4, CSR_HSTATUS, t4
+ REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
+
+ /* Save Guest and Restore Host SSTATUS */
+ REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
+ csrrw t5, CSR_SSTATUS, t5
+ REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0)
+
+ /* Restore Host GPRs (except A0 and T0-T6) */
+ REG_L ra, (KVM_ARCH_HOST_RA)(a0)
+ REG_L sp, (KVM_ARCH_HOST_SP)(a0)
+ REG_L gp, (KVM_ARCH_HOST_GP)(a0)
+ REG_L tp, (KVM_ARCH_HOST_TP)(a0)
+ REG_L s0, (KVM_ARCH_HOST_S0)(a0)
+ REG_L s1, (KVM_ARCH_HOST_S1)(a0)
+ REG_L a1, (KVM_ARCH_HOST_A1)(a0)
+ REG_L a2, (KVM_ARCH_HOST_A2)(a0)
+ REG_L a3, (KVM_ARCH_HOST_A3)(a0)
+ REG_L a4, (KVM_ARCH_HOST_A4)(a0)
+ REG_L a5, (KVM_ARCH_HOST_A5)(a0)
+ REG_L a6, (KVM_ARCH_HOST_A6)(a0)
+ REG_L a7, (KVM_ARCH_HOST_A7)(a0)
+ REG_L s2, (KVM_ARCH_HOST_S2)(a0)
+ REG_L s3, (KVM_ARCH_HOST_S3)(a0)
+ REG_L s4, (KVM_ARCH_HOST_S4)(a0)
+ REG_L s5, (KVM_ARCH_HOST_S5)(a0)
+ REG_L s6, (KVM_ARCH_HOST_S6)(a0)
+ REG_L s7, (KVM_ARCH_HOST_S7)(a0)
+ REG_L s8, (KVM_ARCH_HOST_S8)(a0)
+ REG_L s9, (KVM_ARCH_HOST_S9)(a0)
+ REG_L s10, (KVM_ARCH_HOST_S10)(a0)
+ REG_L s11, (KVM_ARCH_HOST_S11)(a0)
+
+ /* Return to C code */
+ ret
+ENDPROC(__kvm_riscv_switch_to)
+
+ENTRY(__kvm_riscv_unpriv_trap)
+ /*
+ * We assume that faulting unpriv load/store instruction is
+ * 4-byte long and blindly increment SEPC by 4.
+ *
+ * The trap details will be saved at address pointed by 'A0'
+ * register and we use 'A1' register as temporary.
+ */
+ csrr a1, CSR_SEPC
+ REG_S a1, (KVM_ARCH_TRAP_SEPC)(a0)
+ addi a1, a1, 4
+ csrw CSR_SEPC, a1
+ csrr a1, CSR_SCAUSE
+ REG_S a1, (KVM_ARCH_TRAP_SCAUSE)(a0)
+ csrr a1, CSR_STVAL
+ REG_S a1, (KVM_ARCH_TRAP_STVAL)(a0)
+ csrr a1, CSR_HTVAL
+ REG_S a1, (KVM_ARCH_TRAP_HTVAL)(a0)
+ csrr a1, CSR_HTINST
+ REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0)
+ sret
+ENDPROC(__kvm_riscv_unpriv_trap)
+
+#ifdef CONFIG_FPU
+ .align 3
+ .global __kvm_riscv_fp_f_save
+__kvm_riscv_fp_f_save:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ csrs CSR_SSTATUS, t1
+ frcsr t0
+ fsw f0, KVM_ARCH_FP_F_F0(a0)
+ fsw f1, KVM_ARCH_FP_F_F1(a0)
+ fsw f2, KVM_ARCH_FP_F_F2(a0)
+ fsw f3, KVM_ARCH_FP_F_F3(a0)
+ fsw f4, KVM_ARCH_FP_F_F4(a0)
+ fsw f5, KVM_ARCH_FP_F_F5(a0)
+ fsw f6, KVM_ARCH_FP_F_F6(a0)
+ fsw f7, KVM_ARCH_FP_F_F7(a0)
+ fsw f8, KVM_ARCH_FP_F_F8(a0)
+ fsw f9, KVM_ARCH_FP_F_F9(a0)
+ fsw f10, KVM_ARCH_FP_F_F10(a0)
+ fsw f11, KVM_ARCH_FP_F_F11(a0)
+ fsw f12, KVM_ARCH_FP_F_F12(a0)
+ fsw f13, KVM_ARCH_FP_F_F13(a0)
+ fsw f14, KVM_ARCH_FP_F_F14(a0)
+ fsw f15, KVM_ARCH_FP_F_F15(a0)
+ fsw f16, KVM_ARCH_FP_F_F16(a0)
+ fsw f17, KVM_ARCH_FP_F_F17(a0)
+ fsw f18, KVM_ARCH_FP_F_F18(a0)
+ fsw f19, KVM_ARCH_FP_F_F19(a0)
+ fsw f20, KVM_ARCH_FP_F_F20(a0)
+ fsw f21, KVM_ARCH_FP_F_F21(a0)
+ fsw f22, KVM_ARCH_FP_F_F22(a0)
+ fsw f23, KVM_ARCH_FP_F_F23(a0)
+ fsw f24, KVM_ARCH_FP_F_F24(a0)
+ fsw f25, KVM_ARCH_FP_F_F25(a0)
+ fsw f26, KVM_ARCH_FP_F_F26(a0)
+ fsw f27, KVM_ARCH_FP_F_F27(a0)
+ fsw f28, KVM_ARCH_FP_F_F28(a0)
+ fsw f29, KVM_ARCH_FP_F_F29(a0)
+ fsw f30, KVM_ARCH_FP_F_F30(a0)
+ fsw f31, KVM_ARCH_FP_F_F31(a0)
+ sw t0, KVM_ARCH_FP_F_FCSR(a0)
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_d_save
+__kvm_riscv_fp_d_save:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ csrs CSR_SSTATUS, t1
+ frcsr t0
+ fsd f0, KVM_ARCH_FP_D_F0(a0)
+ fsd f1, KVM_ARCH_FP_D_F1(a0)
+ fsd f2, KVM_ARCH_FP_D_F2(a0)
+ fsd f3, KVM_ARCH_FP_D_F3(a0)
+ fsd f4, KVM_ARCH_FP_D_F4(a0)
+ fsd f5, KVM_ARCH_FP_D_F5(a0)
+ fsd f6, KVM_ARCH_FP_D_F6(a0)
+ fsd f7, KVM_ARCH_FP_D_F7(a0)
+ fsd f8, KVM_ARCH_FP_D_F8(a0)
+ fsd f9, KVM_ARCH_FP_D_F9(a0)
+ fsd f10, KVM_ARCH_FP_D_F10(a0)
+ fsd f11, KVM_ARCH_FP_D_F11(a0)
+ fsd f12, KVM_ARCH_FP_D_F12(a0)
+ fsd f13, KVM_ARCH_FP_D_F13(a0)
+ fsd f14, KVM_ARCH_FP_D_F14(a0)
+ fsd f15, KVM_ARCH_FP_D_F15(a0)
+ fsd f16, KVM_ARCH_FP_D_F16(a0)
+ fsd f17, KVM_ARCH_FP_D_F17(a0)
+ fsd f18, KVM_ARCH_FP_D_F18(a0)
+ fsd f19, KVM_ARCH_FP_D_F19(a0)
+ fsd f20, KVM_ARCH_FP_D_F20(a0)
+ fsd f21, KVM_ARCH_FP_D_F21(a0)
+ fsd f22, KVM_ARCH_FP_D_F22(a0)
+ fsd f23, KVM_ARCH_FP_D_F23(a0)
+ fsd f24, KVM_ARCH_FP_D_F24(a0)
+ fsd f25, KVM_ARCH_FP_D_F25(a0)
+ fsd f26, KVM_ARCH_FP_D_F26(a0)
+ fsd f27, KVM_ARCH_FP_D_F27(a0)
+ fsd f28, KVM_ARCH_FP_D_F28(a0)
+ fsd f29, KVM_ARCH_FP_D_F29(a0)
+ fsd f30, KVM_ARCH_FP_D_F30(a0)
+ fsd f31, KVM_ARCH_FP_D_F31(a0)
+ sw t0, KVM_ARCH_FP_D_FCSR(a0)
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_f_restore
+__kvm_riscv_fp_f_restore:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ lw t0, KVM_ARCH_FP_F_FCSR(a0)
+ csrs CSR_SSTATUS, t1
+ flw f0, KVM_ARCH_FP_F_F0(a0)
+ flw f1, KVM_ARCH_FP_F_F1(a0)
+ flw f2, KVM_ARCH_FP_F_F2(a0)
+ flw f3, KVM_ARCH_FP_F_F3(a0)
+ flw f4, KVM_ARCH_FP_F_F4(a0)
+ flw f5, KVM_ARCH_FP_F_F5(a0)
+ flw f6, KVM_ARCH_FP_F_F6(a0)
+ flw f7, KVM_ARCH_FP_F_F7(a0)
+ flw f8, KVM_ARCH_FP_F_F8(a0)
+ flw f9, KVM_ARCH_FP_F_F9(a0)
+ flw f10, KVM_ARCH_FP_F_F10(a0)
+ flw f11, KVM_ARCH_FP_F_F11(a0)
+ flw f12, KVM_ARCH_FP_F_F12(a0)
+ flw f13, KVM_ARCH_FP_F_F13(a0)
+ flw f14, KVM_ARCH_FP_F_F14(a0)
+ flw f15, KVM_ARCH_FP_F_F15(a0)
+ flw f16, KVM_ARCH_FP_F_F16(a0)
+ flw f17, KVM_ARCH_FP_F_F17(a0)
+ flw f18, KVM_ARCH_FP_F_F18(a0)
+ flw f19, KVM_ARCH_FP_F_F19(a0)
+ flw f20, KVM_ARCH_FP_F_F20(a0)
+ flw f21, KVM_ARCH_FP_F_F21(a0)
+ flw f22, KVM_ARCH_FP_F_F22(a0)
+ flw f23, KVM_ARCH_FP_F_F23(a0)
+ flw f24, KVM_ARCH_FP_F_F24(a0)
+ flw f25, KVM_ARCH_FP_F_F25(a0)
+ flw f26, KVM_ARCH_FP_F_F26(a0)
+ flw f27, KVM_ARCH_FP_F_F27(a0)
+ flw f28, KVM_ARCH_FP_F_F28(a0)
+ flw f29, KVM_ARCH_FP_F_F29(a0)
+ flw f30, KVM_ARCH_FP_F_F30(a0)
+ flw f31, KVM_ARCH_FP_F_F31(a0)
+ fscsr t0
+ csrw CSR_SSTATUS, t2
+ ret
+
+ .align 3
+ .global __kvm_riscv_fp_d_restore
+__kvm_riscv_fp_d_restore:
+ csrr t2, CSR_SSTATUS
+ li t1, SR_FS
+ lw t0, KVM_ARCH_FP_D_FCSR(a0)
+ csrs CSR_SSTATUS, t1
+ fld f0, KVM_ARCH_FP_D_F0(a0)
+ fld f1, KVM_ARCH_FP_D_F1(a0)
+ fld f2, KVM_ARCH_FP_D_F2(a0)
+ fld f3, KVM_ARCH_FP_D_F3(a0)
+ fld f4, KVM_ARCH_FP_D_F4(a0)
+ fld f5, KVM_ARCH_FP_D_F5(a0)
+ fld f6, KVM_ARCH_FP_D_F6(a0)
+ fld f7, KVM_ARCH_FP_D_F7(a0)
+ fld f8, KVM_ARCH_FP_D_F8(a0)
+ fld f9, KVM_ARCH_FP_D_F9(a0)
+ fld f10, KVM_ARCH_FP_D_F10(a0)
+ fld f11, KVM_ARCH_FP_D_F11(a0)
+ fld f12, KVM_ARCH_FP_D_F12(a0)
+ fld f13, KVM_ARCH_FP_D_F13(a0)
+ fld f14, KVM_ARCH_FP_D_F14(a0)
+ fld f15, KVM_ARCH_FP_D_F15(a0)
+ fld f16, KVM_ARCH_FP_D_F16(a0)
+ fld f17, KVM_ARCH_FP_D_F17(a0)
+ fld f18, KVM_ARCH_FP_D_F18(a0)
+ fld f19, KVM_ARCH_FP_D_F19(a0)
+ fld f20, KVM_ARCH_FP_D_F20(a0)
+ fld f21, KVM_ARCH_FP_D_F21(a0)
+ fld f22, KVM_ARCH_FP_D_F22(a0)
+ fld f23, KVM_ARCH_FP_D_F23(a0)
+ fld f24, KVM_ARCH_FP_D_F24(a0)
+ fld f25, KVM_ARCH_FP_D_F25(a0)
+ fld f26, KVM_ARCH_FP_D_F26(a0)
+ fld f27, KVM_ARCH_FP_D_F27(a0)
+ fld f28, KVM_ARCH_FP_D_F28(a0)
+ fld f29, KVM_ARCH_FP_D_F29(a0)
+ fld f30, KVM_ARCH_FP_D_F30(a0)
+ fld f31, KVM_ARCH_FP_D_F31(a0)
+ fscsr t0
+ csrw CSR_SSTATUS, t2
+ ret
+#endif
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
new file mode 100644
index 000000000000..5c4c37ff2d48
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <clocksource/timer-riscv.h>
+#include <asm/csr.h>
+#include <asm/delay.h>
+#include <asm/kvm_vcpu_timer.h>
+
+static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt)
+{
+ return get_cycles64() + gt->time_delta;
+}
+
+static u64 kvm_riscv_delta_cycles2ns(u64 cycles,
+ struct kvm_guest_timer *gt,
+ struct kvm_vcpu_timer *t)
+{
+ unsigned long flags;
+ u64 cycles_now, cycles_delta, delta_ns;
+
+ local_irq_save(flags);
+ cycles_now = kvm_riscv_current_cycles(gt);
+ if (cycles_now < cycles)
+ cycles_delta = cycles - cycles_now;
+ else
+ cycles_delta = 0;
+ delta_ns = (cycles_delta * gt->nsec_mult) >> gt->nsec_shift;
+ local_irq_restore(flags);
+
+ return delta_ns;
+}
+
+static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h)
+{
+ u64 delta_ns;
+ struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt);
+ struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer);
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+ if (kvm_riscv_current_cycles(gt) < t->next_cycles) {
+ delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
+ hrtimer_forward_now(&t->hrt, ktime_set(0, delta_ns));
+ return HRTIMER_RESTART;
+ }
+
+ t->next_set = false;
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_TIMER);
+
+ return HRTIMER_NORESTART;
+}
+
+static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
+{
+ if (!t->init_done || !t->next_set)
+ return -EINVAL;
+
+ hrtimer_cancel(&t->hrt);
+ t->next_set = false;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 delta_ns;
+
+ if (!t->init_done)
+ return -EINVAL;
+
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_TIMER);
+
+ delta_ns = kvm_riscv_delta_cycles2ns(ncycles, gt, t);
+ t->next_cycles = ncycles;
+ hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
+ t->next_set = true;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_TIMER);
+ u64 reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
+ return -EINVAL;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_TIMER_REG(frequency):
+ reg_val = riscv_timebase;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(time):
+ reg_val = kvm_riscv_current_cycles(gt);
+ break;
+ case KVM_REG_RISCV_TIMER_REG(compare):
+ reg_val = t->next_cycles;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(state):
+ reg_val = (t->next_set) ? KVM_RISCV_TIMER_STATE_ON :
+ KVM_RISCV_TIMER_STATE_OFF;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_TIMER);
+ u64 reg_val;
+ int ret = 0;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(u64))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_TIMER_REG(frequency):
+ ret = -EOPNOTSUPP;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(time):
+ gt->time_delta = reg_val - get_cycles64();
+ break;
+ case KVM_REG_RISCV_TIMER_REG(compare):
+ t->next_cycles = reg_val;
+ break;
+ case KVM_REG_RISCV_TIMER_REG(state):
+ if (reg_val == KVM_RISCV_TIMER_STATE_ON)
+ ret = kvm_riscv_vcpu_timer_next_event(vcpu, reg_val);
+ else
+ ret = kvm_riscv_vcpu_timer_cancel(t);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ if (t->init_done)
+ return -EINVAL;
+
+ hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
+ t->init_done = true;
+ t->next_set = false;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ ret = kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
+ vcpu->arch.timer.init_done = false;
+
+ return ret;
+}
+
+int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu)
+{
+ return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
+}
+
+void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+#ifdef CONFIG_64BIT
+ csr_write(CSR_HTIMEDELTA, gt->time_delta);
+#else
+ csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta));
+ csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32));
+#endif
+}
+
+int kvm_riscv_guest_timer_init(struct kvm *kvm)
+{
+ struct kvm_guest_timer *gt = &kvm->arch.timer;
+
+ riscv_cs_get_mult_shift(&gt->nsec_mult, &gt->nsec_shift);
+ gt->time_delta = -get_cycles64();
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
new file mode 100644
index 000000000000..26399df15b63
--- /dev/null
+++ b/arch/riscv/kvm/vm.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_host.h>
+
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS()
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+ sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+ int r;
+
+ r = kvm_riscv_stage2_alloc_pgd(kvm);
+ if (r)
+ return r;
+
+ r = kvm_riscv_stage2_vmid_init(kvm);
+ if (r) {
+ kvm_riscv_stage2_free_pgd(kvm);
+ return r;
+ }
+
+ return kvm_riscv_guest_timer_init(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ int i;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (kvm->vcpus[i]) {
+ kvm_vcpu_destroy(kvm->vcpus[i]);
+ kvm->vcpus[i] = NULL;
+ }
+ }
+ atomic_set(&kvm->online_vcpus, 0);
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_USER_MEMORY:
+ case KVM_CAP_SYNC_MMU:
+ case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+ case KVM_CAP_ONE_REG:
+ case KVM_CAP_READONLY_MEM:
+ case KVM_CAP_MP_STATE:
+ case KVM_CAP_IMMEDIATE_EXIT:
+ r = 1;
+ break;
+ case KVM_CAP_NR_VCPUS:
+ r = num_online_cpus();
+ break;
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
new file mode 100644
index 000000000000..2c6253b293bc
--- /dev/null
+++ b/arch/riscv/kvm/vmid.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+
+static unsigned long vmid_version = 1;
+static unsigned long vmid_next;
+static unsigned long vmid_bits;
+static DEFINE_SPINLOCK(vmid_lock);
+
+void kvm_riscv_stage2_vmid_detect(void)
+{
+ unsigned long old;
+
+ /* Figure-out number of VMID bits in HW */
+ old = csr_read(CSR_HGATP);
+ csr_write(CSR_HGATP, old | HGATP_VMID_MASK);
+ vmid_bits = csr_read(CSR_HGATP);
+ vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT;
+ vmid_bits = fls_long(vmid_bits);
+ csr_write(CSR_HGATP, old);
+
+ /* We polluted local TLB so flush all guest TLB */
+ __kvm_riscv_hfence_gvma_all();
+
+ /* We don't use VMID bits if they are not sufficient */
+ if ((1UL << vmid_bits) < num_possible_cpus())
+ vmid_bits = 0;
+}
+
+unsigned long kvm_riscv_stage2_vmid_bits(void)
+{
+ return vmid_bits;
+}
+
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
+{
+ /* Mark the initial VMID and VMID version invalid */
+ kvm->arch.vmid.vmid_version = 0;
+ kvm->arch.vmid.vmid = 0;
+
+ return 0;
+}
+
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
+{
+ if (!vmid_bits)
+ return false;
+
+ return unlikely(READ_ONCE(vmid->vmid_version) !=
+ READ_ONCE(vmid_version));
+}
+
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_vcpu *v;
+ struct cpumask hmask;
+ struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
+
+ if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
+ return;
+
+ spin_lock(&vmid_lock);
+
+ /*
+ * We need to re-check the vmid_version here to ensure that if
+ * another vcpu already allocated a valid vmid for this vm.
+ */
+ if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) {
+ spin_unlock(&vmid_lock);
+ return;
+ }
+
+ /* First user of a new VMID version? */
+ if (unlikely(vmid_next == 0)) {
+ WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
+ vmid_next = 1;
+
+ /*
+ * We ran out of VMIDs so we increment vmid_version and
+ * start assigning VMIDs from 1.
+ *
+ * This also means existing VMIDs assignement to all Guest
+ * instances is invalid and we have force VMID re-assignement
+ * for all Guest instances. The Guest instances that were not
+ * running will automatically pick-up new VMIDs because will
+ * call kvm_riscv_stage2_vmid_update() whenever they enter
+ * in-kernel run loop. For Guest instances that are already
+ * running, we force VM exits on all host CPUs using IPI and
+ * flush all Guest TLBs.
+ */
+ riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
+ sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
+ }
+
+ vmid->vmid = vmid_next;
+ vmid_next++;
+ vmid_next &= (1 << vmid_bits) - 1;
+
+ WRITE_ONCE(vmid->vmid_version, READ_ONCE(vmid_version));
+
+ spin_unlock(&vmid_lock);
+
+ /* Request stage2 page table update for all VCPUs */
+ kvm_for_each_vcpu(i, v, vcpu->kvm)
+ kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
+}