diff options
Diffstat (limited to 'arch/loongarch/kvm')
-rw-r--r-- | arch/loongarch/kvm/Kconfig | 6 | ||||
-rw-r--r-- | arch/loongarch/kvm/Makefile | 6 | ||||
-rw-r--r-- | arch/loongarch/kvm/exit.c | 326 | ||||
-rw-r--r-- | arch/loongarch/kvm/intc/eiointc.c | 1027 | ||||
-rw-r--r-- | arch/loongarch/kvm/intc/ipi.c | 479 | ||||
-rw-r--r-- | arch/loongarch/kvm/intc/pch_pic.c | 519 | ||||
-rw-r--r-- | arch/loongarch/kvm/irqfd.c | 89 | ||||
-rw-r--r-- | arch/loongarch/kvm/main.c | 53 | ||||
-rw-r--r-- | arch/loongarch/kvm/mmu.c | 126 | ||||
-rw-r--r-- | arch/loongarch/kvm/switch.S | 21 | ||||
-rw-r--r-- | arch/loongarch/kvm/timer.c | 57 | ||||
-rw-r--r-- | arch/loongarch/kvm/tlb.c | 5 | ||||
-rw-r--r-- | arch/loongarch/kvm/trace.h | 20 | ||||
-rw-r--r-- | arch/loongarch/kvm/vcpu.c | 643 | ||||
-rw-r--r-- | arch/loongarch/kvm/vm.c | 107 |
15 files changed, 3277 insertions, 207 deletions
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 61f7e33b1f95..97a811077ac3 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -20,8 +20,11 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on AS_HAS_LVZ_EXTENSION - depends on HAVE_KVM select HAVE_KVM_DIRTY_RING_ACQ_REL + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_IRQCHIP + select HAVE_KVM_MSI + select HAVE_KVM_READONLY_MEM select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT @@ -29,6 +32,7 @@ config KVM select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO select KVM_XFER_TO_GUEST_WORK + select SCHED_INFO help Support hosting virtualized guest machines using hardware virtualization extensions. You will need diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 244467d7792a..3a01292f71cc 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -3,7 +3,7 @@ # Makefile for LoongArch KVM support # -ccflags-y += -I $(srctree)/$(src) +ccflags-y += -I $(src) include $(srctree)/virt/kvm/Makefile.kvm @@ -18,5 +18,9 @@ kvm-y += timer.o kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vm.o +kvm-y += intc/ipi.o +kvm-y += intc/eiointc.o +kvm-y += intc/pch_pic.o +kvm-y += irqfd.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index ed1d89d53e2e..ea321403644a 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/preempt.h> #include <linux/vmalloc.h> +#include <trace/events/kvm.h> #include <asm/fpu.h> #include <asm/inst.h> #include <asm/loongarch.h> @@ -20,6 +21,47 @@ #include <asm/kvm_vcpu.h> #include "trace.h" +static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) +{ + int rd, rj; + unsigned int index, ret; + + if (inst.reg2_format.opcode != cpucfg_op) + return EMULATE_FAIL; + + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + ++vcpu->stat.cpucfg_exits; + index = vcpu->arch.gprs[rj]; + + /* + * By LoongArch Reference Manual 2.2.10.5 + * Return value is 0 for undefined CPUCFG index + * + * Disable preemption since hw gcsr is accessed + */ + preempt_disable(); + switch (index) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; + break; + case CPUCFG_KVM_SIG: + /* CPUCFG emulation between 0x40000000 -- 0x400000ff */ + vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; + break; + case CPUCFG_KVM_FEATURE: + ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; + vcpu->arch.gprs[rd] = ret; + break; + default: + vcpu->arch.gprs[rd] = 0; + break; + } + preempt_enable(); + + return EMULATE_DONE; +} + static unsigned long kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid) { unsigned long val = 0; @@ -83,6 +125,14 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) rj = inst.reg2csr_format.rj; csrid = inst.reg2csr_format.csr; + if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= vcpu->arch.max_pmu_csrid) { + if (kvm_guest_has_pmu(&vcpu->arch)) { + vcpu->arch.pc -= 4; + kvm_make_request(KVM_REQ_PMU, vcpu); + return EMULATE_DONE; + } + } + /* Process CSR ops */ switch (rj) { case 0: /* process csrrd */ @@ -106,8 +156,8 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { - int ret; - unsigned long val; + int idx, ret; + unsigned long *val; u32 addr, rd, rj, opcode; /* @@ -117,9 +167,9 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) rj = inst.reg2_format.rj; opcode = inst.reg2_format.opcode; addr = vcpu->arch.gprs[rj]; - ret = EMULATE_DO_IOCSR; run->iocsr_io.phys_addr = addr; run->iocsr_io.is_write = 0; + val = &vcpu->arch.gprs[rd]; /* LoongArch is Little endian */ switch (opcode) { @@ -152,16 +202,33 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) run->iocsr_io.is_write = 1; break; default: - ret = EMULATE_FAIL; - break; + return EMULATE_FAIL; } - if (ret == EMULATE_DO_IOCSR) { - if (run->iocsr_io.is_write) { - val = vcpu->arch.gprs[rd]; - memcpy(run->iocsr_io.data, &val, run->iocsr_io.len); + if (run->iocsr_io.is_write) { + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) + ret = EMULATE_DONE; + else { + ret = EMULATE_DO_IOCSR; + /* Save data and let user space to write it */ + memcpy(run->iocsr_io.data, val, run->iocsr_io.len); } - vcpu->arch.io_gpr = rd; + trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val); + } else { + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) + ret = EMULATE_DONE; + else { + ret = EMULATE_DO_IOCSR; + /* Save register id for iocsr read completion */ + vcpu->arch.io_gpr = rd; + } + trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL); } return ret; @@ -208,8 +275,6 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) { - int rd, rj; - unsigned int index; unsigned long curr_pc; larch_inst inst; enum emulation_result er = EMULATE_DONE; @@ -224,21 +289,7 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) er = EMULATE_FAIL; switch (((inst.word >> 24) & 0xff)) { case 0x0: /* CPUCFG GSPR */ - if (inst.reg2_format.opcode == 0x1B) { - rd = inst.reg2_format.rd; - rj = inst.reg2_format.rj; - ++vcpu->stat.cpucfg_exits; - index = vcpu->arch.gprs[rj]; - er = EMULATE_DONE; - /* - * By LoongArch Reference Manual 2.2.10.5 - * return value is 0 for undefined cpucfg index - */ - if (index < KVM_MAX_CPUCFG_REGS) - vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; - else - vcpu->arch.gprs[rd] = 0; - } + er = kvm_emu_cpucfg(vcpu, inst); break; case 0x4: /* CSR{RD,WR,XCHG} GSPR */ er = kvm_handle_csr(vcpu, inst); @@ -315,7 +366,7 @@ static int kvm_handle_gspr(struct kvm_vcpu *vcpu) int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int op8, opcode, rd; struct kvm_run *run = vcpu->run; @@ -413,17 +464,35 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, run->mmio.phys_addr, NULL); + + /* + * If mmio device such as PCH-PIC is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, + run->mmio.len, &vcpu->arch.gprs[rd]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (!ret) { + update_pc(&vcpu->arch); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + /* Set for kvm_complete_mmio_read() use */ vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; - } else { - kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - vcpu->mmio_needed = 0; + return EMULATE_DO_MMIO; } + kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->mmio_needed = 0; + return ret; } @@ -463,12 +532,15 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) break; } + trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, + run->mmio.phys_addr, run->mmio.data); + return er; } int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int rd, op8, opcode; unsigned long curr_pc, rd_val = 0; struct kvm_run *run = vcpu->run; @@ -561,17 +633,31 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len, run->mmio.phys_addr, data); + + /* + * If mmio device such as PCH-PIC is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, data); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (!ret) + return EMULATE_DONE; + run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; - } else { - vcpu->arch.pc = curr_pc; - kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - /* Rollback PC if emulation was unsuccessful */ + return EMULATE_DO_MMIO; } + vcpu->arch.pc = curr_pc; + kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + /* Rollback PC if emulation was unsuccessful */ + return ret; } @@ -583,6 +669,12 @@ static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write) struct kvm_run *run = vcpu->run; unsigned long badv = vcpu->arch.badv; + /* Inject ADE exception if exceed max GPA size */ + if (unlikely(badv >= vcpu->kvm->arch.gpa_size)) { + kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM); + return RESUME_GUEST; + } + ret = kvm_handle_mm_fault(vcpu, badv, write); if (ret) { /* Treat as MMIO */ @@ -623,6 +715,14 @@ static int kvm_handle_write_fault(struct kvm_vcpu *vcpu) return kvm_handle_rdwr_fault(vcpu, true); } +int kvm_complete_user_service(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + update_pc(&vcpu->arch); + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, run->hypercall.ret); + + return 0; +} + /** * kvm_handle_fpu_disabled() - Guest used fpu however it is disabled at host * @vcpu: Virtual CPU context. @@ -655,6 +755,31 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static long kvm_save_notify(struct kvm_vcpu *vcpu) +{ + unsigned long id, data; + + id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1); + data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2); + switch (id) { + case BIT(KVM_FEATURE_STEAL_TIME): + if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return KVM_HCALL_INVALID_PARAMETER; + + vcpu->arch.st.guest_addr = data; + if (!(data & KVM_STEAL_PHYS_VALID)) + return 0; + + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + return 0; + default: + return KVM_HCALL_INVALID_CODE; + }; + + return KVM_HCALL_INVALID_CODE; +}; + /* * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. * @vcpu: Virtual CPU context. @@ -685,6 +810,125 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu) +{ + if (kvm_own_lbt(vcpu)) + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + + return RESUME_GUEST; +} + +static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu) +{ + unsigned int min, cpu, i; + unsigned long ipi_bitmap; + struct kvm_vcpu *dest; + + min = kvm_read_reg(vcpu, LOONGARCH_GPR_A3); + for (i = 0; i < 2; i++, min += BITS_PER_LONG) { + ipi_bitmap = kvm_read_reg(vcpu, LOONGARCH_GPR_A1 + i); + if (!ipi_bitmap) + continue; + + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); + while (cpu < BITS_PER_LONG) { + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min); + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1); + if (!dest) + continue; + + /* Send SWI0 to dest vcpu to emulate IPI interrupt */ + kvm_queue_irq(dest, INT_SWI0); + kvm_vcpu_kick(dest); + } + } + + return 0; +} + +/* + * Hypercall emulation always return to guest, Caller should check retval. + */ +static void kvm_handle_service(struct kvm_vcpu *vcpu) +{ + long ret = KVM_HCALL_INVALID_CODE; + unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0); + + switch (func) { + case KVM_HCALL_FUNC_IPI: + if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_IPI)) { + kvm_send_pv_ipi(vcpu); + ret = KVM_HCALL_SUCCESS; + } + break; + case KVM_HCALL_FUNC_NOTIFY: + if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)) + ret = kvm_save_notify(vcpu); + break; + default: + break; + } + + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); +} + +static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) +{ + int ret; + larch_inst inst; + unsigned int code; + + inst.word = vcpu->arch.badi; + code = inst.reg0i15_format.immediate; + ret = RESUME_GUEST; + + switch (code) { + case KVM_HCALL_SERVICE: + vcpu->stat.hypercall_exits++; + kvm_handle_service(vcpu); + break; + case KVM_HCALL_USER_SERVICE: + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_USER_HCALL)) { + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE); + break; + } + + vcpu->stat.hypercall_exits++; + vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; + vcpu->run->hypercall.nr = KVM_HCALL_USER_SERVICE; + vcpu->run->hypercall.args[0] = kvm_read_reg(vcpu, LOONGARCH_GPR_A0); + vcpu->run->hypercall.args[1] = kvm_read_reg(vcpu, LOONGARCH_GPR_A1); + vcpu->run->hypercall.args[2] = kvm_read_reg(vcpu, LOONGARCH_GPR_A2); + vcpu->run->hypercall.args[3] = kvm_read_reg(vcpu, LOONGARCH_GPR_A3); + vcpu->run->hypercall.args[4] = kvm_read_reg(vcpu, LOONGARCH_GPR_A4); + vcpu->run->hypercall.args[5] = kvm_read_reg(vcpu, LOONGARCH_GPR_A5); + vcpu->run->hypercall.flags = 0; + /* + * Set invalid return value by default, let user-mode VMM modify it. + */ + vcpu->run->hypercall.ret = KVM_HCALL_INVALID_CODE; + ret = RESUME_HOST; + break; + case KVM_HCALL_SWDBG: + /* KVM_HCALL_SWDBG only in effective when SW_BP is enabled */ + if (vcpu->guest_debug & KVM_GUESTDBG_SW_BP_MASK) { + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + ret = RESUME_HOST; + break; + } + fallthrough; + default: + /* Treat it as noop intruction, only set return value */ + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE); + break; + } + + if (ret == RESUME_GUEST) + update_pc(&vcpu->arch); + + return ret; +} + /* * LoongArch KVM callback handling for unimplemented guest exiting */ @@ -715,7 +959,9 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_FPDIS] = kvm_handle_fpu_disabled, [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, + [EXCCODE_BTDIS] = kvm_handle_lbt_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, + [EXCCODE_HVC] = kvm_handle_hypercall, }; int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c new file mode 100644 index 000000000000..f39929d7bf8a --- /dev/null +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -0,0 +1,1027 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include <asm/kvm_eiointc.h> +#include <asm/kvm_vcpu.h> +#include <linux/count_zeros.h> + +static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) +{ + int ipnum, cpu, irq_index, irq_mask, irq; + + for (irq = 0; irq < EIOINTC_IRQS; irq++) { + ipnum = s->ipmap.reg_u8[irq / 32]; + if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) { + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + } + irq_index = irq / 32; + irq_mask = BIT(irq & 0x1f); + + cpu = s->coremap.reg_u8[irq]; + if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + else + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + } +} + +static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) +{ + int ipnum, cpu, found, irq_index, irq_mask; + struct kvm_vcpu *vcpu; + struct kvm_interrupt vcpu_irq; + + ipnum = s->ipmap.reg_u8[irq / 32]; + if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) { + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + } + + cpu = s->sw_coremap[irq]; + vcpu = kvm_get_vcpu(s->kvm, cpu); + irq_index = irq / 32; + irq_mask = BIT(irq & 0x1f); + + if (level) { + /* if not enable return false */ + if (((s->enable.reg_u32[irq_index]) & irq_mask) == 0) + return; + s->coreisr.reg_u32[cpu][irq_index] |= irq_mask; + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EIOINTC_IRQS); + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + } else { + s->coreisr.reg_u32[cpu][irq_index] &= ~irq_mask; + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EIOINTC_IRQS); + } + + if (found < EIOINTC_IRQS) + return; /* other irq is handling, needn't update parent irq */ + + vcpu_irq.irq = level ? (INT_HWI0 + ipnum) : -(INT_HWI0 + ipnum); + kvm_vcpu_ioctl_interrupt(vcpu, &vcpu_irq); +} + +static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, + int irq, void *pvalue, u32 len, bool notify) +{ + int i, cpu; + u64 val = *(u64 *)pvalue; + + for (i = 0; i < len; i++) { + cpu = val & 0xff; + val = val >> 8; + + if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + } + + if (s->sw_coremap[irq + i] == cpu) + continue; + + if (notify && test_bit(irq + i, (unsigned long *)s->isr.reg_u8)) { + /* lower irq at old cpu and raise irq at new cpu */ + eiointc_update_irq(s, irq + i, 0); + s->sw_coremap[irq + i] = cpu; + eiointc_update_irq(s, irq + i, 1); + } else { + s->sw_coremap[irq + i] = cpu; + } + } +} + +void eiointc_set_irq(struct loongarch_eiointc *s, int irq, int level) +{ + unsigned long flags; + unsigned long *isr = (unsigned long *)s->isr.reg_u8; + + level ? set_bit(irq, isr) : clear_bit(irq, isr); + spin_lock_irqsave(&s->lock, flags); + eiointc_update_irq(s, irq, level); + spin_unlock_irqrestore(&s->lock, flags); +} + +static inline void eiointc_enable_irq(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, int index, u8 mask, int level) +{ + u8 val; + int irq; + + val = mask & s->isr.reg_u8[index]; + irq = ffs(val); + while (irq != 0) { + /* + * enable bit change from 0 to 1, + * need to update irq by pending bits + */ + eiointc_update_irq(s, irq - 1 + index * 8, level); + val &= ~BIT(irq - 1); + irq = ffs(val); + } +} + +static int loongarch_eiointc_readb(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u8 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = offset - EIOINTC_NODETYPE_START; + data = s->nodetype.reg_u8[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = offset - EIOINTC_IPMAP_START; + data = s->ipmap.reg_u8[index]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = offset - EIOINTC_ENABLE_START; + data = s->enable.reg_u8[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = offset - EIOINTC_BOUNCE_START; + data = s->bounce.reg_u8[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = offset - EIOINTC_COREISR_START; + data = s->coreisr.reg_u8[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = offset - EIOINTC_COREMAP_START; + data = s->coremap.reg_u8[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u8 *)val = data; + + return ret; +} + +static int loongarch_eiointc_readw(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u16 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 1; + data = s->nodetype.reg_u16[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = (offset - EIOINTC_IPMAP_START) >> 1; + data = s->ipmap.reg_u16[index]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 1; + data = s->enable.reg_u16[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = (offset - EIOINTC_BOUNCE_START) >> 1; + data = s->bounce.reg_u16[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 1; + data = s->coreisr.reg_u16[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = (offset - EIOINTC_COREMAP_START) >> 1; + data = s->coremap.reg_u16[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u16 *)val = data; + + return ret; +} + +static int loongarch_eiointc_readl(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u32 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 2; + data = s->nodetype.reg_u32[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = (offset - EIOINTC_IPMAP_START) >> 2; + data = s->ipmap.reg_u32[index]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 2; + data = s->enable.reg_u32[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = (offset - EIOINTC_BOUNCE_START) >> 2; + data = s->bounce.reg_u32[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 2; + data = s->coreisr.reg_u32[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = (offset - EIOINTC_COREMAP_START) >> 2; + data = s->coremap.reg_u32[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u32 *)val = data; + + return ret; +} + +static int loongarch_eiointc_readq(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u64 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 3; + data = s->nodetype.reg_u64[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = (offset - EIOINTC_IPMAP_START) >> 3; + data = s->ipmap.reg_u64; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 3; + data = s->enable.reg_u64[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = (offset - EIOINTC_BOUNCE_START) >> 3; + data = s->bounce.reg_u64[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 3; + data = s->coreisr.reg_u64[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = (offset - EIOINTC_COREMAP_START) >> 3; + data = s->coremap.reg_u64[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u64 *)val = data; + + return ret; +} + +static int kvm_eiointc_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + int ret = -EINVAL; + unsigned long flags; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.eiointc_read_exits++; + spin_lock_irqsave(&eiointc->lock, flags); + switch (len) { + case 1: + ret = loongarch_eiointc_readb(vcpu, eiointc, addr, len, val); + break; + case 2: + ret = loongarch_eiointc_readw(vcpu, eiointc, addr, len, val); + break; + case 4: + ret = loongarch_eiointc_readl(vcpu, eiointc, addr, len, val); + break; + case 8: + ret = loongarch_eiointc_readq(vcpu, eiointc, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access: addr 0x%llx, size %d\n", + __func__, addr, len); + } + spin_unlock_irqrestore(&eiointc->lock, flags); + + return ret; +} + +static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int index, irq, bits, ret = 0; + u8 cpu; + u8 data, old_data; + u8 coreisr, old_coreisr; + gpa_t offset; + + data = *(u8 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START); + s->nodetype.reg_u8[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START); + s->ipmap.reg_u8[index] = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START); + old_data = s->enable.reg_u8[index]; + s->enable.reg_u8[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u8[index] & ~old_data & s->isr.reg_u8[index]; + eiointc_enable_irq(vcpu, s, index, data, 1); + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u8[index] & old_data & s->isr.reg_u8[index]; + eiointc_enable_irq(vcpu, s, index, data, 0); + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = offset - EIOINTC_BOUNCE_START; + s->bounce.reg_u8[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START); + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u8[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u8[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq; + s->coremap.reg_u8[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u16 data, old_data; + u16 coreisr, old_coreisr; + gpa_t offset; + + data = *(u16 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 1; + s->nodetype.reg_u16[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START) >> 1; + s->ipmap.reg_u16[index] = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 1; + old_data = s->enable.reg_u32[index]; + s->enable.reg_u16[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index]; + index = index << 1; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EIOINTC_BOUNCE_START) >> 1; + s->bounce.reg_u16[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 1; + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u16[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u16[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq >> 1; + s->coremap.reg_u16[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u32 data, old_data; + u32 coreisr, old_coreisr; + gpa_t offset; + + data = *(u32 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 2; + s->nodetype.reg_u32[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START) >> 2; + s->ipmap.reg_u32[index] = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 2; + old_data = s->enable.reg_u32[index]; + s->enable.reg_u32[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; + index = index << 2; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EIOINTC_BOUNCE_START) >> 2; + s->bounce.reg_u32[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 2; + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u32[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u32[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq >> 2; + s->coremap.reg_u32[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u64 data, old_data; + u64 coreisr, old_coreisr; + gpa_t offset; + + data = *(u64 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 3; + s->nodetype.reg_u64[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START) >> 3; + s->ipmap.reg_u64 = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 3; + old_data = s->enable.reg_u64[index]; + s->enable.reg_u64[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; + index = index << 3; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EIOINTC_BOUNCE_START) >> 3; + s->bounce.reg_u64[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 3; + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u64[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u64[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq >> 3; + s->coremap.reg_u64[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int kvm_eiointc_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + int ret = -EINVAL; + unsigned long flags; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.eiointc_write_exits++; + spin_lock_irqsave(&eiointc->lock, flags); + switch (len) { + case 1: + ret = loongarch_eiointc_writeb(vcpu, eiointc, addr, len, val); + break; + case 2: + ret = loongarch_eiointc_writew(vcpu, eiointc, addr, len, val); + break; + case 4: + ret = loongarch_eiointc_writel(vcpu, eiointc, addr, len, val); + break; + case 8: + ret = loongarch_eiointc_writeq(vcpu, eiointc, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access: addr 0x%llx, size %d\n", + __func__, addr, len); + } + spin_unlock_irqrestore(&eiointc->lock, flags); + + return ret; +} + +static const struct kvm_io_device_ops kvm_eiointc_ops = { + .read = kvm_eiointc_read, + .write = kvm_eiointc_write, +}; + +static int kvm_eiointc_virt_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + unsigned long flags; + u32 *data = val; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + addr -= EIOINTC_VIRT_BASE; + spin_lock_irqsave(&eiointc->lock, flags); + switch (addr) { + case EIOINTC_VIRT_FEATURES: + *data = eiointc->features; + break; + case EIOINTC_VIRT_CONFIG: + *data = eiointc->status; + break; + default: + break; + } + spin_unlock_irqrestore(&eiointc->lock, flags); + + return 0; +} + +static int kvm_eiointc_virt_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + int ret = 0; + unsigned long flags; + u32 value = *(u32 *)val; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + addr -= EIOINTC_VIRT_BASE; + spin_lock_irqsave(&eiointc->lock, flags); + switch (addr) { + case EIOINTC_VIRT_FEATURES: + ret = -EPERM; + break; + case EIOINTC_VIRT_CONFIG: + /* + * eiointc features can only be set at disabled status + */ + if ((eiointc->status & BIT(EIOINTC_ENABLE)) && value) { + ret = -EPERM; + break; + } + eiointc->status = value & eiointc->features; + break; + default: + break; + } + spin_unlock_irqrestore(&eiointc->lock, flags); + + return ret; +} + +static const struct kvm_io_device_ops kvm_eiointc_virt_ops = { + .read = kvm_eiointc_virt_read, + .write = kvm_eiointc_virt_write, +}; + +static int kvm_eiointc_ctrl_access(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret = 0; + unsigned long flags; + unsigned long type = (unsigned long)attr->attr; + u32 i, start_irq; + void __user *data; + struct loongarch_eiointc *s = dev->kvm->arch.eiointc; + + data = (void __user *)attr->addr; + spin_lock_irqsave(&s->lock, flags); + switch (type) { + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: + if (copy_from_user(&s->num_cpu, data, 4)) + ret = -EFAULT; + break; + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: + if (copy_from_user(&s->features, data, 4)) + ret = -EFAULT; + if (!(s->features & BIT(EIOINTC_HAS_VIRT_EXTENSION))) + s->status |= BIT(EIOINTC_ENABLE); + break; + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED: + eiointc_set_sw_coreisr(s); + for (i = 0; i < (EIOINTC_IRQS / 4); i++) { + start_irq = i * 4; + eiointc_update_sw_coremap(s, start_irq, + (void *)&s->coremap.reg_u32[i], sizeof(u32), false); + } + break; + default: + break; + } + spin_unlock_irqrestore(&s->lock, flags); + + return ret; +} + +static int kvm_eiointc_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, cpuid, offset, ret = 0; + unsigned long flags; + void *p = NULL; + void __user *data; + struct loongarch_eiointc *s; + + s = dev->kvm->arch.eiointc; + addr = attr->attr; + cpuid = addr >> 16; + addr &= 0xffff; + data = (void __user *)attr->addr; + switch (addr) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + offset = (addr - EIOINTC_NODETYPE_START) / 4; + p = &s->nodetype.reg_u32[offset]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + offset = (addr - EIOINTC_IPMAP_START) / 4; + p = &s->ipmap.reg_u32[offset]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + offset = (addr - EIOINTC_ENABLE_START) / 4; + p = &s->enable.reg_u32[offset]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + offset = (addr - EIOINTC_BOUNCE_START) / 4; + p = &s->bounce.reg_u32[offset]; + break; + case EIOINTC_ISR_START ... EIOINTC_ISR_END: + offset = (addr - EIOINTC_ISR_START) / 4; + p = &s->isr.reg_u32[offset]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + offset = (addr - EIOINTC_COREISR_START) / 4; + p = &s->coreisr.reg_u32[cpuid][offset]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + offset = (addr - EIOINTC_COREMAP_START) / 4; + p = &s->coremap.reg_u32[offset]; + break; + default: + kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + spin_lock_irqsave(&s->lock, flags); + if (is_write) { + if (copy_from_user(p, data, 4)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, 4)) + ret = -EFAULT; + } + spin_unlock_irqrestore(&s->lock, flags); + + return ret; +} + +static int kvm_eiointc_sw_status_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, ret = 0; + unsigned long flags; + void *p = NULL; + void __user *data; + struct loongarch_eiointc *s; + + s = dev->kvm->arch.eiointc; + addr = attr->attr; + addr &= 0xffff; + + data = (void __user *)attr->addr; + switch (addr) { + case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU: + p = &s->num_cpu; + break; + case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE: + p = &s->features; + break; + case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE: + p = &s->status; + break; + default: + kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr); + return -EINVAL; + } + spin_lock_irqsave(&s->lock, flags); + if (is_write) { + if (copy_from_user(p, data, 4)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, 4)) + ret = -EFAULT; + } + spin_unlock_irqrestore(&s->lock, flags); + + return ret; +} + +static int kvm_eiointc_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS: + return kvm_eiointc_regs_access(dev, attr, false); + case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS: + return kvm_eiointc_sw_status_access(dev, attr, false); + default: + return -EINVAL; + } +} + +static int kvm_eiointc_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL: + return kvm_eiointc_ctrl_access(dev, attr); + case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS: + return kvm_eiointc_regs_access(dev, attr, true); + case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS: + return kvm_eiointc_sw_status_access(dev, attr, true); + default: + return -EINVAL; + } +} + +static int kvm_eiointc_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct loongarch_eiointc *s; + struct kvm_io_device *device, *device1; + struct kvm *kvm = dev->kvm; + + /* eiointc has been created */ + if (kvm->arch.eiointc) + return -EINVAL; + + s = kzalloc(sizeof(struct loongarch_eiointc), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_eiointc_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, + EIOINTC_BASE, EIOINTC_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kfree(s); + return ret; + } + + device1 = &s->device_vext; + kvm_iodevice_init(device1, &kvm_eiointc_virt_ops); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, + EIOINTC_VIRT_BASE, EIOINTC_VIRT_SIZE, device1); + if (ret < 0) { + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &s->device); + kfree(s); + return ret; + } + kvm->arch.eiointc = s; + + return 0; +} + +static void kvm_eiointc_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_eiointc *eiointc; + + if (!dev || !dev->kvm || !dev->kvm->arch.eiointc) + return; + + kvm = dev->kvm; + eiointc = kvm->arch.eiointc; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &eiointc->device); + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &eiointc->device_vext); + kfree(eiointc); +} + +static struct kvm_device_ops kvm_eiointc_dev_ops = { + .name = "kvm-loongarch-eiointc", + .create = kvm_eiointc_create, + .destroy = kvm_eiointc_destroy, + .set_attr = kvm_eiointc_set_attr, + .get_attr = kvm_eiointc_get_attr, +}; + +int kvm_loongarch_register_eiointc_device(void) +{ + return kvm_register_device_ops(&kvm_eiointc_dev_ops, KVM_DEV_TYPE_LOONGARCH_EIOINTC); +} diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c new file mode 100644 index 000000000000..93f4acd44523 --- /dev/null +++ b/arch/loongarch/kvm/intc/ipi.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_ipi.h> +#include <asm/kvm_vcpu.h> + +static void ipi_send(struct kvm *kvm, uint64_t data) +{ + int cpu, action; + uint32_t status; + struct kvm_vcpu *vcpu; + struct kvm_interrupt irq; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + + action = BIT(data & 0x1f); + spin_lock(&vcpu->arch.ipi_state.lock); + status = vcpu->arch.ipi_state.status; + vcpu->arch.ipi_state.status |= action; + spin_unlock(&vcpu->arch.ipi_state.lock); + if (status == 0) { + irq.irq = LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } +} + +static void ipi_clear(struct kvm_vcpu *vcpu, uint64_t data) +{ + uint32_t status; + struct kvm_interrupt irq; + + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.status &= ~data; + status = vcpu->arch.ipi_state.status; + spin_unlock(&vcpu->arch.ipi_state.lock); + if (status == 0) { + irq.irq = -LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } +} + +static uint64_t read_mailbox(struct kvm_vcpu *vcpu, int offset, int len) +{ + uint64_t data = 0; + + spin_lock(&vcpu->arch.ipi_state.lock); + data = *(ulong *)((void *)vcpu->arch.ipi_state.buf + (offset - 0x20)); + spin_unlock(&vcpu->arch.ipi_state.lock); + + switch (len) { + case 1: + return data & 0xff; + case 2: + return data & 0xffff; + case 4: + return data & 0xffffffff; + case 8: + return data; + default: + kvm_err("%s: unknown data len: %d\n", __func__, len); + return 0; + } +} + +static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int len) +{ + void *pbuf; + + spin_lock(&vcpu->arch.ipi_state.lock); + pbuf = (void *)vcpu->arch.ipi_state.buf + (offset - 0x20); + + switch (len) { + case 1: + *(unsigned char *)pbuf = (unsigned char)data; + break; + case 2: + *(unsigned short *)pbuf = (unsigned short)data; + break; + case 4: + *(unsigned int *)pbuf = (unsigned int)data; + break; + case 8: + *(unsigned long *)pbuf = (unsigned long)data; + break; + default: + kvm_err("%s: unknown data len: %d\n", __func__, len); + } + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) +{ + int i, idx, ret; + uint32_t val = 0, mask = 0; + + /* + * Bit 27-30 is mask for byte writing. + * If the mask is 0, we need not to do anything. + */ + if ((data >> 27) & 0xf) { + /* Read the old val */ + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (unlikely(ret)) { + kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + return ret; + } + /* Construct the mask by scanning the bit 27-30 */ + for (i = 0; i < 4; i++) { + if (data & (BIT(27 + i))) + mask |= (0xff << (i * 8)); + } + /* Save the old part of val */ + val &= mask; + } + val |= ((uint32_t)(data >> 32) & ~mask); + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (unlikely(ret)) + kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + + return ret; +} + +static int mail_send(struct kvm *kvm, uint64_t data) +{ + int cpu, mailbox, offset; + struct kvm_vcpu *vcpu; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + mailbox = ((data & 0xffffffff) >> 2) & 0x7; + offset = IOCSR_IPI_BASE + IOCSR_IPI_BUF_20 + mailbox * 4; + + return send_ipi_data(vcpu, offset, data); +} + +static int any_send(struct kvm *kvm, uint64_t data) +{ + int cpu, offset; + struct kvm_vcpu *vcpu; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + offset = data & 0xffff; + + return send_ipi_data(vcpu, offset, data); +} + +static int loongarch_ipi_readl(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *val) +{ + int ret = 0; + uint32_t offset; + uint64_t res = 0; + + offset = (uint32_t)(addr & 0x1ff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case IOCSR_IPI_STATUS: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.status; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_EN: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.en; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_SET: + res = 0; + break; + case IOCSR_IPI_CLEAR: + res = 0; + break; + case IOCSR_IPI_BUF_20 ... IOCSR_IPI_BUF_38 + 7: + if (offset + len > IOCSR_IPI_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + res = read_mailbox(vcpu, offset, len); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + *(uint64_t *)val = res; + + return ret; +} + +static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *val) +{ + int ret = 0; + uint64_t data; + uint32_t offset; + + data = *(uint64_t *)val; + + offset = (uint32_t)(addr & 0x1ff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case IOCSR_IPI_STATUS: + ret = -EINVAL; + break; + case IOCSR_IPI_EN: + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.en = data; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_SET: + ret = -EINVAL; + break; + case IOCSR_IPI_CLEAR: + /* Just clear the status of the current vcpu */ + ipi_clear(vcpu, data); + break; + case IOCSR_IPI_BUF_20 ... IOCSR_IPI_BUF_38 + 7: + if (offset + len > IOCSR_IPI_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + write_mailbox(vcpu, offset, data, len); + break; + case IOCSR_IPI_SEND: + ipi_send(vcpu->kvm, data); + break; + case IOCSR_MAIL_SEND: + ret = mail_send(vcpu->kvm, *(uint64_t *)val); + break; + case IOCSR_ANY_SEND: + ret = any_send(vcpu->kvm, *(uint64_t *)val); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + + return ret; +} + +static int kvm_ipi_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + int ret; + struct loongarch_ipi *ipi; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + ipi->kvm->stat.ipi_read_exits++; + ret = loongarch_ipi_readl(vcpu, addr, len, val); + + return ret; +} + +static int kvm_ipi_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + int ret; + struct loongarch_ipi *ipi; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + ipi->kvm->stat.ipi_write_exits++; + ret = loongarch_ipi_writel(vcpu, addr, len, val); + + return ret; +} + +static const struct kvm_io_device_ops kvm_ipi_ops = { + .read = kvm_ipi_read, + .write = kvm_ipi_write, +}; + +static int kvm_ipi_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int len = 4; + int cpu, addr; + uint64_t val; + void *p = NULL; + struct kvm_vcpu *vcpu; + + cpu = (attr->attr >> 16) & 0x3ff; + addr = attr->attr & 0xff; + + vcpu = kvm_get_vcpu(dev->kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + + switch (addr) { + case IOCSR_IPI_STATUS: + p = &vcpu->arch.ipi_state.status; + break; + case IOCSR_IPI_EN: + p = &vcpu->arch.ipi_state.en; + break; + case IOCSR_IPI_SET: + p = &vcpu->arch.ipi_state.set; + break; + case IOCSR_IPI_CLEAR: + p = &vcpu->arch.ipi_state.clear; + break; + case IOCSR_IPI_BUF_20: + p = &vcpu->arch.ipi_state.buf[0]; + len = 8; + break; + case IOCSR_IPI_BUF_28: + p = &vcpu->arch.ipi_state.buf[1]; + len = 8; + break; + case IOCSR_IPI_BUF_30: + p = &vcpu->arch.ipi_state.buf[2]; + len = 8; + break; + case IOCSR_IPI_BUF_38: + p = &vcpu->arch.ipi_state.buf[3]; + len = 8; + break; + default: + kvm_err("%s: unknown ipi register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + if (is_write) { + if (len == 4) { + if (get_user(val, (uint32_t __user *)attr->addr)) + return -EFAULT; + *(uint32_t *)p = (uint32_t)val; + } else if (len == 8) { + if (get_user(val, (uint64_t __user *)attr->addr)) + return -EFAULT; + *(uint64_t *)p = val; + } + } else { + if (len == 4) { + val = *(uint32_t *)p; + return put_user(val, (uint32_t __user *)attr->addr); + } else if (len == 8) { + val = *(uint64_t *)p; + return put_user(val, (uint64_t __user *)attr->addr); + } + } + + return 0; +} + +static int kvm_ipi_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_ipi_regs_access(dev, attr, false); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } +} + +static int kvm_ipi_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_ipi_regs_access(dev, attr, true); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } +} + +static int kvm_ipi_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct kvm *kvm; + struct kvm_io_device *device; + struct loongarch_ipi *s; + + if (!dev) { + kvm_err("%s: kvm_device ptr is invalid!\n", __func__); + return -EINVAL; + } + + kvm = dev->kvm; + if (kvm->arch.ipi) { + kvm_err("%s: LoongArch IPI has already been created!\n", __func__); + return -EINVAL; + } + + s = kzalloc(sizeof(struct loongarch_ipi), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_ipi_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, IOCSR_IPI_BASE, IOCSR_IPI_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm_err("%s: Initialize IOCSR dev failed, ret = %d\n", __func__, ret); + goto err; + } + + kvm->arch.ipi = s; + return 0; + +err: + kfree(s); + return -EFAULT; +} + +static void kvm_ipi_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_ipi *ipi; + + if (!dev || !dev->kvm || !dev->kvm->arch.ipi) + return; + + kvm = dev->kvm; + ipi = kvm->arch.ipi; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &ipi->device); + kfree(ipi); +} + +static struct kvm_device_ops kvm_ipi_dev_ops = { + .name = "kvm-loongarch-ipi", + .create = kvm_ipi_create, + .destroy = kvm_ipi_destroy, + .set_attr = kvm_ipi_set_attr, + .get_attr = kvm_ipi_get_attr, +}; + +int kvm_loongarch_register_ipi_device(void) +{ + return kvm_register_device_ops(&kvm_ipi_dev_ops, KVM_DEV_TYPE_LOONGARCH_IPI); +} diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c new file mode 100644 index 000000000000..08fce845f668 --- /dev/null +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include <asm/kvm_eiointc.h> +#include <asm/kvm_pch_pic.h> +#include <asm/kvm_vcpu.h> +#include <linux/count_zeros.h> + +/* update the isr according to irq level and route irq to eiointc */ +static void pch_pic_update_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = BIT(irq); + + /* + * set isr and route irq to eiointc and + * the route table is in htmsi_vector[] + */ + if (level) { + if (mask & s->irr & ~s->mask) { + s->isr |= mask; + irq = s->htmsi_vector[irq]; + eiointc_set_irq(s->kvm->arch.eiointc, irq, level); + } + } else { + if (mask & s->isr & ~s->irr) { + s->isr &= ~mask; + irq = s->htmsi_vector[irq]; + eiointc_set_irq(s->kvm->arch.eiointc, irq, level); + } + } +} + +/* update batch irqs, the irq_mask is a bitmap of irqs */ +static void pch_pic_update_batch_irqs(struct loongarch_pch_pic *s, u64 irq_mask, int level) +{ + int irq, bits; + + /* find each irq by irqs bitmap and update each irq */ + bits = sizeof(irq_mask) * 8; + irq = find_first_bit((void *)&irq_mask, bits); + while (irq < bits) { + pch_pic_update_irq(s, irq, level); + bitmap_clear((void *)&irq_mask, irq, 1); + irq = find_first_bit((void *)&irq_mask, bits); + } +} + +/* called when a irq is triggered in pch pic */ +void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = BIT(irq); + + spin_lock(&s->lock); + if (level) + s->irr |= mask; /* set irr */ + else { + /* + * In edge triggered mode, 0 does not mean to clear irq + * The irr register variable is cleared when cpu writes to the + * PCH_PIC_CLEAR_START address area + */ + if (s->edge & mask) { + spin_unlock(&s->lock); + return; + } + s->irr &= ~mask; + } + pch_pic_update_irq(s, irq, level); + spin_unlock(&s->lock); +} + +/* msi irq handler */ +void pch_msi_set_irq(struct kvm *kvm, int irq, int level) +{ + eiointc_set_irq(kvm->arch.eiointc, irq, level); +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to read. + */ +static u32 pch_pic_read_reg(u64 *s, int high) +{ + u64 val = *s; + + /* read the high 32 bits when high is 1 */ + return high ? (u32)(val >> 32) : (u32)val; +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to write. + */ +static u32 pch_pic_write_reg(u64 *s, int high, u32 v) +{ + u64 val = *s, data = v; + + if (high) { + /* + * Clear val high 32 bits + * Write the high 32 bits when the high is 1 + */ + *s = (val << 32 >> 32) | (data << 32); + val >>= 32; + } else + /* + * Clear val low 32 bits + * Write the low 32 bits when the high is 0 + */ + *s = (val >> 32 << 32) | v; + + return (u32)val; +} + +static int loongarch_pch_pic_read(struct loongarch_pch_pic *s, gpa_t addr, int len, void *val) +{ + int offset, index, ret = 0; + u32 data = 0; + u64 int_id = 0; + + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_INT_ID_START ... PCH_PIC_INT_ID_END: + /* int id version */ + int_id |= (u64)PCH_PIC_INT_ID_VER << 32; + /* irq number */ + int_id |= (u64)31 << (32 + 16); + /* int id value */ + int_id |= PCH_PIC_INT_ID_VAL; + *(u64 *)val = int_id; + break; + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + index = offset >> 2; + /* read mask reg */ + data = pch_pic_read_reg(&s->mask, index); + *(u32 *)val = data; + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + /* read htmsi enable reg */ + data = pch_pic_read_reg(&s->htmsi_en, index); + *(u32 *)val = data; + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* read edge enable reg */ + data = pch_pic_read_reg(&s->edge, index); + *(u32 *)val = data; + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + /* we only use default mode: fixed interrupt distribution mode */ + *(u32 *)val = 0; + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + /* only route to int0: eiointc */ + *(u8 *)val = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + offset -= PCH_PIC_HTMSI_VEC_START; + /* read htmsi vector */ + data = s->htmsi_vector[offset]; + *(u8 *)val = data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + /* we only use defalut value 0: high level triggered */ + *(u32 *)val = 0; + break; + default: + ret = -EINVAL; + } + spin_unlock(&s->lock); + + return ret; +} + +static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic reading */ + vcpu->kvm->stat.pch_pic_read_exits++; + ret = loongarch_pch_pic_read(s, addr, len, val); + + return ret; +} + +static int loongarch_pch_pic_write(struct loongarch_pch_pic *s, gpa_t addr, + int len, const void *val) +{ + int ret; + u32 old, data, offset, index; + u64 irq; + + ret = 0; + data = *(u32 *)val; + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + /* get whether high or low 32 bits we want to write */ + index = offset >> 2; + old = pch_pic_write_reg(&s->mask, index, data); + /* enable irq when mask value change to 0 */ + irq = (old & ~data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 1); + /* disable irq when mask value change to 1 */ + irq = (~old & data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + pch_pic_write_reg(&s->htmsi_en, index, data); + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* 1: edge triggered, 0: level triggered */ + pch_pic_write_reg(&s->edge, index, data); + break; + case PCH_PIC_CLEAR_START ... PCH_PIC_CLEAR_END: + offset -= PCH_PIC_CLEAR_START; + index = offset >> 2; + /* write 1 to clear edge irq */ + old = pch_pic_read_reg(&s->irr, index); + /* + * get the irq bitmap which is edge triggered and + * already set and to be cleared + */ + irq = old & pch_pic_read_reg(&s->edge, index) & data; + /* write irr to the new state where irqs have been cleared */ + pch_pic_write_reg(&s->irr, index, old & ~irq); + /* update cleared irqs */ + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + offset -= PCH_PIC_AUTO_CTRL0_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl0, index, 0); + break; + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + offset -= PCH_PIC_AUTO_CTRL1_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl1, index, 0); + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + offset -= PCH_PIC_ROUTE_ENTRY_START; + /* only route to int0: eiointc */ + s->route_entry[offset] = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + /* route table to eiointc */ + offset -= PCH_PIC_HTMSI_VEC_START; + s->htmsi_vector[offset] = (u8)data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + offset -= PCH_PIC_POLARITY_START; + index = offset >> 2; + /* we only use defalut value 0: high level triggered */ + pch_pic_write_reg(&s->polarity, index, 0); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock(&s->lock); + + return ret; +} + +static int kvm_pch_pic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic writing */ + vcpu->kvm->stat.pch_pic_write_exits++; + ret = loongarch_pch_pic_write(s, addr, len, val); + + return ret; +} + +static const struct kvm_io_device_ops kvm_pch_pic_ops = { + .read = kvm_pch_pic_read, + .write = kvm_pch_pic_write, +}; + +static int kvm_pch_pic_init(struct kvm_device *dev, u64 addr) +{ + int ret; + struct kvm *kvm = dev->kvm; + struct kvm_io_device *device; + struct loongarch_pch_pic *s = dev->kvm->arch.pch_pic; + + s->pch_pic_base = addr; + device = &s->device; + /* init device by pch pic writing and reading ops */ + kvm_iodevice_init(device, &kvm_pch_pic_ops); + mutex_lock(&kvm->slots_lock); + /* register pch pic device */ + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, PCH_PIC_SIZE, device); + mutex_unlock(&kvm->slots_lock); + + return (ret < 0) ? -EFAULT : 0; +} + +/* used by user space to get or set pch pic registers */ +static int kvm_pch_pic_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, offset, len = 8, ret = 0; + void __user *data; + void *p = NULL; + struct loongarch_pch_pic *s; + + s = dev->kvm->arch.pch_pic; + addr = attr->attr; + data = (void __user *)attr->addr; + + /* get pointer to pch pic register by addr */ + switch (addr) { + case PCH_PIC_MASK_START: + p = &s->mask; + break; + case PCH_PIC_HTMSI_EN_START: + p = &s->htmsi_en; + break; + case PCH_PIC_EDGE_START: + p = &s->edge; + break; + case PCH_PIC_AUTO_CTRL0_START: + p = &s->auto_ctrl0; + break; + case PCH_PIC_AUTO_CTRL1_START: + p = &s->auto_ctrl1; + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + offset = addr - PCH_PIC_ROUTE_ENTRY_START; + p = &s->route_entry[offset]; + len = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + offset = addr - PCH_PIC_HTMSI_VEC_START; + p = &s->htmsi_vector[offset]; + len = 1; + break; + case PCH_PIC_INT_IRR_START: + p = &s->irr; + break; + case PCH_PIC_INT_ISR_START: + p = &s->isr; + break; + case PCH_PIC_POLARITY_START: + p = &s->polarity; + break; + default: + return -EINVAL; + } + + spin_lock(&s->lock); + /* write or read value according to is_write */ + if (is_write) { + if (copy_from_user(p, data, len)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, len)) + ret = -EFAULT; + } + spin_unlock(&s->lock); + + return ret; +} + +static int kvm_pch_pic_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS: + return kvm_pch_pic_regs_access(dev, attr, false); + default: + return -EINVAL; + } +} + +static int kvm_pch_pic_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + u64 addr; + void __user *uaddr = (void __user *)(long)attr->addr; + + switch (attr->group) { + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT: + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + if (!dev->kvm->arch.pch_pic) { + kvm_err("%s: please create pch_pic irqchip first!\n", __func__); + return -ENODEV; + } + + return kvm_pch_pic_init(dev, addr); + default: + kvm_err("%s: unknown group (%d) attr (%lld)\n", __func__, attr->group, + attr->attr); + return -EINVAL; + } + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS: + return kvm_pch_pic_regs_access(dev, attr, true); + default: + return -EINVAL; + } +} + +static int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + int i, ret; + u32 nr = KVM_IRQCHIP_NUM_PINS; + struct kvm_irq_routing_entry *entries; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + + return ret; +} + +static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct kvm *kvm = dev->kvm; + struct loongarch_pch_pic *s; + + /* pch pic should not has been created */ + if (kvm->arch.pch_pic) + return -EINVAL; + + ret = kvm_setup_default_irq_routing(kvm); + if (ret) + return -ENOMEM; + + s = kzalloc(sizeof(struct loongarch_pch_pic), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + kvm->arch.pch_pic = s; + + return 0; +} + +static void kvm_pch_pic_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_pch_pic *s; + + if (!dev || !dev->kvm || !dev->kvm->arch.pch_pic) + return; + + kvm = dev->kvm; + s = kvm->arch.pch_pic; + /* unregister pch pic device and free it's memory */ + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &s->device); + kfree(s); +} + +static struct kvm_device_ops kvm_pch_pic_dev_ops = { + .name = "kvm-loongarch-pch-pic", + .create = kvm_pch_pic_create, + .destroy = kvm_pch_pic_destroy, + .set_attr = kvm_pch_pic_set_attr, + .get_attr = kvm_pch_pic_get_attr, +}; + +int kvm_loongarch_register_pch_pic_device(void) +{ + return kvm_register_device_ops(&kvm_pch_pic_dev_ops, KVM_DEV_TYPE_LOONGARCH_PCHPIC); +} diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c new file mode 100644 index 000000000000..9a39627aecf0 --- /dev/null +++ b/arch/loongarch/kvm/irqfd.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include <linux/kvm_host.h> +#include <trace/events/kvm.h> +#include <asm/kvm_pch_pic.h> + +static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + /* PCH-PIC pin (0 ~ 64) <---> GSI (0 ~ 64) */ + pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level); + + return 0; +} + +/* + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + if (!level) + return -1; + + pch_msi_set_irq(kvm, e->msi.data, level); + + return 0; +} + +/* + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = kvm_set_pic_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + + if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) + return -EINVAL; + + return 0; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + return 0; + default: + return -EINVAL; + } +} + +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + switch (e->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level); + return 0; + case KVM_IRQ_ROUTING_MSI: + pch_msi_set_irq(kvm, e->msi.data, level); + return 0; + default: + return -EWOULDBLOCK; + } +} + +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return kvm_arch_irqchip_in_kernel(kvm); +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 86a2f2d0cb27..b6864d6e5ec8 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -9,6 +9,8 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/kvm_csr.h> +#include <asm/kvm_eiointc.h> +#include <asm/kvm_pch_pic.h> #include "trace.h" unsigned long vpid_mask; @@ -242,6 +244,25 @@ void kvm_check_vpid(struct kvm_vcpu *vcpu) kvm_update_vpid(vcpu, cpu); trace_kvm_vpid_change(vcpu, vcpu->arch.vpid); vcpu->cpu = cpu; + kvm_clear_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); + + /* + * LLBCTL is a separated guest CSR register from host, a general + * exception ERET instruction clears the host LLBCTL register in + * host mode, and clears the guest LLBCTL register in guest mode. + * ERET in tlb refill exception does not clear LLBCTL register. + * + * When secondary mmu mapping is changed, guest OS does not know + * even if the content is changed after mapping is changed. + * + * Here clear WCLLB of the guest LLBCTL register when mapping is + * changed. Otherwise, if mmu mapping is changed while guest is + * executing LL/SC pair, LL loads with the old address and set + * the LLBCTL flag, SC checks the LLBCTL flag and will store the + * new address successfully since LLBCTL_WCLLB is on, even if + * memory with new address is changed on other VCPUs. + */ + set_gcsr_llbctl(CSR_LLBCTL_WCLLB); } /* Restore GSTAT(0x50).vpid */ @@ -260,7 +281,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -ENOIOCTLCMD; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { unsigned long env, gcfg = 0; @@ -282,9 +303,9 @@ int kvm_arch_hardware_enable(void) * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ - if (env & CSR_GCFG_GCIP_ALL) + if (env & CSR_GCFG_GCIP_SECURE) gcfg |= CSR_GCFG_GCI_SECURE; - if (env & CSR_GCFG_MATC_ROOT) + if (env & CSR_GCFG_MATP_ROOT) gcfg |= CSR_GCFG_MATC_ROOT; write_csr_gcfg(gcfg); @@ -296,10 +317,17 @@ int kvm_arch_hardware_enable(void) kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx", read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc()); + /* + * HW Guest CSR registers are lost after CPU suspend and resume. + * Clear last_vcpu so that Guest CSR registers forced to reload + * from vCPU SW state. + */ + this_cpu_ptr(vmcs)->last_vcpu = NULL; + return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { write_csr_gcfg(0); write_csr_gstat(0); @@ -312,7 +340,7 @@ void kvm_arch_hardware_disable(void) static int kvm_loongarch_env_init(void) { - int cpu, order; + int cpu, order, ret; void *addr; struct kvm_context *context; @@ -367,7 +395,20 @@ static int kvm_loongarch_env_init(void) kvm_init_gcsr_flag(); - return 0; + /* Register LoongArch IPI interrupt controller interface. */ + ret = kvm_loongarch_register_ipi_device(); + if (ret) + return ret; + + /* Register LoongArch EIOINTC interrupt controller interface. */ + ret = kvm_loongarch_register_eiointc_device(); + if (ret) + return ret; + + /* Register LoongArch PCH-PIC interrupt controller interface. */ + ret = kvm_loongarch_register_pch_pic_device(); + + return ret; } static void kvm_loongarch_env_exit(void) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 50a6acd7ffe4..4d203294767c 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -163,6 +163,7 @@ static kvm_pte_t *kvm_populate_gpa(struct kvm *kvm, child = kvm_mmu_memory_cache_alloc(cache); _kvm_pte_init(child, ctx.invalid_ptes[ctx.level - 1]); + smp_wmb(); /* Make pte visible before pmd */ kvm_set_pte(entry, __pa(child)); } else if (kvm_pte_huge(*entry)) { return entry; @@ -444,6 +445,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) { int needs_flush; + u32 old_flags = old ? old->flags : 0; + u32 new_flags = new ? new->flags : 0; + bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES; + + /* Only track memslot flags changed */ + if (change != KVM_MR_FLAGS_ONLY) + return; + + /* Discard dirty page tracking on readonly memslot */ + if ((old_flags & new_flags) & KVM_MEM_READONLY) + return; /* * If dirty page logging is enabled, write protect all pages in the slot @@ -454,9 +466,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * MOVE/DELETE: The old mappings will already have been cleaned up by * kvm_arch_flush_shadow_memslot() */ - if (change == KVM_MR_FLAGS_ONLY && - (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && - new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + if (!(old_flags & KVM_MEM_LOG_DIRTY_PAGES) && log_dirty_pages) { + /* + * Initially-all-set does not require write protecting any page + * because they're all assumed to be dirty. + */ + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + return; + spin_lock(&kvm->mmu_lock); /* Write protect GPA page table entries */ needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn, @@ -494,38 +511,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) range->end << PAGE_SHIFT, &ctx); } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - unsigned long prot_bits; - kvm_pte_t *ptep; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - gpa_t gpa = range->start << PAGE_SHIFT; - - ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); - if (!ptep) - return false; - - /* Replacing an absent or old page doesn't need flushes */ - if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) { - kvm_set_pte(ptep, 0); - return false; - } - - /* Fill new pte if write protected or page migrated */ - prot_bits = _PAGE_PRESENT | __READABLE; - prot_bits |= _CACHE_MASK & pte_val(range->arg.pte); - - /* - * Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support - * _PAGE_WRITE for map_page_fast if next page write fault - * _PAGE_DIRTY since gpa has already recorded as dirty page - */ - prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte); - kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits))); - - return true; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_ptw_ctx ctx; @@ -567,7 +552,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) { int ret = 0; - kvm_pfn_t pfn = 0; kvm_pte_t *ptep, changed, new; gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; @@ -583,11 +567,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ } /* Track access to pages marked old */ - new = *ptep; - if (!kvm_pte_young(new)) - new = kvm_pte_mkyoung(new); - /* call kvm_set_pfn_accessed() after unlock */ - + new = kvm_pte_mkyoung(*ptep); if (write && !kvm_pte_dirty(new)) { if (!kvm_pte_write(new)) { ret = -EFAULT; @@ -611,23 +591,14 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ } changed = new ^ (*ptep); - if (changed) { + if (changed) kvm_set_pte(ptep, new); - pfn = kvm_pte_pfn(new); - } - spin_unlock(&kvm->mmu_lock); - /* - * Fixme: pfn may be freed after mmu_lock - * kvm_try_get_pfn(pfn)/kvm_release_pfn pair to prevent this? - */ - if (kvm_pte_young(changed)) - kvm_set_pfn_accessed(pfn); + spin_unlock(&kvm->mmu_lock); - if (kvm_pte_dirty(changed)) { + if (kvm_pte_dirty(changed)) mark_page_dirty(kvm, gfn); - kvm_set_pfn_dirty(pfn); - } + return ret; out: spin_unlock(&kvm->mmu_lock); @@ -723,23 +694,23 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, /* * Read each entry once. As above, a non-leaf entry can be promoted to * a huge page _during_ this walk. Re-reading the entry could send the - * walk into the weeks, e.g. p*d_large() returns false (sees the old + * walk into the weeks, e.g. p*d_leaf() returns false (sees the old * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ - pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; - p4d = READ_ONCE(*p4d_offset(&pgd, hva)); + p4d = p4dp_get(p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; - pud = READ_ONCE(*pud_offset(&p4d, hva)); + pud = pudp_get(pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; - pmd = READ_ONCE(*pmd_offset(&pud, hva)); + pmd = pmdp_get(pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; @@ -769,6 +740,7 @@ static kvm_pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, kvm_pte_t *ptep, gfn_t g val += PAGE_SIZE; } + smp_wmb(); /* Make pte visible before pmd */ /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */ kvm_set_pte(ptep, __pa(child)); @@ -808,6 +780,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) struct kvm *kvm = vcpu->kvm; struct kvm_memory_slot *memslot; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct page *page; /* Try the fast path to handle old / clean pages */ srcu_idx = srcu_read_lock(&kvm->srcu); @@ -835,7 +808,7 @@ retry: mmu_seq = kvm->mmu_invalidate_seq; /* * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads in - * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * kvm_faultin_pfn() (which calls get_user_pages()), so that we don't * risk the page we get a reference to getting unmapped before we have a * chance to grab the mmu_lock without mmu_invalidate_retry() noticing. * @@ -847,7 +820,7 @@ retry: smp_rmb(); /* Slow path - ask KVM core whether we can access this GPA */ - pfn = gfn_to_pfn_prot(kvm, gfn, write, &writeable); + pfn = kvm_faultin_pfn(vcpu, gfn, write, &writeable, &page); if (is_error_noslot_pfn(pfn)) { err = -EFAULT; goto out; @@ -859,10 +832,10 @@ retry: /* * This can happen when mappings are changed asynchronously, but * also synchronously if a COW is triggered by - * gfn_to_pfn_prot(). + * kvm_faultin_pfn(). */ spin_unlock(&kvm->mmu_lock); - kvm_release_pfn_clean(pfn); + kvm_release_page_unused(page); if (retry_no > 100) { retry_no = 0; schedule(); @@ -890,11 +863,21 @@ retry: /* Disable dirty logging on HugePages */ level = 0; - if (!fault_supports_huge_mapping(memslot, hva, write)) { - level = 0; - } else { + if (fault_supports_huge_mapping(memslot, hva, write)) { + /* Check page level about host mmu*/ level = host_pfn_mapping_level(kvm, gfn, memslot); if (level == 1) { + /* + * Check page level about secondary mmu + * Disable hugepage if it is normal page on + * secondary mmu already + */ + ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); + if (ptep && !kvm_pte_huge(*ptep)) + level = 0; + } + + if (level == 1) { gfn = gfn & ~(PTRS_PER_PTE - 1); pfn = pfn & ~(PTRS_PER_PTE - 1); } @@ -917,15 +900,13 @@ retry: else ++kvm->stat.pages; kvm_set_pte(ptep, new_pte); + + kvm_release_faultin_page(kvm, page, false, writeable); spin_unlock(&kvm->mmu_lock); - if (prot_bits & _PAGE_DIRTY) { + if (prot_bits & _PAGE_DIRTY) mark_page_dirty_in_slot(kvm, memslot, gfn); - kvm_set_pfn_dirty(pfn); - } - kvm_set_pfn_accessed(pfn); - kvm_release_pfn_clean(pfn); out: srcu_read_unlock(&kvm->srcu, srcu_idx); return err; @@ -940,7 +921,8 @@ int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) return ret; /* Invalidate this entry in the TLB */ - kvm_flush_tlb_gpa(vcpu, gpa); + vcpu->arch.flush_gpa = gpa; + kvm_make_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); return 0; } diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index ba976509bfe8..1be185e94807 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -8,7 +8,7 @@ #include <asm/asmmacro.h> #include <asm/loongarch.h> #include <asm/regdef.h> -#include <asm/stackframe.h> +#include <asm/unwind_hints.h> #define HGPR_OFFSET(x) (PT_R0 + 8*x) #define GGPR_OFFSET(x) (KVM_ARCH_GGPR + 8*x) @@ -85,7 +85,7 @@ * Guest CRMD comes from separate GCSR_CRMD register */ ori t0, zero, CSR_PRMD_PIE - csrxchg t0, t0, LOONGARCH_CSR_PRMD + csrwr t0, LOONGARCH_CSR_PRMD /* Set PVM bit to setup ertn to guest context */ ori t0, zero, CSR_GSTAT_PVM @@ -112,6 +112,7 @@ .text .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) + UNWIND_HINT_UNDEFINED csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS addi.d a2, a2, KVM_VCPU_ARCH @@ -213,12 +214,6 @@ SYM_FUNC_START(kvm_enter_guest) /* Save host GPRs */ kvm_save_host_gpr a2 - /* Save host CRMD, PRMD to stack */ - csrrd a3, LOONGARCH_CSR_CRMD - st.d a3, a2, PT_CRMD - csrrd a3, LOONGARCH_CSR_PRMD - st.d a3, a2, PT_PRMD - addi.d a2, a1, KVM_VCPU_ARCH st.d sp, a2, KVM_ARCH_HSP st.d tp, a2, KVM_ARCH_HTP @@ -279,3 +274,13 @@ SYM_FUNC_END(kvm_restore_lasx) .section ".rodata" SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) + +#ifdef CONFIG_CPU_HAS_LBT +STACK_FRAME_NON_STANDARD kvm_restore_fpu +#ifdef CONFIG_CPU_HAS_LSX +STACK_FRAME_NON_STANDARD kvm_restore_lsx +#endif +#ifdef CONFIG_CPU_HAS_LASX +STACK_FRAME_NON_STANDARD kvm_restore_lasx +#endif +#endif diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 111328f60872..32dc213374be 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -23,24 +23,6 @@ static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick) return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz); } -/* - * Push timer forward on timeout. - * Handle an hrtimer event by push the hrtimer forward a period. - */ -static enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu) -{ - unsigned long cfg, period; - - /* Add periodic tick to current expire time */ - cfg = kvm_read_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG); - if (cfg & CSR_TCFG_PERIOD) { - period = tick_to_ns(vcpu, cfg & CSR_TCFG_VAL); - hrtimer_add_expires_ns(&vcpu->arch.swtimer, period); - return HRTIMER_RESTART; - } else - return HRTIMER_NORESTART; -} - /* Low level hrtimer wake routine */ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) { @@ -50,7 +32,7 @@ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) kvm_queue_irq(vcpu, INT_TI); rcuwait_wake_up(&vcpu->wait); - return kvm_count_timeout(vcpu); + return HRTIMER_NORESTART; } /* @@ -93,7 +75,8 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) /* * Freeze the soft-timer and sync the guest stable timer with it. */ - hrtimer_cancel(&vcpu->arch.swtimer); + if (kvm_vcpu_is_blocking(vcpu)) + hrtimer_cancel(&vcpu->arch.swtimer); /* * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 @@ -168,26 +151,21 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) * Here judge one-shot timer fired by checking whether TVAL is larger * than TCFG */ - if (ticks < cfg) { + if (ticks < cfg) delta = tick_to_ns(vcpu, ticks); - expire = ktime_add_ns(ktime_get(), delta); - vcpu->arch.expire = expire; + else + delta = 0; + + expire = ktime_add_ns(ktime_get(), delta); + vcpu->arch.expire = expire; + if (kvm_vcpu_is_blocking(vcpu)) { /* - * HRTIMER_MODE_PINNED is suggested since vcpu may run in - * the same physical cpu in next time - */ - hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); - } else if (vcpu->stat.generic.blocking) { - /* - * Inject timer interrupt so that halt polling can dectect and exit. - * VCPU is scheduled out already and sleeps in rcuwait queue and - * will not poll pending events again. kvm_queue_irq() is not enough, - * hrtimer swtimer should be used here. + * HRTIMER_MODE_PINNED_HARD is suggested since vcpu may run in + * the same physical cpu in next time, and the timer should run + * in hardirq context even in the PREEMPT_RT case. */ - expire = ktime_add_ns(ktime_get(), 10); - vcpu->arch.expire = expire; - hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); + hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED_HARD); } } @@ -211,10 +189,3 @@ void kvm_save_timer(struct kvm_vcpu *vcpu) kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); preempt_enable(); } - -void kvm_reset_timer(struct kvm_vcpu *vcpu) -{ - write_gcsr_timercfg(0); - kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG, 0); - hrtimer_cancel(&vcpu->arch.swtimer); -} diff --git a/arch/loongarch/kvm/tlb.c b/arch/loongarch/kvm/tlb.c index 02535df6b51f..ebdbe9264e9c 100644 --- a/arch/loongarch/kvm/tlb.c +++ b/arch/loongarch/kvm/tlb.c @@ -23,10 +23,7 @@ void kvm_flush_tlb_all(void) void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa) { - unsigned long flags; - - local_irq_save(flags); + lockdep_assert_irqs_disabled(); gpa &= (PAGE_MASK << 1); invtlb(INVTLB_GID_ADDR, read_csr_gstat() & CSR_GSTAT_GID, gpa); - local_irq_restore(flags); } diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index c2484ad4cffa..1783397b1bc8 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -19,14 +19,16 @@ DECLARE_EVENT_CLASS(kvm_transition, TP_PROTO(struct kvm_vcpu *vcpu), TP_ARGS(vcpu), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; ), - TP_printk("PC: 0x%08lx", __entry->pc) + TP_printk("vcpu %u PC: 0x%08lx", __entry->vcpu_id, __entry->pc) ); DEFINE_EVENT(kvm_transition, kvm_enter, @@ -54,19 +56,22 @@ DECLARE_EVENT_CLASS(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), TP_ARGS(vcpu, reason), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) __field(unsigned int, reason) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; __entry->reason = reason; ), - TP_printk("[%s]PC: 0x%08lx", - __print_symbolic(__entry->reason, - kvm_trace_symbol_exit_types), - __entry->pc) + TP_printk("vcpu %u [%s] PC: 0x%08lx", + __entry->vcpu_id, + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), + __entry->pc) ); DEFINE_EVENT(kvm_exit, kvm_exit_idle, @@ -85,14 +90,17 @@ TRACE_EVENT(kvm_exit_gspr, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int inst_word), TP_ARGS(vcpu, inst_word), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned int, inst_word) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->inst_word = inst_word; ), - TP_printk("Inst word: 0x%08x", __entry->inst_word) + TP_printk("vcpu %u Inst word: 0x%08x", __entry->vcpu_id, + __entry->inst_word) ); #define KVM_TRACE_AUX_SAVE 0 diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 36106922b5d7..9e1a9b4aa4c6 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -6,6 +6,7 @@ #include <linux/kvm_host.h> #include <linux/entry-kvm.h> #include <asm/fpu.h> +#include <asm/lbt.h> #include <asm/loongarch.h> #include <asm/setup.h> #include <asm/time.h> @@ -19,6 +20,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, idle_exits), STATS_DESC_COUNTER(VCPU, cpucfg_exits), STATS_DESC_COUNTER(VCPU, signal_exits), + STATS_DESC_COUNTER(VCPU, hypercall_exits) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -30,6 +32,170 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; +static inline void kvm_save_host_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_context *context; + + context = this_cpu_ptr(vcpu->kvm->arch.vmcs); + context->perf_cntr[0] = read_csr_perfcntr0(); + context->perf_cntr[1] = read_csr_perfcntr1(); + context->perf_cntr[2] = read_csr_perfcntr2(); + context->perf_cntr[3] = read_csr_perfcntr3(); + context->perf_ctrl[0] = write_csr_perfctrl0(0); + context->perf_ctrl[1] = write_csr_perfctrl1(0); + context->perf_ctrl[2] = write_csr_perfctrl2(0); + context->perf_ctrl[3] = write_csr_perfctrl3(0); +} + +static inline void kvm_restore_host_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_context *context; + + context = this_cpu_ptr(vcpu->kvm->arch.vmcs); + write_csr_perfcntr0(context->perf_cntr[0]); + write_csr_perfcntr1(context->perf_cntr[1]); + write_csr_perfcntr2(context->perf_cntr[2]); + write_csr_perfcntr3(context->perf_cntr[3]); + write_csr_perfctrl0(context->perf_ctrl[0]); + write_csr_perfctrl1(context->perf_ctrl[1]); + write_csr_perfctrl2(context->perf_ctrl[2]); + write_csr_perfctrl3(context->perf_ctrl[3]); +} + + +static inline void kvm_save_guest_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); +} + +static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); +} + +static int kvm_own_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + + if (!kvm_guest_has_pmu(&vcpu->arch)) + return -EINVAL; + + kvm_save_host_pmu(vcpu); + + /* Set PM0-PM(num) to guest */ + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + + kvm_restore_guest_pmu(vcpu); + + return 0; +} + +static void kvm_lose_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (!(vcpu->arch.aux_inuse & KVM_LARCH_PMU)) + return; + + kvm_save_guest_pmu(vcpu); + + /* Disable pmu access from guest */ + write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); + + /* + * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when + * exiting the guest, so that the next time trap into the guest. + * We don't need to deal with PMU CSRs contexts. + */ + val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + if (!(val & KVM_PMU_EVENT_ENABLED)) + vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; + + kvm_restore_host_pmu(vcpu); +} + +static void kvm_restore_pmu(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) + kvm_make_request(KVM_REQ_PMU, vcpu); +} + +static void kvm_check_pmu(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_PMU, vcpu)) { + kvm_own_pmu(vcpu); + vcpu->arch.aux_inuse |= KVM_LARCH_PMU; + } +} + +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + u32 version; + u64 steal; + gpa_t gpa; + struct kvm_memslots *slots; + struct kvm_steal_time __user *st; + struct gfn_to_hva_cache *ghc; + + ghc = &vcpu->arch.st.cache; + gpa = vcpu->arch.st.guest_addr; + if (!(gpa & KVM_STEAL_PHYS_VALID)) + return; + + gpa &= KVM_STEAL_PHYS_MASK; + slots = kvm_memslots(vcpu->kvm); + if (slots->generation != ghc->generation || gpa != ghc->gpa) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st))) { + ghc->gpa = INVALID_GPA; + return; + } + } + + st = (struct kvm_steal_time __user *)ghc->hva; + unsafe_get_user(version, &st->version, out); + if (version & 1) + version += 1; /* first time write, random junk */ + + version += 1; + unsafe_put_user(version, &st->version, out); + smp_wmb(); + + unsafe_get_user(steal, &st->steal, out); + steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + unsafe_put_user(steal, &st->steal, out); + + smp_wmb(); + version += 1; + unsafe_put_user(version, &st->version, out); +out: + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); +} + /* * kvm_check_requests - check and handle pending vCPU requests * @@ -47,9 +213,22 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return RESUME_HOST; + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) + kvm_update_stolen_time(vcpu); + return RESUME_GUEST; } +static void kvm_late_check_requests(struct kvm_vcpu *vcpu) +{ + lockdep_assert_irqs_disabled(); + if (kvm_check_request(KVM_REQ_TLB_FLUSH_GPA, vcpu)) + if (vcpu->arch.flush_gpa != INVALID_GPA) { + kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa); + vcpu->arch.flush_gpa = INVALID_GPA; + } +} + /* * Check and handle pending signal and vCPU requests etc * Run with irq enabled and preempt enabled @@ -61,7 +240,7 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) */ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) { - int ret; + int idx, ret; /* * Check conditions before entering the guest @@ -70,7 +249,9 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) if (ret < 0) return ret; + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_check_requests(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; } @@ -100,6 +281,14 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) /* Make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); kvm_check_vpid(vcpu); + kvm_check_pmu(vcpu); + + /* + * Called after function kvm_check_vpid() + * Since it updates CSR.GSTAT used by kvm_flush_tlb_gpa(), + * and it may also clear KVM_REQ_TLB_FLUSH_GPA pending bit + */ + kvm_late_check_requests(vcpu); vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY); /* Clear KVM_LARCH_SWCSR_LATEST as CSR will change when enter guest */ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; @@ -122,7 +311,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret = RESUME_GUEST; unsigned long estat = vcpu->arch.host_estat; - u32 intr = estat & 0x1fff; /* Ignore NMI */ + u32 intr = estat & CSR_ESTAT_IS; u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; vcpu->mode = OUTSIDE_GUEST_MODE; @@ -130,6 +319,8 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Set a default exit reason */ run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_lose_pmu(vcpu); + guest_timing_exit_irqoff(); guest_state_exit_irqoff(); local_irq_enable(); @@ -247,7 +438,101 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) + vcpu->guest_debug = dbg->control; + else + vcpu->guest_debug = 0; + + return 0; +} + +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val) +{ + int cpuid; + struct kvm_phyid_map *map; + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (val >= KVM_MAX_PHYID) + return -EINVAL; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if ((cpuid < KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) { + /* Discard duplicated CPUID set operation */ + if (cpuid == val) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + + /* + * CPUID is already set before + * Forbid changing to a different CPUID at runtime + */ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + if (map->phys_map[val].enabled) { + /* Discard duplicated CPUID set operation */ + if (vcpu == map->phys_map[val].vcpu) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + + /* + * New CPUID is already set with other vcpu + * Forbid sharing the same CPUID between different vcpus + */ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val); + map->phys_map[val].enabled = true; + map->phys_map[val].vcpu = vcpu; + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + + return 0; +} + +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +{ + int cpuid; + struct kvm_phyid_map *map; + struct loongarch_csrs *csr = vcpu->arch.csr; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + + if (cpuid >= KVM_MAX_PHYID) + return; + + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if (map->phys_map[cpuid].enabled) { + map->phys_map[cpuid].vcpu = NULL; + map->phys_map[cpuid].enabled = false; + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); + } + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); +} + +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) +{ + struct kvm_phyid_map *map; + + if (cpuid >= KVM_MAX_PHYID) + return NULL; + + map = kvm->arch.phyid_map; + if (!map->phys_map[cpuid].enabled) + return NULL; + + return map->phys_map[cpuid].vcpu; } static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) @@ -259,6 +544,17 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) return -EINVAL; if (id == LOONGARCH_CSR_ESTAT) { + preempt_disable(); + vcpu_load(vcpu); + /* + * Sync pending interrupts into ESTAT so that interrupt + * remains during VM migration stage + */ + kvm_deliver_intr(vcpu); + vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; + vcpu_put(vcpu); + preempt_enable(); + /* ESTAT IP0~IP7 get from GINTC */ gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff; *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2); @@ -282,6 +578,9 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) if (get_gcsr_flag(id) & INVALID_GCSR) return -EINVAL; + if (id == LOONGARCH_CSR_CPUID) + return kvm_set_cpuid(vcpu, val); + if (id == LOONGARCH_CSR_ESTAT) { /* ESTAT IP0~IP7 inject through GINTC */ gintc = (val >> 2) & 0xff; @@ -295,6 +594,22 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) kvm_write_sw_gcsr(csr, id, val); + /* + * After modifying the PMU CSR register value of the vcpu. + * If the PMU CSRs are used, we need to set KVM_REQ_PMU. + */ + if (id >= LOONGARCH_CSR_PERFCTRL0 && id <= LOONGARCH_CSR_PERFCNTR3) { + unsigned long val; + + val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + + if (val & KVM_PMU_EVENT_ENABLED) + kvm_make_request(KVM_REQ_PMU, vcpu); + } + return ret; } @@ -304,11 +619,18 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) return -EINVAL; switch (id) { - case 2: + case LOONGARCH_CPUCFG0: + *v = GENMASK(31, 0); + return 0; + case LOONGARCH_CPUCFG1: + /* CPUCFG1_MSGINT is not supported by KVM */ + *v = GENMASK(25, 0); + return 0; + case LOONGARCH_CPUCFG2: /* CPUCFG2 features unconditionally supported by KVM */ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP | CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV | - CPUCFG2_LAM; + CPUCFG2_LSPW | CPUCFG2_LAM; /* * For the ISA extensions listed below, if one is supported * by the host, then it is also supported by KVM. @@ -317,15 +639,39 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) *v |= CPUCFG2_LSX; if (cpu_has_lasx) *v |= CPUCFG2_LASX; + if (cpu_has_lbt_x86) + *v |= CPUCFG2_X86BT; + if (cpu_has_lbt_arm) + *v |= CPUCFG2_ARMBT; + if (cpu_has_lbt_mips) + *v |= CPUCFG2_MIPSBT; return 0; + case LOONGARCH_CPUCFG3: + *v = GENMASK(16, 0); + return 0; + case LOONGARCH_CPUCFG4: + case LOONGARCH_CPUCFG5: + *v = GENMASK(31, 0); + return 0; + case LOONGARCH_CPUCFG6: + if (cpu_has_pmp) + *v = GENMASK(14, 0); + else + *v = 0; + return 0; + case LOONGARCH_CPUCFG16: + *v = GENMASK(16, 0); + return 0; + case LOONGARCH_CPUCFG17 ... LOONGARCH_CPUCFG20: + *v = GENMASK(30, 0); + return 0; default: /* - * No restrictions on other valid CPUCFG IDs' values, but - * CPUCFG data is limited to 32 bits as the LoongArch ISA - * manual says (Volume 1, Section 2.2.10.5 "CPUCFG"). + * CPUCFG bits should be zero if reserved by HW or not + * supported by KVM. */ - *v = U32_MAX; + *v = 0; return 0; } } @@ -344,7 +690,7 @@ static int kvm_check_cpucfg(int id, u64 val) return -EINVAL; switch (id) { - case 2: + case LOONGARCH_CPUCFG2: if (!(val & CPUCFG2_LLFTP)) /* Guests must have a constant timer */ return -EINVAL; @@ -358,6 +704,17 @@ static int kvm_check_cpucfg(int id, u64 val) /* LASX architecturally implies LSX and FP but val does not satisfy that */ return -EINVAL; return 0; + case LOONGARCH_CPUCFG6: + if (val & CPUCFG6_PMP) { + u32 host = read_cpucfg(LOONGARCH_CPUCFG6); + if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS)) + return -EINVAL; + if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM)) + return -EINVAL; + if ((val & CPUCFG6_UPM) && !(host & CPUCFG6_UPM)) + return -EINVAL; + } + return 0; default: /* * Values for the other CPUCFG IDs are not being further validated @@ -385,11 +742,42 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu, else ret = -EINVAL; break; + case KVM_REG_LOONGARCH_LBT: + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -ENXIO; + + switch (reg->id) { + case KVM_REG_LOONGARCH_LBT_SCR0: + *v = vcpu->arch.lbt.scr0; + break; + case KVM_REG_LOONGARCH_LBT_SCR1: + *v = vcpu->arch.lbt.scr1; + break; + case KVM_REG_LOONGARCH_LBT_SCR2: + *v = vcpu->arch.lbt.scr2; + break; + case KVM_REG_LOONGARCH_LBT_SCR3: + *v = vcpu->arch.lbt.scr3; + break; + case KVM_REG_LOONGARCH_LBT_EFLAGS: + *v = vcpu->arch.lbt.eflags; + break; + case KVM_REG_LOONGARCH_LBT_FTOP: + *v = vcpu->arch.fpu.ftop; + break; + default: + ret = -EINVAL; + break; + } + break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { case KVM_REG_LOONGARCH_COUNTER: *v = drdtime() + vcpu->kvm->arch.time_offset; break; + case KVM_REG_LOONGARCH_DEBUG_INST: + *v = INSN_HVCL | KVM_HCALL_SWDBG; + break; default: ret = -EINVAL; break; @@ -440,6 +828,37 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, if (ret) break; vcpu->arch.cpucfg[id] = (u32)v; + if (id == LOONGARCH_CPUCFG6) + vcpu->arch.max_pmu_csrid = + LOONGARCH_CSR_PERFCTRL0 + 2 * kvm_get_pmu_num(&vcpu->arch) + 1; + break; + case KVM_REG_LOONGARCH_LBT: + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -ENXIO; + + switch (reg->id) { + case KVM_REG_LOONGARCH_LBT_SCR0: + vcpu->arch.lbt.scr0 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR1: + vcpu->arch.lbt.scr1 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR2: + vcpu->arch.lbt.scr2 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR3: + vcpu->arch.lbt.scr3 = v; + break; + case KVM_REG_LOONGARCH_LBT_EFLAGS: + vcpu->arch.lbt.eflags = v; + break; + case KVM_REG_LOONGARCH_LBT_FTOP: + vcpu->arch.fpu.ftop = v; + break; + default: + ret = -EINVAL; + break; + } break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { @@ -452,7 +871,7 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->kvm->arch.time_offset = (signed long)(v - drdtime()); break; case KVM_REG_LOONGARCH_VCPU_RESET: - kvm_reset_timer(vcpu); + vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); break; @@ -533,7 +952,10 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { switch (attr->attr) { - case 2: + case LOONGARCH_CPUCFG2: + case LOONGARCH_CPUCFG6: + return 0; + case CPUCFG_KVM_FEATURE: return 0; default: return -ENXIO; @@ -542,6 +964,16 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, return -ENXIO; } +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + return 0; +} + static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -551,6 +983,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_has_attr(vcpu, attr); + break; default: break; } @@ -558,22 +993,48 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, return ret; } -static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, +static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int ret = 0; uint64_t val; uint64_t __user *uaddr = (uint64_t __user *)attr->addr; - ret = _kvm_get_cpucfg_mask(attr->attr, &val); - if (ret) - return ret; + switch (attr->attr) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + ret = _kvm_get_cpucfg_mask(attr->attr, &val); + if (ret) + return ret; + break; + case CPUCFG_KVM_FEATURE: + val = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; + break; + default: + return -ENXIO; + } put_user(val, uaddr); return ret; } +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 gpa; + u64 __user *user = (u64 __user *)attr->addr; + + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + gpa = vcpu->arch.st.guest_addr; + if (put_user(gpa, user)) + return -EFAULT; + + return 0; +} + static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -581,7 +1042,10 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_LOONGARCH_VCPU_CPUCFG: - ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); + ret = kvm_loongarch_cpucfg_get_attr(vcpu, attr); + break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_get_attr(vcpu, attr); break; default: break; @@ -593,7 +1057,65 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - return -ENXIO; + u64 val, valid; + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + + switch (attr->attr) { + case CPUCFG_KVM_FEATURE: + if (get_user(val, user)) + return -EFAULT; + + valid = LOONGARCH_PV_FEAT_MASK; + if (val & ~valid) + return -EINVAL; + + /* All vCPUs need set the same PV features */ + if ((kvm->arch.pv_features & LOONGARCH_PV_FEAT_UPDATED) + && ((kvm->arch.pv_features & valid) != val)) + return -EINVAL; + kvm->arch.pv_features = val | LOONGARCH_PV_FEAT_UPDATED; + return 0; + default: + return -ENXIO; + } +} + +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int idx, ret = 0; + u64 gpa, __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + if (get_user(gpa, user)) + return -EFAULT; + + if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return -EINVAL; + + if (!(gpa & KVM_STEAL_PHYS_VALID)) { + vcpu->arch.st.guest_addr = gpa; + return 0; + } + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) { + vcpu->arch.st.guest_addr = gpa; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + } + + return ret; } static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, @@ -605,6 +1127,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_set_attr(vcpu, attr); + break; default: break; } @@ -709,12 +1234,66 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +#ifdef CONFIG_CPU_HAS_LBT +int kvm_own_lbt(struct kvm_vcpu *vcpu) +{ + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + set_csr_euen(CSR_EUEN_LBTEN); + _restore_lbt(&vcpu->arch.lbt); + vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + preempt_enable(); + + return 0; +} + +static void kvm_lose_lbt(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) { + _save_lbt(&vcpu->arch.lbt); + clear_csr_euen(CSR_EUEN_LBTEN); + vcpu->arch.aux_inuse &= ~KVM_LARCH_LBT; + } + preempt_enable(); +} + +static void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) +{ + /* + * If TM is enabled, top register save/restore will + * cause lbt exception, here enable lbt in advance + */ + if (fcsr & FPU_CSR_TM) + kvm_own_lbt(vcpu); +} + +static void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) + return; + kvm_check_fcsr(vcpu, read_fcsr(LOONGARCH_FCSR0)); + } +} +#else +static inline void kvm_lose_lbt(struct kvm_vcpu *vcpu) { } +static inline void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) { } +static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { } +#endif + /* Enable FPU and restore context */ void kvm_own_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); - /* Enable FPU */ + /* + * Enable FPU for guest + * Set FR and FRE according to guest context + */ + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_FPEN); kvm_restore_fpu(&vcpu->arch.fpu); @@ -734,6 +1313,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu) preempt_disable(); /* Enable LSX for guest */ + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN); switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { case KVM_LARCH_FPU: @@ -768,6 +1348,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu) preempt_disable(); + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) { case KVM_LARCH_LSX: @@ -799,6 +1380,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); + kvm_check_fcsr_alive(vcpu); if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) { kvm_save_lasx(&vcpu->arch.fpu); vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX); @@ -821,6 +1403,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) /* Disable FPU */ clear_csr_euen(CSR_EUEN_FPEN); } + kvm_lose_lbt(vcpu); preempt_enable(); } @@ -874,8 +1457,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct loongarch_csrs *csr; vcpu->arch.vpid = 0; + vcpu->arch.flush_gpa = INVALID_GPA; - hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); vcpu->arch.swtimer.function = kvm_swtimer_wakeup; vcpu->arch.handle_exit = kvm_handle_exit; @@ -893,6 +1477,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Init */ vcpu->arch.last_sched_cpu = -1; + /* Init ipi_state lock */ + spin_lock_init(&vcpu->arch.ipi_state.lock); + /* * Initialize guest register state to valid architectural reset state. */ @@ -905,6 +1492,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Set cpuid */ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id); + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); /* Start with no pending virtual guest interrupts */ csr->csrs[LOONGARCH_CSR_GINTC] = 0; @@ -923,6 +1511,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.swtimer); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); + kvm_drop_cpuid(vcpu); kfree(vcpu->arch.csr); /* @@ -959,9 +1548,10 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Restore timer state regardless */ kvm_restore_timer(vcpu); + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - /* Control guest page CCA attribute */ - change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + /* Restore hardware PMU CSRs */ + kvm_restore_pmu(vcpu); /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) @@ -1139,12 +1729,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->mmio_needed = 0; } - if (run->exit_reason == KVM_EXIT_LOONGARCH_IOCSR) { + switch (run->exit_reason) { + case KVM_EXIT_HYPERCALL: + kvm_complete_user_service(vcpu, run); + break; + case KVM_EXIT_LOONGARCH_IOCSR: if (!run->iocsr_io.is_write) kvm_complete_iocsr_read(vcpu, run); + break; } - if (run->immediate_exit) + if (!vcpu->wants_to_run) return r; /* Clear exit_reason */ diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 0a37f6fa8f2d..edccfc8c9cd8 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -5,6 +5,9 @@ #include <linux/kvm_host.h> #include <asm/kvm_mmu.h> +#include <asm/kvm_vcpu.h> +#include <asm/kvm_eiointc.h> +#include <asm/kvm_pch_pic.h> const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), @@ -30,8 +33,26 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.pgd) return -ENOMEM; + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), GFP_KERNEL_ACCOUNT); + if (!kvm->arch.phyid_map) { + free_page((unsigned long)kvm->arch.pgd); + kvm->arch.pgd = NULL; + return -ENOMEM; + } + spin_lock_init(&kvm->arch.phyid_map_lock); + kvm_init_vmcs(kvm); - kvm->arch.gpa_size = BIT(cpu_vabits - 1); + + /* Enable all PV features by default */ + kvm->arch.pv_features = BIT(KVM_FEATURE_IPI); + if (kvm_pvtime_supported()) + kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); + + /* + * cpu_vabits means user address space only (a half of total). + * GPA size of VM is the same with the size of user address space. + */ + kvm->arch.gpa_size = BIT(cpu_vabits); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table; @@ -52,6 +73,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); free_page((unsigned long)kvm->arch.pgd); kvm->arch.pgd = NULL; + kvfree(kvm->arch.phyid_map); + kvm->arch.phyid_map = NULL; } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -59,6 +82,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { + case KVM_CAP_IRQCHIP: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: @@ -66,6 +90,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -88,7 +113,85 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } +static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_LOONGARCH_VM_FEAT_LSX: + if (cpu_has_lsx) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_LASX: + if (cpu_has_lasx) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_X86BT: + if (cpu_has_lbt_x86) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_ARMBT: + if (cpu_has_lbt_arm) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_MIPSBT: + if (cpu_has_lbt_mips) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PMU: + if (cpu_has_pmp) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PV_IPI: + return 0; + case KVM_LOONGARCH_VM_FEAT_PV_STEALTIME: + if (kvm_pvtime_supported()) + return 0; + return -ENXIO; + default: + return -ENXIO; + } +} + +static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_LOONGARCH_VM_FEAT_CTRL: + return kvm_vm_feature_has_attr(kvm, attr); + default: + return -ENXIO; + } +} + int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - return -ENOIOCTLCMD; + void __user *argp = (void __user *)arg; + struct kvm *kvm = filp->private_data; + struct kvm_device_attr attr; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: + return 0; + case KVM_HAS_DEVICE_ATTR: + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_has_attr(kvm, &attr); + default: + return -ENOIOCTLCMD; + } +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, bool line_status) +{ + if (!kvm_arch_irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level, line_status); + + return 0; +} + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return (kvm->arch.ipi && kvm->arch.eiointc && kvm->arch.pch_pic); } |