summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2019-12-05 13:16:58 -0800
committerOlof Johansson <olof@lixom.net>2019-12-05 13:18:54 -0800
commit942e6f8a8314e5550e254519dfba4ccd5170421d (patch)
tree75ec655b440fbc1c454247af38b5596dd8c78de9 /virt
parent336bab731be76a90291697e51d2aed0ad67d7cb5 (diff)
parentb08baef02b26cf7c2123e4a24a2fa1fb7a593ffb (diff)
Merge mainline/master into arm/fixes
This brings in the mainline tree right after armsoc contents was merged this release cycle, so that we can re-run savedefconfig, etc. Signed-off-by: Olof Johansson <olof@lixom.net>
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c8
-rw-r--r--virt/kvm/arm/arm.c49
-rw-r--r--virt/kvm/arm/hypercalls.c71
-rw-r--r--virt/kvm/arm/mmio.c9
-rw-r--r--virt/kvm/arm/pmu.c48
-rw-r--r--virt/kvm/arm/psci.c84
-rw-r--r--virt/kvm/arm/pvtime.c131
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c1
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c12
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c59
-rw-r--r--virt/kvm/arm/vgic/vgic.c4
-rw-r--r--virt/kvm/arm/vgic/vgic.h2
-rw-r--r--virt/kvm/coalesced_mmio.c8
-rw-r--r--virt/kvm/kvm_main.c272
15 files changed, 555 insertions, 206 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index e2bb5bd60227..f182b2380345 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -80,7 +80,7 @@ static inline bool userspace_irqchip(struct kvm *kvm)
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
{
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
static void soft_timer_cancel(struct hrtimer *hrt)
@@ -697,11 +697,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
ptimer->cntvoff = 0;
- hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
- hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
+ hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
vtimer->hrtimer.function = kvm_hrtimer_expire;
ptimer->hrtimer.function = kvm_hrtimer_expire;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86c6aa1cb58e..12e0280291ce 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -40,6 +40,10 @@
#include <asm/kvm_coproc.h>
#include <asm/sections.h>
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_pmu.h>
+#include <kvm/arm_psci.h>
+
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
#endif
@@ -98,6 +102,26 @@ int kvm_arch_check_processor_compat(void)
return 0;
}
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_ARM_NISV_TO_USER:
+ r = 0;
+ kvm->arch.return_nisv_io_abort_to_user = true;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
/**
* kvm_arch_init_vm - initializes a VM data structure
@@ -197,6 +221,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_VCPU_EVENTS:
case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
+ case KVM_CAP_ARM_NISV_TO_USER:
+ case KVM_CAP_ARM_INJECT_EXT_DABT:
r = 1;
break;
case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -322,20 +348,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
/*
* If we're about to block (most likely because we've just hit a
* WFI), we need to sync back the state of the GIC CPU interface
- * so that we have the lastest PMR and group enables. This ensures
+ * so that we have the latest PMR and group enables. This ensures
* that kvm_arch_vcpu_runnable has up-to-date data to decide
* whether we have pending interrupts.
+ *
+ * For the same reason, we want to tell GICv4 that we need
+ * doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
+ vgic_v4_put(vcpu, true);
preempt_enable();
-
- kvm_vgic_v4_enable_doorbell(vcpu);
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- kvm_vgic_v4_disable_doorbell(vcpu);
+ preempt_disable();
+ vgic_v4_load(vcpu);
+ preempt_enable();
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -351,6 +381,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_arm_reset_debug_ptr(vcpu);
+ kvm_arm_pvtime_vcpu_init(&vcpu->arch);
+
return kvm_vgic_vcpu_init(vcpu);
}
@@ -380,11 +412,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vcpu_load_sysregs(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
+ if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
+ kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
if (single_task_running())
- vcpu_clear_wfe_traps(vcpu);
+ vcpu_clear_wfx_traps(vcpu);
else
- vcpu_set_wfe_traps(vcpu);
+ vcpu_set_wfx_traps(vcpu);
vcpu_ptrauth_setup_lazy(vcpu);
}
@@ -645,6 +679,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
* that a VCPU sees new virtual interrupts.
*/
kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+
+ if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+ kvm_update_stolen_time(vcpu);
}
}
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
new file mode 100644
index 000000000000..550dfa3e53cd
--- /dev/null
+++ b/virt/kvm/arm/hypercalls.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_psci.h>
+
+int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+{
+ u32 func_id = smccc_get_function(vcpu);
+ long val = SMCCC_RET_NOT_SUPPORTED;
+ u32 feature;
+ gpa_t gpa;
+
+ switch (func_id) {
+ case ARM_SMCCC_VERSION_FUNC_ID:
+ val = ARM_SMCCC_VERSION_1_1;
+ break;
+ case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
+ feature = smccc_get_arg1(vcpu);
+ switch (feature) {
+ case ARM_SMCCC_ARCH_WORKAROUND_1:
+ switch (kvm_arm_harden_branch_predictor()) {
+ case KVM_BP_HARDEN_UNKNOWN:
+ break;
+ case KVM_BP_HARDEN_WA_NEEDED:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_BP_HARDEN_NOT_REQUIRED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
+ break;
+ case ARM_SMCCC_ARCH_WORKAROUND_2:
+ switch (kvm_arm_have_ssbd()) {
+ case KVM_SSBD_FORCE_DISABLE:
+ case KVM_SSBD_UNKNOWN:
+ break;
+ case KVM_SSBD_KERNEL:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_SSBD_FORCE_ENABLE:
+ case KVM_SSBD_MITIGATED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
+ break;
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ }
+ break;
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ val = kvm_hypercall_pv_features(vcpu);
+ break;
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ gpa = kvm_init_stolen_time(vcpu);
+ if (gpa != GPA_INVALID)
+ val = gpa;
+ break;
+ default:
+ return kvm_psci_call(vcpu);
+ }
+
+ smccc_set_retval(vcpu, val, 0, 0, 0);
+ return 1;
+}
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index 6af5c91337f2..70d3b449692c 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -167,7 +167,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (ret)
return ret;
} else {
- kvm_err("load/store instruction decoding not implemented\n");
+ if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
+ run->exit_reason = KVM_EXIT_ARM_NISV;
+ run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
+ run->arm_nisv.fault_ipa = fault_ipa;
+ return 0;
+ }
+
+ kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n");
return -ENOSYS;
}
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 362a01886bab..8731dfeced8b 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -8,6 +8,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
@@ -146,8 +147,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(select_idx))
counter = upper_32_bits(counter);
-
- else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
+ else if (select_idx != ARMV8_PMU_CYCLE_IDX)
counter = lower_32_bits(counter);
return counter;
@@ -193,7 +193,7 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
*/
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
- u64 counter, reg;
+ u64 counter, reg, val;
pmc = kvm_pmu_get_canonical_pmc(pmc);
if (!pmc->perf_event)
@@ -201,16 +201,19 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
- if (kvm_pmu_pmc_is_chained(pmc)) {
- reg = PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
- __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+ if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
+ reg = PMCCNTR_EL0;
+ val = counter;
} else {
- reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
+ reg = PMEVCNTR0_EL0 + pmc->idx;
+ val = lower_32_bits(counter);
}
+ __vcpu_sys_reg(vcpu, reg) = val;
+
+ if (kvm_pmu_pmc_is_chained(pmc))
+ __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+
kvm_pmu_release_perf_event(pmc);
}
@@ -440,8 +443,25 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
struct pt_regs *regs)
{
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
int idx = pmc->idx;
+ u64 period;
+
+ cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * Reset the sample period to the architectural limit,
+ * i.e. the point where the counter overflows.
+ */
+ period = -(local64_read(&perf_event->count));
+
+ if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
+ period &= GENMASK(31, 0);
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
@@ -449,6 +469,8 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
}
+
+ cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
}
/**
@@ -567,12 +589,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
* high counter.
*/
attr.sample_period = (-counter) & GENMASK(63, 0);
+ if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
+ attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
+
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow,
pmc + 1);
-
- if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
- attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
} else {
/* The initial sample period (overflow count) of an event. */
if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index 87927f7e1ee7..17e2bdd4b76f 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -15,6 +15,7 @@
#include <asm/kvm_host.h>
#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
/*
* This is an implementation of the Power State Coordination Interface
@@ -23,38 +24,6 @@
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
-static u32 smccc_get_function(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 0);
-}
-
-static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 1);
-}
-
-static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 2);
-}
-
-static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 3);
-}
-
-static void smccc_set_retval(struct kvm_vcpu *vcpu,
- unsigned long a0,
- unsigned long a1,
- unsigned long a2,
- unsigned long a3)
-{
- vcpu_set_reg(vcpu, 0, a0);
- vcpu_set_reg(vcpu, 1, a1);
- vcpu_set_reg(vcpu, 2, a2);
- vcpu_set_reg(vcpu, 3, a3);
-}
-
static unsigned long psci_affinity_mask(unsigned long affinity_level)
{
if (affinity_level <= 3)
@@ -373,7 +342,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
* Errors:
* -EINVAL: Unrecognized PSCI function
*/
-static int kvm_psci_call(struct kvm_vcpu *vcpu)
+int kvm_psci_call(struct kvm_vcpu *vcpu)
{
switch (kvm_psci_version(vcpu, vcpu->kvm)) {
case KVM_ARM_PSCI_1_0:
@@ -387,55 +356,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu)
};
}
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
-{
- u32 func_id = smccc_get_function(vcpu);
- u32 val = SMCCC_RET_NOT_SUPPORTED;
- u32 feature;
-
- switch (func_id) {
- case ARM_SMCCC_VERSION_FUNC_ID:
- val = ARM_SMCCC_VERSION_1_1;
- break;
- case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
- feature = smccc_get_arg1(vcpu);
- switch(feature) {
- case ARM_SMCCC_ARCH_WORKAROUND_1:
- switch (kvm_arm_harden_branch_predictor()) {
- case KVM_BP_HARDEN_UNKNOWN:
- break;
- case KVM_BP_HARDEN_WA_NEEDED:
- val = SMCCC_RET_SUCCESS;
- break;
- case KVM_BP_HARDEN_NOT_REQUIRED:
- val = SMCCC_RET_NOT_REQUIRED;
- break;
- }
- break;
- case ARM_SMCCC_ARCH_WORKAROUND_2:
- switch (kvm_arm_have_ssbd()) {
- case KVM_SSBD_FORCE_DISABLE:
- case KVM_SSBD_UNKNOWN:
- break;
- case KVM_SSBD_KERNEL:
- val = SMCCC_RET_SUCCESS;
- break;
- case KVM_SSBD_FORCE_ENABLE:
- case KVM_SSBD_MITIGATED:
- val = SMCCC_RET_NOT_REQUIRED;
- break;
- }
- break;
- }
- break;
- default:
- return kvm_psci_call(vcpu);
- }
-
- smccc_set_retval(vcpu, val, 0, 0, 0);
- return 1;
-}
-
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
{
return 3; /* PSCI version and two workaround registers */
diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
new file mode 100644
index 000000000000..1e0f4c284888
--- /dev/null
+++ b/virt/kvm/arm/pvtime.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+#include <asm/pvclock-abi.h>
+
+#include <kvm/arm_hypercalls.h>
+
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 steal;
+ __le64 steal_le;
+ u64 offset;
+ int idx;
+ u64 base = vcpu->arch.steal.base;
+
+ if (base == GPA_INVALID)
+ return;
+
+ /* Let's do the local bookkeeping */
+ steal = vcpu->arch.steal.steal;
+ steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
+ vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+ vcpu->arch.steal.steal = steal;
+
+ steal_le = cpu_to_le64(steal);
+ idx = srcu_read_lock(&kvm->srcu);
+ offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
+ kvm_put_guest(kvm, base + offset, steal_le, u64);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+ u32 feature = smccc_get_arg1(vcpu);
+ long val = SMCCC_RET_NOT_SUPPORTED;
+
+ switch (feature) {
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ }
+
+ return val;
+}
+
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+ struct pvclock_vcpu_stolen_time init_values = {};
+ struct kvm *kvm = vcpu->kvm;
+ u64 base = vcpu->arch.steal.base;
+ int idx;
+
+ if (base == GPA_INVALID)
+ return base;
+
+ /*
+ * Start counting stolen time from the time the guest requests
+ * the feature enabled.
+ */
+ vcpu->arch.steal.steal = 0;
+ vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ kvm_write_guest(kvm, base, &init_values, sizeof(init_values));
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return base;
+}
+
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 __user *user = (u64 __user *)attr->addr;
+ struct kvm *kvm = vcpu->kvm;
+ u64 ipa;
+ int ret = 0;
+ int idx;
+
+ if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+ return -ENXIO;
+
+ if (get_user(ipa, user))
+ return -EFAULT;
+ if (!IS_ALIGNED(ipa, 64))
+ return -EINVAL;
+ if (vcpu->arch.steal.base != GPA_INVALID)
+ return -EEXIST;
+
+ /* Check the address is in a valid memslot */
+ idx = srcu_read_lock(&kvm->srcu);
+ if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT)))
+ ret = -EINVAL;
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (!ret)
+ vcpu->arch.steal.base = ipa;
+
+ return ret;
+}
+
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 __user *user = (u64 __user *)attr->addr;
+ u64 ipa;
+
+ if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+ return -ENXIO;
+
+ ipa = vcpu->arch.steal.base;
+
+ if (put_user(ipa, user))
+ return -EFAULT;
+ return 0;
+}
+
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_PVTIME_IPA:
+ return 0;
+ }
+ return -ENXIO;
+}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 6f50c429196d..b3c5de48064c 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -203,6 +203,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
raw_spin_lock_init(&vgic_cpu->ap_list_lock);
+ atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
/*
* Enable and configure all SGIs to be edge-triggered and
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 2be6b66b3856..98c7360d9fb7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -360,7 +360,10 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
if (ret)
return ret;
+ if (map.vpe)
+ atomic_dec(&map.vpe->vlpi_count);
map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ atomic_inc(&map.vpe->vlpi_count);
ret = its_map_vlpi(irq->host_irq, &map);
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 8d69f007dd0c..f45635a6f0ec 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -357,14 +357,14 @@ retry:
}
/**
- * vgic_its_save_pending_tables - Save the pending tables into guest RAM
+ * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
* kvm lock and all vcpu lock must be held
*/
int vgic_v3_save_pending_tables(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- int last_byte_offset = -1;
struct vgic_irq *irq;
+ gpa_t last_ptr = ~(gpa_t)0;
int ret;
u8 val;
@@ -384,11 +384,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
bit_nr = irq->intid % BITS_PER_BYTE;
ptr = pendbase + byte_offset;
- if (byte_offset != last_byte_offset) {
+ if (ptr != last_ptr) {
ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
- last_byte_offset = byte_offset;
+ last_ptr = ptr;
}
stored = val & (1U << bit_nr);
@@ -664,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
if (has_vhe())
__vgic_v3_activate_traps(vcpu);
+
+ WARN_ON(vgic_v4_load(vcpu));
}
void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
@@ -676,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
+ WARN_ON(vgic_v4_put(vcpu, false));
+
vgic_v3_vmcr_sync(vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 477af6aebb97..46f875589c47 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
{
struct kvm_vcpu *vcpu = info;
+ /* We got the message, no need to fire again */
+ if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
+ disable_irq_nosync(irq);
+
vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
@@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
{
- if (!vgic_supports_direct_msis(vcpu->kvm))
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ struct irq_desc *desc = irq_to_desc(vpe->irq);
+
+ if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
+ /*
+ * If blocking, a doorbell is required. Undo the nested
+ * disable_irq() calls...
+ */
+ while (need_db && irqd_irq_disabled(&desc->irq_data))
+ enable_irq(vpe->irq);
+
+ return its_schedule_vpe(vpe, false);
}
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_load(struct kvm_vcpu *vcpu)
{
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
int err;
- if (!vgic_supports_direct_msis(vcpu->kvm))
+ if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
return 0;
/*
@@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
* doc in drivers/irqchip/irq-gic-v4.c to understand how this
* turns into a VMOVP command at the ITS level.
*/
- err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
+ err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
if (err)
return err;
- err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
+ /* Disabled the doorbell, as we're about to enter the guest */
+ disable_irq_nosync(vpe->irq);
+
+ err = its_schedule_vpe(vpe, true);
if (err)
return err;
@@ -226,9 +243,7 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
* Now that the VPE is resident, let's get rid of a potential
* doorbell interrupt that would still be pending.
*/
- err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
-
- return err;
+ return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
}
static struct vgic_its *vgic_get_its(struct kvm *kvm,
@@ -266,7 +281,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
mutex_lock(&its->its_lock);
- /* Perform then actual DevID/EventID -> LPI translation. */
+ /* Perform the actual DevID/EventID -> LPI translation. */
ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
irq_entry->msi.data, &irq);
if (ret)
@@ -294,6 +309,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
irq->hw = true;
irq->host_irq = virq;
+ atomic_inc(&map.vpe->vlpi_count);
out:
mutex_unlock(&its->its_lock);
@@ -327,6 +343,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
WARN_ON(!(irq->hw && irq->host_irq == virq));
if (irq->hw) {
+ atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
ret = its_unmap_vlpi(virq);
}
@@ -335,21 +352,3 @@ out:
mutex_unlock(&its->its_lock);
return ret;
}
-
-void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
-{
- if (vgic_supports_direct_msis(vcpu->kvm)) {
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
- if (irq)
- enable_irq(irq);
- }
-}
-
-void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
-{
- if (vgic_supports_direct_msis(vcpu->kvm)) {
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
- if (irq)
- disable_irq(irq);
- }
-}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 45a870cb63f5..99b02ca730a8 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -857,8 +857,6 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- WARN_ON(vgic_v4_sync_hwstate(vcpu));
-
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@@ -882,8 +880,6 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
- WARN_ON(vgic_v4_flush_hwstate(vcpu));
-
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 83066a81b16a..c7fefd6b1c80 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -316,7 +316,5 @@ void vgic_its_invalidate_cache(struct kvm *kvm);
bool vgic_supports_direct_msis(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
#endif
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 8ffd07e2a160..00c747dbc82e 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -110,14 +110,11 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
int kvm_coalesced_mmio_init(struct kvm *kvm)
{
struct page *page;
- int ret;
- ret = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
- goto out_err;
+ return -ENOMEM;
- ret = 0;
kvm->coalesced_mmio_ring = page_address(page);
/*
@@ -128,8 +125,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
spin_lock_init(&kvm->ring_lock);
INIT_LIST_HEAD(&kvm->coalesced_zones);
-out_err:
- return ret;
+ return 0;
}
void kvm_coalesced_mmio_free(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fd68fbe0a75d..00268290dcbd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -50,6 +50,7 @@
#include <linux/bsearch.h>
#include <linux/io.h>
#include <linux/lockdep.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include <asm/ioctl.h>
@@ -121,9 +122,22 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#define KVM_COMPAT(c) .compat_ioctl = (c)
#else
+/*
+ * For architectures that don't implement a compat infrastructure,
+ * adopt a double line of defense:
+ * - Prevent a compat task from opening /dev/kvm
+ * - If the open has been done by a 64bit task, and the KVM fd
+ * passed to a compat task, let the ioctls fail.
+ */
static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg) { return -EINVAL; }
-#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl
+
+static int kvm_no_compat_open(struct inode *inode, struct file *file)
+{
+ return is_compat_task() ? -ENODEV : 0;
+}
+#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
+ .open = kvm_no_compat_open
#endif
static int hardware_enable_all(void);
static void hardware_disable_all(void);
@@ -149,10 +163,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
return 0;
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -625,10 +659,28 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
}
+/*
+ * Called after the VM is otherwise initialized, but just before adding it to
+ * the vm_list.
+ */
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+/*
+ * Called just after removing the VM from the vm_list, but before doing any
+ * other destruction.
+ */
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
- int r, i;
struct kvm *kvm = kvm_arch_alloc_vm();
+ int r = -ENOMEM;
+ int i;
if (!kvm)
return ERR_PTR(-ENOMEM);
@@ -640,46 +692,51 @@ static struct kvm *kvm_create_vm(unsigned long type)
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
- refcount_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);
- r = kvm_arch_init_vm(kvm, type);
- if (r)
- goto out_err_no_disable;
-
- r = hardware_enable_all();
- if (r)
- goto out_err_no_disable;
-
-#ifdef CONFIG_HAVE_KVM_IRQFD
- INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
-#endif
-
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
- r = -ENOMEM;
+ if (init_srcu_struct(&kvm->srcu))
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
+
+ refcount_set(&kvm->users_count, 1);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_memslots *slots = kvm_alloc_memslots();
+
if (!slots)
- goto out_err_no_srcu;
+ goto out_err_no_arch_destroy_vm;
/* Generations must be different for each address space. */
slots->generation = i;
rcu_assign_pointer(kvm->memslots[i], slots);
}
- if (init_srcu_struct(&kvm->srcu))
- goto out_err_no_srcu;
- if (init_srcu_struct(&kvm->irq_srcu))
- goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
rcu_assign_pointer(kvm->buses[i],
kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
if (!kvm->buses[i])
- goto out_err;
+ goto out_err_no_arch_destroy_vm;
}
+ r = kvm_arch_init_vm(kvm, type);
+ if (r)
+ goto out_err_no_arch_destroy_vm;
+
+ r = hardware_enable_all();
+ if (r)
+ goto out_err_no_disable;
+
+#ifdef CONFIG_HAVE_KVM_IRQFD
+ INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
+#endif
+
r = kvm_init_mmu_notifier(kvm);
if (r)
+ goto out_err_no_mmu_notifier;
+
+ r = kvm_arch_post_init_vm(kvm);
+ if (r)
goto out_err;
mutex_lock(&kvm_lock);
@@ -691,17 +748,24 @@ static struct kvm *kvm_create_vm(unsigned long type)
return kvm;
out_err:
- cleanup_srcu_struct(&kvm->irq_srcu);
-out_err_no_irq_srcu:
- cleanup_srcu_struct(&kvm->srcu);
-out_err_no_srcu:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ if (kvm->mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
+#endif
+out_err_no_mmu_notifier:
hardware_disable_all();
out_err_no_disable:
- refcount_set(&kvm->users_count, 0);
+ kvm_arch_destroy_vm(kvm);
+out_err_no_arch_destroy_vm:
+ WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
+ cleanup_srcu_struct(&kvm->srcu);
+out_err_no_srcu:
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@ -733,6 +797,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
mutex_unlock(&kvm_lock);
+ kvm_arch_pre_destroy_vm(kvm);
+
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@ -772,6 +838,18 @@ void kvm_put_kvm(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_put_kvm);
+/*
+ * Used to put a reference that was taken on behalf of an object associated
+ * with a user-visible file descriptor, e.g. a vcpu or device, if installation
+ * of the new file descriptor fails and the reference cannot be transferred to
+ * its final owner. In such cases, the caller is still actively using @kvm and
+ * will fail miserably if the refcount unexpectedly hits zero.
+ */
+void kvm_put_kvm_no_destroy(struct kvm *kvm)
+{
+ WARN_ON(refcount_dec_and_test(&kvm->users_count));
+}
+EXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy);
static int kvm_vm_release(struct inode *inode, struct file *filp)
{
@@ -1853,7 +1931,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
SetPageDirty(page);
@@ -1863,7 +1941,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
@@ -2360,20 +2438,23 @@ out:
kvm_arch_vcpu_unblocking(vcpu);
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
- if (!vcpu_valid_wakeup(vcpu))
- shrink_halt_poll_ns(vcpu);
- else if (halt_poll_ns) {
- if (block_ns <= vcpu->halt_poll_ns)
- ;
- /* we had a long block, shrink polling */
- else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ if (!kvm_arch_no_poll(vcpu)) {
+ if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- /* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < halt_poll_ns &&
- block_ns < halt_poll_ns)
- grow_halt_poll_ns(vcpu);
- } else
- vcpu->halt_poll_ns = 0;
+ } else if (halt_poll_ns) {
+ if (block_ns <= vcpu->halt_poll_ns)
+ ;
+ /* we had a long block, shrink polling */
+ else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vcpu);
+ /* we had a short halt and our poll time is too small */
+ else if (vcpu->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vcpu);
+ } else {
+ vcpu->halt_poll_ns = 0;
+ }
+ }
trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
kvm_arch_vcpu_block_finish(vcpu);
@@ -2670,17 +2751,18 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
goto unlock_vcpu_destroy;
}
- BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
+ vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
+ BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
/* Now it's all set up, let userspace reach it */
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
if (r < 0) {
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
goto unlock_vcpu_destroy;
}
- kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
+ kvm->vcpus[vcpu->vcpu_idx] = vcpu;
/*
* Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus
@@ -3046,14 +3128,14 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
return filp->private_data;
}
-static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+static const struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
#ifdef CONFIG_KVM_MPIC
[KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
[KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
#endif
};
-int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type)
{
if (type >= ARRAY_SIZE(kvm_device_ops_table))
return -ENOSPC;
@@ -3074,7 +3156,7 @@ void kvm_unregister_device_ops(u32 type)
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
- struct kvm_device_ops *ops = NULL;
+ const struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int type;
@@ -3114,7 +3196,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
kvm_get_kvm(kvm);
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
mutex_unlock(&kvm->lock);
@@ -4272,12 +4354,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = kvm_arch_hardware_setup();
if (r < 0)
- goto out_free_0a;
+ goto out_free_1;
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, check_processor_compat, &r, 1);
if (r < 0)
- goto out_free_1;
+ goto out_free_2;
}
r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
@@ -4334,9 +4416,8 @@ out_free_3:
unregister_reboot_notifier(&kvm_reboot_notifier);
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
out_free_2:
-out_free_1:
kvm_arch_hardware_unsetup();
-out_free_0a:
+out_free_1:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
kvm_irqfd_exit();
@@ -4364,3 +4445,86 @@ void kvm_exit(void)
kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+struct kvm_vm_worker_thread_context {
+ struct kvm *kvm;
+ struct task_struct *parent;
+ struct completion init_done;
+ kvm_vm_thread_fn_t thread_fn;
+ uintptr_t data;
+ int err;
+};
+
+static int kvm_vm_worker_thread(void *context)
+{
+ /*
+ * The init_context is allocated on the stack of the parent thread, so
+ * we have to locally copy anything that is needed beyond initialization
+ */
+ struct kvm_vm_worker_thread_context *init_context = context;
+ struct kvm *kvm = init_context->kvm;
+ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+ uintptr_t data = init_context->data;
+ int err;
+
+ err = kthread_park(current);
+ /* kthread_park(current) is never supposed to return an error */
+ WARN_ON(err != 0);
+ if (err)
+ goto init_complete;
+
+ err = cgroup_attach_task_all(init_context->parent, current);
+ if (err) {
+ kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+ __func__, err);
+ goto init_complete;
+ }
+
+ set_user_nice(current, task_nice(init_context->parent));
+
+init_complete:
+ init_context->err = err;
+ complete(&init_context->init_done);
+ init_context = NULL;
+
+ if (err)
+ return err;
+
+ /* Wait to be woken up by the spawner before proceeding. */
+ kthread_parkme();
+
+ if (!kthread_should_stop())
+ err = thread_fn(kvm, data);
+
+ return err;
+}
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr)
+{
+ struct kvm_vm_worker_thread_context init_context = {};
+ struct task_struct *thread;
+
+ *thread_ptr = NULL;
+ init_context.kvm = kvm;
+ init_context.parent = current;
+ init_context.thread_fn = thread_fn;
+ init_context.data = data;
+ init_completion(&init_context.init_done);
+
+ thread = kthread_run(kvm_vm_worker_thread, &init_context,
+ "%s-%d", name, task_pid_nr(current));
+ if (IS_ERR(thread))
+ return PTR_ERR(thread);
+
+ /* kthread_run is never supposed to return NULL */
+ WARN_ON(thread == NULL);
+
+ wait_for_completion(&init_context.init_done);
+
+ if (!init_context.err)
+ *thread_ptr = thread;
+
+ return init_context.err;
+}