summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/devices/vcpu.txt41
-rw-r--r--Documentation/virtual/kvm/vcpu-requests.rst307
-rw-r--r--arch/arm/include/asm/kvm_host.h28
-rw-r--r--arch/arm/include/uapi/asm/kvm.h8
-rw-r--r--arch/arm/kvm/guest.c51
-rw-r--r--arch/arm/kvm/handle_exit.c1
-rw-r--r--arch/arm/kvm/hyp/switch.c2
-rw-r--r--arch/arm/kvm/reset.c16
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm64/kvm/guest.c9
-rw-r--r--arch/arm64/kvm/handle_exit.c1
-rw-r--r--arch/arm64/kvm/hyp/switch.c1
-rw-r--r--arch/arm64/kvm/reset.c16
-rw-r--r--arch/mips/kvm/trap_emul.c2
-rw-r--r--arch/mips/kvm/vz.c2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h4
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/s390/include/asm/kvm_host.h6
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/x86/include/asm/kvm_host.h47
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--include/kvm/arm_arch_timer.h8
-rw-r--r--include/kvm/arm_pmu.h6
-rw-r--r--include/kvm/arm_vgic.h13
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--virt/kvm/arm/arch_timer.c139
-rw-r--r--virt/kvm/arm/arm.c78
-rw-r--r--virt/kvm/arm/pmu.c117
-rw-r--r--virt/kvm/arm/psci.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c24
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c22
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c68
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h12
-rw-r--r--virt/kvm/arm/vgic/vgic.c68
37 files changed, 914 insertions, 227 deletions
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt
index 02f50686c418..2b5dab16c4f2 100644
--- a/Documentation/virtual/kvm/devices/vcpu.txt
+++ b/Documentation/virtual/kvm/devices/vcpu.txt
@@ -16,7 +16,9 @@ Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
Returns: -EBUSY: The PMU overflow interrupt is already set
-ENXIO: The overflow interrupt not set when attempting to get it
-ENODEV: PMUv3 not supported
- -EINVAL: Invalid PMU overflow interrupt number supplied
+ -EINVAL: Invalid PMU overflow interrupt number supplied or
+ trying to set the IRQ number without using an in-kernel
+ irqchip.
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
@@ -25,11 +27,36 @@ all vcpus, while as an SPI it must be a separate number per vcpu.
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
Parameters: no additional parameter in kvm_device_attr.addr
-Returns: -ENODEV: PMUv3 not supported
- -ENXIO: PMUv3 not properly configured as required prior to calling this
- attribute
+Returns: -ENODEV: PMUv3 not supported or GIC not initialized
+ -ENXIO: PMUv3 not properly configured or in-kernel irqchip not
+ configured as required prior to calling this attribute
-EBUSY: PMUv3 already initialized
-Request the initialization of the PMUv3. This must be done after creating the
-in-kernel irqchip. Creating a PMU with a userspace irqchip is currently not
-supported.
+Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
+virtual GIC implementation, this must be done after initializing the in-kernel
+irqchip.
+
+
+2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
+Architectures: ARM,ARM64
+
+2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
+2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
+Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
+ pointer to an int
+Returns: -EINVAL: Invalid timer interrupt number
+ -EBUSY: One or more VCPUs has already run
+
+A value describing the architected timer interrupt number when connected to an
+in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
+attribute overrides the default values (see below).
+
+KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
+KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
+
+Setting the same PPI for different timers will prevent the VCPUs from running.
+Setting the interrupt number on a VCPU configures all VCPUs created at that
+time to use the number provided for a given timer, overwriting any previously
+configured values on other VCPUs. Userspace should configure the interrupt
+numbers on at least one VCPU after creating all VCPUs and before running any
+VCPUs.
diff --git a/Documentation/virtual/kvm/vcpu-requests.rst b/Documentation/virtual/kvm/vcpu-requests.rst
new file mode 100644
index 000000000000..5feb3706a7ae
--- /dev/null
+++ b/Documentation/virtual/kvm/vcpu-requests.rst
@@ -0,0 +1,307 @@
+=================
+KVM VCPU Requests
+=================
+
+Overview
+========
+
+KVM supports an internal API enabling threads to request a VCPU thread to
+perform some activity. For example, a thread may request a VCPU to flush
+its TLB with a VCPU request. The API consists of the following functions::
+
+ /* Check if any requests are pending for VCPU @vcpu. */
+ bool kvm_request_pending(struct kvm_vcpu *vcpu);
+
+ /* Check if VCPU @vcpu has request @req pending. */
+ bool kvm_test_request(int req, struct kvm_vcpu *vcpu);
+
+ /* Clear request @req for VCPU @vcpu. */
+ void kvm_clear_request(int req, struct kvm_vcpu *vcpu);
+
+ /*
+ * Check if VCPU @vcpu has request @req pending. When the request is
+ * pending it will be cleared and a memory barrier, which pairs with
+ * another in kvm_make_request(), will be issued.
+ */
+ bool kvm_check_request(int req, struct kvm_vcpu *vcpu);
+
+ /*
+ * Make request @req of VCPU @vcpu. Issues a memory barrier, which pairs
+ * with another in kvm_check_request(), prior to setting the request.
+ */
+ void kvm_make_request(int req, struct kvm_vcpu *vcpu);
+
+ /* Make request @req of all VCPUs of the VM with struct kvm @kvm. */
+ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+
+Typically a requester wants the VCPU to perform the activity as soon
+as possible after making the request. This means most requests
+(kvm_make_request() calls) are followed by a call to kvm_vcpu_kick(),
+and kvm_make_all_cpus_request() has the kicking of all VCPUs built
+into it.
+
+VCPU Kicks
+----------
+
+The goal of a VCPU kick is to bring a VCPU thread out of guest mode in
+order to perform some KVM maintenance. To do so, an IPI is sent, forcing
+a guest mode exit. However, a VCPU thread may not be in guest mode at the
+time of the kick. Therefore, depending on the mode and state of the VCPU
+thread, there are two other actions a kick may take. All three actions
+are listed below:
+
+1) Send an IPI. This forces a guest mode exit.
+2) Waking a sleeping VCPU. Sleeping VCPUs are VCPU threads outside guest
+ mode that wait on waitqueues. Waking them removes the threads from
+ the waitqueues, allowing the threads to run again. This behavior
+ may be suppressed, see KVM_REQUEST_NO_WAKEUP below.
+3) Nothing. When the VCPU is not in guest mode and the VCPU thread is not
+ sleeping, then there is nothing to do.
+
+VCPU Mode
+---------
+
+VCPUs have a mode state, ``vcpu->mode``, that is used to track whether the
+guest is running in guest mode or not, as well as some specific
+outside guest mode states. The architecture may use ``vcpu->mode`` to
+ensure VCPU requests are seen by VCPUs (see "Ensuring Requests Are Seen"),
+as well as to avoid sending unnecessary IPIs (see "IPI Reduction"), and
+even to ensure IPI acknowledgements are waited upon (see "Waiting for
+Acknowledgements"). The following modes are defined:
+
+OUTSIDE_GUEST_MODE
+
+ The VCPU thread is outside guest mode.
+
+IN_GUEST_MODE
+
+ The VCPU thread is in guest mode.
+
+EXITING_GUEST_MODE
+
+ The VCPU thread is transitioning from IN_GUEST_MODE to
+ OUTSIDE_GUEST_MODE.
+
+READING_SHADOW_PAGE_TABLES
+
+ The VCPU thread is outside guest mode, but it wants the sender of
+ certain VCPU requests, namely KVM_REQ_TLB_FLUSH, to wait until the VCPU
+ thread is done reading the page tables.
+
+VCPU Request Internals
+======================
+
+VCPU requests are simply bit indices of the ``vcpu->requests`` bitmap.
+This means general bitops, like those documented in [atomic-ops]_ could
+also be used, e.g. ::
+
+ clear_bit(KVM_REQ_UNHALT & KVM_REQUEST_MASK, &vcpu->requests);
+
+However, VCPU request users should refrain from doing so, as it would
+break the abstraction. The first 8 bits are reserved for architecture
+independent requests, all additional bits are available for architecture
+dependent requests.
+
+Architecture Independent Requests
+---------------------------------
+
+KVM_REQ_TLB_FLUSH
+
+ KVM's common MMU notifier may need to flush all of a guest's TLB
+ entries, calling kvm_flush_remote_tlbs() to do so. Architectures that
+ choose to use the common kvm_flush_remote_tlbs() implementation will
+ need to handle this VCPU request.
+
+KVM_REQ_MMU_RELOAD
+
+ When shadow page tables are used and memory slots are removed it's
+ necessary to inform each VCPU to completely refresh the tables. This
+ request is used for that.
+
+KVM_REQ_PENDING_TIMER
+
+ This request may be made from a timer handler run on the host on behalf
+ of a VCPU. It informs the VCPU thread to inject a timer interrupt.
+
+KVM_REQ_UNHALT
+
+ This request may be made from the KVM common function kvm_vcpu_block(),
+ which is used to emulate an instruction that causes a CPU to halt until
+ one of an architectural specific set of events and/or interrupts is
+ received (determined by checking kvm_arch_vcpu_runnable()). When that
+ event or interrupt arrives kvm_vcpu_block() makes the request. This is
+ in contrast to when kvm_vcpu_block() returns due to any other reason,
+ such as a pending signal, which does not indicate the VCPU's halt
+ emulation should stop, and therefore does not make the request.
+
+KVM_REQUEST_MASK
+----------------
+
+VCPU requests should be masked by KVM_REQUEST_MASK before using them with
+bitops. This is because only the lower 8 bits are used to represent the
+request's number. The upper bits are used as flags. Currently only two
+flags are defined.
+
+VCPU Request Flags
+------------------
+
+KVM_REQUEST_NO_WAKEUP
+
+ This flag is applied to requests that only need immediate attention
+ from VCPUs running in guest mode. That is, sleeping VCPUs do not need
+ to be awaken for these requests. Sleeping VCPUs will handle the
+ requests when they are awaken later for some other reason.
+
+KVM_REQUEST_WAIT
+
+ When requests with this flag are made with kvm_make_all_cpus_request(),
+ then the caller will wait for each VCPU to acknowledge its IPI before
+ proceeding. This flag only applies to VCPUs that would receive IPIs.
+ If, for example, the VCPU is sleeping, so no IPI is necessary, then
+ the requesting thread does not wait. This means that this flag may be
+ safely combined with KVM_REQUEST_NO_WAKEUP. See "Waiting for
+ Acknowledgements" for more information about requests with
+ KVM_REQUEST_WAIT.
+
+VCPU Requests with Associated State
+===================================
+
+Requesters that want the receiving VCPU to handle new state need to ensure
+the newly written state is observable to the receiving VCPU thread's CPU
+by the time it observes the request. This means a write memory barrier
+must be inserted after writing the new state and before setting the VCPU
+request bit. Additionally, on the receiving VCPU thread's side, a
+corresponding read barrier must be inserted after reading the request bit
+and before proceeding to read the new state associated with it. See
+scenario 3, Message and Flag, of [lwn-mb]_ and the kernel documentation
+[memory-barriers]_.
+
+The pair of functions, kvm_check_request() and kvm_make_request(), provide
+the memory barriers, allowing this requirement to be handled internally by
+the API.
+
+Ensuring Requests Are Seen
+==========================
+
+When making requests to VCPUs, we want to avoid the receiving VCPU
+executing in guest mode for an arbitrary long time without handling the
+request. We can be sure this won't happen as long as we ensure the VCPU
+thread checks kvm_request_pending() before entering guest mode and that a
+kick will send an IPI to force an exit from guest mode when necessary.
+Extra care must be taken to cover the period after the VCPU thread's last
+kvm_request_pending() check and before it has entered guest mode, as kick
+IPIs will only trigger guest mode exits for VCPU threads that are in guest
+mode or at least have already disabled interrupts in order to prepare to
+enter guest mode. This means that an optimized implementation (see "IPI
+Reduction") must be certain when it's safe to not send the IPI. One
+solution, which all architectures except s390 apply, is to:
+
+- set ``vcpu->mode`` to IN_GUEST_MODE between disabling the interrupts and
+ the last kvm_request_pending() check;
+- enable interrupts atomically when entering the guest.
+
+This solution also requires memory barriers to be placed carefully in both
+the requesting thread and the receiving VCPU. With the memory barriers we
+can exclude the possibility of a VCPU thread observing
+!kvm_request_pending() on its last check and then not receiving an IPI for
+the next request made of it, even if the request is made immediately after
+the check. This is done by way of the Dekker memory barrier pattern
+(scenario 10 of [lwn-mb]_). As the Dekker pattern requires two variables,
+this solution pairs ``vcpu->mode`` with ``vcpu->requests``. Substituting
+them into the pattern gives::
+
+ CPU1 CPU2
+ ================= =================
+ local_irq_disable();
+ WRITE_ONCE(vcpu->mode, IN_GUEST_MODE); kvm_make_request(REQ, vcpu);
+ smp_mb(); smp_mb();
+ if (kvm_request_pending(vcpu)) { if (READ_ONCE(vcpu->mode) ==
+ IN_GUEST_MODE) {
+ ...abort guest entry... ...send IPI...
+ } }
+
+As stated above, the IPI is only useful for VCPU threads in guest mode or
+that have already disabled interrupts. This is why this specific case of
+the Dekker pattern has been extended to disable interrupts before setting
+``vcpu->mode`` to IN_GUEST_MODE. WRITE_ONCE() and READ_ONCE() are used to
+pedantically implement the memory barrier pattern, guaranteeing the
+compiler doesn't interfere with ``vcpu->mode``'s carefully planned
+accesses.
+
+IPI Reduction
+-------------
+
+As only one IPI is needed to get a VCPU to check for any/all requests,
+then they may be coalesced. This is easily done by having the first IPI
+sending kick also change the VCPU mode to something !IN_GUEST_MODE. The
+transitional state, EXITING_GUEST_MODE, is used for this purpose.
+
+Waiting for Acknowledgements
+----------------------------
+
+Some requests, those with the KVM_REQUEST_WAIT flag set, require IPIs to
+be sent, and the acknowledgements to be waited upon, even when the target
+VCPU threads are in modes other than IN_GUEST_MODE. For example, one case
+is when a target VCPU thread is in READING_SHADOW_PAGE_TABLES mode, which
+is set after disabling interrupts. To support these cases, the
+KVM_REQUEST_WAIT flag changes the condition for sending an IPI from
+checking that the VCPU is IN_GUEST_MODE to checking that it is not
+OUTSIDE_GUEST_MODE.
+
+Request-less VCPU Kicks
+-----------------------
+
+As the determination of whether or not to send an IPI depends on the
+two-variable Dekker memory barrier pattern, then it's clear that
+request-less VCPU kicks are almost never correct. Without the assurance
+that a non-IPI generating kick will still result in an action by the
+receiving VCPU, as the final kvm_request_pending() check does for
+request-accompanying kicks, then the kick may not do anything useful at
+all. If, for instance, a request-less kick was made to a VCPU that was
+just about to set its mode to IN_GUEST_MODE, meaning no IPI is sent, then
+the VCPU thread may continue its entry without actually having done
+whatever it was the kick was meant to initiate.
+
+One exception is x86's posted interrupt mechanism. In this case, however,
+even the request-less VCPU kick is coupled with the same
+local_irq_disable() + smp_mb() pattern described above; the ON bit
+(Outstanding Notification) in the posted interrupt descriptor takes the
+role of ``vcpu->requests``. When sending a posted interrupt, PIR.ON is
+set before reading ``vcpu->mode``; dually, in the VCPU thread,
+vmx_sync_pir_to_irr() reads PIR after setting ``vcpu->mode`` to
+IN_GUEST_MODE.
+
+Additional Considerations
+=========================
+
+Sleeping VCPUs
+--------------
+
+VCPU threads may need to consider requests before and/or after calling
+functions that may put them to sleep, e.g. kvm_vcpu_block(). Whether they
+do or not, and, if they do, which requests need consideration, is
+architecture dependent. kvm_vcpu_block() calls kvm_arch_vcpu_runnable()
+to check if it should awaken. One reason to do so is to provide
+architectures a function where requests may be checked if necessary.
+
+Clearing Requests
+-----------------
+
+Generally it only makes sense for the receiving VCPU thread to clear a
+request. However, in some circumstances, such as when the requesting
+thread and the receiving VCPU thread are executed serially, such as when
+they are the same thread, or when they are using some form of concurrency
+control to temporarily execute synchronously, then it's possible to know
+that the request may be cleared immediately, rather than waiting for the
+receiving VCPU thread to handle the request in VCPU RUN. The only current
+examples of this are kvm_vcpu_block() calls made by VCPUs to block
+themselves. A possible side-effect of that call is to make the
+KVM_REQ_UNHALT request, which may then be cleared immediately when the
+VCPU returns from the call.
+
+References
+==========
+
+.. [atomic-ops] Documentation/core-api/atomic_ops.rst
+.. [memory-barriers] Documentation/memory-barriers.txt
+.. [lwn-mb] https://lwn.net/Articles/573436/
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f0e66577ce05..127e2dd2e21c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -44,7 +44,9 @@
#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
#endif
-#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
@@ -233,8 +235,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
@@ -291,20 +291,12 @@ static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
-static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
-static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
-static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 5e3c673fa3f4..5db2d4c6a55f 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -203,6 +203,14 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL 0
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index fa6182a40941..1e0784ebbfd6 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -301,3 +301,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
return -EINVAL;
}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_set_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_get_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_has_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index f86a9aaef462..54442e375354 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -72,6 +72,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_wfx(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 624a510d31df..ebd2dd46adf7 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -237,8 +237,10 @@ void __hyp_text __noreturn __hyp_panic(int cause)
vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ __timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
+ __banked_restore_state(host_ctxt);
__sysreg_restore_state(host_ctxt);
}
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index 1da8b2d14550..5ed0c3ee33d6 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = {
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
};
-static const struct kvm_irq_level cortexa_ptimer_irq = {
- { .irq = 30 },
- .level = 1,
-};
-
-static const struct kvm_irq_level cortexa_vtimer_irq = {
- { .irq = 27 },
- .level = 1,
-};
-
/*******************************************************************************
* Exported reset function
@@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = {
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
struct kvm_regs *reset_regs;
- const struct kvm_irq_level *cpu_vtimer_irq;
- const struct kvm_irq_level *cpu_ptimer_irq;
switch (vcpu->arch.target) {
case KVM_ARM_TARGET_CORTEX_A7:
case KVM_ARM_TARGET_CORTEX_A15:
reset_regs = &cortexa_regs_reset;
vcpu->arch.midr = read_cpuid_id();
- cpu_vtimer_irq = &cortexa_vtimer_irq;
- cpu_ptimer_irq = &cortexa_ptimer_irq;
break;
default:
return -ENODEV;
@@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_reset_coprocs(vcpu);
/* Reset arch_timer context */
- return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
+ return kvm_timer_vcpu_reset(vcpu);
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5e19165c5fa8..0c4fd1f46e10 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -41,7 +41,9 @@
#define KVM_VCPU_MAX_FEATURES 4
-#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -333,8 +335,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
u64 __kvm_call_hyp(void *hypfn, ...);
#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 70eea2ecc663..9f3ca24bbcc6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -232,6 +232,9 @@ struct kvm_arch_memory_slot {
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index b37446a8ffdb..5c7f657dd207 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_set_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_get_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_has_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index fa1b18e364fc..17d8a1677a0b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -89,6 +89,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index aede1658aeda..e5f089de6526 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -422,6 +422,7 @@ void __hyp_text __noreturn __hyp_panic(void)
vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ __timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_host_state(host_ctxt);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 561badf93de8..3256b9228e75 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = {
COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
};
-static const struct kvm_irq_level default_ptimer_irq = {
- .irq = 30,
- .level = 1,
-};
-
-static const struct kvm_irq_level default_vtimer_irq = {
- .irq = 27,
- .level = 1,
-};
-
static bool cpu_has_32bit_el1(void)
{
u64 pfr0;
@@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
- const struct kvm_irq_level *cpu_vtimer_irq;
- const struct kvm_irq_level *cpu_ptimer_irq;
const struct kvm_regs *cpu_reset;
switch (vcpu->arch.target) {
@@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
cpu_reset = &default_regs_reset;
}
- cpu_vtimer_irq = &default_vtimer_irq;
- cpu_ptimer_irq = &default_ptimer_irq;
break;
}
@@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_pmu_vcpu_reset(vcpu);
/* Reset timer */
- return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
+ return kvm_timer_vcpu_reset(vcpu);
}
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index a563759fd142..6a0d7040d882 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu,
struct mm_struct *mm;
int i;
- if (likely(!vcpu->requests))
+ if (likely(!kvm_request_pending(vcpu)))
return;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 71d8856ade64..74805035edc8 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu)
int ret = 0;
int i;
- if (!vcpu->requests)
+ if (!kvm_request_pending(vcpu))
return 0;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9c51ac4b8f36..50e0bc9723cc 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -52,8 +52,8 @@
#define KVM_IRQCHIP_NUM_PINS 256
/* PPC-specific vcpu->requests bit members */
-#define KVM_REQ_WATCHDOG 8
-#define KVM_REQ_EPR_EXIT 9
+#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
+#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
#include <linux/mmu_notifier.h>
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3eaac3809977..071b87ee682f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
kvmppc_core_check_exceptions(vcpu);
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Exception delivery raised request; start over */
return 1;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f7cf2cd564ef..fd64f087737c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !!(v->arch.pending_exceptions) ||
- v->requests;
+ return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
*/
smp_mb();
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Make sure we process requests preemptable */
local_irq_enable();
trace_kvm_check_requests(vcpu);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 426614a882a9..9c3bd94204ac 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -42,9 +42,9 @@
#define KVM_HALT_POLL_NS_DEFAULT 80000
/* s390-specific vcpu->requests bit members */
-#define KVM_REQ_ENABLE_IBS 8
-#define KVM_REQ_DISABLE_IBS 9
-#define KVM_REQ_ICPT_OPEREXC 10
+#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0)
+#define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1)
+#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 689ac48361c6..ad41e0fa3a21 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2440,7 +2440,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
kvm_s390_vcpu_request_handled(vcpu);
- if (!vcpu->requests)
+ if (!kvm_request_pending(vcpu))
return 0;
/*
* We use MMU_RELOAD just to re-arm the ipte notifier for the
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9c761fea0c98..563979976fab 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -48,28 +48,31 @@
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
/* x86-specific vcpu->requests bit members */
-#define KVM_REQ_MIGRATE_TIMER 8
-#define KVM_REQ_REPORT_TPR_ACCESS 9
-#define KVM_REQ_TRIPLE_FAULT 10
-#define KVM_REQ_MMU_SYNC 11
-#define KVM_REQ_CLOCK_UPDATE 12
-#define KVM_REQ_EVENT 14
-#define KVM_REQ_APF_HALT 15
-#define KVM_REQ_STEAL_UPDATE 16
-#define KVM_REQ_NMI 17
-#define KVM_REQ_PMU 18
-#define KVM_REQ_PMI 19
-#define KVM_REQ_SMI 20
-#define KVM_REQ_MASTERCLOCK_UPDATE 21
-#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
-#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_HV_CRASH 26
-#define KVM_REQ_IOAPIC_EOI_EXIT 27
-#define KVM_REQ_HV_RESET 28
-#define KVM_REQ_HV_EXIT 29
-#define KVM_REQ_HV_STIMER 30
+#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
+#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
+#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
+#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
+#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
+#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
+#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
+#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
+#define KVM_REQ_NMI KVM_ARCH_REQ(9)
+#define KVM_REQ_PMU KVM_ARCH_REQ(10)
+#define KVM_REQ_PMI KVM_ARCH_REQ(11)
+#define KVM_REQ_SMI KVM_ARCH_REQ(12)
+#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
+#define KVM_REQ_MCLOCK_INPROGRESS \
+ KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SCAN_IOAPIC \
+ KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16)
+#define KVM_REQ_APIC_PAGE_RELOAD \
+ KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18)
+#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19)
+#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
+#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
+#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 464da936c53d..f81060518635 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6710,7 +6710,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -6874,7 +6874,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->sync_pir_to_irr(vcpu);
}
- if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
+ if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
|| need_resched() || signal_pending(current)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 295584f31a4e..f0053f884b4a 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -57,9 +57,7 @@ struct arch_timer_cpu {
int kvm_timer_hyp_init(void);
int kvm_timer_enable(struct kvm_vcpu *vcpu);
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *virt_irq,
- const struct kvm_irq_level *phys_irq);
+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
@@ -70,6 +68,10 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
+int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
+int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
+
bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
void kvm_timer_schedule(struct kvm_vcpu *vcpu);
void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 1ab4633adf4f..f6e030617467 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -35,6 +35,7 @@ struct kvm_pmu {
int irq_num;
struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
bool ready;
+ bool created;
bool irq_level;
};
@@ -63,6 +64,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
+int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
#else
struct kvm_pmu {
};
@@ -112,6 +114,10 @@ static inline int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
{
return -ENXIO;
}
+static inline int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
#endif
#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ef718586321c..2d923a6b2175 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -38,6 +38,10 @@
#define VGIC_MIN_LPI 8192
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
+#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \
+ (irq) <= VGIC_MAX_SPI)
+
enum vgic_type {
VGIC_V2, /* Good ol' GICv2 */
VGIC_V3, /* New fancy GICv3 */
@@ -119,6 +123,9 @@ struct vgic_irq {
u8 source; /* GICv2 SGIs only */
u8 priority;
enum vgic_irq_config config; /* Level or edge */
+
+ void *owner; /* Opaque pointer to reserve an interrupt
+ for in-kernel devices. */
};
struct vgic_register_region;
@@ -298,9 +305,7 @@ int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level);
-int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level);
+ bool level, void *owner);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
@@ -341,4 +346,6 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
*/
int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);
+int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner);
+
#endif /* __KVM_ARM_VGIC_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8c0664309815..0b50e7b35ed4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -126,6 +126,13 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_PENDING_TIMER 2
#define KVM_REQ_UNHALT 3
+#define KVM_REQUEST_ARCH_BASE 8
+
+#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
+ BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \
+ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
+})
+#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -1098,6 +1105,11 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
set_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
}
+static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
+{
+ return READ_ONCE(vcpu->requests);
+}
+
static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
{
return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 5976609ef27c..8e89d63005c7 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -21,6 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/uaccess.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
@@ -35,6 +36,16 @@ static struct timecounter *timecounter;
static unsigned int host_vtimer_irq;
static u32 host_vtimer_irq_flags;
+static const struct kvm_irq_level default_ptimer_irq = {
+ .irq = 30,
+ .level = 1,
+};
+
+static const struct kvm_irq_level default_vtimer_irq = {
+ .irq = 27,
+ .level = 1,
+};
+
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
vcpu_vtimer(vcpu)->active_cleared_last = false;
@@ -95,7 +106,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
* If the vcpu is blocked we want to wake it up so that it will see
* the timer has expired when entering the guest.
*/
- kvm_vcpu_kick(vcpu);
+ kvm_vcpu_wake_up(vcpu);
}
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
@@ -215,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
if (likely(irqchip_in_kernel(vcpu->kvm))) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
- timer_ctx->irq.level);
+ timer_ctx->irq.level,
+ timer_ctx);
WARN_ON(ret);
}
}
@@ -445,23 +457,12 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
kvm_timer_update_state(vcpu);
}
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *virt_irq,
- const struct kvm_irq_level *phys_irq)
+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
- * The vcpu timer irq number cannot be determined in
- * kvm_timer_vcpu_init() because it is called much before
- * kvm_vcpu_set_target(). To handle this, we determine
- * vcpu timer irq number when the vcpu is reset.
- */
- vtimer->irq.irq = virt_irq->irq;
- ptimer->irq.irq = phys_irq->irq;
-
- /*
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
* and to 0 for ARMv7. We provide an implementation that always
* resets the timer to be disabled and unmasked and is compliant with
@@ -496,6 +497,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/* Synchronize cntvoff across all vtimers of a VM. */
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
@@ -504,6 +507,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
timer->timer.function = kvm_timer_expire;
+
+ vtimer->irq.irq = default_vtimer_irq.irq;
+ ptimer->irq.irq = default_ptimer_irq.irq;
}
static void kvm_timer_init_interrupt(void *info)
@@ -613,6 +619,30 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
}
+static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
+{
+ int vtimer_irq, ptimer_irq;
+ int i, ret;
+
+ vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
+ ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
+ if (ret)
+ return false;
+
+ ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
+ ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
+ if (ret)
+ return false;
+
+ kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
+ if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
+ vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
+ return false;
+ }
+
+ return true;
+}
+
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -632,6 +662,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (!vgic_initialized(vcpu->kvm))
return -ENODEV;
+ if (!timer_irqs_are_valid(vcpu)) {
+ kvm_debug("incorrectly configured timer irqs\n");
+ return -EINVAL;
+ }
+
/*
* Find the physical IRQ number corresponding to the host_vtimer_irq
*/
@@ -681,3 +716,79 @@ void kvm_timer_init_vhe(void)
val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
write_sysreg(val, cnthctl_el2);
}
+
+static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
+ vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
+ }
+}
+
+int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ int __user *uaddr = (int __user *)(long)attr->addr;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ int irq;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return -EINVAL;
+
+ if (get_user(irq, uaddr))
+ return -EFAULT;
+
+ if (!(irq_is_ppi(irq)))
+ return -EINVAL;
+
+ if (vcpu->arch.timer_cpu.enabled)
+ return -EBUSY;
+
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ int __user *uaddr = (int __user *)(long)attr->addr;
+ struct arch_timer_context *timer;
+ int irq;
+
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ timer = vcpu_vtimer(vcpu);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ timer = vcpu_ptimer(vcpu);
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ irq = timer->irq.irq;
+ return put_user(irq, uaddr);
+}
+
+int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ return 0;
+ }
+
+ return -ENXIO;
+}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 3417e184c8e1..a265acc53e39 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
}
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.power_off = true;
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu->arch.power_off = false;
break;
case KVM_MP_STATE_STOPPED:
- vcpu->arch.power_off = true;
+ vcpu_power_off(vcpu);
break;
default:
return -EINVAL;
@@ -520,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
}
ret = kvm_timer_enable(vcpu);
+ if (ret)
+ return ret;
+
+ ret = kvm_arm_pmu_v3_enable(vcpu);
return ret;
}
@@ -536,21 +547,7 @@ void kvm_arm_halt_guest(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.pause = true;
- kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
-}
-
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pause = true;
- kvm_vcpu_kick(vcpu);
-}
-
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
-{
- struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
-
- vcpu->arch.pause = false;
- swake_up(wq);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
}
void kvm_arm_resume_guest(struct kvm *kvm)
@@ -558,16 +555,23 @@ void kvm_arm_resume_guest(struct kvm *kvm)
int i;
struct kvm_vcpu *vcpu;
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arm_resume_vcpu(vcpu);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.pause = false;
+ swake_up(kvm_arch_vcpu_wq(vcpu));
+ }
}
-static void vcpu_sleep(struct kvm_vcpu *vcpu)
+static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
{
struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
(!vcpu->arch.pause)));
+
+ if (vcpu->arch.power_off || vcpu->arch.pause) {
+ /* Awaken to handle a signal, request we sleep again later. */
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ }
}
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
@@ -575,6 +579,20 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
return vcpu->arch.target >= 0;
}
+static void check_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
+ vcpu_req_sleep(vcpu);
+
+ /*
+ * Clear IRQ_PENDING requests that were made to guarantee
+ * that a VCPU sees new virtual interrupts.
+ */
+ kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+ }
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -620,8 +638,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
update_vttbr(vcpu->kvm);
- if (vcpu->arch.power_off || vcpu->arch.pause)
- vcpu_sleep(vcpu);
+ check_vcpu_requests(vcpu);
/*
* Preparing the interrupts to be injected also
@@ -650,8 +667,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->exit_reason = KVM_EXIT_INTR;
}
+ /*
+ * Ensure we set mode to IN_GUEST_MODE after we disable
+ * interrupts and before the final VCPU requests check.
+ * See the comment in kvm_vcpu_exiting_guest_mode() and
+ * Documentation/virtual/kvm/vcpu-requests.rst
+ */
+ smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
- vcpu->arch.power_off || vcpu->arch.pause) {
+ kvm_request_pending(vcpu)) {
+ vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
@@ -667,7 +693,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
- vcpu->mode = IN_GUEST_MODE;
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
@@ -756,6 +781,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
* trigger a world-switch round on the running physical CPU to set the
* virtual IRQ/FIQ fields in the HCR appropriately.
*/
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return 0;
@@ -806,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
+ return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
if (!irqchip_in_kernel(kvm))
return -ENXIO;
@@ -814,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
+ return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
}
return -EINVAL;
@@ -884,7 +910,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* Handle the "start in power-off" case.
*/
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- vcpu->arch.power_off = true;
+ vcpu_power_off(vcpu);
else
vcpu->arch.power_off = false;
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 4b43e7f3b158..fc8a723ff387 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -203,6 +203,24 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
return reg;
}
+static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ bool overflow = !!kvm_pmu_overflow_status(vcpu);
+
+ if (pmu->irq_level == overflow)
+ return;
+
+ pmu->irq_level = overflow;
+
+ if (likely(irqchip_in_kernel(vcpu->kvm))) {
+ int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ pmu->irq_num, overflow,
+ &vcpu->arch.pmu);
+ WARN_ON(ret);
+ }
+}
+
/**
* kvm_pmu_overflow_set - set PMU overflow interrupt
* @vcpu: The vcpu pointer
@@ -210,37 +228,18 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
*/
void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
{
- u64 reg;
-
if (val == 0)
return;
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
- reg = kvm_pmu_overflow_status(vcpu);
- if (reg != 0)
- kvm_vcpu_kick(vcpu);
+ kvm_pmu_check_overflow(vcpu);
}
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- bool overflow;
-
if (!kvm_arm_pmu_v3_ready(vcpu))
return;
-
- overflow = !!kvm_pmu_overflow_status(vcpu);
- if (pmu->irq_level == overflow)
- return;
-
- pmu->irq_level = overflow;
-
- if (likely(irqchip_in_kernel(vcpu->kvm))) {
- int ret;
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- pmu->irq_num, overflow);
- WARN_ON(ret);
- }
+ kvm_pmu_check_overflow(vcpu);
}
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
@@ -451,34 +450,74 @@ bool kvm_arm_support_pmu_v3(void)
return (perf_num_counters() > 0);
}
-static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
+int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
{
- if (!kvm_arm_support_pmu_v3())
- return -ENODEV;
+ if (!vcpu->arch.pmu.created)
+ return 0;
/*
- * We currently require an in-kernel VGIC to use the PMU emulation,
- * because we do not support forwarding PMU overflow interrupts to
- * userspace yet.
+ * A valid interrupt configuration for the PMU is either to have a
+ * properly configured interrupt number and using an in-kernel
+ * irqchip, or to not have an in-kernel GIC and not set an IRQ.
*/
- if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ int irq = vcpu->arch.pmu.irq_num;
+ if (!kvm_arm_pmu_irq_initialized(vcpu))
+ return -EINVAL;
+
+ /*
+ * If we are using an in-kernel vgic, at this point we know
+ * the vgic will be initialized, so we can check the PMU irq
+ * number against the dimensions of the vgic and make sure
+ * it's valid.
+ */
+ if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
+ return -EINVAL;
+ } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
+ return -EINVAL;
+ }
+
+ kvm_pmu_vcpu_reset(vcpu);
+ vcpu->arch.pmu.ready = true;
+
+ return 0;
+}
+
+static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arm_support_pmu_v3())
return -ENODEV;
- if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) ||
- !kvm_arm_pmu_irq_initialized(vcpu))
+ if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENXIO;
- if (kvm_arm_pmu_v3_ready(vcpu))
+ if (vcpu->arch.pmu.created)
return -EBUSY;
- kvm_pmu_vcpu_reset(vcpu);
- vcpu->arch.pmu.ready = true;
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ int ret;
+
+ /*
+ * If using the PMU with an in-kernel virtual GIC
+ * implementation, we require the GIC to be already
+ * initialized when initializing the PMU.
+ */
+ if (!vgic_initialized(vcpu->kvm))
+ return -ENODEV;
+
+ if (!kvm_arm_pmu_irq_initialized(vcpu))
+ return -ENXIO;
+ ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
+ &vcpu->arch.pmu);
+ if (ret)
+ return ret;
+ }
+
+ vcpu->arch.pmu.created = true;
return 0;
}
-#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
-
/*
* For one VM the interrupt type must be same for each vcpu.
* As a PPI, the interrupt number is the same for all vcpus,
@@ -512,6 +551,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return -EINVAL;
+
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
@@ -519,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return -EFAULT;
/* The PMU overflow interrupt can be a PPI or a valid SPI. */
- if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq)))
+ if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
return -EINVAL;
if (!pmu_irq_is_valid(vcpu->kvm, irq))
@@ -546,6 +588,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return -EINVAL;
+
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index a08d7a93aebb..f1e363bab5e8 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -57,6 +57,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* for KVM will preserve the register state.
*/
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
return PSCI_RET_SUCCESS;
}
@@ -64,6 +65,8 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
{
vcpu->arch.power_off = true;
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ kvm_vcpu_kick(vcpu);
}
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
@@ -178,10 +181,9 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
* after this call is handled and before the VCPUs have been
* re-initialized.
*/
- kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
tmp->arch.power_off = true;
- kvm_vcpu_kick(tmp);
- }
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index f138ed2e9c63..b7baf581611a 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, spi_id, level);
+ return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL);
}
/**
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index 63e0bbdcddcc..37522e65eb53 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -308,34 +308,36 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
- vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
- vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_sactive,
+ NULL, vgic_mmio_uaccess_write_sactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_cactive,
+ NULL, vgic_mmio_uaccess_write_cactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
- vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
+ 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET,
- vgic_mmio_read_target, vgic_mmio_write_target, 8,
+ vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG,
- vgic_mmio_read_config, vgic_mmio_write_config, 2,
+ vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT,
vgic_mmio_read_raz, vgic_mmio_write_sgir, 4,
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 201d5e2e973d..714fa3933546 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -456,11 +456,13 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1,
+ vgic_mmio_read_active, vgic_mmio_write_sactive,
+ NULL, vgic_mmio_uaccess_write_sactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1,
- VGIC_ACCESS_32bit),
+ vgic_mmio_read_active, vgic_mmio_write_cactive,
+ NULL, vgic_mmio_uaccess_write_cactive,
+ 1, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
@@ -526,12 +528,14 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
vgic_mmio_read_pending, vgic_mmio_write_cpending,
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 4,
- VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
+ vgic_mmio_read_active, vgic_mmio_write_sactive,
+ NULL, vgic_mmio_uaccess_write_sactive,
+ 4, VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
+ vgic_mmio_read_active, vgic_mmio_write_cactive,
+ NULL, vgic_mmio_uaccess_write_cactive,
+ 4, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 1c17b2a2f105..c1e4bdd66131 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -231,56 +231,94 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
* be migrated while we don't hold the IRQ locks and we don't want to be
* chasing moving targets.
*
- * For private interrupts, we only have to make sure the single and only VCPU
- * that can potentially queue the IRQ is stopped.
+ * For private interrupts we don't have to do anything because userspace
+ * accesses to the VGIC state already require all VCPUs to be stopped, and
+ * only the VCPU itself can modify its private interrupts active state, which
+ * guarantees that the VCPU is not running.
*/
static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
{
- if (intid < VGIC_NR_PRIVATE_IRQS)
- kvm_arm_halt_vcpu(vcpu);
- else
+ if (intid > VGIC_NR_PRIVATE_IRQS)
kvm_arm_halt_guest(vcpu->kvm);
}
/* See vgic_change_active_prepare */
static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
{
- if (intid < VGIC_NR_PRIVATE_IRQS)
- kvm_arm_resume_vcpu(vcpu);
- else
+ if (intid > VGIC_NR_PRIVATE_IRQS)
kvm_arm_resume_guest(vcpu->kvm);
}
-void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
+static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
- vgic_change_active_prepare(vcpu, intid);
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
vgic_mmio_change_active(vcpu, irq, false);
vgic_put_irq(vcpu->kvm, irq);
}
- vgic_change_active_finish(vcpu, intid);
}
-void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
+ mutex_lock(&vcpu->kvm->lock);
vgic_change_active_prepare(vcpu, intid);
+
+ __vgic_mmio_write_cactive(vcpu, addr, len, val);
+
+ vgic_change_active_finish(vcpu, intid);
+ mutex_unlock(&vcpu->kvm->lock);
+}
+
+void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __vgic_mmio_write_cactive(vcpu, addr, len, val);
+}
+
+static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
vgic_mmio_change_active(vcpu, irq, true);
vgic_put_irq(vcpu->kvm, irq);
}
+}
+
+void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+
+ mutex_lock(&vcpu->kvm->lock);
+ vgic_change_active_prepare(vcpu, intid);
+
+ __vgic_mmio_write_sactive(vcpu, addr, len, val);
+
vgic_change_active_finish(vcpu, intid);
+ mutex_unlock(&vcpu->kvm->lock);
+}
+
+void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __vgic_mmio_write_sactive(vcpu, addr, len, val);
}
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index ea4171acdef3..5693f6df45ec 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -75,7 +75,7 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
* The _WITH_LENGTH version instantiates registers with a fixed length
* and is mutually exclusive with the _PER_IRQ version.
*/
-#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \
+#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \
{ \
.reg_offset = off, \
.bits_per_irq = bpi, \
@@ -83,6 +83,8 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
.access_flags = acc, \
.read = rd, \
.write = wr, \
+ .uaccess_read = ur, \
+ .uaccess_write = uw, \
}
#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \
@@ -165,6 +167,14 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
+void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 83b24d20ff8f..fed717e07938 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -35,11 +35,12 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
/*
* Locking order is always:
- * its->cmd_lock (mutex)
- * its->its_lock (mutex)
- * vgic_cpu->ap_list_lock
- * kvm->lpi_list_lock
- * vgic_irq->irq_lock
+ * kvm->lock (mutex)
+ * its->cmd_lock (mutex)
+ * its->its_lock (mutex)
+ * vgic_cpu->ap_list_lock
+ * kvm->lpi_list_lock
+ * vgic_irq->irq_lock
*
* If you need to take multiple locks, always take the upper lock first,
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
@@ -234,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
/*
* Only valid injection if changing level for level-triggered IRQs or for a
- * rising edge.
+ * rising edge, and in-kernel connected IRQ lines can only be controlled by
+ * their owner.
*/
-static bool vgic_validate_injection(struct vgic_irq *irq, bool level)
+static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
{
+ if (irq->owner != owner)
+ return false;
+
switch (irq->config) {
case VGIC_CONFIG_LEVEL:
return irq->line_level != level;
@@ -285,8 +290,10 @@ retry:
* won't see this one until it exits for some other
* reason.
*/
- if (vcpu)
+ if (vcpu) {
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
+ }
return false;
}
@@ -332,6 +339,7 @@ retry:
spin_unlock(&irq->irq_lock);
spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return true;
@@ -346,13 +354,16 @@ retry:
* false: to ignore the call
* Level-sensitive true: raise the input signal
* false: lower the input signal
+ * @owner: The opaque pointer to the owner of the IRQ being raised to verify
+ * that the caller is allowed to inject this IRQ. Userspace
+ * injections will have owner == NULL.
*
* The VGIC is not concerned with devices being active-LOW or active-HIGH for
* level-sensitive interrupts. You can think of the level parameter as 1
* being HIGH and 0 being LOW and all devices being active-HIGH.
*/
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
+ bool level, void *owner)
{
struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
@@ -374,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
spin_lock(&irq->irq_lock);
- if (!vgic_validate_injection(irq, level)) {
+ if (!vgic_validate_injection(irq, level, owner)) {
/* Nothing to see here, move along... */
spin_unlock(&irq->irq_lock);
vgic_put_irq(kvm, irq);
@@ -431,6 +442,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
}
/**
+ * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
+ *
+ * @vcpu: Pointer to the VCPU (used for PPIs)
+ * @intid: The virtual INTID identifying the interrupt (PPI or SPI)
+ * @owner: Opaque pointer to the owner
+ *
+ * Returns 0 if intid is not already used by another in-kernel device and the
+ * owner is set, otherwise returns an error code.
+ */
+int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
+{
+ struct vgic_irq *irq;
+ int ret = 0;
+
+ if (!vgic_initialized(vcpu->kvm))
+ return -EAGAIN;
+
+ /* SGIs and LPIs cannot be wired up to any device */
+ if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
+ return -EINVAL;
+
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
+ spin_lock(&irq->irq_lock);
+ if (irq->owner && irq->owner != owner)
+ ret = -EEXIST;
+ else
+ irq->owner = owner;
+ spin_unlock(&irq->irq_lock);
+
+ return ret;
+}
+
+/**
* vgic_prune_ap_list - Remove non-relevant interrupts from the list
*
* @vcpu: The VCPU pointer
@@ -721,8 +765,10 @@ void vgic_kick_vcpus(struct kvm *kvm)
* a good kick...
*/
kvm_for_each_vcpu(c, vcpu, kvm) {
- if (kvm_vgic_vcpu_pending_irq(vcpu))
+ if (kvm_vgic_vcpu_pending_irq(vcpu)) {
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
+ }
}
}