summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/xen.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw@amazon.co.uk>2021-12-10 16:36:24 +0000
committerPaolo Bonzini <pbonzini@redhat.com>2022-01-07 10:44:45 -0500
commit55749769fe608fa3f4a075e42e89d237c8e37637 (patch)
treec99579f4f18b42a971d486fe53aabf02756dd538 /arch/x86/kvm/xen.c
parent14243b387137a4afbe1df5d9dc15182d6657bb79 (diff)
KVM: x86: Fix wall clock writes in Xen shared_info not to mark page dirty
When dirty ring logging is enabled, any dirty logging without an active vCPU context will cause a kernel oops. But we've already declared that the shared_info page doesn't get dirty tracking anyway, since it would be kind of insane to mark it dirty every time we deliver an event channel interrupt. Userspace is supposed to just assume it's always dirty any time a vCPU can run or event channels are routed. So stop using the generic kvm_write_wall_clock() and just write directly through the gfn_to_pfn_cache that we already have set up. We can make kvm_write_wall_clock() static in x86.c again now, but let's not remove the 'sec_hi_ofs' argument even though it's not used yet. At some point we *will* want to use that for KVM guests too. Fixes: 629b5348841a ("KVM: x86/xen: update wallclock region") Reported-by: butt3rflyh4ck <butterflyhuangxx@gmail.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Message-Id: <20211210163625.2886-6-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/xen.c')
-rw-r--r--arch/x86/kvm/xen.c62
1 files changed, 48 insertions, 14 deletions
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index ceddabd1f5c6..0e3f7d6e9fd7 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -25,8 +25,11 @@ DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
{
struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+ struct pvclock_wall_clock *wc;
gpa_t gpa = gfn_to_gpa(gfn);
- int wc_ofs, sec_hi_ofs;
+ u32 *wc_sec_hi;
+ u32 wc_version;
+ u64 wall_nsec;
int ret = 0;
int idx = srcu_read_lock(&kvm->srcu);
@@ -35,32 +38,63 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
goto out;
}
- ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, false, true, gpa,
- PAGE_SIZE, false);
- if (ret)
- goto out;
+ do {
+ ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, false, true,
+ gpa, PAGE_SIZE, false);
+ if (ret)
+ goto out;
+
+ /*
+ * This code mirrors kvm_write_wall_clock() except that it writes
+ * directly through the pfn cache and doesn't mark the page dirty.
+ */
+ wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
+
+ /* It could be invalid again already, so we need to check */
+ read_lock_irq(&gpc->lock);
+
+ if (gpc->valid)
+ break;
+
+ read_unlock_irq(&gpc->lock);
+ } while (1);
/* Paranoia checks on the 32-bit struct layout */
BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924);
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
- /* 32-bit location by default */
- wc_ofs = offsetof(struct compat_shared_info, wc);
- sec_hi_ofs = offsetof(struct compat_shared_info, arch.wc_sec_hi);
-
#ifdef CONFIG_X86_64
/* Paranoia checks on the 64-bit struct layout */
BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00);
BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c);
- if (kvm->arch.xen.long_mode) {
- wc_ofs = offsetof(struct shared_info, wc);
- sec_hi_ofs = offsetof(struct shared_info, wc_sec_hi);
- }
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+ struct shared_info *shinfo = gpc->khva;
+
+ wc_sec_hi = &shinfo->wc_sec_hi;
+ wc = &shinfo->wc;
+ } else
#endif
+ {
+ struct compat_shared_info *shinfo = gpc->khva;
+
+ wc_sec_hi = &shinfo->arch.wc_sec_hi;
+ wc = &shinfo->wc;
+ }
+
+ /* Increment and ensure an odd value */
+ wc_version = wc->version = (wc->version + 1) | 1;
+ smp_wmb();
+
+ wc->nsec = do_div(wall_nsec, 1000000000);
+ wc->sec = (u32)wall_nsec;
+ *wc_sec_hi = wall_nsec >> 32;
+ smp_wmb();
+
+ wc->version = wc_version + 1;
+ read_unlock_irq(&gpc->lock);
- kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs);
kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
out: