From ddc12f2a12917c10b0deb0928f0560bffb7729ec Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 18 May 2021 10:34:10 -0700 Subject: KVM: mmu: Refactor memslot copy Factor out copying kvm_memslots from allocating the memory for new ones in preparation for adding a new lock to protect the arch-specific fields of the memslots. No functional change intended. Reviewed-by: David Hildenbrand Signed-off-by: Ben Gardon Message-Id: <20210518173414.450044-4-bgardon@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a6bc7af0e28..d65be9461493 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1307,6 +1307,18 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, return old_memslots; } +static size_t kvm_memslots_size(int slots) +{ + return sizeof(struct kvm_memslots) + + (sizeof(struct kvm_memory_slot) * slots); +} + +static void kvm_copy_memslots(struct kvm_memslots *to, + struct kvm_memslots *from) +{ + memcpy(to, from, kvm_memslots_size(from->used_slots)); +} + /* * Note, at a minimum, the current number of used slots must be allocated, even * when deleting a memslot, as we need a complete duplicate of the memslots for @@ -1316,19 +1328,16 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old, enum kvm_mr_change change) { struct kvm_memslots *slots; - size_t old_size, new_size; - - old_size = sizeof(struct kvm_memslots) + - (sizeof(struct kvm_memory_slot) * old->used_slots); + size_t new_size; if (change == KVM_MR_CREATE) - new_size = old_size + sizeof(struct kvm_memory_slot); + new_size = kvm_memslots_size(old->used_slots + 1); else - new_size = old_size; + new_size = kvm_memslots_size(old->used_slots); slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT); if (likely(slots)) - memcpy(slots, old, old_size); + kvm_copy_memslots(slots, old); return slots; } -- cgit From b10a038e84d188e15819058b2978b2daa9853aeb Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 18 May 2021 10:34:11 -0700 Subject: KVM: mmu: Add slots_arch_lock for memslot arch fields Add a new lock to protect the arch-specific fields of memslots if they need to be modified in a kvm->srcu read critical section. A future commit will use this lock to lazily allocate memslot rmaps for x86. Signed-off-by: Ben Gardon Message-Id: <20210518173414.450044-5-bgardon@google.com> [Add Documentation/ hunk. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d65be9461493..fa7e7ebefc79 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -909,6 +909,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); + mutex_init(&kvm->slots_arch_lock); INIT_LIST_HEAD(&kvm->devices); BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); @@ -1281,6 +1282,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; rcu_assign_pointer(kvm->memslots[as_id], slots); + + /* + * Acquired in kvm_set_memslot. Must be released before synchronize + * SRCU below in order to avoid deadlock with another thread + * acquiring the slots_arch_lock in an srcu critical section. + */ + mutex_unlock(&kvm->slots_arch_lock); + synchronize_srcu_expedited(&kvm->srcu); /* @@ -1352,9 +1361,27 @@ static int kvm_set_memslot(struct kvm *kvm, struct kvm_memslots *slots; int r; + /* + * Released in install_new_memslots. + * + * Must be held from before the current memslots are copied until + * after the new memslots are installed with rcu_assign_pointer, + * then released before the synchronize srcu in install_new_memslots. + * + * When modifying memslots outside of the slots_lock, must be held + * before reading the pointer to the current memslots until after all + * changes to those memslots are complete. + * + * These rules ensure that installing new memslots does not lose + * changes made to the previous memslots. + */ + mutex_lock(&kvm->slots_arch_lock); + slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change); - if (!slots) + if (!slots) { + mutex_unlock(&kvm->slots_arch_lock); return -ENOMEM; + } if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { /* @@ -1365,10 +1392,9 @@ static int kvm_set_memslot(struct kvm *kvm, slot->flags |= KVM_MEMSLOT_INVALID; /* - * We can re-use the old memslots, the only difference from the - * newly installed memslots is the invalid flag, which will get - * dropped by update_memslots anyway. We'll also revert to the - * old memslots if preparing the new memory region fails. + * We can re-use the memory from the old memslots. + * It will be overwritten with a copy of the new memslots + * after reacquiring the slots_arch_lock below. */ slots = install_new_memslots(kvm, as_id, slots); @@ -1380,6 +1406,17 @@ static int kvm_set_memslot(struct kvm *kvm, * - kvm_is_visible_gfn (mmu_check_root) */ kvm_arch_flush_shadow_memslot(kvm, slot); + + /* Released in install_new_memslots. */ + mutex_lock(&kvm->slots_arch_lock); + + /* + * The arch-specific fields of the memslots could have changed + * between releasing the slots_arch_lock in + * install_new_memslots and here, so get a fresh copy of the + * slots. + */ + kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id)); } r = kvm_arch_prepare_memory_region(kvm, new, mem, change); @@ -1395,8 +1432,13 @@ static int kvm_set_memslot(struct kvm *kvm, return 0; out_slots: - if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { + slot = id_to_memslot(slots, old->id); + slot->flags &= ~KVM_MEMSLOT_INVALID; slots = install_new_memslots(kvm, as_id, slots); + } else { + mutex_unlock(&kvm->slots_arch_lock); + } kvfree(slots); return r; } -- cgit From 2fdef3a2ae01dfd928c4b42c5a3b76546170a74c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sun, 6 Jun 2021 11:10:44 +0900 Subject: kvm: add PM-notifier Add KVM PM-notifier so that architectures can have arch-specific VM suspend/resume routines. Such architectures need to select CONFIG_HAVE_KVM_PM_NOTIFIER and implement kvm_arch_pm_notifier(). Signed-off-by: Sergey Senozhatsky Acked-by: Marc Zyngier Message-Id: <20210606021045.14159-1-senozhatsky@chromium.org> Signed-off-by: Paolo Bonzini --- virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 1c37ccd5d402..62b39149b8c8 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -63,3 +63,6 @@ config HAVE_KVM_NO_POLL config KVM_XFER_TO_GUEST_WORK bool + +config HAVE_KVM_PM_NOTIFIER + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fa7e7ebefc79..fc35ba0ea5d3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -780,6 +781,38 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ +#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER +static int kvm_pm_notifier_call(struct notifier_block *bl, + unsigned long state, + void *unused) +{ + struct kvm *kvm = container_of(bl, struct kvm, pm_notifier); + + return kvm_arch_pm_notifier(kvm, state); +} + +static void kvm_init_pm_notifier(struct kvm *kvm) +{ + kvm->pm_notifier.notifier_call = kvm_pm_notifier_call; + /* Suspend KVM before we suspend ftrace, RCU, etc. */ + kvm->pm_notifier.priority = INT_MAX; + register_pm_notifier(&kvm->pm_notifier); +} + +static void kvm_destroy_pm_notifier(struct kvm *kvm) +{ + unregister_pm_notifier(&kvm->pm_notifier); +} +#else /* !CONFIG_HAVE_KVM_PM_NOTIFIER */ +static void kvm_init_pm_notifier(struct kvm *kvm) +{ +} + +static void kvm_destroy_pm_notifier(struct kvm *kvm) +{ +} +#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */ + static struct kvm_memslots *kvm_alloc_memslots(void) { int i; @@ -964,6 +997,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_unlock(&kvm_lock); preempt_notifier_inc(); + kvm_init_pm_notifier(kvm); return kvm; @@ -1011,6 +1045,7 @@ static void kvm_destroy_vm(struct kvm *kvm) int i; struct mm_struct *mm = kvm->mm; + kvm_destroy_pm_notifier(kvm); kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); -- cgit From e3cb6fa0e2bf4ffc6225a55851f0cf2b93b50f91 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 10 Jun 2021 12:30:32 -0400 Subject: KVM: switch per-VM stats to u64 Make them the same type as vCPU stats. There is no reason to limit the counters to unsigned long. Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fc35ba0ea5d3..ed4d1581d502 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4833,14 +4833,14 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file) static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val) { - *val = *(ulong *)((void *)kvm + offset); + *val = *(u64 *)((void *)kvm + offset); return 0; } static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset) { - *(ulong *)((void *)kvm + offset) = 0; + *(u64 *)((void *)kvm + offset) = 0; return 0; } -- cgit From 0193cc908b5ae8aff2e2d2997ca5d4ae26ed24d4 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 18 Jun 2021 22:27:03 +0000 Subject: KVM: stats: Separate generic stats from architecture specific ones Generic KVM stats are those collected in architecture independent code or those supported by all architectures; put all generic statistics in a separate structure. This ensures that they are defined the same way in the statistics API which is being added, removing duplication among different architectures in the declaration of the descriptors. No functional change intended. Reviewed-by: David Matlack Reviewed-by: Ricardo Koller Reviewed-by: Krish Sadhukhan Signed-off-by: Jing Zhang Message-Id: <20210618222709.1858088-2-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ed4d1581d502..cec986487b30 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -332,7 +332,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) */ if (!kvm_arch_flush_remote_tlb(kvm) || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) - ++kvm->stat.remote_tlb_flush; + ++kvm->stat.generic.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); @@ -3029,9 +3029,9 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited) { if (waited) - vcpu->stat.halt_poll_fail_ns += poll_ns; + vcpu->stat.generic.halt_poll_fail_ns += poll_ns; else - vcpu->stat.halt_poll_success_ns += poll_ns; + vcpu->stat.generic.halt_poll_success_ns += poll_ns; } /* @@ -3049,16 +3049,16 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); - ++vcpu->stat.halt_attempted_poll; + ++vcpu->stat.generic.halt_attempted_poll; do { /* * This sets KVM_REQ_UNHALT if an interrupt * arrives. */ if (kvm_vcpu_check_block(vcpu) < 0) { - ++vcpu->stat.halt_successful_poll; + ++vcpu->stat.generic.halt_successful_poll; if (!vcpu_valid_wakeup(vcpu)) - ++vcpu->stat.halt_poll_invalid; + ++vcpu->stat.generic.halt_poll_invalid; goto out; } poll_end = cur = ktime_get(); @@ -3115,7 +3115,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) waitp = kvm_arch_vcpu_get_wait(vcpu); if (rcuwait_wake_up(waitp)) { WRITE_ONCE(vcpu->ready, true); - ++vcpu->stat.halt_wakeup; + ++vcpu->stat.generic.halt_wakeup; return true; } -- cgit From cb082bfab59a224a49ae803fed52cd03e8d6b5e0 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 18 Jun 2021 22:27:04 +0000 Subject: KVM: stats: Add fd-based API to read binary stats data This commit defines the API for userspace and prepare the common functionalities to support per VM/VCPU binary stats data readings. The KVM stats now is only accessible by debugfs, which has some shortcomings this change series are supposed to fix: 1. The current debugfs stats solution in KVM could be disabled when kernel Lockdown mode is enabled, which is a potential rick for production. 2. The current debugfs stats solution in KVM is organized as "one stats per file", it is good for debugging, but not efficient for production. 3. The stats read/clear in current debugfs solution in KVM are protected by the global kvm_lock. Besides that, there are some other benefits with this change: 1. All KVM VM/VCPU stats can be read out in a bulk by one copy to userspace. 2. A schema is used to describe KVM statistics. From userspace's perspective, the KVM statistics are self-describing. 3. With the fd-based solution, a separate telemetry would be able to read KVM stats in a less privileged environment. 4. After the initial setup by reading in stats descriptors, a telemetry only needs to read the stats data itself, no more parsing or setup is needed. Reviewed-by: David Matlack Reviewed-by: Ricardo Koller Reviewed-by: Krish Sadhukhan Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba #arm64 Signed-off-by: Jing Zhang Message-Id: <20210618222709.1858088-3-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/binary_stats.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 virt/kvm/binary_stats.c (limited to 'virt/kvm') diff --git a/virt/kvm/binary_stats.c b/virt/kvm/binary_stats.c new file mode 100644 index 000000000000..e609d428811a --- /dev/null +++ b/virt/kvm/binary_stats.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM binary statistics interface implementation + * + * Copyright 2021 Google LLC + */ + +#include +#include +#include +#include + +/** + * kvm_stats_read() - Common function to read from the binary statistics + * file descriptor. + * + * @id: identification string of the stats + * @header: stats header for a vm or a vcpu + * @desc: start address of an array of stats descriptors for a vm or a vcpu + * @stats: start address of stats data block for a vm or a vcpu + * @size_stats: the size of stats data block pointed by @stats + * @user_buffer: start address of userspace buffer + * @size: requested read size from userspace + * @offset: the start position from which the content will be read for the + * corresponding vm or vcp file descriptor + * + * The file content of a vm/vcpu file descriptor is now defined as below: + * +-------------+ + * | Header | + * +-------------+ + * | id string | + * +-------------+ + * | Descriptors | + * +-------------+ + * | Stats Data | + * +-------------+ + * Although this function allows userspace to read any amount of data (as long + * as in the limit) from any position, the typical usage would follow below + * steps: + * 1. Read header from offset 0. Get the offset of descriptors and stats data + * and some other necessary information. This is a one-time work for the + * lifecycle of the corresponding vm/vcpu stats fd. + * 2. Read id string from its offset. This is a one-time work for the lifecycle + * of the corresponding vm/vcpu stats fd. + * 3. Read descriptors from its offset and discover all the stats by parsing + * descriptors. This is a one-time work for the lifecycle of the + * corresponding vm/vcpu stats fd. + * 4. Periodically read stats data from its offset using pread. + * + * Return: the number of bytes that has been successfully read + */ +ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, + const struct _kvm_stats_desc *desc, + void *stats, size_t size_stats, + char __user *user_buffer, size_t size, loff_t *offset) +{ + ssize_t len; + ssize_t copylen; + ssize_t remain = size; + size_t size_desc; + size_t size_header; + void *src; + loff_t pos = *offset; + char __user *dest = user_buffer; + + size_header = sizeof(*header); + size_desc = header->num_desc * sizeof(*desc); + + len = KVM_STATS_NAME_SIZE + size_header + size_desc + size_stats - pos; + len = min(len, remain); + if (len <= 0) + return 0; + remain = len; + + /* + * Copy kvm stats header. + * The header is the first block of content userspace usually read out. + * The pos is 0 and the copylen and remain would be the size of header. + * The copy of the header would be skipped if offset is larger than the + * size of header. That usually happens when userspace reads stats + * descriptors and stats data. + */ + copylen = size_header - pos; + copylen = min(copylen, remain); + if (copylen > 0) { + src = (void *)header + pos; + if (copy_to_user(dest, src, copylen)) + return -EFAULT; + remain -= copylen; + pos += copylen; + dest += copylen; + } + + /* + * Copy kvm stats header id string. + * The id string is unique for every vm/vcpu, which is stored in kvm + * and kvm_vcpu structure. + * The id string is part of the stat header from the perspective of + * userspace, it is usually read out together with previous constant + * header part and could be skipped for later descriptors and stats + * data readings. + */ + copylen = header->id_offset + KVM_STATS_NAME_SIZE - pos; + copylen = min(copylen, remain); + if (copylen > 0) { + src = id + pos - header->id_offset; + if (copy_to_user(dest, src, copylen)) + return -EFAULT; + remain -= copylen; + pos += copylen; + dest += copylen; + } + + /* + * Copy kvm stats descriptors. + * The descriptors copy would be skipped in the typical case that + * userspace periodically read stats data, since the pos would be + * greater than the end address of descriptors + * (header->header.desc_offset + size_desc) causing copylen <= 0. + */ + copylen = header->desc_offset + size_desc - pos; + copylen = min(copylen, remain); + if (copylen > 0) { + src = (void *)desc + pos - header->desc_offset; + if (copy_to_user(dest, src, copylen)) + return -EFAULT; + remain -= copylen; + pos += copylen; + dest += copylen; + } + + /* Copy kvm stats values */ + copylen = header->data_offset + size_stats - pos; + copylen = min(copylen, remain); + if (copylen > 0) { + src = stats + pos - header->data_offset; + if (copy_to_user(dest, src, copylen)) + return -EFAULT; + remain -= copylen; + pos += copylen; + dest += copylen; + } + + *offset = pos; + return len; +} -- cgit From fcfe1baeddbf1c7c448b44c82586d0cbc8abc9f5 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 18 Jun 2021 22:27:05 +0000 Subject: KVM: stats: Support binary stats retrieval for a VM Add a VM ioctl to get a statistics file descriptor by which a read functionality is provided for userspace to read out VM stats header, descriptors and data. Define VM statistics descriptors and header for all architectures. Reviewed-by: David Matlack Reviewed-by: Ricardo Koller Reviewed-by: Krish Sadhukhan Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba #arm64 Signed-off-by: Jing Zhang Message-Id: <20210618222709.1858088-4-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cec986487b30..33ec43a87d0f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4055,6 +4055,42 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, } } +static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct kvm *kvm = file->private_data; + + return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header, + &kvm_vm_stats_desc[0], &kvm->stat, + sizeof(kvm->stat), user_buffer, size, offset); +} + +static const struct file_operations kvm_vm_stats_fops = { + .read = kvm_vm_stats_read, + .llseek = noop_llseek, +}; + +static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm) +{ + int fd; + struct file *file; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; + + file = anon_inode_getfile("kvm-vm-stats", + &kvm_vm_stats_fops, kvm, O_RDONLY); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); + } + file->f_mode |= FMODE_PREAD; + fd_install(fd, file); + + return fd; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4237,6 +4273,9 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_RESET_DIRTY_RINGS: r = kvm_vm_ioctl_reset_dirty_pages(kvm); break; + case KVM_GET_STATS_FD: + r = kvm_vm_ioctl_get_stats_fd(kvm); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -4316,6 +4355,9 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) if (r < 0) goto put_kvm; + snprintf(kvm->stats_id, sizeof(kvm->stats_id), + "kvm-%d", task_pid_nr(current)); + file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { put_unused_fd(r); -- cgit From ce55c049459cff0034cc1bcfdce3bf343a2d6317 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 18 Jun 2021 22:27:06 +0000 Subject: KVM: stats: Support binary stats retrieval for a VCPU Add a VCPU ioctl to get a statistics file descriptor by which a read functionality is provided for userspace to read out VCPU stats header, descriptors and data. Define VCPU statistics descriptors and header for all architectures. Reviewed-by: David Matlack Reviewed-by: Ricardo Koller Reviewed-by: Krish Sadhukhan Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba #arm64 Signed-off-by: Jing Zhang Message-Id: <20210618222709.1858088-5-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 33ec43a87d0f..c8d0028df4ac 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3448,6 +3448,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); BUG_ON(kvm->vcpus[vcpu->vcpu_idx]); + /* Fill the stats id string for the vcpu */ + snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", + task_pid_nr(current), id); + /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); @@ -3497,6 +3501,44 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } +static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer, + size_t size, loff_t *offset) +{ + struct kvm_vcpu *vcpu = file->private_data; + + return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header, + &kvm_vcpu_stats_desc[0], &vcpu->stat, + sizeof(vcpu->stat), user_buffer, size, offset); +} + +static const struct file_operations kvm_vcpu_stats_fops = { + .read = kvm_vcpu_stats_read, + .llseek = noop_llseek, +}; + +static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu) +{ + int fd; + struct file *file; + char name[15 + ITOA_MAX_LEN + 1]; + + snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id); + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; + + file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); + } + file->f_mode |= FMODE_PREAD; + fd_install(fd, file); + + return fd; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3694,6 +3736,10 @@ out_free1: r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); break; } + case KVM_GET_STATS_FD: { + r = kvm_vcpu_ioctl_get_stats_fd(vcpu); + break; + } default: r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } @@ -3952,6 +3998,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #else return 0; #endif + case KVM_CAP_BINARY_STATS_FD: + return 1; default: break; } @@ -5254,7 +5302,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align, SLAB_ACCOUNT, offsetof(struct kvm_vcpu, arch), - sizeof_field(struct kvm_vcpu, arch), + offsetofend(struct kvm_vcpu, stats_id) + - offsetof(struct kvm_vcpu, arch), NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; -- cgit From bc9e9e672df9f16f3825320c53ec01b3d44add28 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Wed, 23 Jun 2021 17:28:46 -0400 Subject: KVM: debugfs: Reuse binary stats descriptors To remove code duplication, use the binary stats descriptors in the implementation of the debugfs interface for statistics. This unifies the definition of statistics for the binary and debugfs interfaces. Signed-off-by: Jing Zhang Message-Id: <20210618222709.1858088-8-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 104 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 28 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c8d0028df4ac..3dcc2abbfc60 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -115,7 +115,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); struct dentry *kvm_debugfs_dir; EXPORT_SYMBOL_GPL(kvm_debugfs_dir); -static int kvm_debugfs_num_entries; static const struct file_operations stat_fops_per_vm; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, @@ -860,9 +859,24 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) kvfree(slots); } +static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc) +{ + switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { + case KVM_STATS_TYPE_INSTANT: + return 0444; + case KVM_STATS_TYPE_CUMULATIVE: + case KVM_STATS_TYPE_PEAK: + default: + return 0644; + } +} + + static void kvm_destroy_vm_debugfs(struct kvm *kvm) { int i; + int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + + kvm_vcpu_stats_header.num_desc; if (!kvm->debugfs_dentry) return; @@ -880,7 +894,10 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) { char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; - struct kvm_stats_debugfs_item *p; + const struct _kvm_stats_desc *pdesc; + int i; + int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + + kvm_vcpu_stats_header.num_desc; if (!debugfs_initialized()) return 0; @@ -894,15 +911,32 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) if (!kvm->debugfs_stat_data) return -ENOMEM; - for (p = debugfs_entries; p->name; p++) { + for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { + pdesc = &kvm_vm_stats_desc[i]; stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) return -ENOMEM; stat_data->kvm = kvm; - stat_data->dbgfs_item = p; - kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p), + stat_data->desc = pdesc; + stat_data->kind = KVM_STAT_VM; + kvm->debugfs_stat_data[i] = stat_data; + debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), + kvm->debugfs_dentry, stat_data, + &stat_fops_per_vm); + } + + for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { + pdesc = &kvm_vcpu_stats_desc[i]; + stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); + if (!stat_data) + return -ENOMEM; + + stat_data->kvm = kvm; + stat_data->desc = pdesc; + stat_data->kind = KVM_STAT_VCPU; + kvm->debugfs_stat_data[i] = stat_data; + debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm->debugfs_dentry, stat_data, &stat_fops_per_vm); } @@ -4900,7 +4934,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, return -ENOENT; if (simple_attr_open(inode, file, get, - KVM_DBGFS_GET_MODE(stat_data->dbgfs_item) & 0222 + kvm_stats_debugfs_mode(stat_data->desc) & 0222 ? set : NULL, fmt)) { kvm_put_kvm(stat_data->kvm); @@ -4923,14 +4957,14 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file) static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val) { - *val = *(u64 *)((void *)kvm + offset); + *val = *(u64 *)((void *)(&kvm->stat) + offset); return 0; } static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset) { - *(u64 *)((void *)kvm + offset) = 0; + *(u64 *)((void *)(&kvm->stat) + offset) = 0; return 0; } @@ -4943,7 +4977,7 @@ static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val) *val = 0; kvm_for_each_vcpu(i, vcpu, kvm) - *val += *(u64 *)((void *)vcpu + offset); + *val += *(u64 *)((void *)(&vcpu->stat) + offset); return 0; } @@ -4954,7 +4988,7 @@ static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) - *(u64 *)((void *)vcpu + offset) = 0; + *(u64 *)((void *)(&vcpu->stat) + offset) = 0; return 0; } @@ -4964,14 +4998,14 @@ static int kvm_stat_data_get(void *data, u64 *val) int r = -EFAULT; struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - switch (stat_data->dbgfs_item->kind) { + switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_get_stat_per_vm(stat_data->kvm, - stat_data->dbgfs_item->offset, val); + stat_data->desc->desc.offset, val); break; case KVM_STAT_VCPU: r = kvm_get_stat_per_vcpu(stat_data->kvm, - stat_data->dbgfs_item->offset, val); + stat_data->desc->desc.offset, val); break; } @@ -4986,14 +5020,14 @@ static int kvm_stat_data_clear(void *data, u64 val) if (val) return -EINVAL; - switch (stat_data->dbgfs_item->kind) { + switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_clear_stat_per_vm(stat_data->kvm, - stat_data->dbgfs_item->offset); + stat_data->desc->desc.offset); break; case KVM_STAT_VCPU: r = kvm_clear_stat_per_vcpu(stat_data->kvm, - stat_data->dbgfs_item->offset); + stat_data->desc->desc.offset); break; } @@ -5050,6 +5084,7 @@ static int vm_stat_clear(void *_offset, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(vm_stat_readonly_fops, vm_stat_get, NULL, "%llu\n"); static int vcpu_stat_get(void *_offset, u64 *val) { @@ -5086,11 +5121,7 @@ static int vcpu_stat_clear(void *_offset, u64 val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear, "%llu\n"); - -static const struct file_operations *stat_fops[] = { - [KVM_STAT_VCPU] = &vcpu_stat_fops, - [KVM_STAT_VM] = &vm_stat_fops, -}; +DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_readonly_fops, vcpu_stat_get, NULL, "%llu\n"); static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) { @@ -5144,15 +5175,32 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) static void kvm_init_debug(void) { - struct kvm_stats_debugfs_item *p; + const struct file_operations *fops; + const struct _kvm_stats_desc *pdesc; + int i; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); - kvm_debugfs_num_entries = 0; - for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p), - kvm_debugfs_dir, (void *)(long)p->offset, - stat_fops[p->kind]); + for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { + pdesc = &kvm_vm_stats_desc[i]; + if (kvm_stats_debugfs_mode(pdesc) & 0222) + fops = &vm_stat_fops; + else + fops = &vm_stat_readonly_fops; + debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), + kvm_debugfs_dir, + (void *)(long)pdesc->desc.offset, fops); + } + + for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { + pdesc = &kvm_vcpu_stats_desc[i]; + if (kvm_stats_debugfs_mode(pdesc) & 0222) + fops = &vcpu_stat_fops; + else + fops = &vcpu_stat_readonly_fops; + debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), + kvm_debugfs_dir, + (void *)(long)pdesc->desc.offset, fops); } } -- cgit