summaryrefslogtreecommitdiff
path: root/virt/kvm/binary_stats.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 15:40:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 15:40:51 -0700
commit36824f198c621cebeb22966b5e244378fa341295 (patch)
treeee1e358a4ed0cd022ae12b4b7ba1fa3d0e5746d5 /virt/kvm/binary_stats.c
parent9840cfcb97fc8b6aa7b36cec3cc3fd763f14052e (diff)
parentb8917b4ae44d1b945f6fba3d8ee6777edb44633b (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "This covers all architectures (except MIPS) so I don't expect any other feature pull requests this merge window. ARM: - Add MTE support in guests, complete with tag save/restore interface - Reduce the impact of CMOs by moving them in the page-table code - Allow device block mappings at stage-2 - Reduce the footprint of the vmemmap in protected mode - Support the vGIC on dumb systems such as the Apple M1 - Add selftest infrastructure to support multiple configuration and apply that to PMU/non-PMU setups - Add selftests for the debug architecture - The usual crop of PMU fixes PPC: - Support for the H_RPT_INVALIDATE hypercall - Conversion of Book3S entry/exit to C - Bug fixes S390: - new HW facilities for guests - make inline assembly more robust with KASAN and co x86: - Allow userspace to handle emulation errors (unknown instructions) - Lazy allocation of the rmap (host physical -> guest physical address) - Support for virtualizing TSC scaling on VMX machines - Optimizations to avoid shattering huge pages at the beginning of live migration - Support for initializing the PDPTRs without loading them from memory - Many TLB flushing cleanups - Refuse to load if two-stage paging is available but NX is not (this has been a requirement in practice for over a year) - A large series that separates the MMU mode (WP/SMAP/SMEP etc.) from CR0/CR4/EFER, using the MMU mode everywhere once it is computed from the CPU registers - Use PM notifier to notify the guest about host suspend or hibernate - Support for passing arguments to Hyper-V hypercalls using XMM registers - Support for Hyper-V TLB flush hypercalls and enlightened MSR bitmap on AMD processors - Hide Hyper-V hypercalls that are not included in the guest CPUID - Fixes for live migration of virtual machines that use the Hyper-V "enlightened VMCS" optimization of nested virtualization - Bugfixes (not many) Generic: - Support for retrieving statistics without debugfs - Cleanups for the KVM selftests API" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (314 commits) KVM: x86: rename apic_access_page_done to apic_access_memslot_enabled kvm: x86: disable the narrow guest module parameter on unload selftests: kvm: Allows userspace to handle emulation errors. kvm: x86: Allow userspace to handle emulation errors KVM: x86/mmu: Let guest use GBPAGES if supported in hardware and TDP is on KVM: x86/mmu: Get CR4.SMEP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Get CR0.WP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Drop redundant rsvd bits reset for nested NPT KVM: x86/mmu: Optimize and clean up so called "last nonleaf level" logic KVM: x86: Enhance comments for MMU roles and nested transition trickiness KVM: x86/mmu: WARN on any reserved SPTE value when making a valid SPTE KVM: x86/mmu: Add helpers to do full reserved SPTE checks w/ generic MMU KVM: x86/mmu: Use MMU's role to determine PTTYPE KVM: x86/mmu: Collapse 32-bit PAE and 64-bit statements for helpers KVM: x86/mmu: Add a helper to calculate root from role_regs KVM: x86/mmu: Add helper to update paging metadata KVM: x86/mmu: Don't update nested guest's paging bitmasks if CR0.PG=0 KVM: x86/mmu: Consolidate reset_rsvds_bits_mask() calls KVM: x86/mmu: Use MMU role_regs to get LA57, and drop vCPU LA57 helper KVM: x86/mmu: Get nested MMU's root level from the MMU's role ...
Diffstat (limited to 'virt/kvm/binary_stats.c')
-rw-r--r--virt/kvm/binary_stats.c146
1 files changed, 146 insertions, 0 deletions
diff --git a/virt/kvm/binary_stats.c b/virt/kvm/binary_stats.c
new file mode 100644
index 000000000000..e609d428811a
--- /dev/null
+++ b/virt/kvm/binary_stats.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM binary statistics interface implementation
+ *
+ * Copyright 2021 Google LLC
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+/**
+ * kvm_stats_read() - Common function to read from the binary statistics
+ * file descriptor.
+ *
+ * @id: identification string of the stats
+ * @header: stats header for a vm or a vcpu
+ * @desc: start address of an array of stats descriptors for a vm or a vcpu
+ * @stats: start address of stats data block for a vm or a vcpu
+ * @size_stats: the size of stats data block pointed by @stats
+ * @user_buffer: start address of userspace buffer
+ * @size: requested read size from userspace
+ * @offset: the start position from which the content will be read for the
+ * corresponding vm or vcp file descriptor
+ *
+ * The file content of a vm/vcpu file descriptor is now defined as below:
+ * +-------------+
+ * | Header |
+ * +-------------+
+ * | id string |
+ * +-------------+
+ * | Descriptors |
+ * +-------------+
+ * | Stats Data |
+ * +-------------+
+ * Although this function allows userspace to read any amount of data (as long
+ * as in the limit) from any position, the typical usage would follow below
+ * steps:
+ * 1. Read header from offset 0. Get the offset of descriptors and stats data
+ * and some other necessary information. This is a one-time work for the
+ * lifecycle of the corresponding vm/vcpu stats fd.
+ * 2. Read id string from its offset. This is a one-time work for the lifecycle
+ * of the corresponding vm/vcpu stats fd.
+ * 3. Read descriptors from its offset and discover all the stats by parsing
+ * descriptors. This is a one-time work for the lifecycle of the
+ * corresponding vm/vcpu stats fd.
+ * 4. Periodically read stats data from its offset using pread.
+ *
+ * Return: the number of bytes that has been successfully read
+ */
+ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
+ const struct _kvm_stats_desc *desc,
+ void *stats, size_t size_stats,
+ char __user *user_buffer, size_t size, loff_t *offset)
+{
+ ssize_t len;
+ ssize_t copylen;
+ ssize_t remain = size;
+ size_t size_desc;
+ size_t size_header;
+ void *src;
+ loff_t pos = *offset;
+ char __user *dest = user_buffer;
+
+ size_header = sizeof(*header);
+ size_desc = header->num_desc * sizeof(*desc);
+
+ len = KVM_STATS_NAME_SIZE + size_header + size_desc + size_stats - pos;
+ len = min(len, remain);
+ if (len <= 0)
+ return 0;
+ remain = len;
+
+ /*
+ * Copy kvm stats header.
+ * The header is the first block of content userspace usually read out.
+ * The pos is 0 and the copylen and remain would be the size of header.
+ * The copy of the header would be skipped if offset is larger than the
+ * size of header. That usually happens when userspace reads stats
+ * descriptors and stats data.
+ */
+ copylen = size_header - pos;
+ copylen = min(copylen, remain);
+ if (copylen > 0) {
+ src = (void *)header + pos;
+ if (copy_to_user(dest, src, copylen))
+ return -EFAULT;
+ remain -= copylen;
+ pos += copylen;
+ dest += copylen;
+ }
+
+ /*
+ * Copy kvm stats header id string.
+ * The id string is unique for every vm/vcpu, which is stored in kvm
+ * and kvm_vcpu structure.
+ * The id string is part of the stat header from the perspective of
+ * userspace, it is usually read out together with previous constant
+ * header part and could be skipped for later descriptors and stats
+ * data readings.
+ */
+ copylen = header->id_offset + KVM_STATS_NAME_SIZE - pos;
+ copylen = min(copylen, remain);
+ if (copylen > 0) {
+ src = id + pos - header->id_offset;
+ if (copy_to_user(dest, src, copylen))
+ return -EFAULT;
+ remain -= copylen;
+ pos += copylen;
+ dest += copylen;
+ }
+
+ /*
+ * Copy kvm stats descriptors.
+ * The descriptors copy would be skipped in the typical case that
+ * userspace periodically read stats data, since the pos would be
+ * greater than the end address of descriptors
+ * (header->header.desc_offset + size_desc) causing copylen <= 0.
+ */
+ copylen = header->desc_offset + size_desc - pos;
+ copylen = min(copylen, remain);
+ if (copylen > 0) {
+ src = (void *)desc + pos - header->desc_offset;
+ if (copy_to_user(dest, src, copylen))
+ return -EFAULT;
+ remain -= copylen;
+ pos += copylen;
+ dest += copylen;
+ }
+
+ /* Copy kvm stats values */
+ copylen = header->data_offset + size_stats - pos;
+ copylen = min(copylen, remain);
+ if (copylen > 0) {
+ src = stats + pos - header->data_offset;
+ if (copy_to_user(dest, src, copylen))
+ return -EFAULT;
+ remain -= copylen;
+ pos += copylen;
+ dest += copylen;
+ }
+
+ *offset = pos;
+ return len;
+}