summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJing Zhang <jingzhangos@google.com>2021-06-18 22:27:04 +0000
committerPaolo Bonzini <pbonzini@redhat.com>2021-06-24 11:47:57 -0400
commitcb082bfab59a224a49ae803fed52cd03e8d6b5e0 (patch)
treea2c44bb780df8a32581373e05a054b640687cf3f /include
parent0193cc908b5ae8aff2e2d2997ca5d4ae26ed24d4 (diff)
KVM: stats: Add fd-based API to read binary stats data
This commit defines the API for userspace and prepare the common functionalities to support per VM/VCPU binary stats data readings. The KVM stats now is only accessible by debugfs, which has some shortcomings this change series are supposed to fix: 1. The current debugfs stats solution in KVM could be disabled when kernel Lockdown mode is enabled, which is a potential rick for production. 2. The current debugfs stats solution in KVM is organized as "one stats per file", it is good for debugging, but not efficient for production. 3. The stats read/clear in current debugfs solution in KVM are protected by the global kvm_lock. Besides that, there are some other benefits with this change: 1. All KVM VM/VCPU stats can be read out in a bulk by one copy to userspace. 2. A schema is used to describe KVM statistics. From userspace's perspective, the KVM statistics are self-describing. 3. With the fd-based solution, a separate telemetry would be able to read KVM stats in a less privileged environment. 4. After the initial setup by reading in stats descriptors, a telemetry only needs to read the stats data itself, no more parsing or setup is needed. Reviewed-by: David Matlack <dmatlack@google.com> Reviewed-by: Ricardo Koller <ricarkol@google.com> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com> Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> #arm64 Signed-off-by: Jing Zhang <jingzhangos@google.com> Message-Id: <20210618222709.1858088-3-jingzhangos@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'include')
-rw-r--r--include/linux/kvm_host.h82
-rw-r--r--include/linux/kvm_types.h2
-rw-r--r--include/uapi/linux/kvm.h73
3 files changed, 155 insertions, 2 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 37cbb56ccd09..9ee7f350473b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1272,16 +1272,94 @@ struct kvm_stats_debugfs_item {
int mode;
};
+struct _kvm_stats_desc {
+ struct kvm_stats_desc desc;
+ char name[KVM_STATS_NAME_SIZE];
+};
+
#define KVM_DBGFS_GET_MODE(dbgfs_item) \
((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)
-#define VM_STAT(n, x, ...) \
+#define VM_STAT(n, x, ...) \
{ n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
-#define VCPU_STAT(n, x, ...) \
+#define VCPU_STAT(n, x, ...) \
{ n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
+#define STATS_DESC_COMMON(type, unit, base, exp) \
+ .flags = type | unit | base | \
+ BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \
+ BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \
+ BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \
+ .exponent = exp, \
+ .size = 1
+
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vm_stat, generic.stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VM_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vm_stat, stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VCPU_STATS_DESC(stat, type, unit, base, exp) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp), \
+ .offset = offsetof(struct kvm_vcpu_stat, stat) \
+ }, \
+ .name = #stat, \
+ }
+/* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
+#define STATS_DESC(SCOPE, stat, type, unit, base, exp) \
+ SCOPE##_STATS_DESC(stat, type, unit, base, exp)
+
+#define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, unit, base, exponent)
+#define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, unit, base, exponent)
+#define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, unit, base, exponent)
+
+/* Cumulative counter, read/write */
+#define STATS_DESC_COUNTER(SCOPE, name) \
+ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+/* Instantaneous counter, read only */
+#define STATS_DESC_ICOUNTER(SCOPE, name) \
+ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+/* Peak counter, read/write */
+#define STATS_DESC_PCOUNTER(SCOPE, name) \
+ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+
+/* Cumulative time in nanosecond */
+#define STATS_DESC_TIME_NSEC(SCOPE, name) \
+ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
+ KVM_STATS_BASE_POW10, -9)
+
extern struct kvm_stats_debugfs_item debugfs_entries[];
extern struct dentry *kvm_debugfs_dir;
+ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
+ const struct _kvm_stats_desc *desc,
+ void *stats, size_t size_stats,
+ char __user *user_buffer, size_t size, loff_t *offset);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 48db778291b7..ed6a985c5680 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -89,4 +89,6 @@ struct kvm_vcpu_stat_generic {
u64 halt_poll_fail_ns;
};
+#define KVM_STATS_NAME_SIZE 48
+
#endif /* __KVM_TYPES_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 330835f1005b..f1ba602260f6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1087,6 +1087,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_SREGS2 200
#define KVM_CAP_EXIT_HYPERCALL 201
#define KVM_CAP_PPC_RPT_INVALIDATE 202
+#define KVM_CAP_BINARY_STATS_FD 203
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1906,4 +1907,76 @@ struct kvm_dirty_gfn {
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
+/**
+ * struct kvm_stats_header - Header of per vm/vcpu binary statistics data.
+ * @flags: Some extra information for header, always 0 for now.
+ * @name_size: The size in bytes of the memory which contains statistics
+ * name string including trailing '\0'. The memory is allocated
+ * at the send of statistics descriptor.
+ * @num_desc: The number of statistics the vm or vcpu has.
+ * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed
+ * by vm/vcpu stats fd.
+ * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file
+ * pointd by vm/vcpu stats fd.
+ * @data_offset: The offset of the vm/vcpu stats' data block in the file
+ * pointed by vm/vcpu stats fd.
+ *
+ * This is the header userspace needs to read from stats fd before any other
+ * readings. It is used by userspace to discover all the information about the
+ * vm/vcpu's binary statistics.
+ * Userspace reads this header from the start of the vm/vcpu's stats fd.
+ */
+struct kvm_stats_header {
+ __u32 flags;
+ __u32 name_size;
+ __u32 num_desc;
+ __u32 id_offset;
+ __u32 desc_offset;
+ __u32 data_offset;
+};
+
+#define KVM_STATS_TYPE_SHIFT 0
+#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK
+
+#define KVM_STATS_UNIT_SHIFT 4
+#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+
+#define KVM_STATS_BASE_SHIFT 8
+#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2
+
+/**
+ * struct kvm_stats_desc - Descriptor of a KVM statistics.
+ * @flags: Annotations of the stats, like type, unit, etc.
+ * @exponent: Used together with @flags to determine the unit.
+ * @size: The number of data items for this stats.
+ * Every data item is of type __u64.
+ * @offset: The offset of the stats to the start of stat structure in
+ * struture kvm or kvm_vcpu.
+ * @unused: Unused field for future usage. Always 0 for now.
+ * @name: The name string for the stats. Its size is indicated by the
+ * &kvm_stats_header->name_size.
+ */
+struct kvm_stats_desc {
+ __u32 flags;
+ __s16 exponent;
+ __u16 size;
+ __u32 offset;
+ __u32 unused;
+ char name[];
+};
+
+#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
+
#endif /* __LINUX_KVM_H */