diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2022-07-29 09:46:01 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2022-08-01 03:21:00 -0400 |
commit | 63f4b210414b65aa3103c54369cacbd0b1bdf02f (patch) | |
tree | 2dc7b490d3a89306669c70256a41764ca52ab3b3 /include | |
parent | 2e2e91158febfeb73b5d4f249440218304f34101 (diff) | |
parent | 7edc3a68038ab151a8791ddb6217755a5e4a5809 (diff) |
Merge remote-tracking branch 'kvm/next' into kvm-next-5.20
KVM/s390, KVM/x86 and common infrastructure changes for 5.20
x86:
* Permit guests to ignore single-bit ECC errors
* Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache
* Intel IPI virtualization
* Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS
* PEBS virtualization
* Simplify PMU emulation by just using PERF_TYPE_RAW events
* More accurate event reinjection on SVM (avoid retrying instructions)
* Allow getting/setting the state of the speaker port data bit
* Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent
* "Notify" VM exit (detect microarchitectural hangs) for Intel
* Cleanups for MCE MSR emulation
s390:
* add an interface to provide a hypervisor dump for secure guests
* improve selftests to use TAP interface
* enable interpretive execution of zPCI instructions (for PCI passthrough)
* First part of deferred teardown
* CPU Topology
* PV attestation
* Minor fixes
Generic:
* new selftests API using struct kvm_vcpu instead of a (vm, id) tuple
x86:
* Use try_cmpxchg64 instead of cmpxchg64
* Bugfixes
* Ignore benign host accesses to PMU MSRs when PMU is disabled
* Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior
* x86/MMU: Allow NX huge pages to be disabled on a per-vm basis
* Port eager page splitting to shadow MMU as well
* Enable CMCI capability by default and handle injected UCNA errors
* Expose pid of vcpu threads in debugfs
* x2AVIC support for AMD
* cleanup PIO emulation
* Fixes for LLDT/LTR emulation
* Don't require refcounted "struct page" to create huge SPTEs
x86 cleanups:
* Use separate namespaces for guest PTEs and shadow PTEs bitmasks
* PIO emulation
* Reorganize rmap API, mostly around rmap destruction
* Do not workaround very old KVM bugs for L0 that runs with nesting enabled
* new selftests API for CPUID
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/kvm_host.h | 20 | ||||
-rw-r--r-- | include/linux/kvm_types.h | 8 | ||||
-rw-r--r-- | include/linux/sched/user.h | 3 | ||||
-rw-r--r-- | include/linux/vfio_pci_core.h | 12 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 100 | ||||
-rw-r--r-- | include/uapi/linux/vfio_zdev.h | 7 |
6 files changed, 132 insertions, 18 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 90a45ef7203b..1c480b1821e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -907,11 +907,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) -{ - return vcpu->vcpu_idx; -} - void kvm_destroy_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); @@ -1139,7 +1134,6 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -void kvm_set_page_accessed(struct page *page); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, @@ -1231,7 +1225,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); @@ -1358,6 +1351,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); +int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min); int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); @@ -1436,6 +1430,8 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state); #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); +#else +static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif int kvm_arch_hardware_enable(void); @@ -1572,8 +1568,8 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); -bool kvm_is_reserved_pfn(kvm_pfn_t pfn); -bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); +struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn); +bool kvm_is_zone_device_page(struct page *page); struct kvm_irq_ack_notifier { struct hlist_node link; @@ -1719,12 +1715,6 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } -static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu, - gpa_t gpa) -{ - return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa)); -} - static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 1dcfba68076a..3ca3db020e0e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -19,6 +19,7 @@ struct kvm_memslots; enum kvm_mr_change; #include <linux/bits.h> +#include <linux/mutex.h> #include <linux/types.h> #include <linux/spinlock_types.h> @@ -69,6 +70,7 @@ struct gfn_to_pfn_cache { struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; + struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; enum pfn_cache_usage usage; @@ -83,13 +85,17 @@ struct gfn_to_pfn_cache { * MMU flows is problematic, as is triggering reclaim, I/O, etc... while * holding MMU locks. Note, these caches act more like prefetch buffers than * classical caches, i.e. objects are not returned to the cache on being freed. + * + * The @capacity field and @objects array are lazily initialized when the cache + * is topped up (__kvm_mmu_topup_memory_cache()). */ struct kvm_mmu_memory_cache { int nobjs; gfp_t gfp_zero; gfp_t gfp_custom; struct kmem_cache *kmem_cache; - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; + int capacity; + void **objects; }; #endif diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 00ed419dd464..f054d0360a75 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -24,7 +24,8 @@ struct user_struct { kuid_t uid; #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ - defined(CONFIG_NET) || defined(CONFIG_IO_URING) + defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \ + defined(CONFIG_VFIO_PCI_ZDEV_KVM) atomic_long_t locked_vm; #endif #ifdef CONFIG_WATCH_QUEUE diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 23c176d4b073..d5d9e17f0156 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -206,15 +206,25 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) } #endif -#ifdef CONFIG_S390 +#ifdef CONFIG_VFIO_PCI_ZDEV_KVM extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps); +int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev); +void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev); #else static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) { return -ENODEV; } + +static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) +{ + return 0; +} + +static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) +{} #endif /* Will be exported for vfio pci drivers usage */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0c1f42a40fd3..c823a136f923 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -271,6 +271,7 @@ struct kvm_xen_exit { #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 #define KVM_EXIT_RISCV_CSR 36 +#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -504,6 +505,11 @@ struct kvm_run { unsigned long write_mask; unsigned long ret_value; } riscv_csr; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1165,6 +1171,12 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_TSC_CONTROL 214 #define KVM_CAP_SYSTEM_EVENT_DATA 215 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 +#define KVM_CAP_S390_PROTECTED_DUMP 217 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 +#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 +#define KVM_CAP_S390_ZPCI_OP 221 +#define KVM_CAP_S390_CPU_TOPOLOGY 222 #ifdef KVM_CAP_IRQ_ROUTING @@ -1668,6 +1680,55 @@ struct kvm_s390_pv_unp { __u64 tweak; }; +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + enum pv_cmd_id { KVM_PV_ENABLE, KVM_PV_DISABLE, @@ -1676,6 +1737,8 @@ enum pv_cmd_id { KVM_PV_VERIFY, KVM_PV_PREP_RESET, KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, }; struct kvm_pv_cmd { @@ -2127,4 +2190,41 @@ struct kvm_stats_desc { /* Available with KVM_CAP_XSAVE2 */ #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +/* Available with KVM_CAP_S390_PROTECTED_DUMP */ +#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) + +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + +/* Available with KVM_CAP_S390_ZPCI_OP */ +#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + #endif /* __LINUX_KVM_H */ diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h index b4309397b6b2..77f2aff1f27e 100644 --- a/include/uapi/linux/vfio_zdev.h +++ b/include/uapi/linux/vfio_zdev.h @@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base { __u16 fmb_length; /* Measurement Block Length (in bytes) */ __u8 pft; /* PCI Function Type */ __u8 gid; /* PCI function group ID */ + /* End of version 1 */ + __u32 fh; /* PCI function handle */ + /* End of version 2 */ }; /** @@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group { __u16 noi; /* Maximum number of MSIs */ __u16 maxstbl; /* Maximum Store Block Length */ __u8 version; /* Supported PCI Version */ + /* End of version 1 */ + __u8 reserved; + __u16 imaxstbl; /* Maximum Interpreted Store Block Length */ + /* End of version 2 */ }; /** |