diff options
56 files changed, 611 insertions, 313 deletions
diff --git a/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst index 46595b788d3a..2ec0249e37b6 100644 --- a/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst +++ b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst @@ -15,15 +15,19 @@ The driver provides a description of its available events and configuration options in sysfs, see /sys/bus/event_sources/devices/mac_iod<iod>_mac<mac>_ch<ch>/ and /sys/bus/event_sources/devices/pci_iod<iod>_pci<pci>/. This driver exports: + - formats, used by perf user space and other tools to configure events - events, used by perf user space and other tools to create events - symbolically, e.g.: + symbolically, e.g.:: + perf stat -a -e mac_iod0_mac0_ch0/event=0x21/ ls perf stat -a -e pci_iod0_pci0/event=0x24/ ls + - cpumask, used by perf user space and other tools to know on which CPUs to open the events This driver supports the following events for MAC: + - cycles This event counts MAC cycles at MAC frequency. - read-count @@ -77,6 +81,7 @@ Examples for use with perf:: perf stat -e mac_iod0_mac0_ch0/ea-mac/ ls And, this driver supports the following events for PCI: + - pci-port0-cycles This event counts PCI cycles at PCI frequency in port0. - pci-port0-read-count diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index f3d9871906a2..d56b2d690709 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -66,6 +66,10 @@ specified as a bitmap:: This will only count the operations from core/thread 0 and 1 in this cluster. +User should not use tt_core_deprecated to specify the core/thread filtering. +This option is provided for backward compatiblility and only support 8bit +which may not cover all the core/thread sharing L3C. + 2. Tracetag allow the user to chose to count only read, write or atomic operations via the tt_req parameeter in perf. The default value counts all operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101 diff --git a/MAINTAINERS b/MAINTAINERS index b45db73e55df..9b754d290f93 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10390,7 +10390,7 @@ L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/entry F: include/linux/entry-common.h -F: include/linux/entry-kvm.h +F: include/linux/entry-virt.h F: include/linux/irq-entry-common.h F: kernel/entry/ @@ -11604,7 +11604,6 @@ F: drivers/pci/controller/pci-hyperv-intf.c F: drivers/pci/controller/pci-hyperv.c F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c -F: drivers/video/fbdev/hyperv_fb.c F: include/asm-generic/mshyperv.h F: include/clocksource/hyperv_timer.h F: include/hyperv/hvgdk.h @@ -11618,6 +11617,16 @@ F: include/uapi/linux/hyperv.h F: net/vmw_vsock/hyperv_transport.c F: tools/hv/ +HYPER-V FRAMEBUFFER DRIVER +M: "K. Y. Srinivasan" <kys@microsoft.com> +M: Haiyang Zhang <haiyangz@microsoft.com> +M: Wei Liu <wei.liu@kernel.org> +M: Dexuan Cui <decui@microsoft.com> +L: linux-hyperv@vger.kernel.org +S: Obsolete +T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git +F: drivers/video/fbdev/hyperv_fb.c + HYPERBUS SUPPORT M: Vignesh Raghavendra <vigneshr@ti.com> R: Tudor Ambarus <tudor.ambarus@linaro.org> diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index bfe3ce9df197..ba7cf7fec5e9 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -153,6 +153,7 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) regs->pc = afregs->pc; regs->regs[29] = afregs->fp; regs->regs[30] = afregs->lr; + regs->pstate = PSR_MODE_EL1h; return regs; } diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index ff6fd0bbd7d2..78a4dbf75e60 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -79,7 +79,6 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end); -extern void init_idmap_kpti_bbml2_flag(void); extern void linear_map_maybe_split_to_ptes(void); /* @@ -107,5 +106,11 @@ static inline bool kaslr_requires_kpti(void) return true; } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +void kpti_install_ng_mappings(void); +#else +static inline void kpti_install_ng_mappings(void) {} +#endif + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3917ad897801..5ed401ff79e3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1941,104 +1941,6 @@ static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope) } #endif -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) - -extern -void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags); - -static phys_addr_t __initdata kpti_ng_temp_alloc; - -static phys_addr_t __init kpti_ng_pgd_alloc(enum pgtable_type type) -{ - kpti_ng_temp_alloc -= PAGE_SIZE; - return kpti_ng_temp_alloc; -} - -static int __init __kpti_install_ng_mappings(void *__unused) -{ - typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); - extern kpti_remap_fn idmap_kpti_install_ng_mappings; - kpti_remap_fn *remap_fn; - - int cpu = smp_processor_id(); - int levels = CONFIG_PGTABLE_LEVELS; - int order = order_base_2(levels); - u64 kpti_ng_temp_pgd_pa = 0; - pgd_t *kpti_ng_temp_pgd; - u64 alloc = 0; - - if (levels == 5 && !pgtable_l5_enabled()) - levels = 4; - else if (levels == 4 && !pgtable_l4_enabled()) - levels = 3; - - remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); - - if (!cpu) { - alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); - kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); - kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); - - // - // Create a minimal page table hierarchy that permits us to map - // the swapper page tables temporarily as we traverse them. - // - // The physical pages are laid out as follows: - // - // +--------+-/-------+-/------ +-/------ +-\\\--------+ - // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] : - // +--------+-\-------+-\------ +-\------ +-///--------+ - // ^ - // The first page is mapped into this hierarchy at a PMD_SHIFT - // aligned virtual address, so that we can manipulate the PTE - // level entries while the mapping is active. The first entry - // covers the PTE[] page itself, the remaining entries are free - // to be used as a ad-hoc fixmap. - // - create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), - KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, - kpti_ng_pgd_alloc, 0); - } - - cpu_install_idmap(); - remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); - cpu_uninstall_idmap(); - - if (!cpu) { - free_pages(alloc, order); - arm64_use_ng_mappings = true; - } - - return 0; -} - -static void __init kpti_install_ng_mappings(void) -{ - /* Check whether KPTI is going to be used */ - if (!arm64_kernel_unmapped_at_el0()) - return; - - /* - * We don't need to rewrite the page-tables if either we've done - * it already or we have KASLR enabled and therefore have not - * created any global mappings at all. - */ - if (arm64_use_ng_mappings) - return; - - init_idmap_kpti_bbml2_flag(); - stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask); -} - -#else -static inline void kpti_install_ng_mappings(void) -{ -} -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ - static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap) { if (__this_cpu_read(this_cpu_vector) == vectors) { @@ -2419,17 +2321,21 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) #ifdef CONFIG_ARM64_MTE static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { + static bool cleared_zero_page = false; + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); mte_cpu_setup(); /* * Clear the tags in the zero page. This needs to be done via the - * linear map which has the Tagged attribute. + * linear map which has the Tagged attribute. Since this page is + * always mapped as pte_special(), set_pte_at() will not attempt to + * clear the tags or set PG_mte_tagged. */ - if (try_page_mte_tagging(ZERO_PAGE(0))) { + if (!cleared_zero_page) { + cleared_zero_page = true; mte_clear_page_tags(lm_alias(empty_zero_page)); - set_page_mte_tagged(ZERO_PAGE(0)); } kasan_init_hw_tags_cpu(); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 54a52dc5c1ae..43f7a2f39403 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -478,7 +478,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, if (folio_test_hugetlb(folio)) WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); else - WARN_ON_ONCE(!page_mte_tagged(page)); + WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); /* limit access to the end of the page */ offset = offset_in_page(addr); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 0c5d408afd95..8ab6104a4883 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "kprobes: " fmt +#include <linux/execmem.h> #include <linux/extable.h> #include <linux/kasan.h> #include <linux/kernel.h> @@ -41,6 +42,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); +void *alloc_insn_page(void) +{ + void *addr; + + addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); + if (!addr) + return NULL; + set_memory_rox((unsigned long)addr, 1); + return addr; +} + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { kprobe_opcode_t *addr = p->ainsn.xol_insn; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index bff62e75d681..4f803fd1c99a 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -25,7 +25,7 @@ menuconfig KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_VFIO select HAVE_KVM_DIRTY_RING_ACQ_REL select NEED_KVM_DIRTY_RING_WITH_BITMAP diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fa79744290f3..f21d1b7f20f8 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -6,7 +6,6 @@ #include <linux/bug.h> #include <linux/cpu_pm.h> -#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> @@ -1183,7 +1182,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (!ret) ret = 1; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b3d8c3de4149..b8d37eb037fc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -470,14 +470,6 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, mutex_unlock(&fixmap_lock); } -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -extern __alias(__create_pgd_mapping_locked) -void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags); -#endif - #define INVALID_PHYS_ADDR (-1ULL) static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp, @@ -823,7 +815,7 @@ static bool linear_map_requires_bbml2 __initdata; u32 idmap_kpti_bbml2_flag; -void __init init_idmap_kpti_bbml2_flag(void) +static void __init init_idmap_kpti_bbml2_flag(void) { WRITE_ONCE(idmap_kpti_bbml2_flag, 1); /* Must be visible to other CPUs before stop_machine() is called. */ @@ -1135,7 +1127,93 @@ static void __init declare_vma(struct vm_struct *vma, } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -static pgprot_t kernel_exec_prot(void) +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) + +static phys_addr_t kpti_ng_temp_alloc __initdata; + +static phys_addr_t __init kpti_ng_pgd_alloc(enum pgtable_type type) +{ + kpti_ng_temp_alloc -= PAGE_SIZE; + return kpti_ng_temp_alloc; +} + +static int __init __kpti_install_ng_mappings(void *__unused) +{ + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); + extern kpti_remap_fn idmap_kpti_install_ng_mappings; + kpti_remap_fn *remap_fn; + + int cpu = smp_processor_id(); + int levels = CONFIG_PGTABLE_LEVELS; + int order = order_base_2(levels); + u64 kpti_ng_temp_pgd_pa = 0; + pgd_t *kpti_ng_temp_pgd; + u64 alloc = 0; + + if (levels == 5 && !pgtable_l5_enabled()) + levels = 4; + else if (levels == 4 && !pgtable_l4_enabled()) + levels = 3; + + remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); + + if (!cpu) { + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); + kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); + kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); + + // + // Create a minimal page table hierarchy that permits us to map + // the swapper page tables temporarily as we traverse them. + // + // The physical pages are laid out as follows: + // + // +--------+-/-------+-/------ +-/------ +-\\\--------+ + // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] : + // +--------+-\-------+-\------ +-\------ +-///--------+ + // ^ + // The first page is mapped into this hierarchy at a PMD_SHIFT + // aligned virtual address, so that we can manipulate the PTE + // level entries while the mapping is active. The first entry + // covers the PTE[] page itself, the remaining entries are free + // to be used as a ad-hoc fixmap. + // + __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + } + + cpu_install_idmap(); + remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); + cpu_uninstall_idmap(); + + if (!cpu) { + free_pages(alloc, order); + arm64_use_ng_mappings = true; + } + + return 0; +} + +void __init kpti_install_ng_mappings(void) +{ + /* Check whether KPTI is going to be used */ + if (!arm64_kernel_unmapped_at_el0()) + return; + + /* + * We don't need to rewrite the page-tables if either we've done + * it already or we have KASLR enabled and therefore have not + * created any global mappings at all. + */ + if (arm64_use_ng_mappings) + return; + + init_idmap_kpti_bbml2_flag(); + stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask); +} + +static pgprot_t __init kernel_exec_prot(void) { return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; } diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 40eea6da7c25..ae64bbdf83a7 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -31,7 +31,7 @@ config KVM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS help diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 9c802f7103c6..30e3b089a596 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -4,7 +4,6 @@ */ #include <linux/kvm_host.h> -#include <linux/entry-kvm.h> #include <asm/fpu.h> #include <asm/lbt.h> #include <asm/loongarch.h> @@ -251,7 +250,7 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret < 0) return ret; diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 5a62091b0809..c50328212917 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -30,7 +30,7 @@ config KVM select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 47bcf190ccc5..bccb919ca615 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,7 +7,6 @@ */ #include <linux/bitops.h> -#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kdebug.h> @@ -911,7 +910,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret) continue; ret = 1; diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 090f5ac9f492..c3ba12b1bc07 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/irq.h> #include <linux/export.h> +#include <linux/irqchip/irq-msi-lib.h> #include <asm/mshyperv.h> static int hv_map_interrupt(union hv_device_id device_id, bool level, @@ -289,59 +290,99 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) (void)hv_unmap_msi_interrupt(dev, &old_entry); } -static void hv_msi_free_irq(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq) -{ - struct irq_data *irqd = irq_get_irq_data(virq); - struct msi_desc *desc; - - if (!irqd) - return; - - desc = irq_data_get_msi_desc(irqd); - if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) - return; - - hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); -} - /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. */ static struct irq_chip hv_pci_msi_controller = { .name = "HV-PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, .irq_ack = irq_chip_ack_parent, - .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = hv_irq_compose_msi_msg, - .irq_set_affinity = msi_domain_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED, + .irq_set_affinity = irq_chip_set_affinity_parent, }; -static struct msi_domain_ops pci_msi_domain_ops = { - .msi_free = hv_msi_free_irq, - .msi_prepare = pci_msi_prepare, +static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + struct irq_chip *chip = info->chip; + + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED; + + info->ops->msi_prepare = pci_msi_prepare; + + return true; +} + +#define HV_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX) +#define HV_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS) + +static struct msi_parent_ops hv_msi_parent_ops = { + .supported_flags = HV_MSI_FLAGS_SUPPORTED, + .required_flags = HV_MSI_FLAGS_REQUIRED, + .bus_select_token = DOMAIN_BUS_NEXUS, + .bus_select_mask = MATCH_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "HV-", + .init_dev_msi_info = hv_init_dev_msi_info, }; -static struct msi_domain_info hv_pci_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX, - .ops = &pci_msi_domain_ops, - .chip = &hv_pci_msi_controller, - .handler = handle_edge_irq, - .handler_name = "edge", +static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + /* + * TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e. everything except + * entry_to_msi_msg() should be in here. + */ + + int ret; + + ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg); + if (ret) + return ret; + + for (int i = 0; i < nr_irqs; ++i) { + irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller, NULL, + handle_edge_irq, NULL, "edge"); + } + return 0; +} + +static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs) +{ + for (int i = 0; i < nr_irqs; ++i) { + struct irq_data *irqd = irq_domain_get_irq_data(d, virq); + struct msi_desc *desc; + + desc = irq_data_get_msi_desc(irqd); + if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) + continue; + + hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); + } + irq_domain_free_irqs_top(d, virq, nr_irqs); +} + +static const struct irq_domain_ops hv_msi_domain_ops = { + .select = msi_lib_irq_domain_select, + .alloc = hv_msi_domain_alloc, + .free = hv_msi_domain_free, }; struct irq_domain * __init hv_create_pci_msi_domain(void) { struct irq_domain *d = NULL; - struct fwnode_handle *fn; - fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); - if (fn) - d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_fwnode("HV-PCI-MSI"), + .ops = &hv_msi_domain_ops, + .parent = x86_vector_domain, + }; + + if (info.fwnode) + d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops); /* No point in going further if we can't get an irq domain */ BUG_ON(!d); diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index ade6c665c97e..a4615b889f3e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -463,6 +463,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value) } /* + * Keep track of the PFN regions which were shared with the host. The access + * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()). + */ +struct hv_enc_pfn_region { + struct list_head list; + u64 pfn; + int count; +}; + +static LIST_HEAD(hv_list_enc); +static DEFINE_RAW_SPINLOCK(hv_list_enc_lock); + +static int hv_list_enc_add(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + /* Check if the PFN already exists in some region first */ + list_for_each_entry(ent, &hv_list_enc, list) { + if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn)) + /* Nothing to do - pfn is already in the list */ + goto unlock_done; + } + + /* + * Check if the PFN is adjacent to an existing region. Growing + * a region can make it adjacent to another one but merging is + * not (yet) implemented for simplicity. A PFN cannot be added + * to two regions to keep the logic in hv_list_enc_remove() + * correct. + */ + list_for_each_entry(ent, &hv_list_enc, list) { + if (ent->pfn + ent->count == pfn) { + /* Grow existing region up */ + ent->count++; + goto unlock_done; + } else if (pfn + 1 == ent->pfn) { + /* Grow existing region down */ + ent->pfn--; + ent->count++; + goto unlock_done; + } + } + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + /* No adjacent region found -- create a new one */ + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = pfn; + ent->count = 1; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +static int hv_list_enc_remove(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent, *t; + struct hv_enc_pfn_region new_region; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_for_each_entry_safe(ent, t, &hv_list_enc, list) { + if (pfn == ent->pfn + ent->count - 1) { + /* Removing tail pfn */ + ent->count--; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn == ent->pfn) { + /* Removing head pfn */ + ent->count--; + ent->pfn++; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) { + /* + * Removing a pfn in the middle. Cut off the tail + * of the existing region and create a template for + * the new one. + */ + new_region.pfn = pfn + 1; + new_region.count = ent->count - (pfn - ent->pfn + 1); + ent->count = pfn - ent->pfn; + goto unlock_split; + } + + } +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + continue; + +unlock_split: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = new_region.pfn; + ent->count = new_region.count; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +/* Stop new private<->shared conversions */ +static void hv_vtom_kexec_begin(void) +{ + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* + * Crash kernel reaches here with interrupts disabled: can't wait for + * conversions to finish. + * + * If race happened, just report and proceed. + */ + if (!set_memory_enc_stop_conversion()) + pr_warn("Failed to stop shared<->private conversions\n"); +} + +static void hv_vtom_kexec_finish(void) +{ + struct hv_gpa_range_for_visibility *input; + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 hv_status; + int cur, i; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + if (unlikely(!input)) + goto out; + + list_for_each_entry(ent, &hv_list_enc, list) { + for (i = 0, cur = 0; i < ent->count; i++) { + input->gpa_page_list[cur] = ent->pfn + i; + cur++; + + if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) { + input->partition_id = HV_PARTITION_ID_SELF; + input->host_visibility = VMBUS_PAGE_NOT_VISIBLE; + input->reserved0 = 0; + input->reserved1 = 0; + hv_status = hv_do_rep_hypercall( + HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, + cur, 0, input, NULL); + WARN_ON_ONCE(!hv_result_success(hv_status)); + cur = 0; + } + } + + } + +out: + local_irq_restore(flags); +} + +/* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * * In Isolation VM, all guest memory is encrypted from host and guest @@ -475,6 +664,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], struct hv_gpa_range_for_visibility *input; u64 hv_status; unsigned long flags; + int ret; /* no-op if partition isolation is not enabled */ if (!hv_is_isolation_supported()) @@ -486,6 +676,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], return -EINVAL; } + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_remove(pfn, count); + else + ret = hv_list_enc_add(pfn, count); + if (ret) + return ret; + local_irq_save(flags); input = *this_cpu_ptr(hyperv_pcpu_input_arg); @@ -506,8 +703,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], if (hv_result_success(hv_status)) return 0; + + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_add(pfn, count); else - return -EFAULT; + ret = hv_list_enc_remove(pfn, count); + /* + * There's no good way to recover from -ENOMEM here, the accounting is + * wrong either way. + */ + WARN_ON_ONCE(ret); + + return -EFAULT; } /* @@ -669,6 +876,8 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; + x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin; + x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish; /* Set WB as the default cache mode. */ guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c78f860419d6..25773af116bc 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -565,6 +565,11 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif #endif + /* + * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. Root + * partition doesn't need to write to synthetic MSR to enable invariant + * TSC feature. It sees what the hardware provides. + */ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { /* * Writing to synthetic MSR 0x40000118 updates/changes the @@ -636,8 +641,12 @@ static void __init ms_hyperv_init_platform(void) * TSC should be marked as unstable only after Hyper-V * clocksource has been initialized. This ensures that the * stability of the sched_clock is not altered. + * + * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. No + * need to check for it. */ - if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + if (!hv_root_partition() && + !(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) mark_tsc_unstable("running on Hyper-V"); hardlockup_detector_disable(); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4e43923656d0..67d4f23bab66 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -40,7 +40,7 @@ config KVM_X86 select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_NO_POLL - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO select HAVE_KVM_PM_NOTIFIER if PM diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 546272a5d34d..d7b258af63ea 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -28,7 +28,6 @@ #include <linux/slab.h> #include <linux/tboot.h> #include <linux/trace_events.h> -#include <linux/entry-kvm.h> #include <asm/apic.h> #include <asm/asm.h> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4b8138bd4857..42ecd093bb4c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -59,7 +59,6 @@ #include <linux/sched/stat.h> #include <linux/sched/isolation.h> #include <linux/mem_encrypt.h> -#include <linux/entry-kvm.h> #include <linux/suspend.h> #include <linux/smp.h> @@ -11635,7 +11634,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (__xfer_to_guest_mode_work_pending()) { kvm_vcpu_srcu_read_unlock(vcpu); - r = xfer_to_guest_mode_handle_work(vcpu); + r = kvm_xfer_to_guest_mode_handle_work(vcpu); kvm_vcpu_srcu_read_lock(vcpu); if (r) return r; diff --git a/drivers/Makefile b/drivers/Makefile index a104163b1353..8e1ffa4358d5 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -161,7 +161,7 @@ obj-$(CONFIG_SOUNDWIRE) += soundwire/ # Virtualization drivers obj-$(CONFIG_VIRT_DRIVERS) += virt/ -obj-$(subst m,y,$(CONFIG_HYPERV)) += hv/ +obj-$(CONFIG_HYPERV) += hv/ obj-$(CONFIG_PM_DEVFREQ) += devfreq/ obj-$(CONFIG_EXTCON) += extcon/ diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 2edc13ca184e..10356d4ec55c 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -549,14 +549,22 @@ static void __init hv_init_tsc_clocksource(void) union hv_reference_tsc_msr tsc_msr; /* + * When running as a guest partition: + * * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly * handles frequency and offset changes due to live migration, * pause/resume, and other VM management operations. So lower the * Hyper-V Reference TSC rating, causing the generic TSC to be used. * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference * TSC will be preferred over the virtualized ARM64 arch counter. + * + * When running as the root partition: + * + * There is no HV_ACCESS_TSC_INVARIANT feature. Always lower the rating + * of the Hyper-V Reference TSC. */ - if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { + if ((ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) || + hv_root_partition()) { hyperv_cs_tsc.rating = 250; hyperv_cs_msr.rating = 245; } diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index fda170730468..7e6bc0b3a589 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -400,7 +400,7 @@ source "drivers/gpu/drm/tyr/Kconfig" config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" - depends on DRM && PCI && HYPERV + depends on DRM && PCI && HYPERV_VMBUS select DRM_CLIENT_SELECTION select DRM_KMS_HELPER select DRM_GEM_SHMEM_HELPER diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 7ff85c7200e5..5341aa79f387 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1171,7 +1171,7 @@ config GREENASIA_FF config HID_HYPERV_MOUSE tristate "Microsoft Hyper-V mouse driver" - depends on HYPERV + depends on HYPERV_VMBUS help Select this option to enable the Hyper-V mouse driver. diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 57623ca7f350..0b8c391a0342 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -3,13 +3,14 @@ menu "Microsoft Hyper-V guest support" config HYPERV - tristate "Microsoft Hyper-V client drivers" + bool "Microsoft Hyper-V core hypervisor support" depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ || (ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 select OF_EARLY_FLATTREE if OF select SYSFB if EFI && !HYPERV_VTL_MODE + select IRQ_MSI_LIB if X86 help Select this option to run Linux as a Hyper-V client operating system. @@ -44,18 +45,25 @@ config HYPERV_TIMER config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" - depends on HYPERV && CONNECTOR && NLS + depends on HYPERV_VMBUS && CONNECTOR && NLS depends on PTP_1588_CLOCK_OPTIONAL help Select this option to enable the Hyper-V Utilities. config HYPERV_BALLOON tristate "Microsoft Hyper-V Balloon driver" - depends on HYPERV + depends on HYPERV_VMBUS select PAGE_REPORTING help Select this option to enable Hyper-V Balloon driver. +config HYPERV_VMBUS + tristate "Microsoft Hyper-V VMBus driver" + depends on HYPERV + default HYPERV + help + Select this option to enable Hyper-V Vmbus driver. + config MSHV_ROOT tristate "Microsoft Hyper-V root partition support" depends on HYPERV && (X86_64 || ARM64) @@ -66,6 +74,7 @@ config MSHV_ROOT # no particular order, making it impossible to reassemble larger pages depends on PAGE_SIZE_4KB select EVENTFD + select VIRT_XFER_TO_GUEST_WORK default n help Select this option to enable support for booting and running as root diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index 976189c725dc..1a1677bf4dac 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_HYPERV) += hv_vmbus.o +obj-$(CONFIG_HYPERV_VMBUS) += hv_vmbus.o obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o obj-$(CONFIG_MSHV_ROOT) += mshv_root.o @@ -16,5 +16,5 @@ mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \ mshv_root_hv_call.o mshv_portid_table.o # Code that must be built-in -obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o +obj-$(CONFIG_HYPERV) += hv_common.o obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o mshv_common.o diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 7c7c66e0dc3f..162d6aeece7b 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -925,7 +925,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) /* Send a closing message */ - msg = &channel->close_msg.msg; + msg = &channel->close_msg; msg->header.msgtype = CHANNELMSG_CLOSECHANNEL; msg->child_relid = channel->offermsg.child_relid; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 49898d10faff..e109a620c83f 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -257,7 +257,7 @@ static void hv_kmsg_dump_register(void) static inline bool hv_output_page_exists(void) { - return hv_root_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); + return hv_parent_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); } void __init hv_get_partition_id(void) @@ -377,7 +377,7 @@ int __init hv_common_init(void) BUG_ON(!hyperv_pcpu_output_arg); } - if (hv_root_partition()) { + if (hv_parent_partition()) { hv_synic_eventring_tail = alloc_percpu(u8 *); BUG_ON(!hv_synic_eventring_tail); } @@ -531,7 +531,7 @@ int hv_common_cpu_init(unsigned int cpu) if (msr_vp_index > hv_max_vp_index) hv_max_vp_index = msr_vp_index; - if (hv_root_partition()) { + if (hv_parent_partition()) { synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail); *synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT, sizeof(u8), flags); @@ -558,7 +558,7 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ - if (hv_root_partition()) { + if (hv_parent_partition()) { synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); kfree(*synic_eventring_tail); *synic_eventring_tail = NULL; @@ -729,13 +729,17 @@ void hv_identify_partition_type(void) * the root partition setting if also a Confidential VM. */ if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) && - (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && !(ms_hyperv.priv_high & HV_ISOLATION)) { - pr_info("Hyper-V: running as root partition\n"); - if (IS_ENABLED(CONFIG_MSHV_ROOT)) - hv_curr_partition_type = HV_PARTITION_TYPE_ROOT; - else + + if (!IS_ENABLED(CONFIG_MSHV_ROOT)) { pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n"); + } else if (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) { + pr_info("Hyper-V: running as root partition\n"); + hv_curr_partition_type = HV_PARTITION_TYPE_ROOT; + } else { + pr_info("Hyper-V: running as L1VH partition\n"); + hv_curr_partition_type = HV_PARTITION_TYPE_L1VH; + } } } diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index 832885198643..b3de35ff6334 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -129,8 +129,7 @@ static int hvt_op_open(struct inode *inode, struct file *file) * device gets released. */ hvt->mode = HVUTIL_TRANSPORT_CHARDEV; - } - else if (hvt->mode == HVUTIL_TRANSPORT_NETLINK) { + } else if (hvt->mode == HVUTIL_TRANSPORT_NETLINK) { /* * We're switching from netlink communication to using char * device. Issue the reset first. @@ -195,7 +194,7 @@ static void hvt_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) } spin_unlock(&hvt_list_lock); if (!hvt_found) { - pr_warn("hvt_cn_callback: spurious message received!\n"); + pr_warn("%s: spurious message received!\n", __func__); return; } @@ -210,7 +209,7 @@ static void hvt_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) if (hvt->mode == HVUTIL_TRANSPORT_NETLINK) hvt_found->on_msg(msg->data, msg->len); else - pr_warn("hvt_cn_callback: unexpected netlink message!\n"); + pr_warn("%s: unexpected netlink message!\n", __func__); mutex_unlock(&hvt->lock); } @@ -260,8 +259,9 @@ int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len, hvt->outmsg_len = len; hvt->on_read = on_read_cb; wake_up_interruptible(&hvt->outmsg_q); - } else + } else { ret = -ENOMEM; + } out_unlock: mutex_unlock(&hvt->lock); return ret; diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h index 0340a67acd0a..d4813df92b9c 100644 --- a/drivers/hv/mshv.h +++ b/drivers/hv/mshv.h @@ -25,6 +25,4 @@ int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count, int hv_call_get_partition_property(u64 partition_id, u64 property_code, u64 *property_value); -int mshv_do_pre_guest_mode_work(ulong th_flags); - #endif /* _MSHV_H */ diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c index 6f227a8a5af7..aa2be51979fd 100644 --- a/drivers/hv/mshv_common.c +++ b/drivers/hv/mshv_common.c @@ -138,25 +138,3 @@ int hv_call_get_partition_property(u64 partition_id, return 0; } EXPORT_SYMBOL_GPL(hv_call_get_partition_property); - -/* - * Handle any pre-processing before going into the guest mode on this cpu, most - * notably call schedule(). Must be invoked with both preemption and - * interrupts enabled. - * - * Returns: 0 on success, -errno on error. - */ -int mshv_do_pre_guest_mode_work(ulong th_flags) -{ - if (th_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - return -EINTR; - - if (th_flags & _TIF_NEED_RESCHED) - schedule(); - - if (th_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(NULL); - - return 0; -} -EXPORT_SYMBOL_GPL(mshv_do_pre_guest_mode_work); diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 72df774e410a..e3b2bd417c46 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -8,6 +8,7 @@ * Authors: Microsoft Linux virtualization team */ +#include <linux/entry-virt.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/fs.h> @@ -37,12 +38,6 @@ MODULE_AUTHOR("Microsoft"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Microsoft Hyper-V root partition VMM interface /dev/mshv"); -/* TODO move this to mshyperv.h when needed outside driver */ -static inline bool hv_parent_partition(void) -{ - return hv_root_partition(); -} - /* TODO move this to another file when debugfs code is added */ enum hv_stats_vp_counters { /* HV_THREAD_COUNTER */ #if defined(CONFIG_X86) @@ -487,28 +482,6 @@ mshv_vp_wait_for_hv_kick(struct mshv_vp *vp) return 0; } -static int mshv_pre_guest_mode_work(struct mshv_vp *vp) -{ - const ulong work_flags = _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | - _TIF_NEED_RESCHED | _TIF_NOTIFY_RESUME; - ulong th_flags; - - th_flags = read_thread_flags(); - while (th_flags & work_flags) { - int ret; - - /* nb: following will call schedule */ - ret = mshv_do_pre_guest_mode_work(th_flags); - - if (ret) - return ret; - - th_flags = read_thread_flags(); - } - - return 0; -} - /* Must be called with interrupts enabled */ static long mshv_run_vp_with_root_scheduler(struct mshv_vp *vp) { @@ -529,9 +502,11 @@ static long mshv_run_vp_with_root_scheduler(struct mshv_vp *vp) u32 flags = 0; struct hv_output_dispatch_vp output; - ret = mshv_pre_guest_mode_work(vp); - if (ret) - break; + if (__xfer_to_guest_mode_work_pending()) { + ret = xfer_to_guest_mode_handle_work(); + if (ret) + break; + } if (vp->run.flags.intercept_suspend) flags |= HV_DISPATCH_VP_FLAG_CLEAR_INTERCEPT_SUSPEND; @@ -2074,9 +2049,13 @@ static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out) /* Retrieve and stash the supported scheduler type */ static int __init mshv_retrieve_scheduler_type(struct device *dev) { - int ret; + int ret = 0; + + if (hv_l1vh_partition()) + hv_scheduler_type = HV_SCHEDULER_TYPE_CORE_SMT; + else + ret = hv_retrieve_scheduler_type(&hv_scheduler_type); - ret = hv_retrieve_scheduler_type(&hv_scheduler_type); if (ret) return ret; @@ -2203,9 +2182,6 @@ static int __init mshv_root_partition_init(struct device *dev) { int err; - if (mshv_retrieve_scheduler_type(dev)) - return -ENODEV; - err = root_scheduler_init(dev); if (err) return err; @@ -2227,7 +2203,7 @@ static int __init mshv_parent_partition_init(void) struct device *dev; union hv_hypervisor_version_info version_info; - if (!hv_root_partition() || is_kdump_kernel()) + if (!hv_parent_partition() || is_kdump_kernel()) return -ENODEV; if (hv_get_hypervisor_version(&version_info)) @@ -2264,7 +2240,12 @@ static int __init mshv_parent_partition_init(void) mshv_cpuhp_online = ret; - ret = mshv_root_partition_init(dev); + ret = mshv_retrieve_scheduler_type(dev); + if (ret) + goto remove_cpu_state; + + if (hv_root_partition()) + ret = mshv_root_partition_init(dev); if (ret) goto remove_cpu_state; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2ed5a1e89d69..69591dc7bad2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -322,7 +322,7 @@ static ssize_t out_read_index_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sysfs_emit(buf, "%d\n", outbound.current_read_index); + return sysfs_emit(buf, "%u\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -341,7 +341,7 @@ static ssize_t out_write_index_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sysfs_emit(buf, "%d\n", outbound.current_write_index); + return sysfs_emit(buf, "%u\n", outbound.current_write_index); } static DEVICE_ATTR_RO(out_write_index); @@ -1742,7 +1742,7 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel, u32 target_cpu; ssize_t ret; - if (sscanf(buf, "%uu", &target_cpu) != 1) + if (sscanf(buf, "%u", &target_cpu) != 1) return -EIO; cpus_read_lock(); @@ -1947,7 +1947,7 @@ static const struct kobj_type vmbus_chan_ktype = { * is running. * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid - * exposing the ring buffer by default, this function is reponsible to enable visibility of + * exposing the ring buffer by default, this function is responsible to enable visibility of * ring for userspace to use. * Note: Race conditions can happen with userspace and it is not encouraged to create new * use-cases for this. This was added to maintain backward compatibility, while solving @@ -2110,7 +2110,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = vmbus_add_channel_kobj(child_device_obj, child_device_obj->channel); if (ret) { - pr_err("Unable to register primary channeln"); + pr_err("Unable to register primary channel\n"); goto err_kset_unregister; } hv_debug_add_dev_dir(child_device_obj); diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 17edc1597446..c7ef347a4dff 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -276,8 +276,8 @@ config SERIO_OLPC_APSP config HYPERV_KEYBOARD tristate "Microsoft Synthetic Keyboard driver" - depends on HYPERV - default HYPERV + depends on HYPERV_VMBUS + default HYPERV_VMBUS help Select this option to enable the Hyper-V Keyboard driver. diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index c8cbd85adcf9..982964c1a9fb 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" - depends on HYPERV + depends on HYPERV_VMBUS select UCS2_STRING select NLS help diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 9a249c65aedc..7065a8e5f9b1 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -221,7 +221,7 @@ config PCI_LABEL config PCI_HYPERV tristate "Hyper-V PCI Frontend" - depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && SYSFS + depends on ((X86 && X86_64) || ARM64) && HYPERV_VMBUS && PCI_MSI && SYSFS select PCI_HYPERV_INTERFACE select IRQ_MSI_LIB help diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index bbd81a43047d..f963e4f9e552 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -57,6 +57,11 @@ #define L3C_V2_NR_EVENTS 0xFF HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 17, 16); +/* + * Remain the config1:0-7 for backward compatibility if some existing users + * hardcode the config1:0-7 directly without parsing the sysfs attribute. + */ +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core_deprecated, config1, 7, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); @@ -95,6 +100,21 @@ static bool support_ext(struct hisi_l3c_pmu *pmu) return l3c_pmu_ext->support_ext; } +/* + * tt_core was extended to cover all the CPUs sharing the L3 and was moved from + * config1:0-7 to config2:0-*. Try it first and fallback to tt_core_deprecated + * if user's still using the deprecated one. + */ +static u32 hisi_l3c_pmu_get_tt_core(struct perf_event *event) +{ + u32 core = hisi_get_tt_core(event); + + if (core) + return core; + + return hisi_get_tt_core_deprecated(event); +} + static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); @@ -259,7 +279,7 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event) static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - u32 core = hisi_get_tt_core(event); + u32 core = hisi_l3c_pmu_get_tt_core(event); if (core) { u32 val; @@ -280,7 +300,7 @@ static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - u32 core = hisi_get_tt_core(event); + u32 core = hisi_l3c_pmu_get_tt_core(event); if (core) { u32 val; @@ -300,7 +320,7 @@ static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) static bool hisi_l3c_pmu_have_filter(struct perf_event *event) { - return hisi_get_tt_req(event) || hisi_get_tt_core(event) || + return hisi_get_tt_req(event) || hisi_l3c_pmu_get_tt_core(event) || hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event); } @@ -331,6 +351,9 @@ static int hisi_l3c_pmu_check_filter(struct perf_event *event) if (ext < 0 || ext > hisi_l3c_pmu->ext_num) return -EINVAL; + if (hisi_get_tt_core(event) && hisi_get_tt_core_deprecated(event)) + return -EINVAL; + return 0; } @@ -602,10 +625,11 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = { static struct attribute *hisi_l3c_pmu_v2_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), - HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), + HISI_PMU_FORMAT_ATTR(tt_core_deprecated, "config1:0-7"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"), HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), NULL }; @@ -617,6 +641,7 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = { static struct attribute *hisi_l3c_pmu_v3_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), HISI_PMU_FORMAT_ATTR(ext, "config:16-17"), + HISI_PMU_FORMAT_ATTR(tt_core_deprecated, "config1:0-7"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), NULL diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 5522310bab8d..19d0884479a2 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -589,7 +589,7 @@ config XEN_SCSI_FRONTEND config HYPERV_STORAGE tristate "Microsoft Hyper-V virtual storage driver" - depends on SCSI && HYPERV + depends on SCSI && HYPERV_VMBUS depends on m || SCSI_FC_ATTRS != m default HYPERV help diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig index b060dcd7c635..6f86a61231e6 100644 --- a/drivers/uio/Kconfig +++ b/drivers/uio/Kconfig @@ -140,7 +140,7 @@ config UIO_MF624 config UIO_HV_GENERIC tristate "Generic driver for Hyper-V VMBus" - depends on HYPERV + depends on HYPERV_VMBUS help Generic driver that you can bind, dynamically, to any Hyper-V VMBus device. It is useful to provide direct access diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index c21484d15f0c..c3b9954df804 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -1773,13 +1773,16 @@ config FB_BROADSHEET a bridge adapter. config FB_HYPERV - tristate "Microsoft Hyper-V Synthetic Video support" - depends on FB && HYPERV + tristate "Microsoft Hyper-V Synthetic Video support (DEPRECATED)" + depends on FB && HYPERV_VMBUS select DMA_CMA if HAVE_DMA_CONTIGUOUS && CMA select FB_IOMEM_HELPERS_DEFERRED help This framebuffer driver supports Microsoft Hyper-V Synthetic Video. + This driver is deprecated, please use the Hyper-V DRM driver at + drivers/gpu/drm/hyperv (CONFIG_DRM_HYPERV) instead. + config FB_SIMPLE tristate "Simple framebuffer support" depends on FB diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 75338ffc703f..c99e2ea4b3de 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1357,6 +1357,8 @@ static int __init hvfb_drv_init(void) { int ret; + pr_warn("Deprecated: use Hyper-V DRM driver instead\n"); + if (fb_modesetting_disabled("hyper_fb")) return -ENODEV; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ebbf55f8864b..0aa7e5d1b05f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3397,7 +3397,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device if (fs_info->sectorsize > PAGE_SIZE) btrfs_warn(fs_info, - "support for block size %u with page size %zu is experimental, some features may be missing", + "support for block size %u with page size %lu is experimental, some features may be missing", fs_info->sectorsize, PAGE_SIZE); /* * Handle the space caching options appropriately now that we have the diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index d062ac521051..230d9326b685 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -23,7 +23,11 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, int type; if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) { - *max_len = BTRFS_FID_SIZE_CONNECTABLE; + if (btrfs_root_id(BTRFS_I(inode)->root) != + btrfs_root_id(BTRFS_I(parent)->root)) + *max_len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; + else + *max_len = BTRFS_FID_SIZE_CONNECTABLE; return FILEID_INVALID; } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE; @@ -45,6 +49,8 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, parent_root_id = btrfs_root_id(BTRFS_I(parent)->root); if (parent_root_id != fid->root_objectid) { + if (*max_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT) + return FILEID_INVALID; fid->parent_root_objectid = parent_root_id; len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; type = FILEID_BTRFS_WITH_PARENT_ROOT; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index a729b77983fa..64ba6bc807d9 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -31,6 +31,7 @@ enum hv_partition_type { HV_PARTITION_TYPE_GUEST, HV_PARTITION_TYPE_ROOT, + HV_PARTITION_TYPE_L1VH, }; struct ms_hyperv_info { @@ -162,6 +163,7 @@ static inline u64 hv_generate_guest_id(u64 kernel_version) return guest_id; } +#if IS_ENABLED(CONFIG_HYPERV_VMBUS) /* Free the message slot and signal end-of-message if required */ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) { @@ -197,6 +199,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) } } +extern int vmbus_interrupt; +extern int vmbus_irq; +#endif /* CONFIG_HYPERV_VMBUS */ + int hv_get_hypervisor_version(union hv_hypervisor_version_info *info); void hv_setup_vmbus_handler(void (*handler)(void)); @@ -210,9 +216,6 @@ void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); void hv_setup_mshv_handler(void (*handler)(void)); -extern int vmbus_interrupt; -extern int vmbus_irq; - #if IS_ENABLED(CONFIG_HYPERV) /* * Hypervisor's notion of virtual processor ID is different from @@ -354,12 +357,22 @@ static inline bool hv_root_partition(void) { return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT; } +static inline bool hv_l1vh_partition(void) +{ + return hv_curr_partition_type == HV_PARTITION_TYPE_L1VH; +} +static inline bool hv_parent_partition(void) +{ + return hv_root_partition() || hv_l1vh_partition(); +} int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); #else /* CONFIG_MSHV_ROOT */ static inline bool hv_root_partition(void) { return false; } +static inline bool hv_l1vh_partition(void) { return false; } +static inline bool hv_parent_partition(void) { return false; } static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) { return -EOPNOTSUPP; diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 1be7f6a02304..77abddfc750e 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -597,8 +597,6 @@ struct ms_hyperv_tsc_page { /* HV_REFERENCE_TSC_PAGE */ #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) -# - /* Hyper-V defined statically assigned SINTs */ #define HV_SYNIC_INTERCEPTION_SINT_INDEX 0x00000000 #define HV_SYNIC_IOMMU_FAULT_SINT_INDEX 0x00000001 diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h index 42e7876455b5..858f6a3925b3 100644 --- a/include/hyperv/hvhdk_mini.h +++ b/include/hyperv/hvhdk_mini.h @@ -301,6 +301,7 @@ struct hv_input_map_device_interrupt { /* HV_OUTPUT_MAP_DEVICE_INTERRUPT */ struct hv_output_map_device_interrupt { struct hv_interrupt_entry interrupt_entry; + u64 ext_status_deprecated[5]; } __packed; /* HV_INPUT_UNMAP_DEVICE_INTERRUPT */ diff --git a/include/linux/entry-kvm.h b/include/linux/entry-virt.h index 16149f6625e4..42c89e3e5ca7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-virt.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_ENTRYKVM_H -#define __LINUX_ENTRYKVM_H +#ifndef __LINUX_ENTRYVIRT_H +#define __LINUX_ENTRYVIRT_H #include <linux/static_call_types.h> #include <linux/resume_user_mode.h> @@ -10,7 +10,7 @@ #include <linux/tick.h> /* Transfer to guest mode work */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK #ifndef ARCH_XFER_TO_GUEST_MODE_WORK # define ARCH_XFER_TO_GUEST_MODE_WORK (0) @@ -21,8 +21,6 @@ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ ARCH_XFER_TO_GUEST_MODE_WORK) -struct kvm_vcpu; - /** * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest * mode work handling function. @@ -32,12 +30,10 @@ struct kvm_vcpu; * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be * replaced by architecture specific code. */ -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work); +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); #ifndef arch_xfer_to_guest_mode_work -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work) +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; } @@ -46,11 +42,10 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, /** * xfer_to_guest_mode_handle_work - Check and handle pending work which needs * to be handled before going to guest mode - * @vcpu: Pointer to current's VCPU data * * Returns: 0 or an error code */ -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); +int xfer_to_guest_mode_handle_work(void); /** * xfer_to_guest_mode_prepare - Perform last minute preparation work that @@ -95,6 +90,6 @@ static inline bool xfer_to_guest_mode_work_pending(void) lockdep_assert_irqs_disabled(); return __xfer_to_guest_mode_work_pending(); } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ #endif diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a59c5c3e95fb..59826c89171c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -707,11 +707,6 @@ struct vmbus_channel_msginfo { unsigned char msg[]; }; -struct vmbus_close_msg { - struct vmbus_channel_msginfo info; - struct vmbus_channel_close_channel msg; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -800,7 +795,7 @@ struct vmbus_channel { struct hv_ring_buffer_info outbound; /* send to parent */ struct hv_ring_buffer_info inbound; /* receive from parent */ - struct vmbus_close_msg close_msg; + struct vmbus_channel_close_channel close_msg; /* Statistics */ u64 interrupts; /* Host to Guest interrupts */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 19b8c4bebb9c..fa36e70df088 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H - +#include <linux/entry-virt.h> #include <linux/types.h> #include <linux/hardirq.h> #include <linux/list.h> @@ -2450,13 +2450,24 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ + +static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +{ + int r = xfer_to_guest_mode_handle_work(); + + if (r) { + WARN_ON_ONCE(r != -EINTR); + kvm_handle_signal_exit(vcpu); + } + return r; +} +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 621b200d9b87..c5b30054cd01 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -129,7 +129,7 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else static __always_inline void rcu_irq_work_resched(void) { } diff --git a/kernel/entry/Makefile b/kernel/entry/Makefile index 77fcd83dd663..2333d70802e4 100644 --- a/kernel/entry/Makefile +++ b/kernel/entry/Makefile @@ -14,4 +14,4 @@ CFLAGS_common.o += -fno-stack-protector obj-$(CONFIG_GENERIC_IRQ_ENTRY) += common.o obj-$(CONFIG_GENERIC_SYSCALL) += syscall-common.o syscall_user_dispatch.o -obj-$(CONFIG_KVM_XFER_TO_GUEST_WORK) += kvm.o +obj-$(CONFIG_VIRT_XFER_TO_GUEST_WORK) += virt.o diff --git a/kernel/entry/kvm.c b/kernel/entry/virt.c index 8485f63863af..c52f99249763 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/virt.c @@ -1,17 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/entry-kvm.h> -#include <linux/kvm_host.h> +#include <linux/entry-virt.h> -static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) +static int xfer_to_guest_mode_work(unsigned long ti_work) { do { int ret; - if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { - kvm_handle_signal_exit(vcpu); + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) return -EINTR; - } if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) schedule(); @@ -19,7 +16,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ti_work & _TIF_NOTIFY_RESUME) resume_user_mode_work(NULL); - ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); + ret = arch_xfer_to_guest_mode_handle_work(ti_work); if (ret) return ret; @@ -28,7 +25,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) return 0; } -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +int xfer_to_guest_mode_handle_work(void) { unsigned long ti_work; @@ -44,6 +41,6 @@ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) if (!(ti_work & XFER_TO_GUEST_MODE_WORK)) return 0; - return xfer_to_guest_mode_work(vcpu, ti_work); + return xfer_to_guest_mode_work(ti_work); } EXPORT_SYMBOL_GPL(xfer_to_guest_mode_handle_work); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 31690ffa452a..8293bae1dec1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -573,7 +573,7 @@ void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len) } EXPORT_SYMBOL_GPL(rcutorture_format_gp_seqs); -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) /* * An empty function that will trigger a reschedule on * IRQ tail once IRQs get re-enabled on userspace/guest resume. @@ -602,7 +602,7 @@ noinstr void rcu_irq_work_resched(void) if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU)) return; - if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) + if (IS_ENABLED(CONFIG_VIRT_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) return; instrumentation_begin(); @@ -611,7 +611,7 @@ noinstr void rcu_irq_work_resched(void) } instrumentation_end(); } -#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */ +#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) */ #ifdef CONFIG_PROVE_RCU /** diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 56356d2980c8..8e803c4828c4 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -72,7 +72,7 @@ config VIRTIO_VSOCKETS_COMMON config HYPERV_VSOCKETS tristate "Hyper-V transport for Virtual Sockets" - depends on VSOCKETS && HYPERV + depends on VSOCKETS && HYPERV_VMBUS help This module implements a Hyper-V transport for Virtual Sockets. diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 1b7d5be0b6c4..0227e13cd8dd 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -87,7 +87,7 @@ config HAVE_KVM_VCPU_RUN_PID_CHANGE config HAVE_KVM_NO_POLL bool -config KVM_XFER_TO_GUEST_WORK +config VIRT_XFER_TO_GUEST_WORK bool config HAVE_KVM_PM_NOTIFIER |