diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2020-12-01 20:40:50 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2020-12-01 20:40:50 -0400 |
commit | 2b0a999ba003ea9b48910d75c318ae63246bc6f3 (patch) | |
tree | 71dc50d336f2a7aac158e36aed1c6b99db821f2d /arch/x86 | |
parent | f93c39bc95472dae3b5de71da5c005f47ece3148 (diff) | |
parent | b65054597872ce3aefbc6a666385eabdf9e288da (diff) |
Merge tag 'v5.10-rc6' into rdma.git for-next
For dependencies in following patches
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/mwait.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c | 65 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/tboot.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/irq.c | 85 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 18 |
13 files changed, 91 insertions, 117 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 324ddd7fd0aa..7e5f33a0d0e2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1656,6 +1656,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index e039a933aca3..29dd27b5a339 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - trace_hardirqs_on(); - mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 581fb7223ad0..d41b70fe4918 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); - - spectre_v2_user_ibpb = mode; } /* diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 4102b866e7c0..32b7099e3511 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1384,8 +1384,10 @@ noinstr void do_machine_check(struct pt_regs *regs) * When there's any problem use only local no_way_out state. */ if (!lmce) { - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; + if (mce_end(order) < 0) { + if (!no_way_out) + no_way_out = worst >= MCE_PANIC_SEVERITY; + } } else { /* * If there was a fatal machine check we should have diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index af323e2e3100..6f4ca4bea625 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -507,6 +507,24 @@ unlock: return ret ?: nbytes; } +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} + struct task_move_callback { struct callback_head work; struct rdtgroup *rdtgrp; @@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head) (rdtgrp->flags & RDT_DELETED)) { current->closid = 0; current->rmid = 0; - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } if (unlikely(current->flags & PF_EXITING)) @@ -1769,7 +1787,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1792,7 +1809,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) @@ -1813,12 +1829,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1847,12 +1857,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2079,8 +2083,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -2139,13 +2142,11 @@ static int rdt_get_tree(struct fs_context *fc) &kn_mongrp); if (ret < 0) goto out_info; - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) goto out_mongrp; - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2357,7 +2358,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) if (atomic_read(&sentry->waitcount) != 0) sentry->flags = RDT_DELETED; else - kfree(sentry); + rdtgroup_remove(sentry); } } @@ -2399,7 +2400,7 @@ static void rmdir_all_sub(void) if (atomic_read(&rdtgrp->waitcount) != 0) rdtgrp->flags = RDT_DELETED; else - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2499,11 +2500,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2838,8 +2834,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2880,6 +2876,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); @@ -2892,7 +2889,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* @@ -3049,11 +3046,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; @@ -3065,11 +3057,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn, rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba4593a913fa..145a7ac0c19a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -685,7 +685,7 @@ void arch_cpu_idle(void) */ void __cpuidle default_idle(void) { - safe_halt(); + raw_safe_halt(); } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); @@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy) /* * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power * states (local apic timer and TSC stop). + * + * XXX this function is completely buggered vs RCU and tracing. */ static void amd_e400_idle(void) { @@ -757,9 +759,9 @@ static void amd_e400_idle(void) * The switch back from broadcast mode needs to be called with * interrupts disabled. */ - local_irq_disable(); + raw_local_irq_disable(); tick_broadcast_exit(); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void) if (!need_resched()) __sti_mwait(0, 0); else - local_irq_enable(); + raw_local_irq_enable(); } else { - local_irq_enable(); + raw_local_irq_enable(); } __current_clr_polling(); } diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 420be871d9d4..ae64f98ec2ab 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,13 +514,10 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || dmar_disabled) pr_warn("Forcing Intel-IOMMU to enabled\n"); dmar_disabled = 0; -#ifdef CONFIG_SWIOTLB - swiotlb = 0; -#endif no_iommu = 0; return 1; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 99d118ffc67d..814698e5b152 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -40,29 +40,10 @@ static int pending_userspace_extint(struct kvm_vcpu *v) * check if there is pending interrupt from * non-APIC source without intack. */ -static int kvm_cpu_has_extint(struct kvm_vcpu *v) -{ - u8 accept = kvm_apic_accept_pic_intr(v); - - if (accept) { - if (irqchip_split(v->kvm)) - return pending_userspace_extint(v); - else - return v->kvm->arch.vpic->output; - } else - return 0; -} - -/* - * check if there is injectable interrupt: - * when virtual interrupt delivery enabled, - * interrupt from apic will handled by hardware, - * we don't need to check it here. - */ -int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +int kvm_cpu_has_extint(struct kvm_vcpu *v) { /* - * FIXME: interrupt.injected represents an interrupt that it's + * FIXME: interrupt.injected represents an interrupt whose * side-effects have already been applied (e.g. bit from IRR * already moved to ISR). Therefore, it is incorrect to rely * on interrupt.injected to know if there is a pending @@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (!lapic_in_kernel(v)) return v->arch.interrupt.injected; + if (!kvm_apic_accept_pic_intr(v)) + return 0; + + if (irqchip_split(v->kvm)) + return pending_userspace_extint(v); + else + return v->kvm->arch.vpic->output; +} + +/* + * check if there is injectable interrupt: + * when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{ if (kvm_cpu_has_extint(v)) return 1; @@ -91,20 +89,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* - * FIXME: interrupt.injected represents an interrupt that it's - * side-effects have already been applied (e.g. bit from IRR - * already moved to ISR). Therefore, it is incorrect to rely - * on interrupt.injected to know if there is a pending - * interrupt in the user-mode LAPIC. - * This leads to nVMX/nSVM not be able to distinguish - * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on - * pending interrupt or should re-inject an injected - * interrupt. - */ - if (!lapic_in_kernel(v)) - return v->arch.interrupt.injected; - if (kvm_cpu_has_extint(v)) return 1; @@ -118,16 +102,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ static int kvm_cpu_get_extint(struct kvm_vcpu *v) { - if (kvm_cpu_has_extint(v)) { - if (irqchip_split(v->kvm)) { - int vector = v->arch.pending_external_vector; - - v->arch.pending_external_vector = -1; - return vector; - } else - return kvm_pic_read_irq(v->kvm); /* PIC */ - } else + if (!kvm_cpu_has_extint(v)) { + WARN_ON(!lapic_in_kernel(v)); return -1; + } + + if (!lapic_in_kernel(v)) + return v->arch.interrupt.nr; + + if (irqchip_split(v->kvm)) { + int vector = v->arch.pending_external_vector; + + v->arch.pending_external_vector = -1; + return vector; + } else + return kvm_pic_read_irq(v->kvm); /* PIC */ } /* @@ -135,13 +124,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v) */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - int vector; - - if (!lapic_in_kernel(v)) - return v->arch.interrupt.nr; - - vector = kvm_cpu_get_extint(v); - + int vector = kvm_cpu_get_extint(v); if (vector != -1) return vector; /* PIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 105e7859d1f2..86c33d53c90a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2465,7 +2465,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!kvm_apic_hw_enabled(apic)) + if (!kvm_apic_present(vcpu)) return -1; __apic_update_ppr(apic, &ppr); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5bb1939b65d8..7a6ae9e90bd7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3517,7 +3517,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->root_level; + int root = vcpu->arch.mmu->shadow_root_level; int leaf; int level; bool reserved = false; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b14106258a..566f4d18185b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -642,8 +642,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, * Its safe to read more than we are asked, caller should ensure that * destination has enough space. */ - src_paddr = round_down(src_paddr, 16); offset = src_paddr & 15; + src_paddr = round_down(src_paddr, 16); sz = round_up(sz + offset, 16); return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1e81cfebd491..79b3a564f1c9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1309,8 +1309,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->msrpm = svm_vcpu_alloc_msrpm(); - if (!svm->msrpm) + if (!svm->msrpm) { + err = -ENOMEM; goto error_free_vmcb_page; + } svm_vcpu_init_msrpm(vcpu, svm->msrpm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 078a39d489fe..e545a8a613b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4051,21 +4051,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) { + /* + * We can accept userspace's request for interrupt injection + * as long as we have a place to store the interrupt number. + * The actual injection will happen when the CPU is able to + * deliver the interrupt. + */ + if (kvm_cpu_has_extint(vcpu)) + return false; + + /* Acknowledging ExtINT does not happen if LINT0 is masked. */ return (!lapic_in_kernel(vcpu) || kvm_apic_accept_pic_intr(vcpu)); } -/* - * if userspace requested an interrupt window, check that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { return kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); } |