diff options
author | Ingo Molnar <mingo@kernel.org> | 2020-07-28 13:18:01 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2020-07-28 13:18:01 +0200 |
commit | e89d4ca1df630378bde3e36c42001b755b0044fe (patch) | |
tree | d953ccecb8365acab65d0075e674f7e9a9bd9880 /arch/x86/kernel | |
parent | 2ac5413e5edca6910d2ae157187a889e94be2b62 (diff) | |
parent | 92ed301919932f777713b9172e525674157e983d (diff) |
Merge tag 'v5.8-rc7' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/msi.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/apic/vector.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/stacktrace.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/unwind_orc.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 1 |
15 files changed, 110 insertions, 68 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ce61e3e7d399..81ffcfbfaef2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2316,12 +2316,12 @@ static int mp_irqdomain_create(int ioapic) ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, (void *)(long)ioapic); - /* Release fw handle if it was allocated above */ - if (!cfg->dev) - irq_domain_free_fwnode(fn); - - if (!ip->irqdomain) + if (!ip->irqdomain) { + /* Release fw handle if it was allocated above */ + if (!cfg->dev) + irq_domain_free_fwnode(fn); return -ENOMEM; + } ip->irqdomain->parent = parent; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 5cbaca58af95..c2b2911feeef 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -263,12 +263,13 @@ void __init arch_init_msi_domain(struct irq_domain *parent) msi_default_domain = pci_msi_create_irq_domain(fn, &pci_msi_domain_info, parent); - irq_domain_free_fwnode(fn); } - if (!msi_default_domain) + if (!msi_default_domain) { + irq_domain_free_fwnode(fn); pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); - else + } else { msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; + } } #ifdef CONFIG_IRQ_REMAP @@ -301,7 +302,8 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, if (!fn) return NULL; d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent); - irq_domain_free_fwnode(fn); + if (!d) + irq_domain_free_fwnode(fn); return d; } #endif @@ -364,7 +366,8 @@ static struct irq_domain *dmar_get_irq_domain(void) if (fn) { dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info, x86_vector_domain); - irq_domain_free_fwnode(fn); + if (!dmar_domain) + irq_domain_free_fwnode(fn); } out: mutex_unlock(&dmar_lock); @@ -489,7 +492,10 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id) } d = msi_create_irq_domain(fn, domain_info, parent); - irq_domain_free_fwnode(fn); + if (!d) { + irq_domain_free_fwnode(fn); + kfree(domain_info); + } return d; } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c48be6e1f676..7649da2478d8 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, trace_vector_activate(irqd->irq, apicd->is_managed, apicd->can_reserve, reserve); - /* Nothing to do for fixed assigned vectors */ - if (!apicd->can_reserve && !apicd->is_managed) - return 0; - raw_spin_lock_irqsave(&vector_lock, flags); - if (reserve || irqd_is_managed_and_shutdown(irqd)) + if (!apicd->can_reserve && !apicd->is_managed) + assign_irq_vector_any_locked(irqd); + else if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); @@ -709,7 +707,6 @@ int __init arch_early_irq_init(void) x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, NULL); BUG_ON(x86_vector_domain == NULL); - irq_domain_free_fwnode(fn); irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain); @@ -775,20 +772,10 @@ void lapic_offline(void) static int apic_set_affinity(struct irq_data *irqd, const struct cpumask *dest, bool force) { - struct apic_chip_data *apicd = apic_chip_data(irqd); int err; - /* - * Core code can call here for inactive interrupts. For inactive - * interrupts which use managed or reservation mode there is no - * point in going through the vector assignment right now as the - * activation will assign a vector which fits the destination - * cpumask. Let the core code store the destination mask and be - * done with it. - */ - if (!irqd_is_activated(irqd) && - (apicd->is_managed || apicd->can_reserve)) - return IRQ_SET_MASK_OK; + if (WARN_ON_ONCE(!irqd_is_activated(irqd))) + return -EIO; raw_spin_lock(&vector_lock); cpumask_and(vector_searchmask, dest, cpu_online_mask); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c25a67a34bd3..0ab48f1cdf84 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -50,6 +50,13 @@ static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static u64 msr_test_ctrl_cache __ro_after_init; /* + * With a name like MSR_TEST_CTL it should go without saying, but don't touch + * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it + * on CPUs that do not support SLD can cause fireworks, even when writing '0'. + */ +static bool cpu_model_supports_sld __ro_after_init; + +/* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists * CPU models in which having conflicting memory types still leads to @@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on) static void split_lock_init(void) { - split_lock_verify_msr(sld_state != sld_off); + if (cpu_model_supports_sld) + split_lock_verify_msr(sld_state != sld_off); } static void split_lock_warn(unsigned long ip) @@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) return; } + cpu_model_supports_sld = true; split_lock_setup(); } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index fbe89a92ff36..14e4b4d17ee5 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1901,6 +1901,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check; static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) { + WARN_ON_ONCE(user_mode(regs)); + /* * Only required when from kernel mode. See * mce_check_crashing_cpu() for details. @@ -1954,7 +1956,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check) } #else /* 32bit unified entry point */ -DEFINE_IDTENTRY_MCE(exc_machine_check) +DEFINE_IDTENTRY_RAW(exc_machine_check) { unsigned long dr7; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b037cfa7c0c5..7401cc12c3cc 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -71,6 +71,22 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, + unsigned int nbytes) +{ + if (!user_mode(regs)) + return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); + + /* + * Make sure userspace isn't trying to trick us into dumping kernel + * memory by pointing the userspace instruction pointer at it. + */ + if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX)) + return -EINVAL; + + return copy_from_user_nmi(buf, (void __user *)src, nbytes); +} + /* * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus: * @@ -97,17 +113,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) u8 opcodes[OPCODE_BUFSIZE]; unsigned long prologue = regs->ip - PROLOGUE_SIZE; - bool bad_ip; - - /* - * Make sure userspace isn't trying to trick us into dumping kernel - * memory by pointing the userspace instruction pointer at it. - */ - bad_ip = user_mode(regs) && - __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, - OPCODE_BUFSIZE)) { + if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { printk("%sCode: Bad RIP value.\n", loglvl); } else { printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1bb7532f5f34..eb86a2b831b1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -140,6 +140,12 @@ void kernel_fpu_begin(void) copy_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); + + if (boot_cpu_has(X86_FEATURE_XMM)) + ldmxcsr(MXCSR_DEFAULT); + + if (boot_cpu_has(X86_FEATURE_FPU)) + asm volatile ("fninit"); } EXPORT_SYMBOL_GPL(kernel_fpu_begin); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 10cf8789c378..be2a68a09d19 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1079,7 +1079,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of copy_part(offsetof(struct fxregs_state, st_space), 128, &xsave->i387.st_space, &kbuf, &offset_start, &count); if (header.xfeatures & XFEATURE_MASK_SSE) - copy_part(xstate_offsets[XFEATURE_MASK_SSE], 256, + copy_part(xstate_offsets[XFEATURE_SSE], 256, &xsave->i387.xmm_space, &kbuf, &offset_start, &count); /* * Fill xsave->i387.sw_reserved value for ptrace frame: diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 8748321c4486..b8aee71840ae 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -29,6 +29,8 @@ #include <asm/mmu_context.h> #include <asm/pgtable_areas.h> +#include <xen/xen.h> + /* This is a multiple of PAGE_SIZE. */ #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) @@ -543,6 +545,28 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount) return bytecount; } +static bool allow_16bit_segments(void) +{ + if (!IS_ENABLED(CONFIG_X86_16BIT)) + return false; + +#ifdef CONFIG_XEN_PV + /* + * Xen PV does not implement ESPFIX64, which means that 16-bit + * segments will not work correctly. Until either Xen PV implements + * ESPFIX64 and can signal this fact to the guest or unless someone + * provides compelling evidence that allowing broken 16-bit segments + * is worthwhile, disallow 16-bit segments under Xen PV. + */ + if (xen_pv_domain()) { + pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); + return false; + } +#endif + + return true; +} + static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { struct mm_struct *mm = current->mm; @@ -574,7 +598,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) /* The user wants to clear the entry. */ memset(&ldt, 0, sizeof(ldt)); } else { - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + if (!ldt_info.seg_32bit && !allow_16bit_segments()) { error = -EINVAL; goto out; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 674a7d66d960..de2138ba38e5 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -324,7 +324,8 @@ struct paravirt_patch_template pv_ops = { .cpu.swapgs = native_swapgs, #ifdef CONFIG_X86_IOPL_IOPERM - .cpu.update_io_bitmap = native_tss_update_io_bitmap, + .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap, + .cpu.update_io_bitmap = native_tss_update_io_bitmap, #endif .cpu.start_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f362ce0d5ac0..fe67dbd76e51 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -322,20 +322,6 @@ void arch_setup_new_exec(void) } #ifdef CONFIG_X86_IOPL_IOPERM -static inline void tss_invalidate_io_bitmap(struct tss_struct *tss) -{ - /* - * Invalidate the I/O bitmap by moving io_bitmap_base outside the - * TSS limit so any subsequent I/O access from user space will - * trigger a #GP. - * - * This is correct even when VMEXIT rewrites the TSS limit - * to 0x67 as the only requirement is that the base points - * outside the limit. - */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; -} - static inline void switch_to_bitmap(unsigned long tifp) { /* @@ -346,7 +332,7 @@ static inline void switch_to_bitmap(unsigned long tifp) * user mode. */ if (tifp & _TIF_IO_BITMAP) - tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw)); + tss_invalidate_io_bitmap(); } static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) @@ -380,7 +366,7 @@ void native_tss_update_io_bitmap(void) u16 *base = &tss->x86_tss.io_bitmap_base; if (!test_thread_flag(TIF_IO_BITMAP)) { - tss_invalidate_io_bitmap(tss); + native_tss_invalidate_io_bitmap(); return; } diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6ad43fc44556..2fd698e28e4d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -58,7 +58,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, * or a page fault), which can make frame pointers * unreliable. */ - if (IS_ENABLED(CONFIG_FRAME_POINTER)) return -EINVAL; } @@ -81,10 +80,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (unwind_error(&state)) return -EINVAL; - /* Success path for non-user tasks, i.e. kthreads and idle tasks */ - if (!(task->flags & (PF_KTHREAD | PF_IDLE))) - return -EINVAL; - return 0; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f58679e487f6..b7cb3e0716f7 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -303,6 +303,8 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check) do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs, error_code, BUS_ADRALN, NULL); + + local_irq_disable(); } #ifdef CONFIG_VMAP_STACK @@ -870,6 +872,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, trace_hardirqs_off_finish(); /* + * If something gets miswired and we end up here for a user mode + * #DB, we will malfunction. + */ + WARN_ON_ONCE(user_mode(regs)); + + /* * Catch SYSENTER with TF set and clear DR_STEP. If this hit a * watchpoint at the same time then that will still be handled. */ @@ -887,6 +895,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, static __always_inline void exc_debug_user(struct pt_regs *regs, unsigned long dr6) { + /* + * If something gets miswired and we end up here for a kernel mode + * #DB, we will malfunction. + */ + WARN_ON_ONCE(!user_mode(regs)); + idtentry_enter_user(regs); instrumentation_begin(); @@ -917,7 +931,7 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) } #else /* 32 bit does not have separate entry points. */ -DEFINE_IDTENTRY_DEBUG(exc_debug) +DEFINE_IDTENTRY_RAW(exc_debug) { unsigned long dr6, dr7; diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 7f969b2d240f..ec88bbe08a32 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -440,8 +440,11 @@ bool unwind_next_frame(struct unwind_state *state) /* * Find the orc_entry associated with the text address. * - * Decrement call return addresses by one so they work for sibling - * calls and calls to noreturn functions. + * For a call frame (as opposed to a signal frame), state->ip points to + * the instruction after the call. That instruction's stack layout + * could be different from the call instruction's layout, for example + * if the call was to a noreturn function. So get the ORC data for the + * call instruction itself. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); if (!orc) { @@ -662,6 +665,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = task->thread.sp; state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); + state->signal = (void *)state->ip == ret_from_fork; } if (get_stack_info((unsigned long *)state->sp, state->task, diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3bfc8dd8a43d..9a03e5b23135 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -358,6 +358,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) + . = ALIGN(PAGE_SIZE); *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); |