diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_64.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/internal.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/sys_ia32.c | 255 | ||||
-rw-r--r-- | arch/x86/kernel/sys_x86_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 41 |
14 files changed, 306 insertions, 64 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8be5926cce51..bb5abfef0256 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -52,6 +52,8 @@ obj-y += setup.o x86_init.o i8259.o irqinit.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o +obj-$(CONFIG_X86_32) += sys_ia32.o +obj-$(CONFIG_IA32_EMULATION) += sys_ia32.o obj-$(CONFIG_X86_64) += sys_x86_64.o obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 2b4256ebe86e..6e043f295a60 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -7,11 +7,6 @@ #include <asm/ucontext.h> -#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, -static char syscalls[] = { -#include <asm/syscalls_32.h> -}; - /* workaround for a warning with -Wmissing-prototypes */ void foo(void); @@ -64,9 +59,5 @@ void foo(void) #endif BLANK(); - DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); - DEFINE(NR_syscalls, sizeof(syscalls)); - - BLANK(); DEFINE(EFI_svam, offsetof(efi_runtime_services_t, set_virtual_address_map)); } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 24d2fde30d00..c2a47016f243 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -5,30 +5,6 @@ #include <asm/ia32.h> -#define __SYSCALL_64(nr, sym, qual) [nr] = 1, -#define __SYSCALL_X32(nr, sym, qual) -static char syscalls_64[] = { -#include <asm/syscalls_64.h> -}; -#undef __SYSCALL_64 -#undef __SYSCALL_X32 - -#ifdef CONFIG_X86_X32_ABI -#define __SYSCALL_64(nr, sym, qual) -#define __SYSCALL_X32(nr, sym, qual) [nr] = 1, -static char syscalls_x32[] = { -#include <asm/syscalls_64.h> -}; -#undef __SYSCALL_64 -#undef __SYSCALL_X32 -#endif - -#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, -static char syscalls_ia32[] = { -#include <asm/syscalls_32.h> -}; -#undef __SYSCALL_I386 - #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) #include <asm/kvm_para.h> #endif @@ -90,17 +66,5 @@ int main(void) DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary)); BLANK(); #endif - - DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); - DEFINE(NR_syscalls, sizeof(syscalls_64)); - -#ifdef CONFIG_X86_X32_ABI - DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1); - DEFINE(X32_NR_syscalls, sizeof(syscalls_x32)); -#endif - - DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1); - DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); - return 0; } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index dd06fce537fc..54165f3569e8 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1215,8 +1215,14 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, * On Intel systems this is entered on all CPUs in parallel through * MCE broadcast. However some CPUs might be broken beyond repair, * so be always careful when synchronizing with others. + * + * Tracing and kprobes are disabled: if we interrupted a kernel context + * with IF=1, we need to minimize stack usage. There are also recursion + * issues: if the machine check was due to a failure of the memory + * backing the user stack, tracing that reads the user stack will cause + * potentially infinite recursion. */ -void do_machine_check(struct pt_regs *regs, long error_code) +void notrace do_machine_check(struct pt_regs *regs, long error_code) { DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS); @@ -1362,6 +1368,7 @@ out_ist: ist_exit(regs); } EXPORT_SYMBOL_GPL(do_machine_check); +NOKPROBE_SYMBOL(do_machine_check); #ifndef CONFIG_MEMORY_FAILURE int memory_failure(unsigned long pfn, int flags) @@ -1896,10 +1903,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; -dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +dotraplinkage notrace void do_mce(struct pt_regs *regs, long error_code) { machine_check_vector(regs, error_code); } +NOKPROBE_SYMBOL(do_mce); /* * Called for each booted CPU to set up machine checks. diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 97db18441d2c..3b008172ad73 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -8,6 +8,9 @@ #include <linux/device.h> #include <asm/mce.h> +/* Pointer to the installed machine check handler for this CPU setup. */ +extern void (*machine_check_vector)(struct pt_regs *, long error_code); + enum severity_level { MCE_NO_SEVERITY, MCE_DEFERRED_SEVERITY, diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 21efee32e2b1..c7965ff429c5 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -230,7 +230,7 @@ u64 arch_irq_stat(void) * SMP cross-CPU interrupts have their own specific * handlers). */ -__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) +__visible void __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc * desc; @@ -263,7 +263,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) exiting_irq(); set_irq_regs(old_regs); - return 1; } #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c57e1ca70fd1..84c3ba32f211 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -27,7 +27,6 @@ #include <asm/tlb.h> #include <asm/desc.h> #include <asm/mmu_context.h> -#include <asm/syscalls.h> #include <asm/pgtable_areas.h> /* This is a multiple of PAGE_SIZE. */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3053c85e0e42..8a59dae23868 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -28,7 +28,6 @@ #include <linux/hw_breakpoint.h> #include <asm/cpu.h> #include <asm/apic.h> -#include <asm/syscalls.h> #include <linux/uaccess.h> #include <asm/mwait.h> #include <asm/fpu/internal.h> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5052ced43373..954b013cc585 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -49,7 +49,6 @@ #include <asm/tlbflush.h> #include <asm/cpu.h> -#include <asm/syscalls.h> #include <asm/debugreg.h> #include <asm/switch_to.h> #include <asm/vm86.h> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ffd497804dbc..5ef9d8f25b0e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -48,7 +48,6 @@ #include <asm/desc.h> #include <asm/proto.h> #include <asm/ia32.h> -#include <asm/syscalls.h> #include <asm/debugreg.h> #include <asm/switch_to.h> #include <asm/xen/hypervisor.h> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 8a29573851a3..0364f8c3bee3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -42,8 +42,6 @@ #endif /* CONFIG_X86_64 */ #include <asm/syscall.h> -#include <asm/syscalls.h> - #include <asm/sigframe.h> #include <asm/signal.h> @@ -859,7 +857,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) } #ifdef CONFIG_X86_X32_ABI -asmlinkage long sys32_x32_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_x32 __user *frame; diff --git a/arch/x86/kernel/sys_ia32.c b/arch/x86/kernel/sys_ia32.c new file mode 100644 index 000000000000..ab03fede1422 --- /dev/null +++ b/arch/x86/kernel/sys_ia32.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on + * sys_sparc32 + * + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 2000 Hewlett-Packard Co. + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. In 2.5 most of this should be moved to a generic directory. + * + * This file assumes that there is a hole at the end of user address space. + * + * Some of the functions are LE specific currently. These are + * hopefully all marked. This should be fixed. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/syscalls.h> +#include <linux/times.h> +#include <linux/utsname.h> +#include <linux/mm.h> +#include <linux/uio.h> +#include <linux/poll.h> +#include <linux/personality.h> +#include <linux/stat.h> +#include <linux/rwsem.h> +#include <linux/compat.h> +#include <linux/vfs.h> +#include <linux/ptrace.h> +#include <linux/highuid.h> +#include <linux/sysctl.h> +#include <linux/slab.h> +#include <linux/sched/task.h> +#include <asm/mman.h> +#include <asm/types.h> +#include <linux/uaccess.h> +#include <linux/atomic.h> +#include <asm/vgtod.h> +#include <asm/ia32.h> + +#define AA(__x) ((unsigned long)(__x)) + +SYSCALL_DEFINE3(ia32_truncate64, const char __user *, filename, + unsigned long, offset_low, unsigned long, offset_high) +{ + return ksys_truncate(filename, + ((loff_t) offset_high << 32) | offset_low); +} + +SYSCALL_DEFINE3(ia32_ftruncate64, unsigned int, fd, + unsigned long, offset_low, unsigned long, offset_high) +{ + return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); +} + +/* warning: next two assume little endian */ +SYSCALL_DEFINE5(ia32_pread64, unsigned int, fd, char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) +{ + return ksys_pread64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + +SYSCALL_DEFINE5(ia32_pwrite64, unsigned int, fd, const char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) +{ + return ksys_pwrite64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + + +/* + * Some system calls that need sign extended arguments. This could be + * done by a generic wrapper. + */ +SYSCALL_DEFINE6(ia32_fadvise64_64, int, fd, __u32, offset_low, + __u32, offset_high, __u32, len_low, __u32, len_high, + int, advice) +{ + return ksys_fadvise64_64(fd, + (((u64)offset_high)<<32) | offset_low, + (((u64)len_high)<<32) | len_low, + advice); +} + +SYSCALL_DEFINE4(ia32_readahead, int, fd, unsigned int, off_lo, + unsigned int, off_hi, size_t, count) +{ + return ksys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); +} + +SYSCALL_DEFINE6(ia32_sync_file_range, int, fd, unsigned int, off_low, + unsigned int, off_hi, unsigned int, n_low, + unsigned int, n_hi, int, flags) +{ + return ksys_sync_file_range(fd, + ((u64)off_hi << 32) | off_low, + ((u64)n_hi << 32) | n_low, flags); +} + +SYSCALL_DEFINE5(ia32_fadvise64, int, fd, unsigned int, offset_lo, + unsigned int, offset_hi, size_t, len, int, advice) +{ + return ksys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, + len, advice); +} + +SYSCALL_DEFINE6(ia32_fallocate, int, fd, int, mode, + unsigned int, offset_lo, unsigned int, offset_hi, + unsigned int, len_lo, unsigned int, len_hi) +{ + return ksys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, + ((u64)len_hi << 32) | len_lo); +} + +#ifdef CONFIG_IA32_EMULATION +/* + * Another set for IA32/LFS -- x86_64 struct stat is different due to + * support for 64bit inode numbers. + */ +static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) +{ + typeof(ubuf->st_uid) uid = 0; + typeof(ubuf->st_gid) gid = 0; + SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid)); + if (!access_ok(ubuf, sizeof(struct stat64)) || + __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) || + __put_user(stat->ino, &ubuf->__st_ino) || + __put_user(stat->ino, &ubuf->st_ino) || + __put_user(stat->mode, &ubuf->st_mode) || + __put_user(stat->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) || + __put_user(stat->size, &ubuf->st_size) || + __put_user(stat->atime.tv_sec, &ubuf->st_atime) || + __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(stat->blksize, &ubuf->st_blksize) || + __put_user(stat->blocks, &ubuf->st_blocks)) + return -EFAULT; + return 0; +} + +COMPAT_SYSCALL_DEFINE2(ia32_stat64, const char __user *, filename, + struct stat64 __user *, statbuf) +{ + struct kstat stat; + int ret = vfs_stat(filename, &stat); + + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +COMPAT_SYSCALL_DEFINE2(ia32_lstat64, const char __user *, filename, + struct stat64 __user *, statbuf) +{ + struct kstat stat; + int ret = vfs_lstat(filename, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +COMPAT_SYSCALL_DEFINE2(ia32_fstat64, unsigned int, fd, + struct stat64 __user *, statbuf) +{ + struct kstat stat; + int ret = vfs_fstat(fd, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +COMPAT_SYSCALL_DEFINE4(ia32_fstatat64, unsigned int, dfd, + const char __user *, filename, + struct stat64 __user *, statbuf, int, flag) +{ + struct kstat stat; + int error; + + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); +} + +/* + * Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct32 { + unsigned int addr; + unsigned int len; + unsigned int prot; + unsigned int flags; + unsigned int fd; + unsigned int offset; +}; + +COMPAT_SYSCALL_DEFINE1(ia32_mmap, struct mmap_arg_struct32 __user *, arg) +{ + struct mmap_arg_struct32 a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + + if (a.offset & ~PAGE_MASK) + return -EINVAL; + + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset>>PAGE_SHIFT); +} + +/* + * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS + */ +COMPAT_SYSCALL_DEFINE5(ia32_clone, unsigned long, clone_flags, + unsigned long, newsp, int __user *, parent_tidptr, + unsigned long, tls_val, int __user *, child_tidptr) +{ + struct kernel_clone_args args = { + .flags = (clone_flags & ~CSIGNAL), + .pidfd = parent_tidptr, + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (clone_flags & CSIGNAL), + .stack = newsp, + .tls = tls_val, + }; + + if (!legacy_clone_args_valid(&args)) + return -EINVAL; + + return _do_fork(&args); +} +#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index ca3c11a17b5a..504fa5425bce 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -21,7 +21,6 @@ #include <asm/elf.h> #include <asm/ia32.h> -#include <asm/syscalls.h> /* * Align a virtual address to avoid aliasing in the I$ on AMD F15h. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6ef00eb6fbb9..c0bc9df8634d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -572,14 +572,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) return; /* - * Use ist_enter despite the fact that we don't use an IST stack. - * We can be called from a kprobe in non-CONTEXT_KERNEL kernel - * mode or even during context tracking state changes. + * Unlike any other non-IST entry, we can be called from a kprobe in + * non-CONTEXT_KERNEL kernel mode or even during context tracking + * state changes. Make sure that we wake up RCU even if we're coming + * from kernel code. * - * This means that we can't schedule. That's okay. + * This means that we can't schedule even if we came from a + * preemptible kernel context. That's okay. */ - ist_enter(regs); + if (!user_mode(regs)) { + rcu_nmi_enter(); + preempt_disable(); + } RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -600,7 +606,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) cond_local_irq_disable(regs); exit: - ist_exit(regs); + if (!user_mode(regs)) { + preempt_enable_no_resched(); + rcu_nmi_exit(); + } } NOKPROBE_SYMBOL(do_int3); @@ -862,7 +871,25 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { - cond_local_irq_enable(regs); + /* + * This addresses a Pentium Pro Erratum: + * + * PROBLEM: If the APIC subsystem is configured in mixed mode with + * Virtual Wire mode implemented through the local APIC, an + * interrupt vector of 0Fh (Intel reserved encoding) may be + * generated by the local APIC (Int 15). This vector may be + * generated upon receipt of a spurious interrupt (an interrupt + * which is removed before the system receives the INTA sequence) + * instead of the programmed 8259 spurious interrupt vector. + * + * IMPLICATION: The spurious interrupt vector programmed in the + * 8259 is normally handled by an operating system's spurious + * interrupt handler. However, a vector of 0Fh is unknown to some + * operating systems, which would crash if this erratum occurred. + * + * In theory this could be limited to 32bit, but the handler is not + * hurting and who knows which other CPUs suffer from this. + */ } dotraplinkage void |