From 7c3b00e06d731a28fc3d17ed02ba250642b15b81 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 20 Jan 2016 14:59:55 -0800 Subject: ptrace: make wait_on_bit(JOBCTL_TRAPPING_BIT) in ptrace_attach() killable ptrace_attach() can hang waiting for STOPPED -> TRACED transition if the tracee gets frozen in between, change wait_on_bit() to use TASK_KILLABLE. This doesn't really solve the problem(s) and we probably need to fix the freezer. In particular, note that this means that pm freezer will fail if it races attach-to-stopped-task. And otoh perhaps we can just remove JOBCTL_TRAPPING_BIT altogether, it is not clear if we really need to hide this transition from debugger, WNOHANG after PTRACE_ATTACH can fail anyway if it races with SIGCONT. Signed-off-by: Oleg Nesterov Reported-by: Andrey Ryabinin Cc: Roland McGrath Acked-by: Tejun Heo Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b760bae64cf1..aa94aee9d4c9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -364,8 +364,14 @@ unlock_creds: mutex_unlock(&task->signal->cred_guard_mutex); out: if (!retval) { - wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, - TASK_UNINTERRUPTIBLE); + /* + * We do not bother to change retval or clear JOBCTL_TRAPPING + * if wait_on_bit() was interrupted by SIGKILL. The tracer will + * not return to user-mode, it will exit and clear this bit in + * __ptrace_unlink() if it wasn't already cleared by the tracee; + * and until then nobody can ptrace this task. + */ + wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); proc_ptrace_connector(task, PTRACE_ATTACH); } -- cgit From 570ac9337b5c13dbf46ca6758c376e2e13e8956f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 20 Jan 2016 14:59:58 -0800 Subject: ptrace: task_stopped_code(ptrace => true) can't see TASK_STOPPED task task_stopped_code()->task_is_stopped_or_traced() doesn't look right, the traced task must never be TASK_STOPPED. We can not add WARN_ON(task_is_stopped(p)), but this is only because do_wait() can race with PTRACE_ATTACH from another thread. [akpm@linux-foundation.org: teeny cleanup] Signed-off-by: Oleg Nesterov Cc: Andrey Ryabinin Cc: Roland McGrath Acked-by: Tejun Heo Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 07110c6020a0..b0eea830303c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1120,8 +1120,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) static int *task_stopped_code(struct task_struct *p, bool ptrace) { if (ptrace) { - if (task_is_stopped_or_traced(p) && - !(p->jobctl & JOBCTL_LISTENING)) + if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING)) return &p->exit_code; } else { if (p->signal->flags & SIGNAL_STOP_STOPPED) -- cgit From caaee6234d05a58c5b4d05e7bf766131b810a657 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Jan 2016 15:00:04 -0800 Subject: ptrace: use fsuid, fsgid, effective creds for fs access checks By checking the effective credentials instead of the real UID / permitted capabilities, ensure that the calling process actually intended to use its credentials. To ensure that all ptrace checks use the correct caller credentials (e.g. in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS flag), use two new flags and require one of them to be set. The problem was that when a privileged task had temporarily dropped its privileges, e.g. by calling setreuid(0, user_uid), with the intent to perform following syscalls with the credentials of a user, it still passed ptrace access checks that the user would not be able to pass. While an attacker should not be able to convince the privileged task to perform a ptrace() syscall, this is a problem because the ptrace access check is reused for things in procfs. In particular, the following somewhat interesting procfs entries only rely on ptrace access checks: /proc/$pid/stat - uses the check for determining whether pointers should be visible, useful for bypassing ASLR /proc/$pid/maps - also useful for bypassing ASLR /proc/$pid/cwd - useful for gaining access to restricted directories that contain files with lax permissions, e.g. in this scenario: lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar drwx------ root root /root drwxr-xr-x root root /root/foobar -rw-r--r-- root root /root/foobar/secret Therefore, on a system where a root-owned mode 6755 binary changes its effective credentials as described and then dumps a user-specified file, this could be used by an attacker to reveal the memory layout of root's processes or reveal the contents of files he is not allowed to access (through /proc/$pid/cwd). [akpm@linux-foundation.org: fix warning] Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Casey Schaufler Cc: Oleg Nesterov Cc: Ingo Molnar Cc: James Morris Cc: "Serge E. Hallyn" Cc: Andy Shevchenko Cc: Andy Lutomirski Cc: Al Viro Cc: "Eric W. Biederman" Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/events/core.c | 2 +- kernel/futex.c | 2 +- kernel/futex_compat.c | 2 +- kernel/kcmp.c | 4 ++-- kernel/ptrace.c | 39 +++++++++++++++++++++++++++++++-------- 5 files changed, 36 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index bf8244190d0f..c0957416b32e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3376,7 +3376,7 @@ find_lively_task_by_vpid(pid_t vpid) /* Reuse ptrace permission checks for now. */ err = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) goto errout; return task; diff --git a/kernel/futex.c b/kernel/futex.c index c6f514573b28..0773f2b23b10 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2884,7 +2884,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, } ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ)) + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 55c8c9349cfe..4ae3232e7a28 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -155,7 +155,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, } ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ)) + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->compat_robust_list; diff --git a/kernel/kcmp.c b/kernel/kcmp.c index 0aa69ea1d8fd..3a47fa998fe0 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -122,8 +122,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, &task2->signal->cred_guard_mutex); if (ret) goto err; - if (!ptrace_may_access(task1, PTRACE_MODE_READ) || - !ptrace_may_access(task2, PTRACE_MODE_READ)) { + if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) || + !ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS)) { ret = -EPERM; goto err_unlock; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aa94aee9d4c9..2341efe7fe02 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -219,6 +219,14 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; + int dumpable = 0; + kuid_t caller_uid; + kgid_t caller_gid; + + if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) { + WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n"); + return -EPERM; + } /* May we inspect the given task? * This check is used both for attaching with ptrace @@ -228,18 +236,33 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - int dumpable = 0; + /* Don't let security modules deny introspection */ if (same_thread_group(task, current)) return 0; rcu_read_lock(); + if (mode & PTRACE_MODE_FSCREDS) { + caller_uid = cred->fsuid; + caller_gid = cred->fsgid; + } else { + /* + * Using the euid would make more sense here, but something + * in userland might rely on the old behavior, and this + * shouldn't be a security problem since + * PTRACE_MODE_REALCREDS implies that the caller explicitly + * used a syscall that requests access to another process + * (and not a filesystem syscall to procfs). + */ + caller_uid = cred->uid; + caller_gid = cred->gid; + } tcred = __task_cred(task); - if (uid_eq(cred->uid, tcred->euid) && - uid_eq(cred->uid, tcred->suid) && - uid_eq(cred->uid, tcred->uid) && - gid_eq(cred->gid, tcred->egid) && - gid_eq(cred->gid, tcred->sgid) && - gid_eq(cred->gid, tcred->gid)) + if (uid_eq(caller_uid, tcred->euid) && + uid_eq(caller_uid, tcred->suid) && + uid_eq(caller_uid, tcred->uid) && + gid_eq(caller_gid, tcred->egid) && + gid_eq(caller_gid, tcred->sgid) && + gid_eq(caller_gid, tcred->gid)) goto ok; if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; @@ -306,7 +329,7 @@ static int ptrace_attach(struct task_struct *task, long request, goto out; task_lock(task); - retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); + retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); task_unlock(task); if (retval) goto unlock_creds; -- cgit From c428fbdbf3e9515bfe686881ffdba862dbd8cb6f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 20 Jan 2016 15:00:10 -0800 Subject: exit: remove unneeded declaration of exit_mm() Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index b0eea830303c..10e088237fed 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -59,8 +59,6 @@ #include #include -static void exit_mm(struct task_struct *tsk); - static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; -- cgit From c4c54dd1caf1393c529e7ea1f18b4342c796a49c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Jan 2016 15:00:16 -0800 Subject: kernel/cpu.c: change type of cpu_possible_bits and friends Change cpu_possible_bits and friends (online, present, active) from being bitmaps that happen to have the right size to actually being struct cpumasks. Also rename them to __cpu_xyz_mask. This is mostly a small cleanup in preparation for exporting them and, eventually, eliminating the extra indirection through the cpu_xyz_mask variables. Signed-off-by: Rasmus Villemoes Acked-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 85ff5e26e23b..6a96b713cea7 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -759,71 +759,71 @@ const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; EXPORT_SYMBOL(cpu_all_bits); #ifdef CONFIG_INIT_ALL_POSSIBLE -static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly - = CPU_BITS_ALL; +static struct cpumask __cpu_possible_mask __read_mostly + = {CPU_BITS_ALL}; #else -static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly; +static struct cpumask __cpu_possible_mask __read_mostly; #endif -const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits); +const struct cpumask *const cpu_possible_mask = &__cpu_possible_mask; EXPORT_SYMBOL(cpu_possible_mask); -static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly; -const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits); +static struct cpumask __cpu_online_mask __read_mostly; +const struct cpumask *const cpu_online_mask = &__cpu_online_mask; EXPORT_SYMBOL(cpu_online_mask); -static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly; -const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits); +static struct cpumask __cpu_present_mask __read_mostly; +const struct cpumask *const cpu_present_mask = &__cpu_present_mask; EXPORT_SYMBOL(cpu_present_mask); -static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly; -const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits); +static struct cpumask __cpu_active_mask __read_mostly; +const struct cpumask *const cpu_active_mask = &__cpu_active_mask; EXPORT_SYMBOL(cpu_active_mask); void set_cpu_possible(unsigned int cpu, bool possible) { if (possible) - cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits)); + cpumask_set_cpu(cpu, &__cpu_possible_mask); else - cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits)); + cpumask_clear_cpu(cpu, &__cpu_possible_mask); } void set_cpu_present(unsigned int cpu, bool present) { if (present) - cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits)); + cpumask_set_cpu(cpu, &__cpu_present_mask); else - cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits)); + cpumask_clear_cpu(cpu, &__cpu_present_mask); } void set_cpu_online(unsigned int cpu, bool online) { if (online) { - cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); - cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); + cpumask_set_cpu(cpu, &__cpu_online_mask); + cpumask_set_cpu(cpu, &__cpu_active_mask); } else { - cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); + cpumask_clear_cpu(cpu, &__cpu_online_mask); } } void set_cpu_active(unsigned int cpu, bool active) { if (active) - cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); + cpumask_set_cpu(cpu, &__cpu_active_mask); else - cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits)); + cpumask_clear_cpu(cpu, &__cpu_active_mask); } void init_cpu_present(const struct cpumask *src) { - cpumask_copy(to_cpumask(cpu_present_bits), src); + cpumask_copy(&__cpu_present_mask, src); } void init_cpu_possible(const struct cpumask *src) { - cpumask_copy(to_cpumask(cpu_possible_bits), src); + cpumask_copy(&__cpu_possible_mask, src); } void init_cpu_online(const struct cpumask *src) { - cpumask_copy(to_cpumask(cpu_online_bits), src); + cpumask_copy(&__cpu_online_mask, src); } -- cgit From 4b804c85dc37db6c108832b28cd54673ff7ee037 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Jan 2016 15:00:19 -0800 Subject: kernel/cpu.c: export __cpu_*_mask Exporting the cpumasks __cpu_possible_mask and friends will allow us to remove the extra indirection through the cpu_*_mask variables. It will also allow the set_cpu_* functions to become static inlines, which will give a .text reduction. Signed-off-by: Rasmus Villemoes Acked-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 6a96b713cea7..35d1d45be8e9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -759,23 +759,27 @@ const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; EXPORT_SYMBOL(cpu_all_bits); #ifdef CONFIG_INIT_ALL_POSSIBLE -static struct cpumask __cpu_possible_mask __read_mostly +struct cpumask __cpu_possible_mask __read_mostly = {CPU_BITS_ALL}; #else -static struct cpumask __cpu_possible_mask __read_mostly; +struct cpumask __cpu_possible_mask __read_mostly; #endif +EXPORT_SYMBOL(__cpu_possible_mask); const struct cpumask *const cpu_possible_mask = &__cpu_possible_mask; EXPORT_SYMBOL(cpu_possible_mask); -static struct cpumask __cpu_online_mask __read_mostly; +struct cpumask __cpu_online_mask __read_mostly; +EXPORT_SYMBOL(__cpu_online_mask); const struct cpumask *const cpu_online_mask = &__cpu_online_mask; EXPORT_SYMBOL(cpu_online_mask); -static struct cpumask __cpu_present_mask __read_mostly; +struct cpumask __cpu_present_mask __read_mostly; +EXPORT_SYMBOL(__cpu_present_mask); const struct cpumask *const cpu_present_mask = &__cpu_present_mask; EXPORT_SYMBOL(cpu_present_mask); -static struct cpumask __cpu_active_mask __read_mostly; +struct cpumask __cpu_active_mask __read_mostly; +EXPORT_SYMBOL(__cpu_active_mask); const struct cpumask *const cpu_active_mask = &__cpu_active_mask; EXPORT_SYMBOL(cpu_active_mask); -- cgit From 5aec01b834fd6f8ca49d1aeede665b950d0c148e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Jan 2016 15:00:25 -0800 Subject: kernel/cpu.c: eliminate cpu_*_mask Replace the variables cpu_possible_mask, cpu_online_mask, cpu_present_mask and cpu_active_mask with macros expanding to expressions of the same type and value, eliminating some indirection. Signed-off-by: Rasmus Villemoes Acked-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 35d1d45be8e9..8734fc74fcbc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -765,23 +765,15 @@ struct cpumask __cpu_possible_mask __read_mostly struct cpumask __cpu_possible_mask __read_mostly; #endif EXPORT_SYMBOL(__cpu_possible_mask); -const struct cpumask *const cpu_possible_mask = &__cpu_possible_mask; -EXPORT_SYMBOL(cpu_possible_mask); struct cpumask __cpu_online_mask __read_mostly; EXPORT_SYMBOL(__cpu_online_mask); -const struct cpumask *const cpu_online_mask = &__cpu_online_mask; -EXPORT_SYMBOL(cpu_online_mask); struct cpumask __cpu_present_mask __read_mostly; EXPORT_SYMBOL(__cpu_present_mask); -const struct cpumask *const cpu_present_mask = &__cpu_present_mask; -EXPORT_SYMBOL(cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); -const struct cpumask *const cpu_active_mask = &__cpu_active_mask; -EXPORT_SYMBOL(cpu_active_mask); void set_cpu_possible(unsigned int cpu, bool possible) { -- cgit From 9425676a363c0976e3d43dda792dc4711a651d1d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Jan 2016 15:00:28 -0800 Subject: kernel/cpu.c: make set_cpu_* static inlines Almost all callers of the set_cpu_* functions pass an explicit true or false. Making them static inline thus replaces the function calls with a simple set_bit/clear_bit, saving some .text. Signed-off-by: Rasmus Villemoes Acked-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 8734fc74fcbc..5b9d39633ce9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -775,40 +775,6 @@ EXPORT_SYMBOL(__cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); -void set_cpu_possible(unsigned int cpu, bool possible) -{ - if (possible) - cpumask_set_cpu(cpu, &__cpu_possible_mask); - else - cpumask_clear_cpu(cpu, &__cpu_possible_mask); -} - -void set_cpu_present(unsigned int cpu, bool present) -{ - if (present) - cpumask_set_cpu(cpu, &__cpu_present_mask); - else - cpumask_clear_cpu(cpu, &__cpu_present_mask); -} - -void set_cpu_online(unsigned int cpu, bool online) -{ - if (online) { - cpumask_set_cpu(cpu, &__cpu_online_mask); - cpumask_set_cpu(cpu, &__cpu_active_mask); - } else { - cpumask_clear_cpu(cpu, &__cpu_online_mask); - } -} - -void set_cpu_active(unsigned int cpu, bool active) -{ - if (active) - cpumask_set_cpu(cpu, &__cpu_active_mask); - else - cpumask_clear_cpu(cpu, &__cpu_active_mask); -} - void init_cpu_present(const struct cpumask *src) { cpumask_copy(&__cpu_present_mask, src); -- cgit From cdf4b3fa03bab157d2d70d4de65bb7ae319b084f Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jan 2016 15:00:31 -0800 Subject: kexec: set KEXEC_TYPE_CRASH before sanity_check_segment_list() sanity_check_segment_list() checks KEXEC_TYPE_CRASH flag to ensure all the segments of the loaded crash kernel are within the kernel crash resource limits, so set the flag beforehand. Signed-off-by: Xunlei Pang Acked-by: Dave Young Cc: Eric Biederman Cc: Vivek Goyal Acked-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index d873b64fbddc..ee70aef5cd81 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -63,16 +63,16 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, if (ret) goto out_free_image; - ret = sanity_check_segment_list(image); - if (ret) - goto out_free_image; - - /* Enable the special crash kernel control page allocation policy. */ if (kexec_on_panic) { + /* Enable special crash kernel control page alloc policy. */ image->control_page = crashk_res.start; image->type = KEXEC_TYPE_CRASH; } + ret = sanity_check_segment_list(image); + if (ret) + goto out_free_image; + /* * Find a location for the control code buffer, and add it * the vector of segments so that it's pages will also be -- cgit From 2b24692b9235cb82b6f735b7a4c4137211ddf005 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 20 Jan 2016 15:00:34 -0800 Subject: kernel/kexec_core.c: use list_for_each_entry_safe in kimage_free_page_list Use list_for_each_entry_safe() instead of list_for_each_safe() to simplify the code. Signed-off-by: Geliang Tang Cc: Dave Young Cc: Vivek Goyal Acked-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index c823f3001e12..8dc659144869 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -310,12 +310,9 @@ static void kimage_free_pages(struct page *page) void kimage_free_page_list(struct list_head *list) { - struct list_head *pos, *next; + struct page *page, *next; - list_for_each_safe(pos, next, list) { - struct page *page; - - page = list_entry(pos, struct page, lru); + list_for_each_entry_safe(page, next, list, lru) { list_del(&page->lru); kimage_free_pages(page); } -- cgit From 978e30c9b46161c792ecdad0091fd017b21b8ca5 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jan 2016 15:00:36 -0800 Subject: kexec: move some memembers and definitions within the scope of CONFIG_KEXEC_FILE Move the stuff currently only used by the kexec file code within CONFIG_KEXEC_FILE (and CONFIG_KEXEC_VERIFY_SIG). Also move internal "struct kexec_sha_region" and "struct kexec_buf" into "kexec_internal.h". Signed-off-by: Xunlei Pang Cc: "Eric W. Biederman" Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec_file.c | 2 ++ kernel/kexec_internal.h | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'kernel') diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b70ada0028d2..007b791f676d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -109,11 +109,13 @@ int __weak arch_kimage_file_post_load_cleanup(struct kimage *image) return -EINVAL; } +#ifdef CONFIG_KEXEC_VERIFY_SIG int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len) { return -EKEYREJECTED; } +#endif /* Apply relocations of type RELA */ int __weak diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index e4392a698ad4..0a52315d9c62 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -15,6 +15,27 @@ int kimage_is_destination_range(struct kimage *image, extern struct mutex kexec_mutex; #ifdef CONFIG_KEXEC_FILE +struct kexec_sha_region { + unsigned long start; + unsigned long len; +}; + +/* + * Keeps track of buffer parameters as provided by caller for requesting + * memory placement of buffer. + */ +struct kexec_buf { + struct kimage *image; + char *buffer; + unsigned long bufsz; + unsigned long mem; + unsigned long memsz; + unsigned long buf_align; + unsigned long buf_min; + unsigned long buf_max; + bool top_down; /* allocate from top of memory hole */ +}; + void kimage_file_post_load_cleanup(struct kimage *image); #else /* CONFIG_KEXEC_FILE */ static inline void kimage_file_post_load_cleanup(struct kimage *image) { } -- cgit From 41662f5cc55335807d39404371cfcbb1909304c4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Jan 2016 15:00:45 -0800 Subject: sysctl: enable strict writes SYSCTL_WRITES_WARN was added in commit f4aacea2f5d1 ("sysctl: allow for strict write position handling"), and released in v3.16 in August of 2014. Since then I can find only 1 instance of non-zero offset writing[1], and it was fixed immediately in CRIU[2]. As such, it appears safe to flip this to the strict state now. [1] https://www.google.com/search?q="when%20file%20position%20was%20not%200" [2] http://lists.openvz.org/pipermail/criu/2015-April/019819.html Signed-off-by: Kees Cook Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c810f8afdb7f..91420362e0b3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -173,7 +173,7 @@ extern int no_unaligned_warning; #define SYSCTL_WRITES_WARN 0 #define SYSCTL_WRITES_STRICT 1 -static int sysctl_writes_strict = SYSCTL_WRITES_WARN; +static int sysctl_writes_strict = SYSCTL_WRITES_STRICT; static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -- cgit From 5c9cf8af2e77388f1da81c39237fb4f20c2f85d5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 20 Jan 2016 15:00:48 -0800 Subject: kernel: printk: specify alignment for struct printk_log On architectures that have support for efficient unaligned access struct printk_log has 4-byte alignment. Specify alignment attribute in type declaration. The whole point of this patch is to fix deadlock which happening when UBSAN detects unaligned access in printk() thus UBSAN recursively calls printk() with logbuf_lock held by top printk() call. Signed-off-by: Andrey Ryabinin Cc: Peter Zijlstra Cc: Sasha Levin Cc: Randy Dunlap Cc: Rasmus Villemoes Cc: Jonathan Corbet Cc: Michal Marek Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Yury Gribov Cc: Dmitry Vyukov Cc: Konstantin Khlebnikov Cc: Kostya Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e79439134978..c963ba534a78 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -233,7 +233,11 @@ struct printk_log { u8 facility; /* syslog facility */ u8 flags:5; /* internal record flags */ u8 level:3; /* syslog level */ -}; +} +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +__packed __aligned(4) +#endif +; /* * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken @@ -274,11 +278,7 @@ static u32 clear_idx; #define LOG_FACILITY(v) ((v) >> 3 & 0xff) /* record buffer */ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) -#define LOG_ALIGN 4 -#else #define LOG_ALIGN __alignof__(struct printk_log) -#endif #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; -- cgit From ddf1d398e517e660207e2c807f76a90df543a217 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 20 Jan 2016 15:01:02 -0800 Subject: prctl: take mmap sem for writing to protect against others An unprivileged user can trigger an oops on a kernel with CONFIG_CHECKPOINT_RESTORE. proc_pid_cmdline_read takes mmap_sem for reading and obtains args + env start/end values. These get sanity checked as follows: BUG_ON(arg_start > arg_end); BUG_ON(env_start > env_end); These can be changed by prctl_set_mm. Turns out also takes the semaphore for reading, effectively rendering it useless. This results in: kernel BUG at fs/proc/base.c:240! invalid opcode: 0000 [#1] SMP Modules linked in: virtio_net CPU: 0 PID: 925 Comm: a.out Not tainted 4.4.0-rc8-next-20160105dupa+ #71 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff880077a68000 ti: ffff8800784d0000 task.ti: ffff8800784d0000 RIP: proc_pid_cmdline_read+0x520/0x530 RSP: 0018:ffff8800784d3db8 EFLAGS: 00010206 RAX: ffff880077c5b6b0 RBX: ffff8800784d3f18 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 00007f78e8857000 RDI: 0000000000000246 RBP: ffff8800784d3e40 R08: 0000000000000008 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000050 R13: 00007f78e8857800 R14: ffff88006fcef000 R15: ffff880077c5b600 FS: 00007f78e884a740(0000) GS:ffff88007b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f78e8361770 CR3: 00000000790a5000 CR4: 00000000000006f0 Call Trace: __vfs_read+0x37/0x100 vfs_read+0x82/0x130 SyS_read+0x58/0xd0 entry_SYSCALL_64_fastpath+0x12/0x76 Code: 4c 8b 7d a8 eb e9 48 8b 9d 78 ff ff ff 4c 8b 7d 90 48 8b 03 48 39 45 a8 0f 87 f0 fe ff ff e9 d1 fe ff ff 4c 8b 7d 90 eb c6 0f 0b <0f> 0b 0f 0b 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 RIP proc_pid_cmdline_read+0x520/0x530 ---[ end trace 97882617ae9c6818 ]--- Turns out there are instances where the code just reads aformentioned values without locking whatsoever - namely environ_read and get_cmdline. Interestingly these functions look quite resilient against bogus values, but I don't believe this should be relied upon. The first patch gets rid of the oops bug by grabbing mmap_sem for writing. The second patch is optional and puts locking around aformentioned consumers for safety. Consumers of other fields don't seem to benefit from similar treatment and are left untouched. This patch (of 2): The code was taking the semaphore for reading, which does not protect against readers nor concurrent modifications. The problem could cause a sanity checks to fail in procfs's cmdline reader, resulting in an OOPS. Note that some functions perform an unlocked read of various mm fields, but they seem to be fine despite possible modificaton. Signed-off-by: Mateusz Guzik Acked-by: Cyrill Gorcunov Cc: Alexey Dobriyan Cc: Jarod Wilson Cc: Jan Stancek Cc: Al Viro Cc: Anshuman Khandual Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 6af9212ab5aa..78947de6f969 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1853,11 +1853,13 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; } - if (prctl_map.exe_fd != (u32)-1) + if (prctl_map.exe_fd != (u32)-1) { error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); - down_read(&mm->mmap_sem); - if (error) - goto out; + if (error) + return error; + } + + down_write(&mm->mmap_sem); /* * We don't validate if these members are pointing to @@ -1894,10 +1896,8 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data if (prctl_map.auxv_size) memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); - error = 0; -out: - up_read(&mm->mmap_sem); - return error; + up_write(&mm->mmap_sem); + return 0; } #endif /* CONFIG_CHECKPOINT_RESTORE */ @@ -1963,7 +1963,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EINVAL; - down_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); vma = find_vma(mm, addr); prctl_map.start_code = mm->start_code; @@ -2056,7 +2056,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = 0; out: - up_read(&mm->mmap_sem); + up_write(&mm->mmap_sem); return error; } -- cgit