From 98f368e9e2630a3ce3e80fb10fb2e02038cf9578 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 2 Jun 2016 23:43:21 -0500 Subject: kernel: Add noaudit variant of ns_capable() When checking the current cred for a capability in a specific user namespace, it isn't always desirable to have the LSMs audit the check. This patch adds a noaudit variant of ns_capable() for when those situations arise. The common logic between ns_capable() and the new ns_capable_noaudit() is moved into a single, shared function to keep duplicated code to a minimum and ease maintainability. Signed-off-by: Tyler Hicks Acked-by: Serge E. Hallyn Signed-off-by: James Morris --- kernel/capability.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/capability.c b/kernel/capability.c index 45432b54d5c6..00411c82dac5 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -361,6 +361,24 @@ bool has_capability_noaudit(struct task_struct *t, int cap) return has_ns_capability_noaudit(t, &init_user_ns, cap); } +static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit) +{ + int capable; + + if (unlikely(!cap_valid(cap))) { + pr_crit("capable() called with invalid cap=%u\n", cap); + BUG(); + } + + capable = audit ? security_capable(current_cred(), ns, cap) : + security_capable_noaudit(current_cred(), ns, cap); + if (capable == 0) { + current->flags |= PF_SUPERPRIV; + return true; + } + return false; +} + /** * ns_capable - Determine if the current task has a superior capability in effect * @ns: The usernamespace we want the capability in @@ -374,19 +392,27 @@ bool has_capability_noaudit(struct task_struct *t, int cap) */ bool ns_capable(struct user_namespace *ns, int cap) { - if (unlikely(!cap_valid(cap))) { - pr_crit("capable() called with invalid cap=%u\n", cap); - BUG(); - } - - if (security_capable(current_cred(), ns, cap) == 0) { - current->flags |= PF_SUPERPRIV; - return true; - } - return false; + return ns_capable_common(ns, cap, true); } EXPORT_SYMBOL(ns_capable); +/** + * ns_capable_noaudit - Determine if the current task has a superior capability + * (unaudited) in effect + * @ns: The usernamespace we want the capability in + * @cap: The capability to be tested for + * + * Return true if the current task has the given superior capability currently + * available for use, false if not. + * + * This sets PF_SUPERPRIV on the task if the capability is available on the + * assumption that it's about to be used. + */ +bool ns_capable_noaudit(struct user_namespace *ns, int cap) +{ + return ns_capable_common(ns, cap, false); +} +EXPORT_SYMBOL(ns_capable_noaudit); /** * capable - Determine if the current task has a superior capability in effect -- cgit From 2f275de5d1ed7269913ef9b4c64a13952c0a38e8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 27 May 2016 12:57:02 -0700 Subject: seccomp: Add a seccomp_data parameter secure_computing() Currently, if arch code wants to supply seccomp_data directly to seccomp (which is generally much faster than having seccomp do it using the syscall_get_xyz() API), it has to use the two-phase seccomp hooks. Add it to the easy hooks, too. Cc: linux-arch@vger.kernel.org Signed-off-by: Andy Lutomirski Signed-off-by: Kees Cook --- kernel/seccomp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 7002796f14a4..06816290a212 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -554,9 +554,9 @@ void secure_computing_strict(int this_syscall) BUG(); } #else -int __secure_computing(void) +int __secure_computing(const struct seccomp_data *sd) { - u32 phase1_result = seccomp_phase1(NULL); + u32 phase1_result = seccomp_phase1(sd); if (likely(phase1_result == SECCOMP_PHASE1_OK)) return 0; -- cgit From 8112c4f140fa03f9ee68aad2cc79afa7df5418d3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 1 Jun 2016 16:02:17 -0700 Subject: seccomp: remove 2-phase API Since nothing is using the 2-phase API, and it adds more complexity than benefit, remove it. Signed-off-by: Kees Cook Cc: Andy Lutomirski --- kernel/seccomp.c | 129 ++++++++++++++++++------------------------------------- 1 file changed, 41 insertions(+), 88 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 06816290a212..14a37d71b612 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -173,7 +173,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(struct seccomp_data *sd) +static u32 seccomp_run_filters(const struct seccomp_data *sd) { struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; @@ -554,20 +554,9 @@ void secure_computing_strict(int this_syscall) BUG(); } #else -int __secure_computing(const struct seccomp_data *sd) -{ - u32 phase1_result = seccomp_phase1(sd); - - if (likely(phase1_result == SECCOMP_PHASE1_OK)) - return 0; - else if (likely(phase1_result == SECCOMP_PHASE1_SKIP)) - return -1; - else - return seccomp_phase2(phase1_result); -} #ifdef CONFIG_SECCOMP_FILTER -static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) +static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd) { u32 filter_ret, action; int data; @@ -599,10 +588,33 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) goto skip; case SECCOMP_RET_TRACE: - return filter_ret; /* Save the rest for phase 2. */ + /* ENOSYS these calls if there is no tracer attached. */ + if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { + syscall_set_return_value(current, + task_pt_regs(current), + -ENOSYS, 0); + goto skip; + } + + /* Allow the BPF to provide the event message */ + ptrace_event(PTRACE_EVENT_SECCOMP, data); + /* + * The delivery of a fatal signal during event + * notification may silently skip tracer notification. + * Terminating the task now avoids executing a system + * call that may not be intended. + */ + if (fatal_signal_pending(current)) + do_exit(SIGSYS); + /* Check if the tracer forced the syscall to be skipped. */ + this_syscall = syscall_get_nr(current, task_pt_regs(current)); + if (this_syscall < 0) + goto skip; + + return 0; case SECCOMP_RET_ALLOW: - return SECCOMP_PHASE1_OK; + return 0; case SECCOMP_RET_KILL: default: @@ -614,96 +626,37 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) skip: audit_seccomp(this_syscall, 0, action); - return SECCOMP_PHASE1_SKIP; + return -1; +} +#else +static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd) +{ + BUG(); } #endif -/** - * seccomp_phase1() - run fast path seccomp checks on the current syscall - * @arg sd: The seccomp_data or NULL - * - * This only reads pt_regs via the syscall_xyz helpers. The only change - * it will make to pt_regs is via syscall_set_return_value, and it will - * only do that if it returns SECCOMP_PHASE1_SKIP. - * - * If sd is provided, it will not read pt_regs at all. - * - * It may also call do_exit or force a signal; these actions must be - * safe. - * - * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should - * be processed normally. - * - * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be - * invoked. In this case, seccomp_phase1 will have set the return value - * using syscall_set_return_value. - * - * If it returns anything else, then the return value should be passed - * to seccomp_phase2 from a context in which ptrace hooks are safe. - */ -u32 seccomp_phase1(struct seccomp_data *sd) +int __secure_computing(const struct seccomp_data *sd) { int mode = current->seccomp.mode; - int this_syscall = sd ? sd->nr : - syscall_get_nr(current, task_pt_regs(current)); + int this_syscall; if (config_enabled(CONFIG_CHECKPOINT_RESTORE) && unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) - return SECCOMP_PHASE1_OK; + return 0; + + this_syscall = sd ? sd->nr : + syscall_get_nr(current, task_pt_regs(current)); switch (mode) { case SECCOMP_MODE_STRICT: __secure_computing_strict(this_syscall); /* may call do_exit */ - return SECCOMP_PHASE1_OK; -#ifdef CONFIG_SECCOMP_FILTER + return 0; case SECCOMP_MODE_FILTER: - return __seccomp_phase1_filter(this_syscall, sd); -#endif + return __seccomp_filter(this_syscall, sd); default: BUG(); } } - -/** - * seccomp_phase2() - finish slow path seccomp work for the current syscall - * @phase1_result: The return value from seccomp_phase1() - * - * This must be called from a context in which ptrace hooks can be used. - * - * Returns 0 if the syscall should be processed or -1 to skip the syscall. - */ -int seccomp_phase2(u32 phase1_result) -{ - struct pt_regs *regs = task_pt_regs(current); - u32 action = phase1_result & SECCOMP_RET_ACTION; - int data = phase1_result & SECCOMP_RET_DATA; - - BUG_ON(action != SECCOMP_RET_TRACE); - - audit_seccomp(syscall_get_nr(current, regs), 0, action); - - /* Skip these calls if there is no tracer. */ - if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { - syscall_set_return_value(current, regs, - -ENOSYS, 0); - return -1; - } - - /* Allow the BPF to provide the event message */ - ptrace_event(PTRACE_EVENT_SECCOMP, data); - /* - * The delivery of a fatal signal during event - * notification may silently skip tracer notification. - * Terminating the task now avoids executing a system - * call that may not be intended. - */ - if (fatal_signal_pending(current)) - do_exit(SIGSYS); - if (syscall_get_nr(current, regs) < 0) - return -1; /* Explicit request to skip. */ - - return 0; -} #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ long prctl_get_seccomp(void) -- cgit From ce6526e8afa4b6ad0ab134a4cc50c9c863319637 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 1 Jun 2016 19:29:15 -0700 Subject: seccomp: recheck the syscall after RET_TRACE When RET_TRACE triggers, a tracer may change a syscall into something that should be filtered by seccomp. This re-runs seccomp after a trace event to make sure things continue to pass. Signed-off-by: Kees Cook Cc: Andy Lutomirski --- kernel/seccomp.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 14a37d71b612..54d15eb2b701 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -556,7 +556,8 @@ void secure_computing_strict(int this_syscall) #else #ifdef CONFIG_SECCOMP_FILTER -static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd) +static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, + const bool recheck_after_trace) { u32 filter_ret, action; int data; @@ -588,6 +589,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd) goto skip; case SECCOMP_RET_TRACE: + /* We've been put in this state by the ptracer already. */ + if (recheck_after_trace) + return 0; + /* ENOSYS these calls if there is no tracer attached. */ if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { syscall_set_return_value(current, @@ -611,6 +616,15 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd) if (this_syscall < 0) goto skip; + /* + * Recheck the syscall, since it may have changed. This + * intentionally uses a NULL struct seccomp_data to force + * a reload of all registers. This does not goto skip since + * a skip would have already been reported. + */ + if (__seccomp_filter(this_syscall, NULL, true)) + return -1; + return 0; case SECCOMP_RET_ALLOW: @@ -629,7 +643,8 @@ skip: return -1; } #else -static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd) +static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, + const bool recheck_after_trace) { BUG(); } @@ -652,7 +667,7 @@ int __secure_computing(const struct seccomp_data *sd) __secure_computing_strict(this_syscall); /* may call do_exit */ return 0; case SECCOMP_MODE_FILTER: - return __seccomp_filter(this_syscall, sd); + return __seccomp_filter(this_syscall, sd, false); default: BUG(); } -- cgit