summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-09-23 10:16:53 -0700
committerDavid S. Miller <davem@davemloft.net>2017-09-23 10:16:53 -0700
commit1f8d31d189cc6ce1e4b972959fda41e790bb92b8 (patch)
treefd3cca12a29319f073773ac55f7d41cc58e2c73f /kernel
parent3fb5ec06578e4c85d3486b6a73cbeb07960a51ce (diff)
parentcd4175b11685b11c40e31a03e05084cc212b0649 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/devmap.c6
-rw-r--r--kernel/bpf/syscall.c6
-rw-r--r--kernel/bpf/verifier.c7
-rw-r--r--kernel/events/core.c3
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/seccomp.c321
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_mmiotrace.c1
9 files changed, 346 insertions, 21 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 959c9a07f318..e093d9a2c4dd 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -75,8 +75,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
struct bpf_dtab *dtab;
+ int err = -EINVAL;
u64 cost;
- int err;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -108,6 +108,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (err)
goto free_dtab;
+ err = -ENOMEM;
+
/* A per cpu bitfield with a bit per possible net device */
dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
__alignof__(unsigned long));
@@ -128,7 +130,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
free_dtab:
free_percpu(dtab->flush_needed);
kfree(dtab);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(err);
}
static void dev_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cb17e1cd1d43..25d074920a00 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -186,15 +186,17 @@ static int bpf_map_alloc_id(struct bpf_map *map)
static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
{
+ unsigned long flags;
+
if (do_idr_lock)
- spin_lock_bh(&map_idr_lock);
+ spin_lock_irqsave(&map_idr_lock, flags);
else
__acquire(&map_idr_lock);
idr_remove(&map_idr, map->id);
if (do_idr_lock)
- spin_unlock_bh(&map_idr_lock);
+ spin_unlock_irqrestore(&map_idr_lock, flags);
else
__release(&map_idr_lock);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 799b2451ef2d..b914fbe1383e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4205,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
}
if (insn->imm == BPF_FUNC_redirect_map) {
- u64 addr = (unsigned long)prog;
+ /* Note, we cannot use prog directly as imm as subsequent
+ * rewrites would still change the prog pointer. The only
+ * stable address we can use is aux, which also works with
+ * prog clones during blinding.
+ */
+ u64 addr = (unsigned long)prog->aux;
struct bpf_insn r4_ld[] = {
BPF_LD_IMM64(BPF_REG_4, addr),
*insn,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3e691b75b2db..6bc21e202ae4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
}
}
event->tp_event->prog = prog;
+ event->tp_event->bpf_prog_owner = event;
return 0;
}
@@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
return;
prog = event->tp_event->prog;
- if (prog) {
+ if (prog && event->tp_event->bpf_prog_owner == event) {
event->tp_event->prog = NULL;
bpf_prog_put(prog);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f51b7b6d2451..6fc89fd93824 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -202,7 +202,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
irqd_clr_managed_shutdown(d);
- if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) {
+ if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) {
/*
* Catch code which fiddles with enable_irq() on a managed
* and potentially shutdown IRQ. Chained interrupt
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 98b59b5db90b..c24579dfa7a1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -17,11 +17,13 @@
#include <linux/audit.h>
#include <linux/compat.h>
#include <linux/coredump.h>
+#include <linux/kmemleak.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/seccomp.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
+#include <linux/sysctl.h>
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
#include <asm/syscall.h>
@@ -42,6 +44,7 @@
* get/put helpers should be used when accessing an instance
* outside of a lifetime-guarded section. In general, this
* is only needed for handling filters shared across tasks.
+ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
* @prev: points to a previously installed, or inherited, filter
* @prog: the BPF program to evaluate
*
@@ -57,6 +60,7 @@
*/
struct seccomp_filter {
refcount_t usage;
+ bool log;
struct seccomp_filter *prev;
struct bpf_prog *prog;
};
@@ -171,10 +175,15 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
/**
* seccomp_run_filters - evaluates all seccomp filters against @sd
* @sd: optional seccomp data to be passed to filters
+ * @match: stores struct seccomp_filter that resulted in the return value,
+ * unless filter returned SECCOMP_RET_ALLOW, in which case it will
+ * be unchanged.
*
* Returns valid seccomp BPF response codes.
*/
-static u32 seccomp_run_filters(const struct seccomp_data *sd)
+#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
+static u32 seccomp_run_filters(const struct seccomp_data *sd,
+ struct seccomp_filter **match)
{
struct seccomp_data sd_local;
u32 ret = SECCOMP_RET_ALLOW;
@@ -184,7 +193,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
/* Ensure unexpected behavior doesn't result in failing open. */
if (unlikely(WARN_ON(f == NULL)))
- return SECCOMP_RET_KILL;
+ return SECCOMP_RET_KILL_PROCESS;
if (!sd) {
populate_seccomp_data(&sd_local);
@@ -198,8 +207,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
for (; f; f = f->prev) {
u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
- if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
+ if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
ret = cur_ret;
+ *match = f;
+ }
}
return ret;
}
@@ -444,6 +455,10 @@ static long seccomp_attach_filter(unsigned int flags,
return ret;
}
+ /* Set log flag, if present. */
+ if (flags & SECCOMP_FILTER_FLAG_LOG)
+ filter->log = true;
+
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
@@ -514,6 +529,65 @@ static void seccomp_send_sigsys(int syscall, int reason)
}
#endif /* CONFIG_SECCOMP_FILTER */
+/* For use with seccomp_actions_logged */
+#define SECCOMP_LOG_KILL_PROCESS (1 << 0)
+#define SECCOMP_LOG_KILL_THREAD (1 << 1)
+#define SECCOMP_LOG_TRAP (1 << 2)
+#define SECCOMP_LOG_ERRNO (1 << 3)
+#define SECCOMP_LOG_TRACE (1 << 4)
+#define SECCOMP_LOG_LOG (1 << 5)
+#define SECCOMP_LOG_ALLOW (1 << 6)
+
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
+ SECCOMP_LOG_KILL_THREAD |
+ SECCOMP_LOG_TRAP |
+ SECCOMP_LOG_ERRNO |
+ SECCOMP_LOG_TRACE |
+ SECCOMP_LOG_LOG;
+
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
+ bool requested)
+{
+ bool log = false;
+
+ switch (action) {
+ case SECCOMP_RET_ALLOW:
+ break;
+ case SECCOMP_RET_TRAP:
+ log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
+ break;
+ case SECCOMP_RET_ERRNO:
+ log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
+ break;
+ case SECCOMP_RET_TRACE:
+ log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
+ break;
+ case SECCOMP_RET_LOG:
+ log = seccomp_actions_logged & SECCOMP_LOG_LOG;
+ break;
+ case SECCOMP_RET_KILL_THREAD:
+ log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
+ break;
+ case SECCOMP_RET_KILL_PROCESS:
+ default:
+ log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
+ }
+
+ /*
+ * Force an audit message to be emitted when the action is RET_KILL_*,
+ * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
+ * allowed to be logged by the admin.
+ */
+ if (log)
+ return __audit_seccomp(syscall, signr, action);
+
+ /*
+ * Let the audit subsystem decide if the action should be audited based
+ * on whether the current task itself is being audited.
+ */
+ return audit_seccomp(syscall, signr, action);
+}
+
/*
* Secure computing mode 1 allows only read/write/exit/sigreturn.
* To be fully secure this must be combined with rlimit
@@ -539,7 +613,7 @@ static void __secure_computing_strict(int this_syscall)
#ifdef SECCOMP_DEBUG
dump_stack();
#endif
- audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+ seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
do_exit(SIGKILL);
}
@@ -566,6 +640,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
const bool recheck_after_trace)
{
u32 filter_ret, action;
+ struct seccomp_filter *match = NULL;
int data;
/*
@@ -574,9 +649,9 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
*/
rmb();
- filter_ret = seccomp_run_filters(sd);
+ filter_ret = seccomp_run_filters(sd, &match);
data = filter_ret & SECCOMP_RET_DATA;
- action = filter_ret & SECCOMP_RET_ACTION;
+ action = filter_ret & SECCOMP_RET_ACTION_FULL;
switch (action) {
case SECCOMP_RET_ERRNO:
@@ -637,14 +712,25 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
return 0;
+ case SECCOMP_RET_LOG:
+ seccomp_log(this_syscall, 0, action, true);
+ return 0;
+
case SECCOMP_RET_ALLOW:
+ /*
+ * Note that the "match" filter will always be NULL for
+ * this action since SECCOMP_RET_ALLOW is the starting
+ * state in seccomp_run_filters().
+ */
return 0;
- case SECCOMP_RET_KILL:
+ case SECCOMP_RET_KILL_THREAD:
+ case SECCOMP_RET_KILL_PROCESS:
default:
- audit_seccomp(this_syscall, SIGSYS, action);
+ seccomp_log(this_syscall, SIGSYS, action, true);
/* Dump core only if this is the last remaining thread. */
- if (get_nr_threads(current) == 1) {
+ if (action == SECCOMP_RET_KILL_PROCESS ||
+ get_nr_threads(current) == 1) {
siginfo_t info;
/* Show the original registers in the dump. */
@@ -653,13 +739,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
seccomp_init_siginfo(&info, this_syscall, data);
do_coredump(&info);
}
- do_exit(SIGSYS);
+ if (action == SECCOMP_RET_KILL_PROCESS)
+ do_group_exit(SIGSYS);
+ else
+ do_exit(SIGSYS);
}
unreachable();
skip:
- audit_seccomp(this_syscall, 0, action);
+ seccomp_log(this_syscall, 0, action, match ? match->log : false);
return -1;
}
#else
@@ -794,6 +883,29 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
}
#endif
+static long seccomp_get_action_avail(const char __user *uaction)
+{
+ u32 action;
+
+ if (copy_from_user(&action, uaction, sizeof(action)))
+ return -EFAULT;
+
+ switch (action) {
+ case SECCOMP_RET_KILL_PROCESS:
+ case SECCOMP_RET_KILL_THREAD:
+ case SECCOMP_RET_TRAP:
+ case SECCOMP_RET_ERRNO:
+ case SECCOMP_RET_TRACE:
+ case SECCOMP_RET_LOG:
+ case SECCOMP_RET_ALLOW:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
/* Common entry point for both prctl and syscall. */
static long do_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs)
@@ -805,6 +917,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
return seccomp_set_mode_strict();
case SECCOMP_SET_MODE_FILTER:
return seccomp_set_mode_filter(flags, uargs);
+ case SECCOMP_GET_ACTION_AVAIL:
+ if (flags != 0)
+ return -EINVAL;
+
+ return seccomp_get_action_avail(uargs);
default:
return -EINVAL;
}
@@ -922,3 +1039,185 @@ out:
return ret;
}
#endif
+
+#ifdef CONFIG_SYSCTL
+
+/* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
+#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
+#define SECCOMP_RET_TRAP_NAME "trap"
+#define SECCOMP_RET_ERRNO_NAME "errno"
+#define SECCOMP_RET_TRACE_NAME "trace"
+#define SECCOMP_RET_LOG_NAME "log"
+#define SECCOMP_RET_ALLOW_NAME "allow"
+
+static const char seccomp_actions_avail[] =
+ SECCOMP_RET_KILL_PROCESS_NAME " "
+ SECCOMP_RET_KILL_THREAD_NAME " "
+ SECCOMP_RET_TRAP_NAME " "
+ SECCOMP_RET_ERRNO_NAME " "
+ SECCOMP_RET_TRACE_NAME " "
+ SECCOMP_RET_LOG_NAME " "
+ SECCOMP_RET_ALLOW_NAME;
+
+struct seccomp_log_name {
+ u32 log;
+ const char *name;
+};
+
+static const struct seccomp_log_name seccomp_log_names[] = {
+ { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
+ { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
+ { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
+ { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
+ { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+ { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
+ { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
+ { }
+};
+
+static bool seccomp_names_from_actions_logged(char *names, size_t size,
+ u32 actions_logged)
+{
+ const struct seccomp_log_name *cur;
+ bool append_space = false;
+
+ for (cur = seccomp_log_names; cur->name && size; cur++) {
+ ssize_t ret;
+
+ if (!(actions_logged & cur->log))
+ continue;
+
+ if (append_space) {
+ ret = strscpy(names, " ", size);
+ if (ret < 0)
+ return false;
+
+ names += ret;
+ size -= ret;
+ } else
+ append_space = true;
+
+ ret = strscpy(names, cur->name, size);
+ if (ret < 0)
+ return false;
+
+ names += ret;
+ size -= ret;
+ }
+
+ return true;
+}
+
+static bool seccomp_action_logged_from_name(u32 *action_logged,
+ const char *name)
+{
+ const struct seccomp_log_name *cur;
+
+ for (cur = seccomp_log_names; cur->name; cur++) {
+ if (!strcmp(cur->name, name)) {
+ *action_logged = cur->log;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
+{
+ char *name;
+
+ *actions_logged = 0;
+ while ((name = strsep(&names, " ")) && *name) {
+ u32 action_logged = 0;
+
+ if (!seccomp_action_logged_from_name(&action_logged, name))
+ return false;
+
+ *actions_logged |= action_logged;
+ }
+
+ return true;
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ char names[sizeof(seccomp_actions_avail)];
+ struct ctl_table table;
+ int ret;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ memset(names, 0, sizeof(names));
+
+ if (!write) {
+ if (!seccomp_names_from_actions_logged(names, sizeof(names),
+ seccomp_actions_logged))
+ return -EINVAL;
+ }
+
+ table = *ro_table;
+ table.data = names;
+ table.maxlen = sizeof(names);
+ ret = proc_dostring(&table, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+ if (write) {
+ u32 actions_logged;
+
+ if (!seccomp_actions_logged_from_names(&actions_logged,
+ table.data))
+ return -EINVAL;
+
+ if (actions_logged & SECCOMP_LOG_ALLOW)
+ return -EINVAL;
+
+ seccomp_actions_logged = actions_logged;
+ }
+
+ return 0;
+}
+
+static struct ctl_path seccomp_sysctl_path[] = {
+ { .procname = "kernel", },
+ { .procname = "seccomp", },
+ { }
+};
+
+static struct ctl_table seccomp_sysctl_table[] = {
+ {
+ .procname = "actions_avail",
+ .data = (void *) &seccomp_actions_avail,
+ .maxlen = sizeof(seccomp_actions_avail),
+ .mode = 0444,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "actions_logged",
+ .mode = 0644,
+ .proc_handler = seccomp_actions_logged_handler,
+ },
+ { }
+};
+
+static int __init seccomp_sysctl_init(void)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
+ if (!hdr)
+ pr_warn("seccomp: sysctl registration failed\n");
+ else
+ kmemleak_not_leak(hdr);
+
+ return 0;
+}
+
+device_initcall(seccomp_sysctl_init)
+
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5360b7aec57a..752e5daf0896 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file)
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
int cpu = tracing_get_cpu(inode);
+ struct trace_buffer *trace_buf = &tr->trace_buffer;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ if (tr->current_trace->print_max)
+ trace_buf = &tr->max_buffer;
+#endif
if (cpu == RING_BUFFER_ALL_CPUS)
- tracing_reset_online_cpus(&tr->trace_buffer);
+ tracing_reset_online_cpus(trace_buf);
else
- tracing_reset(&tr->trace_buffer, cpu);
+ tracing_reset(trace_buf, cpu);
}
if (file->f_mode & FMODE_READ) {
@@ -5358,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (t == tr->current_trace)
goto out;
+ /* Some tracers won't work on kernel command line */
+ if (system_state < SYSTEM_RUNNING && t->noboot) {
+ pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
+ t->name);
+ goto out;
+ }
+
/* Some tracers are only allowed for the top level buffer */
if (!trace_ok_for_array(t, tr)) {
ret = -EINVAL;
@@ -5667,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp)
*
* iter->pos will be 0 if we haven't read anything.
*/
- if (!tracing_is_on() && iter->pos)
+ if (!tracer_tracing_is_on(iter->tr) && iter->pos)
break;
mutex_unlock(&iter->mutex);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fb5d54d0d1b3..652c682707cd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -444,6 +444,8 @@ struct tracer {
#ifdef CONFIG_TRACER_MAX_TRACE
bool use_max_tr;
#endif
+ /* True if tracer cannot be enabled in kernel param */
+ bool noboot;
};
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index cd7480d0a201..dca78fc48439 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -282,6 +282,7 @@ static struct tracer mmio_tracer __read_mostly =
.close = mmio_close,
.read = mmio_read,
.print_line = mmio_print_line,
+ .noboot = true,
};
__init static int init_mmio_trace(void)