summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cgroup.c5
-rw-r--r--kernel/bpf/core.c1
-rw-r--r--kernel/bpf/devmap.c9
-rw-r--r--kernel/bpf/inode.c5
-rw-r--r--kernel/bpf/lpm_trie.c14
-rw-r--r--kernel/bpf/syscall.c8
-rw-r--r--kernel/bpf/verifier.c12
-rw-r--r--kernel/cgroup/pids.c5
-rw-r--r--kernel/cgroup/rdma.c5
-rw-r--r--kernel/compat.c5
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/crash_core.c4
-rw-r--r--kernel/events/core.c23
-rw-r--r--kernel/fork.c64
-rw-r--r--kernel/kexec.c4
-rw-r--r--kernel/kexec_core.c4
-rw-r--r--kernel/kexec_file.c4
-rw-r--r--kernel/power/poweroff.c3
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/sched/debug.c5
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/sysctl.c44
-rw-r--r--kernel/trace/bpf_trace.c100
23 files changed, 202 insertions, 136 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index fcde0f7b2585..92a7d0cf8d13 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Functions to manage eBPF programs attached to cgroups
*
* Copyright (c) 2016 Daniel Mack
- *
- * This file is subject to the terms and conditions of version 2 of the GNU
- * General Public License. See the file COPYING in the main directory of the
- * Linux distribution for more details.
*/
#include <linux/kernel.h>
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7c473f208a10..080e2bb644cc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2097,7 +2097,6 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
-int sysctl_bpf_stats_enabled __read_mostly;
/* All definitions of tracepoints related to BPF. */
#define CREATE_TRACE_POINTS
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 15dbc15c5b0c..cd8297b3bdb9 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -178,6 +178,7 @@ static void dev_map_free(struct bpf_map *map)
if (!dev)
continue;
+ free_percpu(dev->bulkq);
dev_put(dev->dev);
kfree(dev);
}
@@ -273,6 +274,7 @@ void __dev_map_flush(struct bpf_map *map)
unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
u32 bit;
+ rcu_read_lock();
for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
@@ -283,11 +285,12 @@ void __dev_map_flush(struct bpf_map *map)
if (unlikely(!dev))
continue;
- __clear_bit(bit, bitmap);
-
bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true);
+
+ __clear_bit(bit, bitmap);
}
+ rcu_read_unlock();
}
/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
@@ -380,6 +383,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
int cpu;
+ rcu_read_lock();
for_each_online_cpu(cpu) {
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
__clear_bit(dev->bit, bitmap);
@@ -387,6 +391,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false);
}
+ rcu_read_unlock();
}
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 84a80b02db99..cc0d0cf114e3 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Minimal file system backend for holding eBPF maps and programs,
* used by bpf(2) object pinning.
@@ -5,10 +6,6 @@
* Authors:
*
* Daniel Borkmann <daniel@iogearbox.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
*/
#include <linux/init.h>
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index e61630c2e50b..57b59cca4db7 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Longest prefix match list implementation
*
* Copyright (c) 2016,2017 Daniel Mack
* Copyright (c) 2016 David Herrmann
- *
- * This file is subject to the terms and conditions of version 2 of the GNU
- * General Public License. See the file COPYING in the main directory of the
- * Linux distribution for more details.
*/
#include <linux/bpf.h>
@@ -716,9 +713,14 @@ find_leftmost:
* have exact two children, so this function will never return NULL.
*/
for (node = search_root; node;) {
- if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+ if (node->flags & LPM_TREE_NODE_FLAG_IM) {
+ node = rcu_dereference(node->child[0]);
+ } else {
next_node = node;
- node = rcu_dereference(node->child[0]);
+ node = rcu_dereference(node->child[0]);
+ if (!node)
+ node = rcu_dereference(next_node->child[1]);
+ }
}
do_copy:
next_key->prefixlen = next_node->prefixlen;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ef63d26622f2..42d17f730780 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1573,6 +1573,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
return 0;
default:
return -EINVAL;
@@ -1867,6 +1869,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
@@ -1952,6 +1956,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
@@ -2003,6 +2009,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
case BPF_CGROUP_SYSCTL:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d15cc4fafa89..a5c369e60343 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5353,9 +5353,12 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum range = tnum_range(0, 1);
switch (env->prog->type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
+ env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
+ range = tnum_range(1, 1);
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
- case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
@@ -5372,16 +5375,17 @@ static int check_return_code(struct bpf_verifier_env *env)
}
if (!tnum_in(range, reg->var_off)) {
+ char tn_buf[48];
+
verbose(env, "At program exit the register R0 ");
if (!tnum_is_unknown(reg->var_off)) {
- char tn_buf[48];
-
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
- verbose(env, " should have been 0 or 1\n");
+ tnum_strn(tn_buf, sizeof(tn_buf), range);
+ verbose(env, " should have been in %s\n", tn_buf);
return -EINVAL;
}
return 0;
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index c9960baaa14f..8e513a573fe9 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Process number limiting controller for cgroups.
*
@@ -25,10 +26,6 @@
* a superset of parent/child/pids.current.
*
* Copyright (C) 2015 Aleksa Sarai <cyphar@cyphar.com>
- *
- * This file is subject to the terms and conditions of version 2 of the GNU
- * General Public License. See the file COPYING in the main directory of the
- * Linux distribution for more details.
*/
#include <linux/kernel.h>
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index 1d75ae7f1cb7..ae042c347c64 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* RDMA resource limiting controller for cgroups.
*
@@ -5,10 +6,6 @@
* additional RDMA resources after a certain limit is reached.
*
* Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
- *
- * This file is subject to the terms and conditions of version 2 of the GNU
- * General Public License. See the file COPYING in the main directory of the
- * Linux distribution for more details.
*/
#include <linux/bitops.h>
diff --git a/kernel/compat.c b/kernel/compat.c
index b5f7063c0db6..a2bc1d6ceb57 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/compat.c
*
@@ -5,10 +6,6 @@
* on 64 bit kernels.
*
* Copyright (C) 2002-2003 Stephen Rothwell, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 077fde6fb953..ef1c565edc5d 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1964,6 +1964,9 @@ static ssize_t write_cpuhp_fail(struct device *dev,
if (ret)
return ret;
+ if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
+ return -EINVAL;
+
/*
* Cannot fail STARTING/DYING callbacks.
*/
@@ -2339,6 +2342,9 @@ static int __init mitigations_parse_cmdline(char *arg)
cpu_mitigations = CPU_MITIGATIONS_AUTO;
else if (!strcmp(arg, "auto,nosmt"))
cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+ else
+ pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
+ arg);
return 0;
}
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 093c9f917ed0..9f1557b98468 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -1,9 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* crash.c - kernel crash support code.
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/crash_core.h>
diff --git a/kernel/events/core.c b/kernel/events/core.c
index abbd4b3b96c2..f85929ce13be 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
if (perf_event_check_period(event, value))
return -EINVAL;
+ if (!event->attr.freq && (value & (1ULL << 63)))
+ return -EINVAL;
+
event_function_call(event, __perf_event_period, &value);
return 0;
@@ -5923,7 +5926,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
if (user_mode(regs)) {
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
- } else if (current->mm) {
+ } else if (!(current->flags & PF_KTHREAD)) {
perf_get_regs_user(regs_user, regs, regs_user_copy);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
@@ -10033,6 +10036,12 @@ void perf_pmu_unregister(struct pmu *pmu)
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
+static inline bool has_extended_regs(struct perf_event *event)
+{
+ return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
+ (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
+}
+
static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
{
struct perf_event_context *ctx = NULL;
@@ -10064,12 +10073,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
perf_event_ctx_unlock(event->group_leader, ctx);
if (!ret) {
+ if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
+ has_extended_regs(event))
+ ret = -EOPNOTSUPP;
+
if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
- event_has_any_exclude_flag(event)) {
- if (event->destroy)
- event->destroy(event);
+ event_has_any_exclude_flag(event))
ret = -EINVAL;
- }
+
+ if (ret && event->destroy)
+ event->destroy(event);
}
if (ret)
diff --git a/kernel/fork.c b/kernel/fork.c
index 75675b9bf6df..61667909ce83 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -248,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
- return page ? page_address(page) : NULL;
+ if (likely(page)) {
+ tsk->stack = page_address(page);
+ return tsk->stack;
+ }
+ return NULL;
#endif
}
@@ -1712,31 +1716,6 @@ const struct file_operations pidfd_fops = {
#endif
};
-/**
- * pidfd_create() - Create a new pid file descriptor.
- *
- * @pid: struct pid that the pidfd will reference
- *
- * This creates a new pid file descriptor with the O_CLOEXEC flag set.
- *
- * Note, that this function can only be called after the fd table has
- * been unshared to avoid leaking the pidfd to the new process.
- *
- * Return: On success, a cloexec pidfd is returned.
- * On error, a negative errno number will be returned.
- */
-static int pidfd_create(struct pid *pid)
-{
- int fd;
-
- fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
- O_RDWR | O_CLOEXEC);
- if (fd < 0)
- put_pid(pid);
-
- return fd;
-}
-
static void __delayed_free_task(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@ -1774,6 +1753,7 @@ static __latent_entropy struct task_struct *copy_process(
int pidfd = -1, retval;
struct task_struct *p;
struct multiprocess_signals delayed;
+ struct file *pidfile = NULL;
/*
* Don't allow sharing the root directory with processes in a different
@@ -1822,8 +1802,6 @@ static __latent_entropy struct task_struct *copy_process(
}
if (clone_flags & CLONE_PIDFD) {
- int reserved;
-
/*
* - CLONE_PARENT_SETTID is useless for pidfds and also
* parent_tidptr is used to return pidfds.
@@ -1834,16 +1812,6 @@ static __latent_entropy struct task_struct *copy_process(
if (clone_flags &
(CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
return ERR_PTR(-EINVAL);
-
- /*
- * Verify that parent_tidptr is sane so we can potentially
- * reuse it later.
- */
- if (get_user(reserved, parent_tidptr))
- return ERR_PTR(-EFAULT);
-
- if (reserved != 0)
- return ERR_PTR(-EINVAL);
}
/*
@@ -2058,11 +2026,20 @@ static __latent_entropy struct task_struct *copy_process(
* if the fd table isn't shared).
*/
if (clone_flags & CLONE_PIDFD) {
- retval = pidfd_create(pid);
+ retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
+
+ pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
+ O_RDWR | O_CLOEXEC);
+ if (IS_ERR(pidfile)) {
+ put_unused_fd(pidfd);
+ goto bad_fork_free_pid;
+ }
+ get_pid(pid); /* held by pidfile now */
+
retval = put_user(pidfd, parent_tidptr);
if (retval)
goto bad_fork_put_pidfd;
@@ -2180,6 +2157,9 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_cancel_cgroup;
}
+ /* past the last point of failure */
+ if (pidfile)
+ fd_install(pidfd, pidfile);
init_task_pid_links(p);
if (likely(p->pid)) {
@@ -2246,8 +2226,10 @@ bad_fork_cancel_cgroup:
bad_fork_cgroup_threadgroup_change_end:
cgroup_threadgroup_change_end(current);
bad_fork_put_pidfd:
- if (clone_flags & CLONE_PIDFD)
- ksys_close(pidfd);
+ if (clone_flags & CLONE_PIDFD) {
+ fput(pidfile);
+ put_unused_fd(pidfd);
+ }
bad_fork_free_pid:
if (pid != &init_struct_pid)
free_pid(pid);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 68559808fdfa..1b018f1a6e0d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1,9 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kexec.c - kexec_load system call
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index fd5c95ff9251..d5870723b8ad 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1,9 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kexec.c - kexec system call core code.
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 072b6ee55e3f..ef7b951a8087 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kexec: kexec_file_load system call
*
* Copyright (C) 2014 Red Hat Inc.
* Authors:
* Vivek Goyal <vgoyal@redhat.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 7ef6866b521d..6d475281c730 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -1,7 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* poweroff.c - sysrq handler to gracefully power down machine.
- *
- * This file is released under the GPL v2
*/
#include <linux/kernel.h>
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 9505101ed2bc..096211299c07 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -493,6 +493,9 @@ int suspend_devices_and_enter(suspend_state_t state)
pm_suspend_target_state = state;
+ if (state == PM_SUSPEND_TO_IDLE)
+ pm_set_suspend_no_platform();
+
error = platform_suspend_begin(state);
if (error)
goto Close;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 678bfb9bd87f..14c6a8716ba1 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/sched/debug.c
*
* Print the CFS rbtree and other debugging details
*
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include "sched.h"
diff --git a/kernel/signal.c b/kernel/signal.c
index d622eac9d169..edf8915ddd54 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask);
* This is useful for syscalls such as ppoll, pselect, io_pgetevents and
* epoll_pwait where a new sigmask is passed in from userland for the syscalls.
*/
-void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
+void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved,
+ bool interrupted)
{
if (!usigmask)
@@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
* Restoring sigmask here can lead to delivering signals that the above
* syscalls are intended to block because of the sigmask passed in.
*/
- if (signal_pending(current)) {
+ if (interrupted) {
current->saved_sigmask = *sigsaved;
set_restore_sigmask();
return;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7d1008be6173..1beca96fb625 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -230,11 +230,6 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
#endif
static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-#ifdef CONFIG_BPF_SYSCALL
-static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos);
-#endif
#ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses its own private copy */
@@ -1253,12 +1248,10 @@ static struct ctl_table kern_table[] = {
},
{
.procname = "bpf_stats_enabled",
- .data = &sysctl_bpf_stats_enabled,
- .maxlen = sizeof(sysctl_bpf_stats_enabled),
+ .data = &bpf_stats_enabled_key.key,
+ .maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax_bpf_stats,
- .extra1 = &zero,
- .extra2 = &one,
+ .proc_handler = proc_do_static_key,
},
#endif
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
@@ -3374,26 +3367,35 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
#endif /* CONFIG_PROC_SYSCTL */
-#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
-static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+#if defined(CONFIG_SYSCTL)
+int proc_do_static_key(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
{
- int ret, bpf_stats = *(int *)table->data;
- struct ctl_table tmp = *table;
+ struct static_key *key = (struct static_key *)table->data;
+ static DEFINE_MUTEX(static_key_mutex);
+ int val, ret;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(val),
+ .mode = table->mode,
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
- tmp.data = &bpf_stats;
+ mutex_lock(&static_key_mutex);
+ val = static_key_enabled(key);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
- *(int *)table->data = bpf_stats;
- if (bpf_stats)
- static_branch_enable(&bpf_stats_enabled_key);
+ if (val)
+ static_key_enable(key);
else
- static_branch_disable(&bpf_stats_enabled_key);
+ static_key_disable(key);
}
+ mutex_unlock(&static_key_mutex);
return ret;
}
#endif
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f92d6ad5e080..1c9a4745e596 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -410,8 +410,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
.arg4_type = ARG_CONST_SIZE,
};
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
-
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_sample_data *sd)
@@ -442,24 +440,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
return perf_event_output(event, sd, regs);
}
+/*
+ * Support executing tracepoints in normal, irq, and nmi context that each call
+ * bpf_perf_event_output
+ */
+struct bpf_trace_sample_data {
+ struct perf_sample_data sds[3];
+};
+
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
+static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
- struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
+ struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
+ int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
struct perf_raw_record raw = {
.frag = {
.size = size,
.data = data,
},
};
+ struct perf_sample_data *sd;
+ int err;
- if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
- return -EINVAL;
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ sd = &sds->sds[nest_level - 1];
+
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
+ err = -EINVAL;
+ goto out;
+ }
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
- return __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, sd);
+
+out:
+ this_cpu_dec(bpf_trace_nest_level);
+ return err;
}
static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -822,16 +846,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
/*
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
* to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
+ *
+ * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
+ * in normal, irq, and nmi context.
*/
-static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
+struct bpf_raw_tp_regs {
+ struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
+static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
+static struct pt_regs *get_bpf_raw_tp_regs(void)
+{
+ struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
+
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ this_cpu_dec(bpf_raw_tp_nest_level);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return &tp_regs->regs[nest_level - 1];
+}
+
+static void put_bpf_raw_tp_regs(void)
+{
+ this_cpu_dec(bpf_raw_tp_nest_level);
+}
+
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags, void *, data, u64, size)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
- return ____bpf_perf_event_output(regs, map, flags, data, size);
+ ret = ____bpf_perf_event_output(regs, map, flags, data, size);
+
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
@@ -848,12 +904,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
- return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
- flags, 0, 0);
+ ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+ flags, 0, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
@@ -868,11 +930,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
void *, buf, u32, size, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
- return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
- (unsigned long) size, flags, 0);
+ ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+ (unsigned long) size, flags, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {