summaryrefslogtreecommitdiff
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-06-24 14:52:28 -0700
committerJakub Kicinski <kuba@kernel.org>2023-06-24 14:52:28 -0700
commita685d0df75b0357bf0720cafa30c27634063be0a (patch)
tree3396c4440796cd2caf9b500efa52c77b5cebaafb /kernel/bpf/syscall.c
parentd1d29a42f7acde2fe618cf66b6bfbe047dc51b6b (diff)
parentfbc5669de62a452fb3a26a4560668637d5c9e7b5 (diff)
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2023-06-23 We've added 49 non-merge commits during the last 24 day(s) which contain a total of 70 files changed, 1935 insertions(+), 442 deletions(-). The main changes are: 1) Extend bpf_fib_lookup helper to allow passing the route table ID, from Louis DeLosSantos. 2) Fix regsafe() in verifier to call check_ids() for scalar registers, from Eduard Zingerman. 3) Extend the set of cpumask kfuncs with bpf_cpumask_first_and() and a rework of bpf_cpumask_any*() kfuncs. Additionally, add selftests, from David Vernet. 4) Fix socket lookup BPF helpers for tc/XDP to respect VRF bindings, from Gilad Sever. 5) Change bpf_link_put() to use workqueue unconditionally to fix it under PREEMPT_RT, from Sebastian Andrzej Siewior. 6) Follow-ups to address issues in the bpf_refcount shared ownership implementation, from Dave Marchevsky. 7) A few general refactorings to BPF map and program creation permissions checks which were part of the BPF token series, from Andrii Nakryiko. 8) Various fixes for benchmark framework and add a new benchmark for BPF memory allocator to BPF selftests, from Hou Tao. 9) Documentation improvements around iterators and trusted pointers, from Anton Protopopov. 10) Small cleanup in verifier to improve allocated object check, from Daniel T. Lee. 11) Improve performance of bpf_xdp_pointer() by avoiding access to shared_info when XDP packet does not have frags, from Jesper Dangaard Brouer. 12) Silence a harmless syzbot-reported warning in btf_type_id_size(), from Yonghong Song. 13) Remove duplicate bpfilter_umh_cleanup in favor of umd_cleanup_helper, from Jarkko Sakkinen. 14) Fix BPF selftests build for resolve_btfids under custom HOSTCFLAGS, from Viktor Malik. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (49 commits) bpf, docs: Document existing macros instead of deprecated bpf, docs: BPF Iterator Document selftests/bpf: Fix compilation failure for prog vrf_socket_lookup selftests/bpf: Add vrf_socket_lookup tests bpf: Fix bpf socket lookup from tc/xdp to respect socket VRF bindings bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via TC hookpoint bpf: Factor out socket lookup functions for the TC hookpoint. selftests/bpf: Set the default value of consumer_cnt as 0 selftests/bpf: Ensure that next_cpu() returns a valid CPU number selftests/bpf: Output the correct error code for pthread APIs selftests/bpf: Use producer_cnt to allocate local counter array xsk: Remove unused inline function xsk_buff_discard() bpf: Keep BPF_PROG_LOAD permission checks clear of validations bpf: Centralize permissions checks for all BPF map types bpf: Inline map creation logic in map_create() function bpf: Move unprivileged checks into map_create() and bpf_prog_load() bpf: Remove in_atomic() from bpf_link_put(). selftests/bpf: Verify that check_ids() is used for scalars in regsafe() bpf: Verify scalar ids mapping in regsafe() using check_ids() selftests/bpf: Check if mark_chain_precision() follows scalar ids ... ==================== Link: https://lore.kernel.org/r/20230623211256.8409-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c184
1 files changed, 113 insertions, 71 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4497b193dd20..a2aef900519c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -109,37 +109,6 @@ const struct bpf_map_ops bpf_map_offload_ops = {
.map_mem_usage = bpf_map_offload_map_mem_usage,
};
-static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
-{
- const struct bpf_map_ops *ops;
- u32 type = attr->map_type;
- struct bpf_map *map;
- int err;
-
- if (type >= ARRAY_SIZE(bpf_map_types))
- return ERR_PTR(-EINVAL);
- type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
- ops = bpf_map_types[type];
- if (!ops)
- return ERR_PTR(-EINVAL);
-
- if (ops->map_alloc_check) {
- err = ops->map_alloc_check(attr);
- if (err)
- return ERR_PTR(err);
- }
- if (attr->map_ifindex)
- ops = &bpf_map_offload_ops;
- if (!ops->map_mem_usage)
- return ERR_PTR(-EINVAL);
- map = ops->map_alloc(attr);
- if (IS_ERR(map))
- return map;
- map->ops = ops;
- map->map_type = type;
- return map;
-}
-
static void bpf_map_write_active_inc(struct bpf_map *map)
{
atomic64_inc(&map->writecnt);
@@ -1127,7 +1096,9 @@ free_map_tab:
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
+ const struct bpf_map_ops *ops;
int numa_node = bpf_map_attr_numa_node(attr);
+ u32 map_type = attr->map_type;
struct bpf_map *map;
int f_flags;
int err;
@@ -1158,9 +1129,85 @@ static int map_create(union bpf_attr *attr)
return -EINVAL;
/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
- map = find_and_alloc_map(attr);
+ map_type = attr->map_type;
+ if (map_type >= ARRAY_SIZE(bpf_map_types))
+ return -EINVAL;
+ map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
+ ops = bpf_map_types[map_type];
+ if (!ops)
+ return -EINVAL;
+
+ if (ops->map_alloc_check) {
+ err = ops->map_alloc_check(attr);
+ if (err)
+ return err;
+ }
+ if (attr->map_ifindex)
+ ops = &bpf_map_offload_ops;
+ if (!ops->map_mem_usage)
+ return -EINVAL;
+
+ /* Intent here is for unprivileged_bpf_disabled to block BPF map
+ * creation for unprivileged users; other actions depend
+ * on fd availability and access to bpffs, so are dependent on
+ * object creation success. Even with unprivileged BPF disabled,
+ * capability checks are still carried out.
+ */
+ if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+ return -EPERM;
+
+ /* check privileged map type permissions */
+ switch (map_type) {
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_PROG_ARRAY:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ case BPF_MAP_TYPE_RINGBUF:
+ case BPF_MAP_TYPE_USER_RINGBUF:
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ /* unprivileged */
+ break;
+ case BPF_MAP_TYPE_SK_STORAGE:
+ case BPF_MAP_TYPE_INODE_STORAGE:
+ case BPF_MAP_TYPE_TASK_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
+ case BPF_MAP_TYPE_BLOOM_FILTER:
+ case BPF_MAP_TYPE_LPM_TRIE:
+ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+ case BPF_MAP_TYPE_STACK_TRACE:
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_LRU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ case BPF_MAP_TYPE_CPUMAP:
+ if (!bpf_capable())
+ return -EPERM;
+ break;
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ case BPF_MAP_TYPE_XSKMAP:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ break;
+ default:
+ WARN(1, "unsupported map type %d", map_type);
+ return -EPERM;
+ }
+
+ map = ops->map_alloc(attr);
if (IS_ERR(map))
return PTR_ERR(map);
+ map->ops = ops;
+ map->map_type = map_type;
err = bpf_obj_name_cpy(map->name, attr->map_name,
sizeof(attr->map_name));
@@ -2507,7 +2554,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
struct btf *attach_btf = NULL;
int err;
char license[128];
- bool is_gpl;
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
@@ -2526,15 +2572,15 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
!bpf_capable())
return -EPERM;
- /* copy eBPF program license from user space */
- if (strncpy_from_bpfptr(license,
- make_bpfptr(attr->license, uattr.is_kernel),
- sizeof(license) - 1) < 0)
- return -EFAULT;
- license[sizeof(license) - 1] = 0;
-
- /* eBPF programs must be GPL compatible to use GPL-ed functions */
- is_gpl = license_is_gpl_compatible(license);
+ /* Intent here is for unprivileged_bpf_disabled to block BPF program
+ * creation for unprivileged users; other actions depend
+ * on fd availability and access to bpffs, so are dependent on
+ * object creation success. Even with unprivileged BPF disabled,
+ * capability checks are still carried out for these
+ * and other operations.
+ */
+ if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+ return -EPERM;
if (attr->insn_cnt == 0 ||
attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
@@ -2618,12 +2664,20 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
make_bpfptr(attr->insns, uattr.is_kernel),
bpf_prog_insn_size(prog)) != 0)
goto free_prog_sec;
+ /* copy eBPF program license from user space */
+ if (strncpy_from_bpfptr(license,
+ make_bpfptr(attr->license, uattr.is_kernel),
+ sizeof(license) - 1) < 0)
+ goto free_prog_sec;
+ license[sizeof(license) - 1] = 0;
+
+ /* eBPF programs must be GPL compatible to use GPL-ed functions */
+ prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;
prog->orig_prog = NULL;
prog->jited = 0;
atomic64_set(&prog->aux->refcnt, 1);
- prog->gpl_compatible = is_gpl ? 1 : 0;
if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_dev_bound_init(prog, attr);
@@ -2797,28 +2851,31 @@ static void bpf_link_put_deferred(struct work_struct *work)
bpf_link_free(link);
}
-/* bpf_link_put can be called from atomic context, but ensures that resources
- * are freed from process context
+/* bpf_link_put might be called from atomic context. It needs to be called
+ * from sleepable context in order to acquire sleeping locks during the process.
*/
void bpf_link_put(struct bpf_link *link)
{
if (!atomic64_dec_and_test(&link->refcnt))
return;
- if (in_atomic()) {
- INIT_WORK(&link->work, bpf_link_put_deferred);
- schedule_work(&link->work);
- } else {
- bpf_link_free(link);
- }
+ INIT_WORK(&link->work, bpf_link_put_deferred);
+ schedule_work(&link->work);
}
EXPORT_SYMBOL(bpf_link_put);
+static void bpf_link_put_direct(struct bpf_link *link)
+{
+ if (!atomic64_dec_and_test(&link->refcnt))
+ return;
+ bpf_link_free(link);
+}
+
static int bpf_link_release(struct inode *inode, struct file *filp)
{
struct bpf_link *link = filp->private_data;
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return 0;
}
@@ -4801,7 +4858,7 @@ out_put_progs:
if (ret)
bpf_prog_put(new_prog);
out_put_link:
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return ret;
}
@@ -4824,7 +4881,7 @@ static int link_detach(union bpf_attr *attr)
else
ret = -EOPNOTSUPP;
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return ret;
}
@@ -4894,7 +4951,7 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
fd = bpf_link_new_fd(link);
if (fd < 0)
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return fd;
}
@@ -4971,7 +5028,7 @@ static int bpf_iter_create(union bpf_attr *attr)
return PTR_ERR(link);
err = bpf_iter_new_fd(link);
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return err;
}
@@ -5041,23 +5098,8 @@ out_prog_put:
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
- bool capable;
int err;
- capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
-
- /* Intent here is for unprivileged_bpf_disabled to block key object
- * creation commands for unprivileged users; other actions depend
- * of fd availability and access to bpffs, so are dependent on
- * object creation success. Capabilities are later verified for
- * operations such as load and map create, so even with unprivileged
- * BPF disabled, capability checks are still carried out for these
- * and other operations.
- */
- if (!capable &&
- (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
- return -EPERM;
-
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
return err;