From fedf99200ab086c42a572fca1d7266b06cdc3e3f Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 2 May 2023 11:14:18 -0700 Subject: bpf: Print a warning only if writing to unprivileged_bpf_disabled. Only print the warning message if you are writing to "/proc/sys/kernel/unprivileged_bpf_disabled". The kernel may print an annoying warning when you read "/proc/sys/kernel/unprivileged_bpf_disabled" saying WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks! However, this message is only meaningful when the feature is disabled or enabled. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20230502181418.308479-1-kuifeng@meta.com --- kernel/bpf/syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 14f39c1e573e..909c112ef537 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5380,7 +5380,8 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write, *(int *)table->data = unpriv_enable; } - unpriv_ebpf_notify(unpriv_enable); + if (write) + unpriv_ebpf_notify(unpriv_enable); return ret; } -- cgit From e859e429511afb21d3f784bd0ccdf500d40b73ef Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 17 May 2023 10:31:25 +0000 Subject: bpf: Show target_{obj,btf}_id in tracing link fdinfo The target_btf_id can help us understand which kernel function is linked by a tracing prog. The target_btf_id and target_obj_id have already been exposed to userspace, so we just need to show them. The result as follows, $ cat /proc/10673/fdinfo/10 pos: 0 flags: 02000000 mnt_id: 15 ino: 2094 link_type: tracing link_id: 2 prog_tag: a04f5eef06a7f555 prog_id: 13 attach_type: 24 target_obj_id: 1 target_btf_id: 13964 Signed-off-by: Yafang Shao Acked-by: Song Liu Link: https://lore.kernel.org/r/20230517103126.68372-2-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 909c112ef537..b2621089904b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2968,10 +2968,17 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, { struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link); + u32 target_btf_id, target_obj_id; + bpf_trampoline_unpack_key(tr_link->trampoline->key, + &target_obj_id, &target_btf_id); seq_printf(seq, - "attach_type:\t%d\n", - tr_link->attach_type); + "attach_type:\t%d\n" + "target_obj_id:\t%u\n" + "target_btf_id:\t%u\n", + tr_link->attach_type, + target_obj_id, + target_btf_id); } static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, -- cgit From cb8edce28073a906401c9e421eca7c99f3396da1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 May 2023 16:48:06 -0700 Subject: bpf: Support O_PATH FDs in BPF_OBJ_PIN and BPF_OBJ_GET commands Current UAPI of BPF_OBJ_PIN and BPF_OBJ_GET commands of bpf() syscall forces users to specify pinning location as a string-based absolute or relative (to current working directory) path. This has various implications related to security (e.g., symlink-based attacks), forces BPF FS to be exposed in the file system, which can cause races with other applications. One of the feedbacks we got from folks working with containers heavily was that inability to use purely FD-based location specification was an unfortunate limitation and hindrance for BPF_OBJ_PIN and BPF_OBJ_GET commands. This patch closes this oversight, adding path_fd field to BPF_OBJ_PIN and BPF_OBJ_GET UAPI, following conventions established by *at() syscalls for dirfd + pathname combinations. This now allows interesting possibilities like working with detached BPF FS mount (e.g., to perform multiple pinnings without running a risk of someone interfering with them), and generally making pinning/getting more secure and not prone to any races and/or security attacks. This is demonstrated by a selftest added in subsequent patch that takes advantage of new mount APIs (fsopen, fsconfig, fsmount) to demonstrate creating detached BPF FS mount, pinning, and then getting BPF map out of it, all while never exposing this private instance of BPF FS to outside worlds. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Christian Brauner Link: https://lore.kernel.org/bpf/20230523170013.728457-4-andrii@kernel.org --- kernel/bpf/syscall.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b2621089904b..c7f6807215e6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2697,23 +2697,38 @@ free_prog: return err; } -#define BPF_OBJ_LAST_FIELD file_flags +#define BPF_OBJ_LAST_FIELD path_fd static int bpf_obj_pin(const union bpf_attr *attr) { - if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) + int path_fd; + + if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) + return -EINVAL; + + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) return -EINVAL; - return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; + return bpf_obj_pin_user(attr->bpf_fd, path_fd, + u64_to_user_ptr(attr->pathname)); } static int bpf_obj_get(const union bpf_attr *attr) { + int path_fd; + if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || - attr->file_flags & ~BPF_OBJ_FLAG_MASK) + attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) + return -EINVAL; + + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) return -EINVAL; - return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; + return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), attr->file_flags); } -- cgit From c4c84f6fb2c4dc4c0f5fd927b3c3d3fd28b7030e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 24 May 2023 15:54:19 -0700 Subject: bpf: drop unnecessary bpf_capable() check in BPF_MAP_FREEZE command Seems like that extra bpf_capable() check in BPF_MAP_FREEZE handler was unintentionally left when we switched to a model that all BPF map operations should be allowed regardless of CAP_BPF (or any other capabilities), as long as process got BPF map FD somehow. This patch replaces bpf_capable() check in BPF_MAP_FREEZE handler with writeable access check, given conceptually freezing the map is modifying it: map becomes unmodifiable for subsequent updates. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230524225421.1587859-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c7f6807215e6..c9a201e4c457 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1931,6 +1931,11 @@ static int map_freeze(const union bpf_attr *attr) return -ENOTSUPP; } + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + mutex_lock(&map->freeze_mutex); if (bpf_map_write_active(map)) { err = -EBUSY; @@ -1940,10 +1945,6 @@ static int map_freeze(const union bpf_attr *attr) err = -EBUSY; goto err_put; } - if (!bpf_capable()) { - err = -EPERM; - goto err_put; - } WRITE_ONCE(map->frozen, true); err_put: -- cgit From 4266f41feaeee2521749ce2cfb52eafd4e2947c5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 May 2023 12:13:56 +0200 Subject: bpf: Fix bad unlock balance on freeze_mutex Commit c4c84f6fb2c4 ("bpf: drop unnecessary bpf_capable() check in BPF_MAP_FREEZE command") moved the permissions check outside of the freeze_mutex in the map_freeze() handler. The error paths still jumps to the err_put which tries to unlock the freeze_mutex even though it was not locked in the first place. Fix it. Fixes: c4c84f6fb2c4 ("bpf: drop unnecessary bpf_capable() check in BPF_MAP_FREEZE command") Reported-by: syzbot+8982e75c2878b9ffeac5@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c9a201e4c457..92a57efc77de 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1932,8 +1932,8 @@ static int map_freeze(const union bpf_attr *attr) } if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { - err = -EPERM; - goto err_put; + fdput(f); + return -EPERM; } mutex_lock(&map->freeze_mutex); -- cgit From 132328e8e85174ea788faf8f627c33258c88fbad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 Jun 2023 15:14:45 +0200 Subject: bpf: netfilter: Add BPF_NETFILTER bpf_attach_type Andrii Nakryiko writes: And we currently don't have an attach type for NETLINK BPF link. Thankfully it's not too late to add it. I see that link_create() in kernel/bpf/syscall.c just bypasses attach_type check. We shouldn't have done that. Instead we need to add BPF_NETLINK attach type to enum bpf_attach_type. And wire all that properly throughout the kernel and libbpf itself. This adds BPF_NETFILTER and uses it. This breaks uabi but this wasn't in any non-rc release yet, so it should be fine. v2: check link_attack prog type in link_create too Fixes: 84601d6ee68a ("bpf: add bpf_link support for BPF_NETFILTER programs") Suggested-by: Andrii Nakryiko Signed-off-by: Florian Westphal Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/CAEf4BzZ69YgrQW7DHCJUT_X+GqMq_ZQQPBwopaJJVGFD5=d5Vg@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20230605131445.32016-1-fw@strlen.de --- kernel/bpf/syscall.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 14f39c1e573e..0c21d0d8efe4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2433,6 +2433,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, default: return -EINVAL; } + case BPF_PROG_TYPE_NETFILTER: + if (expected_attach_type == BPF_NETFILTER) + return 0; + return -EINVAL; case BPF_PROG_TYPE_SYSCALL: case BPF_PROG_TYPE_EXT: if (expected_attach_type) @@ -4590,7 +4594,12 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) switch (prog->type) { case BPF_PROG_TYPE_EXT: + break; case BPF_PROG_TYPE_NETFILTER: + if (attr->link_create.attach_type != BPF_NETFILTER) { + ret = -EINVAL; + goto out; + } break; case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: -- cgit From ab5d47bd41b1db82c295b0e751e2b822b43a4b5a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 14 Jun 2023 10:34:30 +0200 Subject: bpf: Remove in_atomic() from bpf_link_put(). bpf_free_inode() is invoked as a RCU callback. Usually RCU callbacks are invoked within softirq context. By setting rcutree.use_softirq=0 boot option the RCU callbacks will be invoked in a per-CPU kthread with bottom halves disabled which implies a RCU read section. On PREEMPT_RT the context remains fully preemptible. The RCU read section however does not allow schedule() invocation. The latter happens in mutex_lock() performed by bpf_trampoline_unlink_prog() originated from bpf_link_put(). It was pointed out that the bpf_link_put() invocation should not be delayed if originated from close(). It was also pointed out that other invocations from within a syscall should also avoid the workqueue. Everyone else should use workqueue by default to remain safe in the future (while auditing the code, every caller was preemptible except for the RCU case). Let bpf_link_put() use the worker unconditionally. Add bpf_link_put_direct() which will directly free the resources and is used by close() and from within __sys_bpf(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230614083430.oENawF8f@linutronix.de --- kernel/bpf/syscall.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 92a57efc77de..12955415d376 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2793,28 +2793,31 @@ static void bpf_link_put_deferred(struct work_struct *work) bpf_link_free(link); } -/* bpf_link_put can be called from atomic context, but ensures that resources - * are freed from process context +/* bpf_link_put might be called from atomic context. It needs to be called + * from sleepable context in order to acquire sleeping locks during the process. */ void bpf_link_put(struct bpf_link *link) { if (!atomic64_dec_and_test(&link->refcnt)) return; - if (in_atomic()) { - INIT_WORK(&link->work, bpf_link_put_deferred); - schedule_work(&link->work); - } else { - bpf_link_free(link); - } + INIT_WORK(&link->work, bpf_link_put_deferred); + schedule_work(&link->work); } EXPORT_SYMBOL(bpf_link_put); +static void bpf_link_put_direct(struct bpf_link *link) +{ + if (!atomic64_dec_and_test(&link->refcnt)) + return; + bpf_link_free(link); +} + static int bpf_link_release(struct inode *inode, struct file *filp) { struct bpf_link *link = filp->private_data; - bpf_link_put(link); + bpf_link_put_direct(link); return 0; } @@ -4787,7 +4790,7 @@ out_put_progs: if (ret) bpf_prog_put(new_prog); out_put_link: - bpf_link_put(link); + bpf_link_put_direct(link); return ret; } @@ -4810,7 +4813,7 @@ static int link_detach(union bpf_attr *attr) else ret = -EOPNOTSUPP; - bpf_link_put(link); + bpf_link_put_direct(link); return ret; } @@ -4880,7 +4883,7 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr) fd = bpf_link_new_fd(link); if (fd < 0) - bpf_link_put(link); + bpf_link_put_direct(link); return fd; } @@ -4957,7 +4960,7 @@ static int bpf_iter_create(union bpf_attr *attr) return PTR_ERR(link); err = bpf_iter_new_fd(link); - bpf_link_put(link); + bpf_link_put_direct(link); return err; } -- cgit From 1d28635abcf1914425d6516e641978011984c58a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 13 Jun 2023 15:35:30 -0700 Subject: bpf: Move unprivileged checks into map_create() and bpf_prog_load() Make each bpf() syscall command a bit more self-contained, making it easier to further enhance it. We move sysctl_unprivileged_bpf_disabled handling down to map_create() and bpf_prog_load(), two special commands in this regard. Also swap the order of checks, calling bpf_capable() only if sysctl_unprivileged_bpf_disabled is true, avoiding unnecessary audit messages. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230613223533.3689589-2-andrii@kernel.org --- kernel/bpf/syscall.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 12955415d376..7c41a623f405 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1157,6 +1157,15 @@ static int map_create(union bpf_attr *attr) !node_online(numa_node))) return -EINVAL; + /* Intent here is for unprivileged_bpf_disabled to block BPF map + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; + /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ map = find_and_alloc_map(attr); if (IS_ERR(map)) @@ -2532,6 +2541,16 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) /* eBPF programs must be GPL compatible to use GPL-ed functions */ is_gpl = license_is_gpl_compatible(license); + /* Intent here is for unprivileged_bpf_disabled to block BPF program + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out for these + * and other operations. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; + if (attr->insn_cnt == 0 || attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) return -E2BIG; @@ -5030,23 +5049,8 @@ out_prog_put: static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; - bool capable; int err; - capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; - - /* Intent here is for unprivileged_bpf_disabled to block key object - * creation commands for unprivileged users; other actions depend - * of fd availability and access to bpffs, so are dependent on - * object creation success. Capabilities are later verified for - * operations such as load and map create, so even with unprivileged - * BPF disabled, capability checks are still carried out for these - * and other operations. - */ - if (!capable && - (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) - return -EPERM; - err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); if (err) return err; -- cgit From 22db41226b679768df8f0a4ff5de8e58f625f45b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 13 Jun 2023 15:35:31 -0700 Subject: bpf: Inline map creation logic in map_create() function Currently find_and_alloc_map() performs two separate functions: some argument sanity checking and partial map creation workflow hanling. Neither of those functions are self-sufficient and are augmented by further checks and initialization logic in the caller (map_create() function). So unify all the sanity checks, permission checks, and creation and initialization logic in one linear piece of code in map_create() instead. This also make it easier to further enhance permission checks and keep them located in one place. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230613223533.3689589-3-andrii@kernel.org --- kernel/bpf/syscall.c | 57 ++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 33 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7c41a623f405..6ef302709ab0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -109,37 +109,6 @@ const struct bpf_map_ops bpf_map_offload_ops = { .map_mem_usage = bpf_map_offload_map_mem_usage, }; -static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) -{ - const struct bpf_map_ops *ops; - u32 type = attr->map_type; - struct bpf_map *map; - int err; - - if (type >= ARRAY_SIZE(bpf_map_types)) - return ERR_PTR(-EINVAL); - type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); - ops = bpf_map_types[type]; - if (!ops) - return ERR_PTR(-EINVAL); - - if (ops->map_alloc_check) { - err = ops->map_alloc_check(attr); - if (err) - return ERR_PTR(err); - } - if (attr->map_ifindex) - ops = &bpf_map_offload_ops; - if (!ops->map_mem_usage) - return ERR_PTR(-EINVAL); - map = ops->map_alloc(attr); - if (IS_ERR(map)) - return map; - map->ops = ops; - map->map_type = type; - return map; -} - static void bpf_map_write_active_inc(struct bpf_map *map) { atomic64_inc(&map->writecnt); @@ -1127,7 +1096,9 @@ free_map_tab: /* called via syscall */ static int map_create(union bpf_attr *attr) { + const struct bpf_map_ops *ops; int numa_node = bpf_map_attr_numa_node(attr); + u32 map_type = attr->map_type; struct bpf_map *map; int f_flags; int err; @@ -1157,6 +1128,25 @@ static int map_create(union bpf_attr *attr) !node_online(numa_node))) return -EINVAL; + /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ + map_type = attr->map_type; + if (map_type >= ARRAY_SIZE(bpf_map_types)) + return -EINVAL; + map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); + ops = bpf_map_types[map_type]; + if (!ops) + return -EINVAL; + + if (ops->map_alloc_check) { + err = ops->map_alloc_check(attr); + if (err) + return err; + } + if (attr->map_ifindex) + ops = &bpf_map_offload_ops; + if (!ops->map_mem_usage) + return -EINVAL; + /* Intent here is for unprivileged_bpf_disabled to block BPF map * creation for unprivileged users; other actions depend * on fd availability and access to bpffs, so are dependent on @@ -1166,10 +1156,11 @@ static int map_create(union bpf_attr *attr) if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) return -EPERM; - /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ - map = find_and_alloc_map(attr); + map = ops->map_alloc(attr); if (IS_ERR(map)) return PTR_ERR(map); + map->ops = ops; + map->map_type = map_type; err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); -- cgit From 6c3eba1c5e283fd2bb1c076dbfcb47f569c3bfde Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 13 Jun 2023 15:35:32 -0700 Subject: bpf: Centralize permissions checks for all BPF map types This allows to do more centralized decisions later on, and generally makes it very explicit which maps are privileged and which are not (e.g., LRU_HASH and LRU_PERCPU_HASH, which are privileged HASH variants, as opposed to unprivileged HASH and HASH_PERCPU; now this is explicit and easy to verify). Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230613223533.3689589-4-andrii@kernel.org --- kernel/bpf/syscall.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6ef302709ab0..658d1154f221 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1156,6 +1156,53 @@ static int map_create(union bpf_attr *attr) if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) return -EPERM; + /* check privileged map type permissions */ + switch (map_type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + /* unprivileged */ + break; + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: + case BPF_MAP_TYPE_BLOOM_FILTER: + case BPF_MAP_TYPE_LPM_TRIE: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_STRUCT_OPS: + case BPF_MAP_TYPE_CPUMAP: + if (!bpf_capable()) + return -EPERM; + break; + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_XSKMAP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + break; + default: + WARN(1, "unsupported map type %d", map_type); + return -EPERM; + } + map = ops->map_alloc(attr); if (IS_ERR(map)) return PTR_ERR(map); -- cgit From 7f6719f7a8662a40afed367a685516f9f34e7bc2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 13 Jun 2023 15:35:33 -0700 Subject: bpf: Keep BPF_PROG_LOAD permission checks clear of validations Move out flags validation and license checks out of the permission checks. They were intermingled, which makes subsequent changes harder. Clean this up: perform straightforward flag validation upfront, and fetch and check license later, right where we use it. Also consolidate capabilities check in one block, right after basic attribute sanity checks. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230613223533.3689589-5-andrii@kernel.org --- kernel/bpf/syscall.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 658d1154f221..a75c54b6f8a3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2550,7 +2550,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) struct btf *attach_btf = NULL; int err; char license[128]; - bool is_gpl; if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; @@ -2569,16 +2568,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) !bpf_capable()) return -EPERM; - /* copy eBPF program license from user space */ - if (strncpy_from_bpfptr(license, - make_bpfptr(attr->license, uattr.is_kernel), - sizeof(license) - 1) < 0) - return -EFAULT; - license[sizeof(license) - 1] = 0; - - /* eBPF programs must be GPL compatible to use GPL-ed functions */ - is_gpl = license_is_gpl_compatible(license); - /* Intent here is for unprivileged_bpf_disabled to block BPF program * creation for unprivileged users; other actions depend * on fd availability and access to bpffs, so are dependent on @@ -2671,12 +2660,20 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) make_bpfptr(attr->insns, uattr.is_kernel), bpf_prog_insn_size(prog)) != 0) goto free_prog_sec; + /* copy eBPF program license from user space */ + if (strncpy_from_bpfptr(license, + make_bpfptr(attr->license, uattr.is_kernel), + sizeof(license) - 1) < 0) + goto free_prog_sec; + license[sizeof(license) - 1] = 0; + + /* eBPF programs must be GPL compatible to use GPL-ed functions */ + prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; prog->orig_prog = NULL; prog->jited = 0; atomic64_set(&prog->aux->refcnt, 1); - prog->gpl_compatible = is_gpl ? 1 : 0; if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_dev_bound_init(prog, attr); -- cgit From db8eae6bc5c702d8e3ab2d0c6bb5976c131576eb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 18 Jun 2023 15:14:14 +0200 Subject: bpf: Force kprobe multi expected_attach_type for kprobe_multi link We currently allow to create perf link for program with expected_attach_type == BPF_TRACE_KPROBE_MULTI. This will cause crash when we call helpers like get_attach_cookie or get_func_ip in such program, because it will call the kprobe_multi's version (current->bpf_ctx context setup) of those helpers while it expects perf_link's current->bpf_ctx context setup. Making sure that we use BPF_TRACE_KPROBE_MULTI expected_attach_type only for programs attaching through kprobe_multi link. Fixes: ca74823c6e16 ("bpf: Add cookie support to programs attached with kprobe multi link") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230618131414.75649-1-jolsa@kernel.org --- kernel/bpf/syscall.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0c21d0d8efe4..f1c8733f76b8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3440,6 +3440,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, return prog->enforce_expected_attach_type && prog->expected_attach_type != attach_type ? -EINVAL : 0; + case BPF_PROG_TYPE_KPROBE: + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && + attach_type != BPF_TRACE_KPROBE_MULTI) + return -EINVAL; + return 0; default: return 0; } -- cgit