From 332ea1f697be148bd5e66475d82b5ecc5084da65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2023 15:29:12 -1000 Subject: bpf: Add bpf_cgroup_from_id() kfunc cgroup ID is an userspace-visible 64bit value uniquely identifying a given cgroup. As the IDs are used widely, it's useful to be able to look up the matching cgroups. Add bpf_cgroup_from_id(). v2: Separate out selftest into its own patch as suggested by Alexei. Signed-off-by: Tejun Heo Link: https://lore.kernel.org/r/Y/bBaG96t0/gQl9/@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5b278a38ae58..a784be6f8bac 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2101,6 +2101,23 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) cgroup_get(ancestor); return ancestor; } + +/** + * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this + * kfunc which is not subsequently stored in a map, must be released by calling + * bpf_cgroup_release(). + * @cgrp: The cgroup for which we're performing a lookup. + * @level: The level of ancestor to look up. + */ +__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) +{ + struct cgroup *cgrp; + + cgrp = cgroup_get_from_id(cgid); + if (IS_ERR(cgrp)) + return NULL; + return cgrp; +} #endif /* CONFIG_CGROUPS */ /** @@ -2167,6 +2184,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_SET8_END(generic_btf_ids) -- cgit From 30a2d8328d8ac1bb0a6bf73f4f4cf03f4f5977cc Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 28 Feb 2023 09:28:45 -0600 Subject: bpf: Fix bpf_cgroup_from_id() doxygen header In commit 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc"), a new bpf_cgroup_from_id() kfunc was added which allows a BPF program to lookup and acquire a reference to a cgroup from a cgroup id. The commit's doxygen comment seems to have copy-pasted fields, which causes BPF kfunc helper documentation to fail to render: /helpers.c:2114: warning: Excess function parameter 'cgrp'... /helpers.c:2114: warning: Excess function parameter 'level'... /helpers.c:2114: warning: Excess function parameter 'level'... This patch fixes the doxygen header. Fixes: 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc") Signed-off-by: David Vernet Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230228152845.294695-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a784be6f8bac..abdcc52f90a6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2106,8 +2106,7 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this * kfunc which is not subsequently stored in a map, must be released by calling * bpf_cgroup_release(). - * @cgrp: The cgroup for which we're performing a lookup. - * @level: The level of ancestor to look up. + * @cgid: cgroup id. */ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) { -- cgit From b5964b968ac64c2ec2debee7518499113b27c34e Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:50 -0800 Subject: bpf: Add skb dynptrs Add skb dynptrs, which are dynptrs whose underlying pointer points to a skb. The dynptr acts on skb data. skb dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of skb->data and skb->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For bpf prog types that don't support writes on skb data, the dynptr is read-only (bpf_dynptr_write() will return an error) For reads and writes through the bpf_dynptr_read() and bpf_dynptr_write() interfaces, reading and writing from/to data in the head as well as from/to non-linear paged buffers is supported. Data slices through the bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() (added in subsequent commit) should be used. For examples of how skb dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-8-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 76 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 14 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index abdcc52f90a6..e8e2414d1587 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1420,11 +1420,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) return ptr->size & DYNPTR_RDONLY_BIT; } +void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) +{ + ptr->size |= DYNPTR_RDONLY_BIT; +} + static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) { ptr->size |= type << DYNPTR_TYPE_SHIFT; } +static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr) +{ + return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT; +} + u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_SIZE_MASK; @@ -1497,6 +1507,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, u32, offset, u64, flags) { + enum bpf_dynptr_type type; int err; if (!src->data || flags) @@ -1506,13 +1517,23 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern if (err) return err; - /* Source and destination may possibly overlap, hence use memmove to - * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr - * pointing to overlapping PTR_TO_MAP_VALUE regions. - */ - memmove(dst, src->data + src->offset + offset, len); + type = bpf_dynptr_get_type(src); - return 0; + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + /* Source and destination may possibly overlap, hence use memmove to + * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr + * pointing to overlapping PTR_TO_MAP_VALUE regions. + */ + memmove(dst, src->data + src->offset + offset, len); + return 0; + case BPF_DYNPTR_TYPE_SKB: + return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); + default: + WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); + return -EFAULT; + } } static const struct bpf_func_proto bpf_dynptr_read_proto = { @@ -1529,22 +1550,36 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len, u64, flags) { + enum bpf_dynptr_type type; int err; - if (!dst->data || flags || bpf_dynptr_is_rdonly(dst)) + if (!dst->data || bpf_dynptr_is_rdonly(dst)) return -EINVAL; err = bpf_dynptr_check_off_len(dst, offset, len); if (err) return err; - /* Source and destination may possibly overlap, hence use memmove to - * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr - * pointing to overlapping PTR_TO_MAP_VALUE regions. - */ - memmove(dst->data + dst->offset + offset, src, len); + type = bpf_dynptr_get_type(dst); - return 0; + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + if (flags) + return -EINVAL; + /* Source and destination may possibly overlap, hence use memmove to + * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr + * pointing to overlapping PTR_TO_MAP_VALUE regions. + */ + memmove(dst->data + dst->offset + offset, src, len); + return 0; + case BPF_DYNPTR_TYPE_SKB: + return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len, + flags); + default: + WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); + return -EFAULT; + } } static const struct bpf_func_proto bpf_dynptr_write_proto = { @@ -1560,6 +1595,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = { BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) { + enum bpf_dynptr_type type; int err; if (!ptr->data) @@ -1572,7 +1608,19 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3 if (bpf_dynptr_is_rdonly(ptr)) return 0; - return (unsigned long)(ptr->data + ptr->offset + offset); + type = bpf_dynptr_get_type(ptr); + + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + return (unsigned long)(ptr->data + ptr->offset + offset); + case BPF_DYNPTR_TYPE_SKB: + /* skb dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ + return 0; + default: + WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type); + return 0; + } } static const struct bpf_func_proto bpf_dynptr_data_proto = { -- cgit From 05421aecd4ed65da0dc17b0c3c13779ef334e9e5 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:51 -0800 Subject: bpf: Add xdp dynptrs Add xdp dynptrs, which are dynptrs whose underlying pointer points to a xdp_buff. The dynptr acts on xdp data. xdp dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of xdp->data and xdp->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For reads and writes on the dynptr, this includes reading/writing from/to and across fragments. Data slices through the bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() should be used. For examples of how xdp dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-9-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e8e2414d1587..114a875a05b1 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1530,6 +1530,8 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern return 0; case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); + case BPF_DYNPTR_TYPE_XDP: + return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; @@ -1576,6 +1578,10 @@ BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, v case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len, flags); + case BPF_DYNPTR_TYPE_XDP: + if (flags) + return -EINVAL; + return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; @@ -1615,7 +1621,8 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3 case BPF_DYNPTR_TYPE_RINGBUF: return (unsigned long)(ptr->data + ptr->offset + offset); case BPF_DYNPTR_TYPE_SKB: - /* skb dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ + case BPF_DYNPTR_TYPE_XDP: + /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ return 0; default: WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type); -- cgit From 66e3a13e7c2c44d0c9dd6bb244680ca7529a8845 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:52 -0800 Subject: bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr Two new kfuncs are added, bpf_dynptr_slice and bpf_dynptr_slice_rdwr. The user must pass in a buffer to store the contents of the data slice if a direct pointer to the data cannot be obtained. For skb and xdp type dynptrs, these two APIs are the only way to obtain a data slice. However, for other types of dynptrs, there is no difference between bpf_dynptr_slice(_rdwr) and bpf_dynptr_data. For skb type dynptrs, the data is copied into the user provided buffer if any of the data is not in the linear portion of the skb. For xdp type dynptrs, the data is copied into the user provided buffer if the data is between xdp frags. If the skb is cloned and a call to bpf_dynptr_data_rdwr is made, then the skb will be uncloned (see bpf_unclone_prologue()). Please note that any bpf_dynptr_write() automatically invalidates any prior data slices of the skb dynptr. This is because the skb may be cloned or may need to pull its paged buffer into the head. As such, any bpf_dynptr_write() will automatically have its prior data slices invalidated, even if the write is to data in the skb head of an uncloned skb. Please note as well that any other helper calls that change the underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data slices of the skb dynptr as well, for the same reasons. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-10-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 114a875a05b1..648b29e78b84 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2193,6 +2193,142 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) return p; } +/** + * bpf_dynptr_slice - Obtain a read-only pointer to the dynptr data. + * + * For non-skb and non-xdp type dynptrs, there is no difference between + * bpf_dynptr_slice and bpf_dynptr_data. + * + * If the intention is to write to the data slice, please use + * bpf_dynptr_slice_rdwr. + * + * The user must check that the returned pointer is not null before using it. + * + * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice + * does not change the underlying packet data pointers, so a call to + * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in + * the bpf program. + * + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. + * + * @returns: NULL if the call failed (eg invalid dynptr), pointer to a read-only + * data slice (can be either direct pointer to the data or a pointer to the user + * provided buffer, with its contents containing the data, if unable to obtain + * direct pointer) + */ +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, + void *buffer, u32 buffer__szk) +{ + enum bpf_dynptr_type type; + u32 len = buffer__szk; + int err; + + if (!ptr->data) + return 0; + + err = bpf_dynptr_check_off_len(ptr, offset, len); + if (err) + return 0; + + type = bpf_dynptr_get_type(ptr); + + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + return ptr->data + ptr->offset + offset; + case BPF_DYNPTR_TYPE_SKB: + return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer); + case BPF_DYNPTR_TYPE_XDP: + { + void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len); + if (xdp_ptr) + return xdp_ptr; + + bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false); + return buffer; + } + default: + WARN_ONCE(true, "unknown dynptr type %d\n", type); + return 0; + } +} + +/** + * bpf_dynptr_slice_rdwr - Obtain a writable pointer to the dynptr data. + * + * For non-skb and non-xdp type dynptrs, there is no difference between + * bpf_dynptr_slice and bpf_dynptr_data. + * + * The returned pointer is writable and may point to either directly the dynptr + * data at the requested offset or to the buffer if unable to obtain a direct + * data pointer to (example: the requested slice is to the paged area of an skb + * packet). In the case where the returned pointer is to the buffer, the user + * is responsible for persisting writes through calling bpf_dynptr_write(). This + * usually looks something like this pattern: + * + * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer)); + * if (!eth) + * return TC_ACT_SHOT; + * + * // mutate eth header // + * + * if (eth == buffer) + * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); + * + * Please note that, as in the example above, the user must check that the + * returned pointer is not null before using it. + * + * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr + * does not change the underlying packet data pointers, so a call to + * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in + * the bpf program. + * + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. + * + * @returns: NULL if the call failed (eg invalid dynptr), pointer to a + * data slice (can be either direct pointer to the data or a pointer to the user + * provided buffer, with its contents containing the data, if unable to obtain + * direct pointer) + */ +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, + void *buffer, u32 buffer__szk) +{ + if (!ptr->data || bpf_dynptr_is_rdonly(ptr)) + return 0; + + /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. + * + * For skb-type dynptrs, it is safe to write into the returned pointer + * if the bpf program allows skb data writes. There are two possiblities + * that may occur when calling bpf_dynptr_slice_rdwr: + * + * 1) The requested slice is in the head of the skb. In this case, the + * returned pointer is directly to skb data, and if the skb is cloned, the + * verifier will have uncloned it (see bpf_unclone_prologue()) already. + * The pointer can be directly written into. + * + * 2) Some portion of the requested slice is in the paged buffer area. + * In this case, the requested data will be copied out into the buffer + * and the returned pointer will be a pointer to the buffer. The skb + * will not be pulled. To persist the write, the user will need to call + * bpf_dynptr_write(), which will pull the skb and commit the write. + * + * Similarly for xdp programs, if the requested slice is not across xdp + * fragments, then a direct pointer will be returned, otherwise the data + * will be copied out into the buffer and the user will need to call + * bpf_dynptr_write() to commit changes. + */ + return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk); +} + __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) { return obj; @@ -2262,6 +2398,8 @@ BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) +BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_SET8_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { -- cgit From 7ce60b110eece1d7b3d5c322fd11f6d41a29d17b Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 1 Mar 2023 13:49:09 -0600 Subject: bpf: Fix doxygen comments for dynptr slice kfuncs In commit 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr"), the bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() kfuncs were added to BPF. These kfuncs included doxygen headers, but unfortunately those headers are not properly formatted according to [0], and causes the following warnings during the docs build: ./kernel/bpf/helpers.c:2225: warning: \ Excess function parameter 'returns' description in 'bpf_dynptr_slice' ./kernel/bpf/helpers.c:2303: warning: \ Excess function parameter 'returns' description in 'bpf_dynptr_slice_rdwr' ... This patch fixes those doxygen comments. [0]: https://docs.kernel.org/doc-guide/kernel-doc.html#function-documentation Fixes: 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr") Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230301194910.602738-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 648b29e78b84..58431a92bb65 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2194,7 +2194,12 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) } /** - * bpf_dynptr_slice - Obtain a read-only pointer to the dynptr data. + * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data. + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. @@ -2209,13 +2214,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in * the bpf program. * - * @ptr: The dynptr whose data slice to retrieve - * @offset: Offset into the dynptr - * @buffer: User-provided buffer to copy contents into - * @buffer__szk: Size (in bytes) of the buffer. This is the length of the - * requested slice. This must be a constant. - * - * @returns: NULL if the call failed (eg invalid dynptr), pointer to a read-only + * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) @@ -2258,7 +2257,12 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset } /** - * bpf_dynptr_slice_rdwr - Obtain a writable pointer to the dynptr data. + * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data. + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. @@ -2287,13 +2291,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in * the bpf program. * - * @ptr: The dynptr whose data slice to retrieve - * @offset: Offset into the dynptr - * @buffer: User-provided buffer to copy contents into - * @buffer__szk: Size (in bytes) of the buffer. This is the length of the - * requested slice. This must be a constant. - * - * @returns: NULL if the call failed (eg invalid dynptr), pointer to a + * Return: NULL if the call failed (eg invalid dynptr), pointer to a * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) -- cgit From c45eac537bd8b4977d335c123212140bc5257670 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 21:30:14 -0800 Subject: bpf: Fix bpf_dynptr_slice{_rdwr} to return NULL instead of 0 Change bpf_dynptr_slice and bpf_dynptr_slice_rdwr to return NULL instead of 0, in accordance with the codebase guidelines. Fixes: 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr") Reported-by: kernel test robot Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230302053014.1726219-1-joannelkoong@gmail.com --- kernel/bpf/helpers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 58431a92bb65..de9ef8476e29 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2227,11 +2227,11 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset int err; if (!ptr->data) - return 0; + return NULL; err = bpf_dynptr_check_off_len(ptr, offset, len); if (err) - return 0; + return NULL; type = bpf_dynptr_get_type(ptr); @@ -2252,7 +2252,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset } default: WARN_ONCE(true, "unknown dynptr type %d\n", type); - return 0; + return NULL; } } @@ -2300,7 +2300,7 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o void *buffer, u32 buffer__szk) { if (!ptr->data || bpf_dynptr_is_rdonly(ptr)) - return 0; + return NULL; /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. * -- cgit From c501bf55c88b834adefda870c7c092ec9052a437 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Mar 2023 09:42:59 -1000 Subject: bpf: Make bpf_get_current_[ancestor_]cgroup_id() available for all program types These helpers are safe to call from any context and there's no reason to restrict access to them. Remove them from bpf_trace and filter lists and add to bpf_base_func_proto() under perfmon_capable(). v2: After consulting with Andrii, relocated in bpf_base_func_proto() so that they require bpf_capable() but not perfomon_capable() as it doesn't read from or affect others on the system. Signed-off-by: Tejun Heo Cc: Andrii Nakryiko Link: https://lore.kernel.org/r/ZAD8QyoszMZiTzBY@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index de9ef8476e29..6fc0d6c44e4c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1748,6 +1748,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; + case BPF_FUNC_get_current_cgroup_id: + return &bpf_get_current_cgroup_id_proto; + case BPF_FUNC_get_current_ancestor_cgroup_id: + return &bpf_get_current_ancestor_cgroup_id_proto; #endif default: break; -- cgit From f71f8530494bb5ab43d3369ef0ce8373eb1ee077 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 2 Mar 2023 13:46:13 +0200 Subject: bpf: Add support for absolute value BPF timers Add a new flag BPF_F_TIMER_ABS that can be passed to bpf_timer_start() to start an absolute value timer instead of the default relative value. This makes the timer expire at an exact point in time, instead of a time with latencies induced by both the BPF and timer subsystems. Suggested-by: Artem Bityutskiy Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20230302114614.2985072-2-tero.kristo@linux.intel.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6fc0d6c44e4c..12f12e879bcf 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1264,10 +1264,11 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla { struct bpf_hrtimer *t; int ret = 0; + enum hrtimer_mode mode; if (in_nmi()) return -EOPNOTSUPP; - if (flags) + if (flags > BPF_F_TIMER_ABS) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; @@ -1275,7 +1276,13 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla ret = -EINVAL; goto out; } - hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT); + + if (flags & BPF_F_TIMER_ABS) + mode = HRTIMER_MODE_ABS_SOFT; + else + mode = HRTIMER_MODE_REL_SOFT; + + hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); out: __bpf_spin_unlock_irqrestore(&timer->lock); return ret; -- cgit From 20c09d92faeefb8536f705d3a4629e0dc314c8a1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 2 Mar 2023 20:14:43 -0800 Subject: bpf: Introduce kptr_rcu. The life time of certain kernel structures like 'struct cgroup' is protected by RCU. Hence it's safe to dereference them directly from __kptr tagged pointers in bpf maps. The resulting pointer is MEM_RCU and can be passed to kfuncs that expect KF_RCU. Derefrence of other kptr-s returns PTR_UNTRUSTED. For example: struct map_value { struct cgroup __kptr *cgrp; }; SEC("tp_btf/cgroup_mkdir") int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp_arg, const char *path) { struct cgroup *cg, *cg2; cg = bpf_cgroup_acquire(cgrp_arg); // cg is PTR_TRUSTED and ref_obj_id > 0 bpf_kptr_xchg(&v->cgrp, cg); cg2 = v->cgrp; // This is new feature introduced by this patch. // cg2 is PTR_MAYBE_NULL | MEM_RCU. // When cg2 != NULL, it's a valid cgroup, but its percpu_ref could be zero if (cg2) bpf_cgroup_ancestor(cg2, level); // safe to do. } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Tejun Heo Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230303041446.3630-4-alexei.starovoitov@gmail.com --- kernel/bpf/helpers.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 12f12e879bcf..637ac4e92e75 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2163,8 +2163,10 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) if (level > cgrp->level || level < 0) return NULL; + /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */ ancestor = cgrp->ancestors[level]; - cgroup_get(ancestor); + if (!cgroup_tryget(ancestor)) + return NULL; return ancestor; } @@ -2382,7 +2384,7 @@ BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) -- cgit From 6018e1f407cccf39b804d1f75ad4de7be4e6cc45 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:17 -0800 Subject: bpf: implement numbers iterator Implement the first open-coded iterator type over a range of integers. It's public API consists of: - bpf_iter_num_new() constructor, which accepts [start, end) range (that is, start is inclusive, end is exclusive). - bpf_iter_num_next() which will keep returning read-only pointer to int until the range is exhausted, at which point NULL will be returned. If bpf_iter_num_next() is kept calling after this, NULL will be persistently returned. - bpf_iter_num_destroy() destructor, which needs to be called at some point to clean up iterator state. BPF verifier enforces that iterator destructor is called at some point before BPF program exits. Note that `start = end = X` is a valid combination to setup an empty iterator. bpf_iter_num_new() will return 0 (success) for any such combination. If bpf_iter_num_new() detects invalid combination of input arguments, it returns error, resets iterator state to, effectively, empty iterator, so any subsequent call to bpf_iter_num_next() will keep returning NULL. BPF verifier has no knowledge that returned integers are in the [start, end) value range, as both `start` and `end` are not statically known and enforced: they are runtime values. While the implementation is pretty trivial, some care needs to be taken to avoid overflows and underflows. Subsequent selftests will validate correctness of [start, end) semantics, especially around extremes (INT_MIN and INT_MAX). Similarly to bpf_loop(), we enforce that no more than BPF_MAX_LOOPS can be specified. bpf_iter_num_{new,next,destroy}() is a logical evolution from bounded BPF loops and bpf_loop() helper and is the basis for implementing ergonomic BPF loops with no statically known or verified bounds. Subsequent patches implement bpf_for() macro, demonstrating how this can be wrapped into something that works and feels like a normal for() loop in C language. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 637ac4e92e75..f9b7eeedce08 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2411,6 +2411,9 @@ BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_SET8_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { -- cgit From c8e18754091479fac3f5b6c053c6bc4be0b7fb11 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 10 Mar 2023 15:07:41 -0800 Subject: bpf: Support __kptr to local kptrs If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module BTF - it must have been allocated by bpf_obj_new and therefore must be free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new. This patch adds support for treating __kptr-tagged pointers to "local kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr acquire / release semantics. Consider the following example: struct node_data { long key; long data; struct bpf_rb_node node; }; struct map_value { struct node_data __kptr *node; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); __type(value, struct map_value); __uint(max_entries, 1); } some_nodes SEC(".maps"); If struct node_data had a matching definition in kernel BTF, the verifier would expect a destructor for the type to be registered. Since struct node_data does not match any type in kernel BTF, the verifier knows that there is no kfunc that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can only come from bpf_obj_new. So instead of searching for a registered dtor, a bpf_obj_drop dtor can be assumed. This allows the runtime to properly destruct such kptrs in bpf_obj_free_fields, which enables maps to clean up map_vals w/ such kptrs when going away. Implementation notes: * "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr. Before this patch, the variable would only ever point to vmlinux or module BTFs, but now it can point to some program BTF for local kptr type. It's later used to populate the (btf, btf_id) pair in kptr btf field. * It's necessary to btf_get the program BTF when populating btf_field for local kptr. btf_record_free later does a btf_put. * Behavior for non-local referenced kptrs is not modified, as bpf_find_btf_id helper only searches vmlinux and module BTFs for matching BTF type. If such a type is found, btf_field_kptr's btf will pass btf_is_kernel check, and the associated release function is some one-argument dtor. If btf_is_kernel check fails, associated release function is two-arg bpf_obj_drop_impl. Before this patch only btf_field_kptr's w/ kernel or module BTFs were created. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f9b7eeedce08..77d64b6951b9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1896,14 +1896,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) return p; } +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) +{ + if (rec) + bpf_obj_free_fields(rec, p); + bpf_mem_free(&bpf_global_ma, p); +} + __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; - if (meta) - bpf_obj_free_fields(meta->record, p); - bpf_mem_free(&bpf_global_ma, p); + __bpf_obj_drop_impl(p, meta ? meta->record : NULL); } static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) -- cgit From c9267aa8b794c2188d49c7d7bd2990e98b2d6b84 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Mar 2023 16:58:43 -0700 Subject: bpf: Fix bpf_strncmp proto. bpf_strncmp() doesn't write into its first argument. Make sure that the verifier knows about it. Signed-off-by: Alexei Starovoitov Acked-by: David Vernet Link: https://lore.kernel.org/r/20230313235845.61029-2-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 77d64b6951b9..f753676ef652 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -571,7 +571,7 @@ static const struct bpf_func_proto bpf_strncmp_proto = { .func = bpf_strncmp, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, + .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_CONST_STR, }; -- cgit From fb2211a57c110b4ced3cb7f8570bd7246acf2d04 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 25 Mar 2023 16:31:45 -0500 Subject: bpf: Remove now-unnecessary NULL checks for KF_RELEASE kfuncs Now that we're not invoking kfunc destructors when the kptr in a map was NULL, we no longer require NULL checks in many of our KF_RELEASE kfuncs. This patch removes those NULL checks. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230325213144.486885-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f753676ef652..8980f6859443 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2089,9 +2089,6 @@ __bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) */ __bpf_kfunc void bpf_task_release(struct task_struct *p) { - if (!p) - return; - put_task_struct(p); } @@ -2148,9 +2145,6 @@ __bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) */ __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp) { - if (!cgrp) - return; - cgroup_put(cgrp); } -- cgit From d02c48fa113953aba0b330ec6c35f50c7d1d7986 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 31 Mar 2023 14:57:31 -0500 Subject: bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 8980f6859443..e71a4a54ce99 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2013,7 +2014,9 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) */ __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) { - return get_task_struct(p); + if (refcount_inc_not_zero(&p->rcu_users)) + return p; + return NULL; } /** @@ -2089,7 +2092,7 @@ __bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) */ __bpf_kfunc void bpf_task_release(struct task_struct *p) { - put_task_struct(p); + put_task_struct_rcu_user(p); } #ifdef CONFIG_CGROUPS @@ -2199,7 +2202,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) rcu_read_lock(); p = find_task_by_pid_ns(pid, &init_pid_ns); if (p) - bpf_task_acquire(p); + p = bpf_task_acquire(p); rcu_read_unlock(); return p; @@ -2371,7 +2374,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front) BTF_ID_FLAGS(func, bpf_list_push_back) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) -- cgit From f85671c6ef46d490a90dac719e0c0e0adbacfd9b Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 31 Mar 2023 14:57:32 -0500 Subject: bpf: Remove now-defunct task kfuncs In commit 22df776a9a86 ("tasks: Extract rcu_users out of union"), the 'refcount_t rcu_users' field was extracted out of a union with the 'struct rcu_head rcu' field. This allows us to safely perform a refcount_inc_not_zero() on task->rcu_users when acquiring a reference on a task struct. A prior patch leveraged this by making struct task_struct an RCU-protected object in the verifier, and by bpf_task_acquire() to use the task->rcu_users field for synchronization. Now that we can use RCU to protect tasks, we no longer need bpf_task_kptr_get(), or bpf_task_acquire_not_zero(). bpf_task_kptr_get() is truly completely unnecessary, as we can just use RCU to get the object. bpf_task_acquire_not_zero() is now equivalent to bpf_task_acquire(). In addition to these changes, this patch also updates the associated selftests to no longer use these kfuncs. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230331195733.699708-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 69 ---------------------------------------------------- 1 file changed, 69 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e71a4a54ce99..6be16db9f188 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2019,73 +2019,6 @@ __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p) return NULL; } -/** - * bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task - * acquired by this kfunc which is not stored in a map as a kptr, must be - * released by calling bpf_task_release(). - * @p: The task on which a reference is being acquired. - */ -__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) -{ - /* For the time being this function returns NULL, as it's not currently - * possible to safely acquire a reference to a task with RCU protection - * using get_task_struct() and put_task_struct(). This is due to the - * slightly odd mechanics of p->rcu_users, and how task RCU protection - * works. - * - * A struct task_struct is refcounted by two different refcount_t - * fields: - * - * 1. p->usage: The "true" refcount field which tracks a task's - * lifetime. The task is freed as soon as this - * refcount drops to 0. - * - * 2. p->rcu_users: An "RCU users" refcount field which is statically - * initialized to 2, and is co-located in a union with - * a struct rcu_head field (p->rcu). p->rcu_users - * essentially encapsulates a single p->usage - * refcount, and when p->rcu_users goes to 0, an RCU - * callback is scheduled on the struct rcu_head which - * decrements the p->usage refcount. - * - * There are two important implications to this task refcounting logic - * described above. The first is that - * refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as - * after the refcount goes to 0, the RCU callback being scheduled will - * cause the memory backing the refcount to again be nonzero due to the - * fields sharing a union. The other is that we can't rely on RCU to - * guarantee that a task is valid in a BPF program. This is because a - * task could have already transitioned to being in the TASK_DEAD - * state, had its rcu_users refcount go to 0, and its rcu callback - * invoked in which it drops its single p->usage reference. At this - * point the task will be freed as soon as the last p->usage reference - * goes to 0, without waiting for another RCU gp to elapse. The only - * way that a BPF program can guarantee that a task is valid is in this - * scenario is to hold a p->usage refcount itself. - * - * Until we're able to resolve this issue, either by pulling - * p->rcu_users and p->rcu out of the union, or by getting rid of - * p->usage and just using p->rcu_users for refcounting, we'll just - * return NULL here. - */ - return NULL; -} - -/** - * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task - * kptr acquired by this kfunc which is not subsequently stored in a map, must - * be released by calling bpf_task_release(). - * @pp: A pointer to a task kptr on which a reference is being acquired. - */ -__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp) -{ - /* We must return NULL here until we have clarity on how to properly - * leverage RCU for ensuring a task's lifetime. See the comment above - * in bpf_task_acquire_not_zero() for more details. - */ - return NULL; -} - /** * bpf_task_release - Release the reference acquired on a task. * @p: The task on which a reference is being released. @@ -2375,8 +2308,6 @@ BTF_ID_FLAGS(func, bpf_list_push_back) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_rbtree_add) -- cgit From f3f21349779776135349a8e6f114a1485b2476b7 Mon Sep 17 00:00:00 2001 From: Barret Rhoden Date: Thu, 6 Apr 2023 20:18:08 -0400 Subject: bpf: ensure all memory is initialized in bpf_get_current_comm BPF helpers that take an ARG_PTR_TO_UNINIT_MEM must ensure that all of the memory is set, including beyond the end of the string. Signed-off-by: Barret Rhoden Link: https://lore.kernel.org/r/20230407001808.1622968-1-brho@google.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6be16db9f188..b6a5cda5bb59 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -258,7 +258,7 @@ BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) goto err_clear; /* Verifier guarantees that size > 0 */ - strscpy(buf, task->comm, size); + strscpy_pad(buf, task->comm, size); return 0; err_clear: memset(buf, 0, size); -- cgit From 1d71283987c729dceccce834a864c27301ba155e Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 10 Apr 2023 23:16:31 -0500 Subject: bpf: Make bpf_cgroup_acquire() KF_RCU | KF_RET_NULL struct cgroup is already an RCU-safe type in the verifier. We can therefore update bpf_cgroup_acquire() to be KF_RCU | KF_RET_NULL, and subsequently remove bpf_cgroup_kptr_get(). This patch does the first of these by updating bpf_cgroup_acquire() to be KF_RCU | KF_RET_NULL, and also updates selftests accordingly. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230411041633.179404-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b6a5cda5bb59..71f0604bdc97 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2037,8 +2037,7 @@ __bpf_kfunc void bpf_task_release(struct task_struct *p) */ __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) { - cgroup_get(cgrp); - return cgrp; + return cgroup_tryget(cgrp) ? cgrp : NULL; } /** @@ -2314,7 +2313,7 @@ BTF_ID_FLAGS(func, bpf_rbtree_add) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS -BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -- cgit From 6499fe6edc4fd5b91aed4d5cd84bd113e1c58d5f Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 10 Apr 2023 23:16:32 -0500 Subject: bpf: Remove bpf_cgroup_kptr_get() kfunc Now that bpf_cgroup_acquire() is KF_RCU | KF_RET_NULL, bpf_cgroup_kptr_get() is redundant. Let's remove it, and update selftests to instead use bpf_cgroup_acquire() where appropriate. The next patch will update the BPF documentation to not mention bpf_cgroup_kptr_get(). Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230411041633.179404-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 71f0604bdc97..f04e60a4847f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2040,37 +2040,6 @@ __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) return cgroup_tryget(cgrp) ? cgrp : NULL; } -/** - * bpf_cgroup_kptr_get - Acquire a reference on a struct cgroup kptr. A cgroup - * kptr acquired by this kfunc which is not subsequently stored in a map, must - * be released by calling bpf_cgroup_release(). - * @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired. - */ -__bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) -{ - struct cgroup *cgrp; - - rcu_read_lock(); - /* Another context could remove the cgroup from the map and release it - * at any time, including after we've done the lookup above. This is - * safe because we're in an RCU read region, so the cgroup is - * guaranteed to remain valid until at least the rcu_read_unlock() - * below. - */ - cgrp = READ_ONCE(*cgrpp); - - if (cgrp && !cgroup_tryget(cgrp)) - /* If the cgroup had been removed from the map and freed as - * described above, cgroup_tryget() will return false. The - * cgroup will be freed at some point after the current RCU gp - * has ended, so just return NULL to the user. - */ - cgrp = NULL; - rcu_read_unlock(); - - return cgrp; -} - /** * bpf_cgroup_release - Release the reference acquired on a cgroup. * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to @@ -2314,7 +2283,6 @@ BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) -- cgit From cd2a8079014aced27da9b2e669784f31680f1351 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 15 Apr 2023 13:18:03 -0700 Subject: bpf: Remove btf_field_offs, use btf_record's fields instead The btf_field_offs struct contains (offset, size) for btf_record fields, sorted by offset. btf_field_offs is always used in conjunction with btf_record, which has btf_field 'fields' array with (offset, type), the latter of which btf_field_offs' size is derived from via btf_field_type_size. This patch adds a size field to struct btf_field and sorts btf_record's fields by offset, making it possible to get rid of btf_field_offs. Less data duplication and less code complexity results. Since btf_field_offs' lifetime closely followed the btf_record used to populate it, most complexity wins are from removal of initialization code like: if (btf_record_successfully_initialized) { foffs = btf_parse_field_offs(rec); if (IS_ERR_OR_NULL(foffs)) // free the btf_record and return err } Other changes in this patch are pretty mechanical: * foffs->field_off[i] -> rec->fields[i].offset * foffs->field_sz[i] -> rec->fields[i].size * Sort rec->fields in btf_parse_fields before returning * It's possible that this is necessary independently of other changes in this patch. btf_record_find in syscall.c expects btf_record's fields to be sorted by offset, yet there's no explicit sorting of them before this patch, record's fields are populated in the order they're read from BTF struct definition. BTF docs don't say anything about the sortedness of struct fields. * All functions taking struct btf_field_offs * input now instead take struct btf_record *. All callsites of these functions already have access to the correct btf_record. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f04e60a4847f..e989b6460147 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1893,7 +1893,7 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) if (!p) return NULL; if (meta) - bpf_obj_init(meta->field_offs, p); + bpf_obj_init(meta->record, p); return p; } -- cgit From 1512217c47f0e8ea076dd0e67262e5a668a78f01 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 15 Apr 2023 13:18:05 -0700 Subject: bpf: Support refcounted local kptrs in existing semantics A local kptr is considered 'refcounted' when it is of a type that has a bpf_refcount field. When such a kptr is created, its refcount should be initialized to 1; when destroyed, the object should be free'd only if a refcount decr results in 0 refcount. Existing logic always frees the underlying memory when destroying a local kptr, and 0-initializes all btf_record fields. This patch adds checks for "is local kptr refcounted?" and new logic for that case in the appropriate places. This patch focuses on changing existing semantics and thus conspicuously does _not_ provide a way for BPF programs in increment refcount. That follows later in the series. __bpf_obj_drop_impl is modified to do the right thing when it sees a refcounted type. Container types for graph nodes (list, tree, stashed in map) are migrated to use __bpf_obj_drop_impl as a destructor for their nodes instead of each having custom destruction code in their _free paths. Now that "drop" isn't a synonym for "free" when the type is refcounted it makes sense to centralize this logic. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230415201811.343116-4-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e989b6460147..e2dbd9644e5c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1798,6 +1798,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) } } +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); + void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) { @@ -1828,13 +1830,8 @@ unlock: /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ - bpf_obj_free_fields(field->graph_root.value_rec, obj); - /* bpf_mem_free requires migrate_disable(), since we can be - * called from map free path as well apart from BPF program (as - * part of map ops doing bpf_obj_free_fields). - */ migrate_disable(); - bpf_mem_free(&bpf_global_ma, obj); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec); migrate_enable(); } } @@ -1871,10 +1868,9 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, obj = pos; obj -= field->graph_root.node_offset; - bpf_obj_free_fields(field->graph_root.value_rec, obj); migrate_disable(); - bpf_mem_free(&bpf_global_ma, obj); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec); migrate_enable(); } } @@ -1897,8 +1893,17 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) return p; } +/* Must be called under migrate_disable(), as required by bpf_mem_free */ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) { + if (rec && rec->refcount_off >= 0 && + !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { + /* Object is refcounted and refcount_dec didn't result in 0 + * refcount. Return without freeing the object + */ + return; + } + if (rec) bpf_obj_free_fields(rec, p); bpf_mem_free(&bpf_global_ma, p); -- cgit From 7c50b1cb76aca4540aa917db5f2a302acddcadff Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 15 Apr 2023 13:18:06 -0700 Subject: bpf: Add bpf_refcount_acquire kfunc Currently, BPF programs can interact with the lifetime of refcounted local kptrs in the following ways: bpf_obj_new - Initialize refcount to 1 as part of new object creation bpf_obj_drop - Decrement refcount and free object if it's 0 collection add - Pass ownership to the collection. No change to refcount but collection is responsible for bpf_obj_dropping it In order to be able to add a refcounted local kptr to multiple collections we need to be able to increment the refcount and acquire a new owning reference. This patch adds a kfunc, bpf_refcount_acquire, implementing such an operation. bpf_refcount_acquire takes a refcounted local kptr and returns a new owning reference to the same underlying memory as the input. The input can be either owning or non-owning. To reinforce why this is safe, consider the following code snippets: struct node *n = bpf_obj_new(typeof(*n)); // A struct node *m = bpf_refcount_acquire(n); // B In the above snippet, n will be alive with refcount=1 after (A), and since nothing changes that state before (B), it's obviously safe. If n is instead added to some rbtree, we can still safely refcount_acquire it: struct node *n = bpf_obj_new(typeof(*n)); struct node *m; bpf_spin_lock(&glock); bpf_rbtree_add(&groot, &n->node, less); // A m = bpf_refcount_acquire(n); // B bpf_spin_unlock(&glock); In the above snippet, after (A) n is a non-owning reference, and after (B) m is an owning reference pointing to the same memory as n. Although n has no ownership of that memory's lifetime, it's guaranteed to be alive until the end of the critical section, and n would be clobbered if we were past the end of the critical section, so it's safe to bump refcount. Implementation details: * From verifier's perspective, bpf_refcount_acquire handling is similar to bpf_obj_new and bpf_obj_drop. Like the former, it returns a new owning reference matching input type, although like the latter, type can be inferred from concrete kptr input. Verifier changes in {check,fixup}_kfunc_call and check_kfunc_args are largely copied from aforementioned functions' verifier changes. * An exception to the above is the new KF_ARG_PTR_TO_REFCOUNTED_KPTR arg, indicated by new "__refcounted_kptr" kfunc arg suffix. This is necessary in order to handle both owning and non-owning input without adding special-casing to "__alloc" arg handling. Also a convenient place to confirm that input type has bpf_refcount field. * The implemented kfunc is actually bpf_refcount_acquire_impl, with 'hidden' second arg that the verifier sets to the type's struct_meta in fixup_kfunc_call. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230415201811.343116-5-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e2dbd9644e5c..57ff8a60222c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1917,6 +1917,20 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) __bpf_obj_drop_impl(p, meta ? meta->record : NULL); } +__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign) +{ + struct btf_struct_meta *meta = meta__ign; + struct bpf_refcount *ref; + + /* Could just cast directly to refcount_t *, but need some code using + * bpf_refcount type so that it is emitted in vmlinux BTF + */ + ref = (struct bpf_refcount *)p__refcounted_kptr + meta->record->refcount_off; + + refcount_inc((refcount_t *)ref); + return (void *)p__refcounted_kptr; +} + static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) { struct list_head *n = (void *)node, *h = (void *)head; @@ -2276,6 +2290,7 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_list_push_front) BTF_ID_FLAGS(func, bpf_list_push_back) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) -- cgit From d2dcc67df910dd85253a701b6a5b747f955d28f5 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 15 Apr 2023 13:18:07 -0700 Subject: bpf: Migrate bpf_rbtree_add and bpf_list_push_{front,back} to possibly fail Consider this code snippet: struct node { long key; bpf_list_node l; bpf_rb_node r; bpf_refcount ref; } int some_bpf_prog(void *ctx) { struct node *n = bpf_obj_new(/*...*/), *m; bpf_spin_lock(&glock); bpf_rbtree_add(&some_tree, &n->r, /* ... */); m = bpf_refcount_acquire(n); bpf_rbtree_add(&other_tree, &m->r, /* ... */); bpf_spin_unlock(&glock); /* ... */ } After bpf_refcount_acquire, n and m point to the same underlying memory, and that node's bpf_rb_node field is being used by the some_tree insert, so overwriting it as a result of the second insert is an error. In order to properly support refcounted nodes, the rbtree and list insert functions must be allowed to fail. This patch adds such support. The kfuncs bpf_rbtree_add, bpf_list_push_{front,back} are modified to return an int indicating success/failure, with 0 -> success, nonzero -> failure. bpf_obj_drop on failure ======================= Currently the only reason an insert can fail is the example above: the bpf_{list,rb}_node is already in use. When such a failure occurs, the insert kfuncs will bpf_obj_drop the input node. This allows the insert operations to logically fail without changing their verifier owning ref behavior, namely the unconditional release_reference of the input owning ref. With insert that always succeeds, ownership of the node is always passed to the collection, since the node always ends up in the collection. With a possibly-failed insert w/ bpf_obj_drop, ownership of the node is always passed either to the collection (success), or to bpf_obj_drop (failure). Regardless, it's correct to continue unconditionally releasing the input owning ref, as something is always taking ownership from the calling program on insert. Keeping owning ref behavior unchanged results in a nice default UX for insert functions that can fail. If the program's reaction to a failed insert is "fine, just get rid of this owning ref for me and let me go on with my business", then there's no reason to check for failure since that's default behavior. e.g.: long important_failures = 0; int some_bpf_prog(void *ctx) { struct node *n, *m, *o; /* all bpf_obj_new'd */ bpf_spin_lock(&glock); bpf_rbtree_add(&some_tree, &n->node, /* ... */); bpf_rbtree_add(&some_tree, &m->node, /* ... */); if (bpf_rbtree_add(&some_tree, &o->node, /* ... */)) { important_failures++; } bpf_spin_unlock(&glock); } If we instead chose to pass ownership back to the program on failed insert - by returning NULL on success or an owning ref on failure - programs would always have to do something with the returned ref on failure. The most likely action is probably "I'll just get rid of this owning ref and go about my business", which ideally would look like: if (n = bpf_rbtree_add(&some_tree, &n->node, /* ... */)) bpf_obj_drop(n); But bpf_obj_drop isn't allowed in a critical section and inserts must occur within one, so in reality error handling would become a hard-to-parse mess. For refcounted nodes, we can replicate the "pass ownership back to program on failure" logic with this patch's semantics, albeit in an ugly way: struct node *n = bpf_obj_new(/* ... */), *m; bpf_spin_lock(&glock); m = bpf_refcount_acquire(n); if (bpf_rbtree_add(&some_tree, &n->node, /* ... */)) { /* Do something with m */ } bpf_spin_unlock(&glock); bpf_obj_drop(m); bpf_refcount_acquire is used to simulate "return owning ref on failure". This should be an uncommon occurrence, though. Addition of two verifier-fixup'd args to collection inserts =========================================================== The actual bpf_obj_drop kfunc is bpf_obj_drop_impl(void *, struct btf_struct_meta *), with bpf_obj_drop macro populating the second arg with 0 and the verifier later filling in the arg during insn fixup. Because bpf_rbtree_add and bpf_list_push_{front,back} now might do bpf_obj_drop, these kfuncs need a btf_struct_meta parameter that can be passed to bpf_obj_drop_impl. Similarly, because the 'node' param to those insert functions is the bpf_{list,rb}_node within the node type, and bpf_obj_drop expects a pointer to the beginning of the node, the insert functions need to be able to find the beginning of the node struct. A second verifier-populated param is necessary: the offset of {list,rb}_node within the node type. These two new params allow the insert kfuncs to correctly call __bpf_obj_drop_impl: beginning_of_node = bpf_rb_node_ptr - offset if (already_inserted) __bpf_obj_drop_impl(beginning_of_node, btf_struct_meta->record); Similarly to other kfuncs with "hidden" verifier-populated params, the insert functions are renamed with _impl prefix and a macro is provided for common usage. For example, bpf_rbtree_add kfunc is now bpf_rbtree_add_impl and bpf_rbtree_add is now a macro which sets "hidden" args to 0. Due to the two new args BPF progs will need to be recompiled to work with the new _impl kfuncs. This patch also rewrites the "hidden argument" explanation to more directly say why the BPF program writer doesn't need to populate the arguments with anything meaningful. How does this new logic affect non-owning references? ===================================================== Currently, non-owning refs are valid until the end of the critical section in which they're created. We can make this guarantee because, if a non-owning ref exists, the referent was added to some collection. The collection will drop() its nodes when it goes away, but it can't go away while our program is accessing it, so that's not a problem. If the referent is removed from the collection in the same CS that it was added in, it can't be bpf_obj_drop'd until after CS end. Those are the only two ways to free the referent's memory and neither can happen until after the non-owning ref's lifetime ends. On first glance, having these collection insert functions potentially bpf_obj_drop their input seems like it breaks the "can't be bpf_obj_drop'd until after CS end" line of reasoning. But we care about the memory not being _freed_ until end of CS end, and a previous patch in the series modified bpf_obj_drop such that it doesn't free refcounted nodes until refcount == 0. So the statement can be more accurately rewritten as "can't be free'd until after CS end". We can prove that this rewritten statement holds for any non-owning reference produced by collection insert functions: * If the input to the insert function is _not_ refcounted * We have an owning reference to the input, and can conclude it isn't in any collection * Inserting a node in a collection turns owning refs into non-owning, and since our input type isn't refcounted, there's no way to obtain additional owning refs to the same underlying memory * Because our node isn't in any collection, the insert operation cannot fail, so bpf_obj_drop will not execute * If bpf_obj_drop is guaranteed not to execute, there's no risk of memory being free'd * Otherwise, the input to the insert function is refcounted * If the insert operation fails due to the node's list_head or rb_root already being in some collection, there was some previous successful insert which passed refcount to the collection * We have an owning reference to the input, it must have been acquired via bpf_refcount_acquire, which bumped the refcount * refcount must be >= 2 since there's a valid owning reference and the node is already in a collection * Insert triggering bpf_obj_drop will decr refcount to >= 1, never resulting in a free So although we may do bpf_obj_drop during the critical section, this will never result in memory being free'd, and no changes to non-owning ref logic are needed in this patch. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230415201811.343116-6-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 65 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 17 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 57ff8a60222c..5067f8d46872 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1931,7 +1931,8 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta return (void *)p__refcounted_kptr; } -static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) +static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, + bool tail, struct btf_record *rec, u64 off) { struct list_head *n = (void *)node, *h = (void *)head; @@ -1939,17 +1940,35 @@ static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *hea INIT_LIST_HEAD(h); if (unlikely(!n->next)) INIT_LIST_HEAD(n); + if (!list_empty(n)) { + /* Only called from BPF prog, no need to migrate_disable */ + __bpf_obj_drop_impl(n - off, rec); + return -EINVAL; + } + tail ? list_add_tail(n, h) : list_add(n, h); + + return 0; } -__bpf_kfunc void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta__ign, u64 off) { - return __bpf_list_add(node, head, false); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_list_add(node, head, false, + meta ? meta->record : NULL, off); } -__bpf_kfunc void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta__ign, u64 off) { - return __bpf_list_add(node, head, true); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_list_add(node, head, true, + meta ? meta->record : NULL, off); } static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) @@ -1989,14 +2008,23 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF * program */ -static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, - void *less) +static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, + void *less, struct btf_record *rec, u64 off) { struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node; + struct rb_node *parent = NULL, *n = (struct rb_node *)node; bpf_callback_t cb = (bpf_callback_t)less; - struct rb_node *parent = NULL; bool leftmost = true; + if (!n->__rb_parent_color) + RB_CLEAR_NODE(n); + + if (!RB_EMPTY_NODE(n)) { + /* Only called from BPF prog, no need to migrate_disable */ + __bpf_obj_drop_impl(n - off, rec); + return -EINVAL; + } + while (*link) { parent = *link; if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) { @@ -2007,15 +2035,18 @@ static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, } } - rb_link_node((struct rb_node *)node, parent, link); - rb_insert_color_cached((struct rb_node *)node, - (struct rb_root_cached *)root, leftmost); + rb_link_node(n, parent, link); + rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost); + return 0; } -__bpf_kfunc void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, - bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) +__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, + bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b), + void *meta__ign, u64 off) { - __bpf_rbtree_add(root, node, (void *)less); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off); } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) @@ -2291,14 +2322,14 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE) -BTF_ID_FLAGS(func, bpf_list_push_front) -BTF_ID_FLAGS(func, bpf_list_push_back) +BTF_ID_FLAGS(func, bpf_list_push_front_impl) +BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) -BTF_ID_FLAGS(func, bpf_rbtree_add) +BTF_ID_FLAGS(func, bpf_rbtree_add_impl) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS -- cgit From 404ad75a36fb1a1008e9fe803aa7d0212df9e240 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 15 Apr 2023 13:18:09 -0700 Subject: bpf: Migrate bpf_rbtree_remove to possibly fail This patch modifies bpf_rbtree_remove to account for possible failure due to the input rb_node already not being in any collection. The function can now return NULL, and does when the aforementioned scenario occurs. As before, on successful removal an owning reference to the removed node is returned. Adding KF_RET_NULL to bpf_rbtree_remove's kfunc flags - now KF_RET_NULL | KF_ACQUIRE - provides the desired verifier semantics: * retval must be checked for NULL before use * if NULL, retval's ref_obj_id is released * retval is a "maybe acquired" owning ref, not a non-owning ref, so it will live past end of critical section (bpf_spin_unlock), and thus can be checked for NULL after the end of the CS BPF programs must add checks ============================ This does change bpf_rbtree_remove's verifier behavior. BPF program writers will need to add NULL checks to their programs, but the resulting UX looks natural: bpf_spin_lock(&glock); n = bpf_rbtree_first(&ghead); if (!n) { /* ... */} res = bpf_rbtree_remove(&ghead, &n->node); bpf_spin_unlock(&glock); if (!res) /* Newly-added check after this patch */ return 1; n = container_of(res, /* ... */); /* Do something else with n */ bpf_obj_drop(n); return 0; The "if (!res)" check above is the only addition necessary for the above program to pass verification after this patch. bpf_rbtree_remove no longer clobbers non-owning refs ==================================================== An issue arises when bpf_rbtree_remove fails, though. Consider this example: struct node_data { long key; struct bpf_list_node l; struct bpf_rb_node r; struct bpf_refcount ref; }; long failed_sum; void bpf_prog() { struct node_data *n = bpf_obj_new(/* ... */); struct bpf_rb_node *res; n->key = 10; bpf_spin_lock(&glock); bpf_list_push_back(&some_list, &n->l); /* n is now a non-owning ref */ res = bpf_rbtree_remove(&some_tree, &n->r, /* ... */); if (!res) failed_sum += n->key; /* not possible */ bpf_spin_unlock(&glock); /* if (res) { do something useful and drop } ... */ } The bpf_rbtree_remove in this example will always fail. Similarly to bpf_spin_unlock, bpf_rbtree_remove is a non-owning reference invalidation point. The verifier clobbers all non-owning refs after a bpf_rbtree_remove call, so the "failed_sum += n->key" line will fail verification, and in fact there's no good way to get information about the node which failed to add after the invalidation. This patch removes non-owning reference invalidation from bpf_rbtree_remove to allow the above usecase to pass verification. The logic for why this is now possible is as follows: Before this series, bpf_rbtree_add couldn't fail and thus assumed that its input, a non-owning reference, was in the tree. But it's easy to construct an example where two non-owning references pointing to the same underlying memory are acquired and passed to rbtree_remove one after another (see rbtree_api_release_aliasing in selftests/bpf/progs/rbtree_fail.c). So it was necessary to clobber non-owning refs to prevent this case and, more generally, to enforce "non-owning ref is definitely in some collection" invariant. This series removes that invariant and the failure / runtime checking added in this patch provide a clean way to deal with the aliasing issue - just fail to remove. Because the aliasing issue prevented by clobbering non-owning refs is no longer an issue, this patch removes the invalidate_non_owning_refs call from verifier handling of bpf_rbtree_remove. Note that bpf_spin_unlock - the other caller of invalidate_non_owning_refs - clobbers non-owning refs for a different reason, so its clobbering behavior remains unchanged. No BPF program changes are necessary for programs to remain valid as a result of this clobbering change. A valid program before this patch passed verification with its non-owning refs having shorter (or equal) lifetimes due to more aggressive clobbering. Also, update existing tests to check bpf_rbtree_remove retval for NULL where necessary, and move rbtree_api_release_aliasing from progs/rbtree_fail.c to progs/rbtree.c since it's now expected to pass verification. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230415201811.343116-8-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5067f8d46872..1835df333287 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2000,6 +2000,12 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct rb_root_cached *r = (struct rb_root_cached *)root; struct rb_node *n = (struct rb_node *)node; + if (!n->__rb_parent_color) + RB_CLEAR_NODE(n); + + if (RB_EMPTY_NODE(n)) + return NULL; + rb_erase_cached(n, r); RB_CLEAR_NODE(n); return (struct bpf_rb_node *)n; @@ -2328,7 +2334,7 @@ BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_rbtree_add_impl) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) -- cgit From 3e81740a90626024a9d9c6f9bfa3d36204dafefb Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 15 Apr 2023 13:18:10 -0700 Subject: bpf: Centralize btf_field-specific initialization logic All btf_fields in an object are 0-initialized by memset in bpf_obj_init. This might not be a valid initial state for some field types, in which case kfuncs that use the type will properly initialize their input if it's been 0-initialized. Some BPF graph collection types and kfuncs do this: bpf_list_{head,node} and bpf_rb_node. An earlier patch in this series added the bpf_refcount field, for which the 0 state indicates that the refcounted object should be free'd. bpf_obj_init treats this field specially, setting refcount to 1 instead of relying on scattered "refcount is 0? Must have just been initialized, let's set to 1" logic in kfuncs. This patch extends this treatment to list and rbtree field types, allowing most scattered initialization logic in kfuncs to be removed. Note that bpf_{list_head,rb_root} may be inside a BPF map, in which case they'll be 0-initialized without passing through the newly-added logic, so scattered initialization logic must remain for these collection root types. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230415201811.343116-9-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1835df333287..00e5fb0682ac 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1936,10 +1936,11 @@ static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head { struct list_head *n = (void *)node, *h = (void *)head; + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); - if (unlikely(!n->next)) - INIT_LIST_HEAD(n); if (!list_empty(n)) { /* Only called from BPF prog, no need to migrate_disable */ __bpf_obj_drop_impl(n - off, rec); @@ -1975,6 +1976,9 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai { struct list_head *n, *h = (void *)head; + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); if (list_empty(h)) @@ -2000,9 +2004,6 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct rb_root_cached *r = (struct rb_root_cached *)root; struct rb_node *n = (struct rb_node *)node; - if (!n->__rb_parent_color) - RB_CLEAR_NODE(n); - if (RB_EMPTY_NODE(n)) return NULL; @@ -2022,9 +2023,6 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, bpf_callback_t cb = (bpf_callback_t)less; bool leftmost = true; - if (!n->__rb_parent_color) - RB_CLEAR_NODE(n); - if (!RB_EMPTY_NODE(n)) { /* Only called from BPF prog, no need to migrate_disable */ __bpf_obj_drop_impl(n - off, rec); -- cgit From 4ab07209d5cc8cb6d2a5324c07b3efc3b2fde494 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 21 Apr 2023 00:44:31 -0700 Subject: bpf: Fix bpf_refcount_acquire's refcount_t address calculation When calculating the address of the refcount_t struct within a local kptr, bpf_refcount_acquire_impl should add refcount_off bytes to the address of the local kptr. Due to some missing parens, the function is incorrectly adding sizeof(refcount_t) * refcount_off bytes. This patch fixes the calculation. Due to the incorrect calculation, bpf_refcount_acquire_impl was trying to refcount_inc some memory well past the end of local kptrs, resulting in kasan and refcount complaints, as reported in [0]. In that thread, Florian and Eduard discovered that bpf selftests written in the new style - with __success and an expected __retval, specifically - were not actually being run. As a result, selftests added in bpf_refcount series weren't really exercising this behavior, and thus didn't unearth the bug. With this fixed behavior it's safe to revert commit 7c4b96c00043 ("selftests/bpf: disable program test run for progs/refcounted_kptr.c"), this patch does so. [0] https://lore.kernel.org/bpf/ZEEp+j22imoN6rn9@strlen.de/ Fixes: 7c50b1cb76ac ("bpf: Add bpf_refcount_acquire kfunc") Reported-by: Florian Westphal Reported-by: Eduard Zingerman Signed-off-by: Dave Marchevsky Signed-off-by: Daniel Borkmann Tested-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20230421074431.3548349-1-davemarchevsky@fb.com --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 00e5fb0682ac..8d368fa353f9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1925,7 +1925,7 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta /* Could just cast directly to refcount_t *, but need some code using * bpf_refcount type so that it is emitted in vmlinux BTF */ - ref = (struct bpf_refcount *)p__refcounted_kptr + meta->record->refcount_off; + ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); refcount_inc((refcount_t *)ref); return (void *)p__refcounted_kptr; -- cgit