summaryrefslogtreecommitdiff
path: root/include/linux/bpf-cgroup.h
diff options
context:
space:
mode:
authorAndrii Nakryiko <andrii@kernel.org>2021-07-12 16:06:15 -0700
committerDaniel Borkmann <daniel@iogearbox.net>2021-07-16 21:15:28 +0200
commitc7603cfa04e7c3a435b31d065f7cbdc829428f6e (patch)
tree1d934a9a65bc67003fdf2721beb1eeced14614aa /include/linux/bpf-cgroup.h
parentd4861fc6be581561d6964700110a4dede54da6a6 (diff)
bpf: Add ambient BPF runtime context stored in current
b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") fixed the problem with cgroup-local storage use in BPF by pre-allocating per-CPU array of 8 cgroup storage pointers to accommodate possible BPF program preemptions and nested executions. While this seems to work good in practice, it introduces new and unnecessary failure mode in which not all BPF programs might be executed if we fail to find an unused slot for cgroup storage, however unlikely it is. It might also not be so unlikely when/if we allow sleepable cgroup BPF programs in the future. Further, the way that cgroup storage is implemented as ambiently-available property during entire BPF program execution is a convenient way to pass extra information to BPF program and helpers without requiring user code to pass around extra arguments explicitly. So it would be good to have a generic solution that can allow implementing this without arbitrary restrictions. Ideally, such solution would work for both preemptable and sleepable BPF programs in exactly the same way. This patch introduces such solution, bpf_run_ctx. It adds one pointer field (bpf_ctx) to task_struct. This field is maintained by BPF_PROG_RUN family of macros in such a way that it always stays valid throughout BPF program execution. BPF program preemption is handled by remembering previous current->bpf_ctx value locally while executing nested BPF program and restoring old value after nested BPF program finishes. This is handled by two helper functions, bpf_set_run_ctx() and bpf_reset_run_ctx(), which are supposed to be used before and after BPF program runs, respectively. Restoring old value of the pointer handles preemption, while bpf_run_ctx pointer being a property of current task_struct naturally solves this problem for sleepable BPF programs by "following" BPF program execution as it is scheduled in and out of CPU. It would even allow CPU migration of BPF programs, even though it's not currently allowed by BPF infra. This patch cleans up cgroup local storage handling as a first application. The design itself is generic, though, with bpf_run_ctx being an empty struct that is supposed to be embedded into a specific struct for a given BPF program type (bpf_cg_run_ctx in this case). Follow up patches are planned that will expand this mechanism for other uses within tracing BPF programs. To verify that this change doesn't revert the fix to the original cgroup storage issue, I ran the same repro as in the original report ([0]) and didn't get any problems. Replacing bpf_reset_run_ctx(old_run_ctx) with bpf_reset_run_ctx(NULL) triggers the issue pretty quickly (so repro does work). [0] https://lore.kernel.org/bpf/YEEvBUiJl2pJkxTd@krava/ Fixes: b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20210712230615.3525979-1-andrii@kernel.org
Diffstat (limited to 'include/linux/bpf-cgroup.h')
-rw-r--r--include/linux/bpf-cgroup.h54
1 files changed, 0 insertions, 54 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8b77d08d4b47..a74cd1c3bd87 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -27,19 +27,6 @@ struct task_struct;
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
-#define BPF_CGROUP_STORAGE_NEST_MAX 8
-
-struct bpf_cgroup_storage_info {
- struct task_struct *task;
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
-};
-
-/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
- * to use bpf cgroup storage simultaneously.
- */
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
- bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -172,44 +159,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED;
}
-static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
- *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
- enum bpf_cgroup_storage_type stype;
- int i, err = 0;
-
- preempt_disable();
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
- continue;
-
- this_cpu_write(bpf_cgroup_storage_info[i].task, current);
- for_each_cgroup_storage_type(stype)
- this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
- storage[stype]);
- goto out;
- }
- err = -EBUSY;
- WARN_ON_ONCE(1);
-
-out:
- preempt_enable();
- return err;
-}
-
-static inline void bpf_cgroup_storage_unset(void)
-{
- int i;
-
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
- continue;
-
- this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
- return;
- }
-}
-
struct bpf_cgroup_storage *
cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
void *key, bool locked);
@@ -487,9 +436,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
-static inline int bpf_cgroup_storage_set(
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
-static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(