diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2025-11-21 18:35:00 -0800 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2025-11-21 18:35:00 -0800 |
| commit | c42732087341e3c1ae34b25cc318609cacd866ac (patch) | |
| tree | 826d07187b4c1d515421716f825be8cccca570e2 | |
| parent | 8f7cf305a15eec663d5084e417f4773f1ef24e23 (diff) | |
| parent | cf49ec5705a6fb635ac9c2626f9ac7a39344b1f5 (diff) | |
Merge branch 'bpf-nested-rcu-critical-sections'
Puranjay Mohan says:
====================
bpf: Nested rcu critical sections
v1: https://lore.kernel.org/bpf/20250916113622.19540-1-puranjay@kernel.org/
Changes in v1->v2:
- Move the addition of new tests to a separate patch (Alexei)
- Avoid incrementing active_rcu_locks at two places (Eduard)
Support nested rcu critical sections by making the boolean flag
active_rcu_lock a counter and use it to manage rcu critical section
state. bpf_rcu_read_lock() increments this counter and
bpf_rcu_read_unlock() decrements it, MEM_RCU -> PTR_UNTRUSTED transition
happens when active_rcu_locks drops to 0.
====================
Link: https://patch.msgid.link/20251117200411.25563-1-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
| -rw-r--r-- | include/linux/bpf_verifier.h | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 47 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/rcu_read_lock.c | 40 |
4 files changed, 66 insertions, 27 deletions
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8d0b60fa5f2b..130bcbd66f60 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -416,7 +416,7 @@ struct bpf_verifier_state { u32 active_irq_id; u32 active_lock_id; void *active_lock_ptr; - bool active_rcu_lock; + u32 active_rcu_locks; bool speculative; bool in_sleepable; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0828718a8ba7..2e170be647bd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1437,7 +1437,7 @@ static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf dst->acquired_refs = src->acquired_refs; dst->active_locks = src->active_locks; dst->active_preempt_locks = src->active_preempt_locks; - dst->active_rcu_lock = src->active_rcu_lock; + dst->active_rcu_locks = src->active_rcu_locks; dst->active_irq_id = src->active_irq_id; dst->active_lock_id = src->active_lock_id; dst->active_lock_ptr = src->active_lock_ptr; @@ -5889,7 +5889,7 @@ static bool in_sleepable(struct bpf_verifier_env *env) */ static bool in_rcu_cs(struct bpf_verifier_env *env) { - return env->cur_state->active_rcu_lock || + return env->cur_state->active_rcu_locks || env->cur_state->active_locks || !in_sleepable(env); } @@ -10744,7 +10744,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } if (env->subprog_info[subprog].might_sleep && - (env->cur_state->active_rcu_lock || env->cur_state->active_preempt_locks || + (env->cur_state->active_rcu_locks || env->cur_state->active_preempt_locks || env->cur_state->active_irq_id || !in_sleepable(env))) { verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n" "i.e., in a RCU/IRQ/preempt-disabled section, or in\n" @@ -11327,7 +11327,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit return -EINVAL; } - if (check_lock && env->cur_state->active_rcu_lock) { + if (check_lock && env->cur_state->active_rcu_locks) { verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix); return -EINVAL; } @@ -11465,7 +11465,7 @@ static int get_helper_proto(struct bpf_verifier_env *env, int func_id, /* Check if we're in a sleepable context. */ static inline bool in_sleepable_context(struct bpf_verifier_env *env) { - return !env->cur_state->active_rcu_lock && + return !env->cur_state->active_rcu_locks && !env->cur_state->active_preempt_locks && !env->cur_state->active_irq_id && in_sleepable(env); @@ -11531,7 +11531,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } - if (env->cur_state->active_rcu_lock) { + if (env->cur_state->active_rcu_locks) { if (fn->might_sleep) { verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n", func_id_name(func_id), func_id); @@ -14038,36 +14038,33 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, preempt_disable = is_kfunc_bpf_preempt_disable(&meta); preempt_enable = is_kfunc_bpf_preempt_enable(&meta); - if (env->cur_state->active_rcu_lock) { + if (rcu_lock) { + env->cur_state->active_rcu_locks++; + } else if (rcu_unlock) { struct bpf_func_state *state; struct bpf_reg_state *reg; u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER); - if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { - verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); - return -EACCES; - } - - if (rcu_lock) { - verbose(env, "nested rcu read lock (kernel function %s)\n", func_name); + if (env->cur_state->active_rcu_locks == 0) { + verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name); return -EINVAL; - } else if (rcu_unlock) { + } + if (--env->cur_state->active_rcu_locks == 0) { bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({ if (reg->type & MEM_RCU) { reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); reg->type |= PTR_UNTRUSTED; } })); - env->cur_state->active_rcu_lock = false; - } else if (sleepable) { - verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name); - return -EACCES; } - } else if (rcu_lock) { - env->cur_state->active_rcu_lock = true; - } else if (rcu_unlock) { - verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name); - return -EINVAL; + } else if (sleepable && env->cur_state->active_rcu_locks) { + verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name); + return -EACCES; + } + + if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { + verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); + return -EACCES; } if (env->cur_state->active_preempt_locks) { @@ -19387,7 +19384,7 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c if (old->active_preempt_locks != cur->active_preempt_locks) return false; - if (old->active_rcu_lock != cur->active_rcu_lock) + if (old->active_rcu_locks != cur->active_rcu_locks) return false; if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index c9f855e5da24..246eb259c08a 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -28,6 +28,7 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.two_regions, true); bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + bpf_program__set_autoload(skel->progs.nested_rcu_region, true); bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true); bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true); @@ -78,7 +79,8 @@ static const char * const inproper_region_tests[] = { "non_sleepable_rcu_mismatch", "inproper_sleepable_helper", "inproper_sleepable_kfunc", - "nested_rcu_region", + "nested_rcu_region_unbalanced_1", + "nested_rcu_region_unbalanced_2", "rcu_read_lock_global_subprog_lock", "rcu_read_lock_global_subprog_unlock", "rcu_read_lock_sleepable_helper_global_subprog", diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 3a868a199349..d70c28824bbe 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -278,6 +278,46 @@ out: return 0; } +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region_unbalanced_1(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region_unbalanced_2(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int task_trusted_non_rcuptr(void *ctx) { |
