diff options
author | David Vernet <void@manifault.com> | 2024-07-31 00:14:36 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2024-07-31 07:45:28 -1000 |
commit | 298dec19bdeb6e33ac220502504d969272b50cf6 (patch) | |
tree | ae6cc05c39b7f8af51c28947a2f6532850b3fcff /kernel/sched/ext.c | |
parent | c8faf11cd192214e231626c3ee973a35d8fc33f2 (diff) |
scx: Allow calling sleepable kfuncs from BPF_PROG_TYPE_SYSCALL
We currently only allow calling sleepable scx kfuncs (i.e.
scx_bpf_create_dsq()) from BPF_PROG_TYPE_STRUCT_OPS progs. The idea here
was that we'd never have to call scx_bpf_create_dsq() outside of a
sched_ext struct_ops callback, but that might not actually be true. For
example, a scheduler could do something like the following:
1. Open and load (not yet attach) a scheduler skel
2. Synchronously call into a BPF_PROG_TYPE_SYSCALL prog from user space.
For example, to initialize an LLC domain, or some other global,
read-only state.
3. Attach the skel, which actually enables the scheduler
The advantage of doing this is that it can preclude having to do pretty
ugly boilerplate like initializing a read-only, statically sized array of
u64[]'s which the kernel consumes literally once at init time to then
create struct bpf_cpumask objects which are actually queried at runtime.
Doing the above is already possible given that we can invoke core BPF
kfuncs, such as bpf_cpumask_create(), from BPF_PROG_TYPE_SYSCALL progs. We
already allow many scx kfuncs to be called from BPF_PROG_TYPE_SYSCALL progs
(e.g. scx_bpf_kick_cpu()). Let's allow the sleepable kfuncs as well.
Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/sched/ext.c')
-rw-r--r-- | kernel/sched/ext.c | 27 |
1 files changed, 11 insertions, 16 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index da9cac6b6cc2..4a07deb30c44 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1029,16 +1029,12 @@ static __always_inline bool scx_kf_allowed(u32 mask) return false; } - if (unlikely((mask & SCX_KF_SLEEPABLE) && in_interrupt())) { - scx_ops_error("sleepable kfunc called from non-sleepable context"); - return false; - } - /* * Enforce nesting boundaries. e.g. A kfunc which can be called from * DISPATCH must not be called if we're running DEQUEUE which is nested - * inside ops.dispatch(). We don't need to check the SCX_KF_SLEEPABLE - * boundary thanks to the above in_interrupt() check. + * inside ops.dispatch(). We don't need to check boundaries for any + * blocking kfuncs as the verifier ensures they're only called from + * sleepable progs. */ if (unlikely(highest_bit(mask) == SCX_KF_CPU_RELEASE && (current->scx.kf_mask & higher_bits(SCX_KF_CPU_RELEASE)))) { @@ -3224,9 +3220,9 @@ static void handle_hotplug(struct rq *rq, bool online) atomic_long_inc(&scx_hotplug_seq); if (online && SCX_HAS_OP(cpu_online)) - SCX_CALL_OP(SCX_KF_SLEEPABLE, cpu_online, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu); else if (!online && SCX_HAS_OP(cpu_offline)) - SCX_CALL_OP(SCX_KF_SLEEPABLE, cpu_offline, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu); else scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, "cpu %d going %s, exiting scheduler", cpu, @@ -3390,7 +3386,7 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool .fork = fork, }; - ret = SCX_CALL_OP_RET(SCX_KF_SLEEPABLE, init_task, p, &args); + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args); if (unlikely(ret)) { ret = ops_sanitize_err("init_task", ret); return ret; @@ -4648,7 +4644,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) cpus_read_lock(); if (scx_ops.init) { - ret = SCX_CALL_OP_RET(SCX_KF_SLEEPABLE, init); + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init); if (ret) { ret = ops_sanitize_err("init", ret); goto err_disable_unlock_cpus; @@ -5424,14 +5420,11 @@ __bpf_kfunc_start_defs(); * @dsq_id: DSQ to create * @node: NUMA node to allocate from * - * Create a custom DSQ identified by @dsq_id. Can be called from ops.init() and - * ops.init_task(). + * Create a custom DSQ identified by @dsq_id. Can be called from any sleepable + * scx callback, and any BPF_PROG_TYPE_SYSCALL prog. */ __bpf_kfunc s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) { - if (!scx_kf_allowed(SCX_KF_SLEEPABLE)) - return -EINVAL; - if (unlikely(node >= (int)nr_node_ids || (node < 0 && node != NUMA_NO_NODE))) return -EINVAL; @@ -6490,6 +6483,8 @@ static int __init scx_init(void) */ if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_sleepable)) || + (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, + &scx_kfunc_set_sleepable)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_select_cpu)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, |