diff options
| author | Tejun Heo <tj@kernel.org> | 2024-09-04 10:24:59 -1000 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2024-09-04 10:24:59 -1000 |
| commit | 8195136669661fdfe54e9a8923c33b31c92fc1da (patch) | |
| tree | 73d02c215b3fd6c79728611ebfe33da658ab7b98 /tools | |
| parent | e179e80c5d4fef458c3cbc3ad4ea17c6d42c0446 (diff) | |
sched_ext: Add cgroup support
Add sched_ext_ops operations to init/exit cgroups, and track task migrations
and config changes. A BPF scheduler may not implement or implement only
subset of cgroup features. The implemented features can be indicated using
%SCX_OPS_HAS_CGOUP_* flags. If cgroup configuration makes use of features
that are not implemented, a warning is triggered.
While a BPF scheduler is being enabled and disabled, relevant cgroup
operations are locked out using scx_cgroup_rwsem. This avoids situations
like task prep taking place while the task is being moved across cgroups,
making things easier for BPF schedulers.
v7: - cgroup interface file visibility toggling is dropped in favor just
warning messages. Dynamically changing interface visiblity caused more
confusion than helping.
v6: - Updated to reflect the removal of SCX_KF_SLEEPABLE.
- Updated to use CONFIG_GROUP_SCHED_WEIGHT and fixes for
!CONFIG_FAIR_GROUP_SCHED && CONFIG_EXT_GROUP_SCHED.
v5: - Flipped the locking order between scx_cgroup_rwsem and
cpus_read_lock() to avoid locking order conflict w/ cpuset. Better
documentation around locking.
- sched_move_task() takes an early exit if the source and destination
are identical. This triggered the warning in scx_cgroup_can_attach()
as it left p->scx.cgrp_moving_from uncleared. Updated the cgroup
migration path so that ops.cgroup_prep_move() is skipped for identity
migrations so that its invocations always match ops.cgroup_move()
one-to-one.
v4: - Example schedulers moved into their own patches.
- Fix build failure when !CONFIG_CGROUP_SCHED, reported by Andrea Righi.
v3: - Make scx_example_pair switch all tasks by default.
- Convert to BPF inline iterators.
- scx_bpf_task_cgroup() is added to determine the current cgroup from
CPU controller's POV. This allows BPF schedulers to accurately track
CPU cgroup membership.
- scx_example_flatcg added. This demonstrates flattened hierarchy
implementation of CPU cgroup control and shows significant performance
improvement when cgroups which are nested multiple levels are under
competition.
v2: - Build fixes for different CONFIG combinations.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Andrea Righi <andrea.righi@canonical.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/sched_ext/include/scx/common.bpf.h | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/sched_ext/maximal.bpf.c | 32 |
2 files changed, 33 insertions, 0 deletions
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 20280df62857..457462b19966 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -61,6 +61,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; +struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; static inline __attribute__((format(printf, 1, 2))) void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 44612fdaf399..00bfa9cb95d3 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -95,6 +95,32 @@ void BPF_STRUCT_OPS(maximal_exit_task, struct task_struct *p, void BPF_STRUCT_OPS(maximal_disable, struct task_struct *p) {} +s32 BPF_STRUCT_OPS(maximal_cgroup_init, struct cgroup *cgrp, + struct scx_cgroup_init_args *args) +{ + return 0; +} + +void BPF_STRUCT_OPS(maximal_cgroup_exit, struct cgroup *cgrp) +{} + +s32 BPF_STRUCT_OPS(maximal_cgroup_prep_move, struct task_struct *p, + struct cgroup *from, struct cgroup *to) +{ + return 0; +} + +void BPF_STRUCT_OPS(maximal_cgroup_move, struct task_struct *p, + struct cgroup *from, struct cgroup *to) +{} + +void BPF_STRUCT_OPS(maximal_cgroup_cancel_move, struct task_struct *p, + struct cgroup *from, struct cgroup *to) +{} + +void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) +{} + s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) { return 0; @@ -126,6 +152,12 @@ struct sched_ext_ops maximal_ops = { .enable = maximal_enable, .exit_task = maximal_exit_task, .disable = maximal_disable, + .cgroup_init = maximal_cgroup_init, + .cgroup_exit = maximal_cgroup_exit, + .cgroup_prep_move = maximal_cgroup_prep_move, + .cgroup_move = maximal_cgroup_move, + .cgroup_cancel_move = maximal_cgroup_cancel_move, + .cgroup_set_weight = maximal_cgroup_set_weight, .init = maximal_init, .exit = maximal_exit, .name = "maximal", |
