summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorChengming Zhou <zhouchengming@bytedance.com>2022-09-07 17:03:32 +0800
committerPeter Zijlstra <peterz@infradead.org>2022-09-09 11:08:33 +0200
commit34f26a15611afb03c33df6819359d36f5b382589 (patch)
tree38765bb0112e0ba6c8f173d8a6a129bf0b9f92f0 /include/linux
parentdc86aba751e2867244411adda1562f6664747019 (diff)
sched/psi: Per-cgroup PSI accounting disable/re-enable interface
PSI accounts stalls for each cgroup separately and aggregates it at each level of the hierarchy. This may cause non-negligible overhead for some workloads when under deep level of the hierarchy. commit 3958e2d0c34e ("cgroup: make per-cgroup pressure stall tracking configurable") make PSI to skip per-cgroup stall accounting, only account system-wide to avoid this each level overhead. But for our use case, we also want leaf cgroup PSI stats accounted for userspace adjustment on that cgroup, apart from only system-wide adjustment. So this patch introduce a per-cgroup PSI accounting disable/re-enable interface "cgroup.pressure", which is a read-write single value file that allowed values are "0" and "1", the defaults is "1" so per-cgroup PSI stats is enabled by default. Implementation details: It should be relatively straight-forward to disable and re-enable state aggregation, time tracking, averaging on a per-cgroup level, if we can live with losing history from while it was disabled. I.e. the avgs will restart from 0, total= will have gaps. But it's hard or complex to stop/restart groupc->tasks[] updates, which is not implemented in this patch. So we always update groupc->tasks[] and PSI_ONCPU bit in psi_group_change() even when the cgroup PSI stats is disabled. Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Link: https://lkml.kernel.org/r/20220907090332.2078-1-zhouchengming@bytedance.com
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--include/linux/psi.h2
-rw-r--r--include/linux/psi_types.h3
3 files changed, 8 insertions, 0 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4bcf56b3491c..7df76b318245 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -428,6 +428,9 @@ struct cgroup {
struct cgroup_file procs_file; /* handle for "cgroup.procs" */
struct cgroup_file events_file; /* handle for "cgroup.events" */
+ /* handles for "{cpu,memory,io,irq}.pressure" */
+ struct cgroup_file psi_files[NR_PSI_RESOURCES];
+
/*
* The bitmask of subsystems enabled on the child cgroups.
* ->subtree_control is the one configured through
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 362a74ca1d3b..b029a847def1 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -39,6 +39,7 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
int psi_cgroup_alloc(struct cgroup *cgrp);
void psi_cgroup_free(struct cgroup *cgrp);
void cgroup_move_task(struct task_struct *p, struct css_set *to);
+void psi_cgroup_restart(struct psi_group *group);
#endif
#else /* CONFIG_PSI */
@@ -60,6 +61,7 @@ static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
{
rcu_assign_pointer(p->cgroups, to);
}
+static inline void psi_cgroup_restart(struct psi_group *group) {}
#endif
#endif /* CONFIG_PSI */
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index a0b746258c68..6e4372735068 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -152,6 +152,7 @@ struct psi_trigger {
struct psi_group {
struct psi_group *parent;
+ bool enabled;
/* Protects data used by the aggregator */
struct mutex avgs_lock;
@@ -194,6 +195,8 @@ struct psi_group {
#else /* CONFIG_PSI */
+#define NR_PSI_RESOURCES 0
+
struct psi_group { };
#endif /* CONFIG_PSI */