summaryrefslogtreecommitdiff
path: root/include/linux/memcontrol.h
diff options
context:
space:
mode:
authorKaiyang Zhao <kaiyang2@cs.cmu.edu>2024-08-14 17:42:27 +0000
committerAndrew Morton <akpm@linux-foundation.org>2024-09-03 21:15:36 -0700
commitf77f0c7514789577125c1b2df145703161736359 (patch)
tree1d2bbd2f91088bf89ac2dd3280ec2dc1f687ce62 /include/linux/memcontrol.h
parent78788c3ede90727ffb7b17287468a08b4e78ee3d (diff)
mm,memcg: provide per-cgroup counters for NUMA balancing operations
The ability to observe the demotion and promotion decisions made by the kernel on a per-cgroup basis is important for monitoring and tuning containerized workloads on machines equipped with tiered memory. Different containers in the system may experience drastically different memory tiering actions that cannot be distinguished from the global counters alone. For example, a container running a workload that has a much hotter memory accesses will likely see more promotions and fewer demotions, potentially depriving a colocated container of top tier memory to such an extent that its performance degrades unacceptably. For another example, some containers may exhibit longer periods between data reuse, causing much more numa_hint_faults than numa_pages_migrated. In this case, tuning hot_threshold_ms may be appropriate, but the signal can easily be lost if only global counters are available. In the long term, we hope to introduce per-cgroup control of promotion and demotion actions to implement memory placement policies in tiering. This patch set adds seven counters to memory.stat in a cgroup: numa_pages_migrated, numa_pte_updates, numa_hint_faults, pgdemote_kswapd, pgdemote_khugepaged, pgdemote_direct and pgpromote_success. pgdemote_* and pgpromote_success are also available in memory.numa_stat. count_memcg_events_mm() is added to count multiple event occurrences at once, and get_mem_cgroup_from_folio() is added because we need to get a reference to the memcg of a folio before it's migrated to track numa_pages_migrated. The accounting of PGDEMOTE_* is moved to shrink_inactive_list() before being changed to per-cgroup. [kaiyang2@cs.cmu.edu: add documentation of the memcg counters in cgroup-v2.rst] Link: https://lkml.kernel.org/r/20240814235122.252309-1-kaiyang2@cs.cmu.edu Link: https://lkml.kernel.org/r/20240814174227.30639-1-kaiyang2@cs.cmu.edu Signed-off-by: Kaiyang Zhao <kaiyang2@cs.cmu.edu> Cc: David Rientjes <rientjes@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Wei Xu <weixugc@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r--include/linux/memcontrol.h24
1 files changed, 21 insertions, 3 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ed170399179a..fe05fdb92779 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -784,6 +784,8 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
struct mem_cgroup *get_mem_cgroup_from_current(void);
+struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio);
+
struct lruvec *folio_lruvec_lock(struct folio *folio);
struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
@@ -1028,8 +1030,8 @@ static inline void count_memcg_folio_events(struct folio *folio,
count_memcg_events(memcg, idx, nr);
}
-static inline void count_memcg_event_mm(struct mm_struct *mm,
- enum vm_event_item idx)
+static inline void count_memcg_events_mm(struct mm_struct *mm,
+ enum vm_event_item idx, unsigned long count)
{
struct mem_cgroup *memcg;
@@ -1039,10 +1041,16 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
rcu_read_lock();
memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
if (likely(memcg))
- count_memcg_events(memcg, idx, 1);
+ count_memcg_events(memcg, idx, count);
rcu_read_unlock();
}
+static inline void count_memcg_event_mm(struct mm_struct *mm,
+ enum vm_event_item idx)
+{
+ count_memcg_events_mm(mm, idx, 1);
+}
+
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
{
@@ -1262,6 +1270,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_current(void)
return NULL;
}
+static inline struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio)
+{
+ return NULL;
+}
+
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
{
@@ -1484,6 +1497,11 @@ static inline void count_memcg_folio_events(struct folio *folio,
{
}
+static inline void count_memcg_events_mm(struct mm_struct *mm,
+ enum vm_event_item idx, unsigned long count)
+{
+}
+
static inline
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{