summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorDan Schatzberg <schatzberg.dan@gmail.com>2021-06-28 19:38:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-29 10:53:50 -0700
commit04f94e3fbe1afcb815d7c7ace78c6779772aa837 (patch)
tree600199a4aa9e881e2885332700d72225dd6faad4 /mm/memcontrol.c
parent87579e9b7d8dc36e7cfc40c03f1ae5634e16e2c5 (diff)
mm: charge active memcg when no mm is set
set_active_memcg() worked for kernel allocations but was silently ignored for user pages. This patch establishes a precedence order for who gets charged: 1. If there is a memcg associated with the page already, that memcg is charged. This happens during swapin. 2. If an explicit mm is passed, mm->memcg is charged. This happens during page faults, which can be triggered in remote VMs (eg gup). 3. Otherwise consult the current process context. If there is an active_memcg, use that. Otherwise, current->mm->memcg. Previously, if a NULL mm was passed to mem_cgroup_charge (case 3) it would always charge the root cgroup. Now it looks up the active_memcg first (falling back to charging the root cgroup if not set). Link: https://lkml.kernel.org/r/20210610173944.1203706-3-schatzberg.dan@gmail.com Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Tejun Heo <tj@kernel.org> Acked-by: Chris Down <chris@chrisdown.name> Acked-by: Jens Axboe <axboe@kernel.dk> Reviewed-by: Shakeel Butt <shakeelb@google.com> Reviewed-by: Michal Koutný <mkoutny@suse.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Ming Lei <ming.lei@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c41
1 files changed, 27 insertions, 14 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f7a552eb3e9d..8f3244e59b30 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -897,13 +897,24 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
}
EXPORT_SYMBOL(mem_cgroup_from_task);
+static __always_inline struct mem_cgroup *active_memcg(void)
+{
+ if (in_interrupt())
+ return this_cpu_read(int_active_memcg);
+ else
+ return current->active_memcg;
+}
+
/**
* get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
* @mm: mm from which memcg should be extracted. It can be NULL.
*
- * Obtain a reference on mm->memcg and returns it if successful. Otherwise
- * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
- * returned.
+ * Obtain a reference on mm->memcg and returns it if successful. If mm
+ * is NULL, then the memcg is chosen as follows:
+ * 1) The active memcg, if set.
+ * 2) current->mm->memcg, if available
+ * 3) root memcg
+ * If mem_cgroup is disabled, NULL is returned.
*/
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{
@@ -921,8 +932,17 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
* counting is disabled on the root level in the
* cgroup core. See CSS_NO_REF.
*/
- if (unlikely(!mm))
- return root_mem_cgroup;
+ if (unlikely(!mm)) {
+ memcg = active_memcg();
+ if (unlikely(memcg)) {
+ /* remote memcg must hold a ref */
+ css_get(&memcg->css);
+ return memcg;
+ }
+ mm = current->mm;
+ if (unlikely(!mm))
+ return root_mem_cgroup;
+ }
rcu_read_lock();
do {
@@ -935,14 +955,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
}
EXPORT_SYMBOL(get_mem_cgroup_from_mm);
-static __always_inline struct mem_cgroup *active_memcg(void)
-{
- if (in_interrupt())
- return this_cpu_read(int_active_memcg);
- else
- return current->active_memcg;
-}
-
static __always_inline bool memcg_kmem_bypass(void)
{
/* Allow remote memcg charging from any context. */
@@ -6711,7 +6723,8 @@ out:
* @gfp_mask: reclaim mode
*
* Try to charge @page to the memcg that @mm belongs to, reclaiming
- * pages according to @gfp_mask if necessary.
+ * pages according to @gfp_mask if necessary. if @mm is NULL, try to
+ * charge to the active memcg.
*
* Do not use this for pages allocated for swapin.
*