summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h6
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/tracehook.h3
-rw-r--r--mm/memcontrol.c47
4 files changed, 51 insertions, 8 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 56174c7199ee..77bf42966200 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -401,6 +401,8 @@ static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
return inactive * inactive_ratio < active;
}
+void mem_cgroup_handle_over_high(void);
+
void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
@@ -620,6 +622,10 @@ static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
{
}
+static inline void mem_cgroup_handle_over_high(void)
+{
+}
+
static inline void mem_cgroup_oom_enable(void)
{
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17bf8b845aa0..055f2ee3b0f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1809,6 +1809,9 @@ struct task_struct {
struct mem_cgroup *memcg_in_oom;
gfp_t memcg_oom_gfp_mask;
int memcg_oom_order;
+
+ /* number of pages to reclaim on returning to userland */
+ unsigned int memcg_nr_pages_over_high;
#endif
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 84d497297c5f..26c152122a42 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -50,6 +50,7 @@
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/task_work.h>
+#include <linux/memcontrol.h>
struct linux_binprm;
/*
@@ -188,6 +189,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
smp_mb__after_atomic();
if (unlikely(current->task_works))
task_work_run();
+
+ mem_cgroup_handle_over_high();
}
#endif /* <linux/tracehook.h> */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 47bd7f13f526..327dcda3ebf6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,6 +62,7 @@
#include <linux/oom.h>
#include <linux/lockdep.h>
#include <linux/file.h>
+#include <linux/tracehook.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
@@ -1972,6 +1973,31 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
return NOTIFY_OK;
}
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+ unsigned int nr_pages = current->memcg_nr_pages_over_high;
+ struct mem_cgroup *memcg, *pos;
+
+ if (likely(!nr_pages))
+ return;
+
+ pos = memcg = get_mem_cgroup_from_mm(current->mm);
+
+ do {
+ if (page_counter_read(&pos->memory) <= pos->high)
+ continue;
+ mem_cgroup_events(pos, MEMCG_HIGH, 1);
+ try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
+ } while ((pos = parent_mem_cgroup(pos)));
+
+ css_put(&memcg->css);
+ current->memcg_nr_pages_over_high = 0;
+}
+
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages)
{
@@ -2080,17 +2106,22 @@ done_restock:
css_get_many(&memcg->css, batch);
if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages);
- if (!(gfp_mask & __GFP_WAIT))
- goto done;
+
/*
- * If the hierarchy is above the normal consumption range,
- * make the charging task trim their excess contribution.
+ * If the hierarchy is above the normal consumption range, schedule
+ * reclaim on returning to userland. We can perform reclaim here
+ * if __GFP_WAIT but let's always punt for simplicity and so that
+ * GFP_KERNEL can consistently be used during reclaim. @memcg is
+ * not recorded as it most likely matches current's and won't
+ * change in the meantime. As high limit is checked again before
+ * reclaim, the cost of mismatch is negligible.
*/
do {
- if (page_counter_read(&memcg->memory) <= memcg->high)
- continue;
- mem_cgroup_events(memcg, MEMCG_HIGH, 1);
- try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+ if (page_counter_read(&memcg->memory) > memcg->high) {
+ current->memcg_nr_pages_over_high += nr_pages;
+ set_notify_resume(current);
+ break;
+ }
} while ((memcg = parent_mem_cgroup(memcg)));
done:
return ret;