summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c260
1 files changed, 96 insertions, 164 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f3a84c64f35c..c265212bec8c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -132,15 +132,11 @@ static const char * const mem_cgroup_lru_names[] = {
* their hierarchy representation
*/
-struct mem_cgroup_tree_per_zone {
+struct mem_cgroup_tree_per_node {
struct rb_root rb_root;
spinlock_t lock;
};
-struct mem_cgroup_tree_per_node {
- struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
-};
-
struct mem_cgroup_tree {
struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
};
@@ -323,15 +319,6 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);
#endif /* !CONFIG_SLOB */
-static struct mem_cgroup_per_zone *
-mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
-{
- int nid = zone_to_nid(zone);
- int zid = zone_idx(zone);
-
- return &memcg->nodeinfo[nid]->zoneinfo[zid];
-}
-
/**
* mem_cgroup_css_from_page - css of the memcg associated with a page
* @page: page of interest
@@ -383,37 +370,35 @@ ino_t page_cgroup_ino(struct page *page)
return ino;
}
-static struct mem_cgroup_per_zone *
-mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
+static struct mem_cgroup_per_node *
+mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page)
{
int nid = page_to_nid(page);
- int zid = page_zonenum(page);
- return &memcg->nodeinfo[nid]->zoneinfo[zid];
+ return memcg->nodeinfo[nid];
}
-static struct mem_cgroup_tree_per_zone *
-soft_limit_tree_node_zone(int nid, int zid)
+static struct mem_cgroup_tree_per_node *
+soft_limit_tree_node(int nid)
{
- return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+ return soft_limit_tree.rb_tree_per_node[nid];
}
-static struct mem_cgroup_tree_per_zone *
+static struct mem_cgroup_tree_per_node *
soft_limit_tree_from_page(struct page *page)
{
int nid = page_to_nid(page);
- int zid = page_zonenum(page);
- return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+ return soft_limit_tree.rb_tree_per_node[nid];
}
-static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz,
+static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
+ struct mem_cgroup_tree_per_node *mctz,
unsigned long new_usage_in_excess)
{
struct rb_node **p = &mctz->rb_root.rb_node;
struct rb_node *parent = NULL;
- struct mem_cgroup_per_zone *mz_node;
+ struct mem_cgroup_per_node *mz_node;
if (mz->on_tree)
return;
@@ -423,7 +408,7 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
return;
while (*p) {
parent = *p;
- mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+ mz_node = rb_entry(parent, struct mem_cgroup_per_node,
tree_node);
if (mz->usage_in_excess < mz_node->usage_in_excess)
p = &(*p)->rb_left;
@@ -439,8 +424,8 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
mz->on_tree = true;
}
-static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz)
+static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
+ struct mem_cgroup_tree_per_node *mctz)
{
if (!mz->on_tree)
return;
@@ -448,8 +433,8 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
mz->on_tree = false;
}
-static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz)
+static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
+ struct mem_cgroup_tree_per_node *mctz)
{
unsigned long flags;
@@ -473,8 +458,8 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
{
unsigned long excess;
- struct mem_cgroup_per_zone *mz;
- struct mem_cgroup_tree_per_zone *mctz;
+ struct mem_cgroup_per_node *mz;
+ struct mem_cgroup_tree_per_node *mctz;
mctz = soft_limit_tree_from_page(page);
/*
@@ -482,7 +467,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
* because their event counter is not touched.
*/
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
- mz = mem_cgroup_page_zoneinfo(memcg, page);
+ mz = mem_cgroup_page_nodeinfo(memcg, page);
excess = soft_limit_excess(memcg);
/*
* We have to update the tree if mz is on RB-tree or
@@ -507,24 +492,22 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
{
- struct mem_cgroup_tree_per_zone *mctz;
- struct mem_cgroup_per_zone *mz;
- int nid, zid;
+ struct mem_cgroup_tree_per_node *mctz;
+ struct mem_cgroup_per_node *mz;
+ int nid;
for_each_node(nid) {
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
- mctz = soft_limit_tree_node_zone(nid, zid);
- mem_cgroup_remove_exceeded(mz, mctz);
- }
+ mz = mem_cgroup_nodeinfo(memcg, nid);
+ mctz = soft_limit_tree_node(nid);
+ mem_cgroup_remove_exceeded(mz, mctz);
}
}
-static struct mem_cgroup_per_zone *
-__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+static struct mem_cgroup_per_node *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
{
struct rb_node *rightmost = NULL;
- struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_per_node *mz;
retry:
mz = NULL;
@@ -532,7 +515,7 @@ retry:
if (!rightmost)
goto done; /* Nothing to reclaim from */
- mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+ mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node);
/*
* Remove the node now but someone else can add it back,
* we will to add it back at the end of reclaim to its correct
@@ -546,10 +529,10 @@ done:
return mz;
}
-static struct mem_cgroup_per_zone *
-mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+static struct mem_cgroup_per_node *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
{
- struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_per_node *mz;
spin_lock_irq(&mctz->lock);
mz = __mem_cgroup_largest_soft_limit_node(mctz);
@@ -643,20 +626,16 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask)
{
unsigned long nr = 0;
- int zid;
+ struct mem_cgroup_per_node *mz;
+ enum lru_list lru;
VM_BUG_ON((unsigned)nid >= nr_node_ids);
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- struct mem_cgroup_per_zone *mz;
- enum lru_list lru;
-
- for_each_lru(lru) {
- if (!(BIT(lru) & lru_mask))
- continue;
- mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
- nr += mz->lru_size[lru];
- }
+ for_each_lru(lru) {
+ if (!(BIT(lru) & lru_mask))
+ continue;
+ mz = mem_cgroup_nodeinfo(memcg, nid);
+ nr += mz->lru_size[lru];
}
return nr;
}
@@ -809,9 +788,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
rcu_read_lock();
if (reclaim) {
- struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_per_node *mz;
- mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone);
+ mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id);
iter = &mz->iter[reclaim->priority];
if (prev && reclaim->generation != iter->generation)
@@ -910,19 +889,17 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
{
struct mem_cgroup *memcg = dead_memcg;
struct mem_cgroup_reclaim_iter *iter;
- struct mem_cgroup_per_zone *mz;
- int nid, zid;
+ struct mem_cgroup_per_node *mz;
+ int nid;
int i;
while ((memcg = parent_mem_cgroup(memcg))) {
for_each_node(nid) {
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
- for (i = 0; i <= DEF_PRIORITY; i++) {
- iter = &mz->iter[i];
- cmpxchg(&iter->position,
- dead_memcg, NULL);
- }
+ mz = mem_cgroup_nodeinfo(memcg, nid);
+ for (i = 0; i <= DEF_PRIORITY; i++) {
+ iter = &mz->iter[i];
+ cmpxchg(&iter->position,
+ dead_memcg, NULL);
}
}
}
@@ -944,39 +921,6 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
iter = mem_cgroup_iter(NULL, iter, NULL))
/**
- * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
- * @zone: zone of the wanted lruvec
- * @memcg: memcg of the wanted lruvec
- *
- * Returns the lru list vector holding pages for the given @zone and
- * @mem. This can be the global zone lruvec, if the memory controller
- * is disabled.
- */
-struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
- struct mem_cgroup *memcg)
-{
- struct mem_cgroup_per_zone *mz;
- struct lruvec *lruvec;
-
- if (mem_cgroup_disabled()) {
- lruvec = &zone->lruvec;
- goto out;
- }
-
- mz = mem_cgroup_zone_zoneinfo(memcg, zone);
- lruvec = &mz->lruvec;
-out:
- /*
- * Since a node can be onlined after the mem_cgroup was created,
- * we have to be prepared to initialize lruvec->zone here;
- * and if offlined then reonlined, we need to reinitialize it.
- */
- if (unlikely(lruvec->zone != zone))
- lruvec->zone = zone;
- return lruvec;
-}
-
-/**
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
* @page: the page
* @zone: zone of the page
@@ -985,14 +929,14 @@ out:
* and putback protocol: the LRU lock must be held, and the page must
* either be PageLRU() or the caller must have isolated/allocated it.
*/
-struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
+struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
{
- struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_per_node *mz;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
- lruvec = &zone->lruvec;
+ lruvec = &pgdat->lruvec;
goto out;
}
@@ -1004,7 +948,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
if (!memcg)
memcg = root_mem_cgroup;
- mz = mem_cgroup_page_zoneinfo(memcg, page);
+ mz = mem_cgroup_page_nodeinfo(memcg, page);
lruvec = &mz->lruvec;
out:
/*
@@ -1012,8 +956,8 @@ out:
* we have to be prepared to initialize lruvec->zone here;
* and if offlined then reonlined, we need to reinitialize it.
*/
- if (unlikely(lruvec->zone != zone))
- lruvec->zone = zone;
+ if (unlikely(lruvec->pgdat != pgdat))
+ lruvec->pgdat = pgdat;
return lruvec;
}
@@ -1030,17 +974,15 @@ out:
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int nr_pages)
{
- struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_per_node *mz;
unsigned long *lru_size;
long size;
bool empty;
- __update_lru_size(lruvec, lru, nr_pages);
-
if (mem_cgroup_disabled())
return;
- mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+ mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
lru_size = mz->lru_size + lru;
empty = list_empty(lruvec->lists + lru);
@@ -1276,9 +1218,9 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
*/
- if (fatal_signal_pending(current) || task_will_free_mem(current)) {
+ if (task_will_free_mem(current)) {
mark_oom_victim(current);
- try_oom_reaper(current);
+ wake_oom_reaper(current);
goto unlock;
}
@@ -1433,7 +1375,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
#endif
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
- struct zone *zone,
+ pg_data_t *pgdat,
gfp_t gfp_mask,
unsigned long *total_scanned)
{
@@ -1443,7 +1385,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
unsigned long excess;
unsigned long nr_scanned;
struct mem_cgroup_reclaim_cookie reclaim = {
- .zone = zone,
+ .pgdat = pgdat,
.priority = 0,
};
@@ -1473,8 +1415,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
}
continue;
}
- total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
- zone, &nr_scanned);
+ total += mem_cgroup_shrink_node(victim, gfp_mask, false,
+ pgdat, &nr_scanned);
*total_scanned += nr_scanned;
if (!soft_limit_excess(root_memcg))
break;
@@ -2107,11 +2049,11 @@ static void lock_page_lru(struct page *page, int *isolated)
{
struct zone *zone = page_zone(page);
- spin_lock_irq(&zone->lru_lock);
+ spin_lock_irq(zone_lru_lock(zone));
if (PageLRU(page)) {
struct lruvec *lruvec;
- lruvec = mem_cgroup_page_lruvec(page, zone);
+ lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_lru(page));
*isolated = 1;
@@ -2126,12 +2068,12 @@ static void unlock_page_lru(struct page *page, int isolated)
if (isolated) {
struct lruvec *lruvec;
- lruvec = mem_cgroup_page_lruvec(page, zone);
+ lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
VM_BUG_ON_PAGE(PageLRU(page), page);
SetPageLRU(page);
add_page_to_lru_list(page, lruvec, page_lru(page));
}
- spin_unlock_irq(&zone->lru_lock);
+ spin_unlock_irq(zone_lru_lock(zone));
}
static void commit_charge(struct page *page, struct mem_cgroup *memcg,
@@ -2431,7 +2373,7 @@ void memcg_kmem_uncharge(struct page *page, int order)
/*
* Because tail pages are not marked as "used", set it. We're under
- * zone->lru_lock and migration entries setup in all page mappings.
+ * zone_lru_lock and migration entries setup in all page mappings.
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
@@ -2601,22 +2543,22 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
return ret;
}
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned)
{
unsigned long nr_reclaimed = 0;
- struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+ struct mem_cgroup_per_node *mz, *next_mz = NULL;
unsigned long reclaimed;
int loop = 0;
- struct mem_cgroup_tree_per_zone *mctz;
+ struct mem_cgroup_tree_per_node *mctz;
unsigned long excess;
unsigned long nr_scanned;
if (order > 0)
return 0;
- mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
+ mctz = soft_limit_tree_node(pgdat->node_id);
/*
* This loop can run a while, specially if mem_cgroup's continuously
* keep exceeding their soft limit and putting the system under
@@ -2631,7 +2573,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
break;
nr_scanned = 0;
- reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
+ reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
gfp_mask, &nr_scanned);
nr_reclaimed += reclaimed;
*total_scanned += nr_scanned;
@@ -3252,22 +3194,21 @@ static int memcg_stat_show(struct seq_file *m, void *v)
#ifdef CONFIG_DEBUG_VM
{
- int nid, zid;
- struct mem_cgroup_per_zone *mz;
+ pg_data_t *pgdat;
+ struct mem_cgroup_per_node *mz;
struct zone_reclaim_stat *rstat;
unsigned long recent_rotated[2] = {0, 0};
unsigned long recent_scanned[2] = {0, 0};
- for_each_online_node(nid)
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
- rstat = &mz->lruvec.reclaim_stat;
+ for_each_online_pgdat(pgdat) {
+ mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+ rstat = &mz->lruvec.reclaim_stat;
- recent_rotated[0] += rstat->recent_rotated[0];
- recent_rotated[1] += rstat->recent_rotated[1];
- recent_scanned[0] += rstat->recent_scanned[0];
- recent_scanned[1] += rstat->recent_scanned[1];
- }
+ recent_rotated[0] += rstat->recent_rotated[0];
+ recent_rotated[1] += rstat->recent_rotated[1];
+ recent_scanned[0] += rstat->recent_scanned[0];
+ recent_scanned[1] += rstat->recent_scanned[1];
+ }
seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
@@ -4147,11 +4088,10 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
return idr_find(&mem_cgroup_idr, id);
}
-static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
+static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
{
struct mem_cgroup_per_node *pn;
- struct mem_cgroup_per_zone *mz;
- int zone, tmp = node;
+ int tmp = node;
/*
* This routine is called against possible nodes.
* But it's BUG to call kmalloc() against offline node.
@@ -4166,18 +4106,16 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
if (!pn)
return 1;
- for (zone = 0; zone < MAX_NR_ZONES; zone++) {
- mz = &pn->zoneinfo[zone];
- lruvec_init(&mz->lruvec);
- mz->usage_in_excess = 0;
- mz->on_tree = false;
- mz->memcg = memcg;
- }
+ lruvec_init(&pn->lruvec);
+ pn->usage_in_excess = 0;
+ pn->on_tree = false;
+ pn->memcg = memcg;
+
memcg->nodeinfo[node] = pn;
return 0;
}
-static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
+static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
{
kfree(memcg->nodeinfo[node]);
}
@@ -4188,7 +4126,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
memcg_wb_domain_exit(memcg);
for_each_node(node)
- free_mem_cgroup_per_zone_info(memcg, node);
+ free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->stat);
kfree(memcg);
}
@@ -4217,7 +4155,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
goto fail;
for_each_node(node)
- if (alloc_mem_cgroup_per_zone_info(memcg, node))
+ if (alloc_mem_cgroup_per_node_info(memcg, node))
goto fail;
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
@@ -5233,7 +5171,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
seq_printf(m, "file %llu\n",
(u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE);
seq_printf(m, "kernel_stack %llu\n",
- (u64)stat[MEMCG_KERNEL_STACK] * PAGE_SIZE);
+ (u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
seq_printf(m, "slab %llu\n",
(u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
@@ -5820,18 +5758,12 @@ static int __init mem_cgroup_init(void)
for_each_node(node) {
struct mem_cgroup_tree_per_node *rtpn;
- int zone;
rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
node_online(node) ? node : NUMA_NO_NODE);
- for (zone = 0; zone < MAX_NR_ZONES; zone++) {
- struct mem_cgroup_tree_per_zone *rtpz;
-
- rtpz = &rtpn->rb_tree_per_zone[zone];
- rtpz->rb_root = RB_ROOT;
- spin_lock_init(&rtpz->lock);
- }
+ rtpn->rb_root = RB_ROOT;
+ spin_lock_init(&rtpn->lock);
soft_limit_tree.rb_tree_per_node[node] = rtpn;
}