summaryrefslogtreecommitdiff
path: root/include/linux/memcontrol.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r--include/linux/memcontrol.h344
1 files changed, 190 insertions, 154 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8ef51c58f470..bfe5c486f4ad 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -76,10 +76,27 @@ enum mem_cgroup_events_target {
};
struct memcg_vmstats_percpu {
- long stat[MEMCG_NR_STAT];
- unsigned long events[NR_VM_EVENT_ITEMS];
- unsigned long nr_page_events;
- unsigned long targets[MEM_CGROUP_NTARGETS];
+ /* Local (CPU and cgroup) page state & events */
+ long state[MEMCG_NR_STAT];
+ unsigned long events[NR_VM_EVENT_ITEMS];
+
+ /* Delta calculation for lockless upward propagation */
+ long state_prev[MEMCG_NR_STAT];
+ unsigned long events_prev[NR_VM_EVENT_ITEMS];
+
+ /* Cgroup1: threshold notifications & softlimit tree updates */
+ unsigned long nr_page_events;
+ unsigned long targets[MEM_CGROUP_NTARGETS];
+};
+
+struct memcg_vmstats {
+ /* Aggregated (CPU and subtree) page state & events */
+ long state[MEMCG_NR_STAT];
+ unsigned long events[NR_VM_EVENT_ITEMS];
+
+ /* Pending child counts during tree propagation */
+ long state_pending[MEMCG_NR_STAT];
+ unsigned long events_pending[NR_VM_EVENT_ITEMS];
};
struct mem_cgroup_reclaim_iter {
@@ -97,12 +114,13 @@ struct batched_lruvec_stat {
};
/*
- * Bitmap of shrinker::id corresponding to memcg-aware shrinkers,
- * which have elements charged to this memcg.
+ * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
+ * shrinkers, which have elements charged to this memcg.
*/
-struct memcg_shrinker_map {
+struct shrinker_info {
struct rcu_head rcu;
- unsigned long map[];
+ atomic_long_t *nr_deferred;
+ unsigned long *map;
};
/*
@@ -128,7 +146,7 @@ struct mem_cgroup_per_node {
struct mem_cgroup_reclaim_iter iter;
- struct memcg_shrinker_map __rcu *shrinker_map;
+ struct shrinker_info __rcu *shrinker_info;
struct rb_node tree_node; /* RB tree node */
unsigned long usage_in_excess;/* Set to the value by which */
@@ -174,7 +192,7 @@ enum memcg_kmem_state {
struct memcg_padding {
char x[0];
} ____cacheline_internodealigned_in_smp;
-#define MEMCG_PADDING(name) struct memcg_padding name;
+#define MEMCG_PADDING(name) struct memcg_padding name
#else
#define MEMCG_PADDING(name)
#endif
@@ -287,8 +305,8 @@ struct mem_cgroup {
MEMCG_PADDING(_pad1_);
- atomic_long_t vmstats[MEMCG_NR_STAT];
- atomic_long_t vmevents[NR_VM_EVENT_ITEMS];
+ /* memory.stat */
+ struct memcg_vmstats vmstats;
/* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
@@ -315,10 +333,6 @@ struct mem_cgroup {
atomic_t moving_account;
struct task_struct *move_lock_task;
- /* Legacy local VM stats and events */
- struct memcg_vmstats_percpu __percpu *vmstats_local;
-
- /* Subtree VM stats and events (batched updates) */
struct memcg_vmstats_percpu __percpu *vmstats_percpu;
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -335,8 +349,7 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
- struct mem_cgroup_per_node *nodeinfo[0];
- /* WARNING: nodeinfo must be the last member here */
+ struct mem_cgroup_per_node *nodeinfo[];
};
/*
@@ -358,6 +371,62 @@ enum page_memcg_data_flags {
#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
+static inline bool PageMemcgKmem(struct page *page);
+
+/*
+ * After the initialization objcg->memcg is always pointing at
+ * a valid memcg, but can be atomically swapped to the parent memcg.
+ *
+ * The caller must ensure that the returned memcg won't be released:
+ * e.g. acquire the rcu_read_lock or css_set_lock.
+ */
+static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
+{
+ return READ_ONCE(objcg->memcg);
+}
+
+/*
+ * __page_memcg - get the memory cgroup associated with a non-kmem page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the memory cgroup associated with the page,
+ * or NULL. This function assumes that the page is known to have a
+ * proper memory cgroup pointer. It's not safe to call this function
+ * against some type of pages, e.g. slab pages or ex-slab pages or
+ * kmem pages.
+ */
+static inline struct mem_cgroup *__page_memcg(struct page *page)
+{
+ unsigned long memcg_data = page->memcg_data;
+
+ VM_BUG_ON_PAGE(PageSlab(page), page);
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+
+ return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
+/*
+ * __page_objcg - get the object cgroup associated with a kmem page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the object cgroup associated with the page,
+ * or NULL. This function assumes that the page is known to have a
+ * proper object cgroup pointer. It's not safe to call this function
+ * against some type of pages, e.g. slab pages or ex-slab pages or
+ * LRU pages.
+ */
+static inline struct obj_cgroup *__page_objcg(struct page *page)
+{
+ unsigned long memcg_data = page->memcg_data;
+
+ VM_BUG_ON_PAGE(PageSlab(page), page);
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
+ VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
+
+ return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
/*
* page_memcg - get the memory cgroup associated with a page
* @page: a pointer to the page struct
@@ -367,20 +436,23 @@ enum page_memcg_data_flags {
* proper memory cgroup pointer. It's not safe to call this function
* against some type of pages, e.g. slab pages or ex-slab pages.
*
- * Any of the following ensures page and memcg binding stability:
+ * For a non-kmem page any of the following ensures page and memcg binding
+ * stability:
+ *
* - the page lock
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ *
+ * For a kmem page a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem page from being released.
*/
static inline struct mem_cgroup *page_memcg(struct page *page)
{
- unsigned long memcg_data = page->memcg_data;
-
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
-
- return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ if (PageMemcgKmem(page))
+ return obj_cgroup_memcg(__page_objcg(page));
+ else
+ return __page_memcg(page);
}
/*
@@ -394,11 +466,19 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
*/
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
{
+ unsigned long memcg_data = READ_ONCE(page->memcg_data);
+
VM_BUG_ON_PAGE(PageSlab(page), page);
WARN_ON_ONCE(!rcu_read_lock_held());
- return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) &
- ~MEMCG_DATA_FLAGS_MASK);
+ if (memcg_data & MEMCG_DATA_KMEM) {
+ struct obj_cgroup *objcg;
+
+ objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return obj_cgroup_memcg(objcg);
+ }
+
+ return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
@@ -406,15 +486,21 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
* @page: a pointer to the page struct
*
* Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function unlike page_memcg() can take any page
+ * or NULL. This function unlike page_memcg() can take any page
* as an argument. It has to be used in cases when it's not known if a page
- * has an associated memory cgroup pointer or an object cgroups vector.
+ * has an associated memory cgroup pointer or an object cgroups vector or
+ * an object cgroup.
+ *
+ * For a non-kmem page any of the following ensures page and memcg binding
+ * stability:
*
- * Any of the following ensures page and memcg binding stability:
* - the page lock
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ *
+ * For a kmem page a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem page from being released.
*/
static inline struct mem_cgroup *page_memcg_check(struct page *page)
{
@@ -427,9 +513,17 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
if (memcg_data & MEMCG_DATA_OBJCGS)
return NULL;
+ if (memcg_data & MEMCG_DATA_KMEM) {
+ struct obj_cgroup *objcg;
+
+ objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return obj_cgroup_memcg(objcg);
+ }
+
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
+#ifdef CONFIG_MEMCG_KMEM
/*
* PageMemcgKmem - check if the page has MemcgKmem flag set
* @page: a pointer to the page struct
@@ -444,7 +538,6 @@ static inline bool PageMemcgKmem(struct page *page)
return page->memcg_data & MEMCG_DATA_KMEM;
}
-#ifdef CONFIG_MEMCG_KMEM
/*
* page_objcgs - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
@@ -486,6 +579,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
}
#else
+static inline bool PageMemcgKmem(struct page *page)
+{
+ return false;
+}
+
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
return NULL;
@@ -596,18 +694,15 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
}
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
+int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
+ gfp_t gfp, swp_entry_t entry);
+void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
void mem_cgroup_uncharge(struct page *page);
void mem_cgroup_uncharge_list(struct list_head *page_list);
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
-static struct mem_cgroup_per_node *
-mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
-{
- return memcg->nodeinfo[nid];
-}
-
/**
* mem_cgroup_lruvec - get the lru list vector for a memcg & node
* @memcg: memcg of the wanted lruvec
@@ -631,7 +726,7 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
if (!memcg)
memcg = root_mem_cgroup;
- mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+ mz = memcg->nodeinfo[pgdat->node_id];
lruvec = &mz->lruvec;
out:
/*
@@ -647,35 +742,18 @@ out:
/**
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
* @page: the page
- * @pgdat: pgdat of the page
*
* This function relies on page->mem_cgroup being stable.
*/
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
- struct pglist_data *pgdat)
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
{
+ pg_data_t *pgdat = page_pgdat(page);
struct mem_cgroup *memcg = page_memcg(page);
VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
return mem_cgroup_lruvec(memcg, pgdat);
}
-static inline bool lruvec_holds_page_lru_lock(struct page *page,
- struct lruvec *lruvec)
-{
- pg_data_t *pgdat = page_pgdat(page);
- const struct mem_cgroup *memcg;
- struct mem_cgroup_per_node *mz;
-
- if (mem_cgroup_disabled())
- return lruvec == &pgdat->__lruvec;
-
- mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- memcg = page_memcg(page) ? : root_mem_cgroup;
-
- return lruvec->pgdat == pgdat && mz->memcg == memcg;
-}
-
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
@@ -708,21 +786,15 @@ static inline void obj_cgroup_get(struct obj_cgroup *objcg)
percpu_ref_get(&objcg->refcnt);
}
-static inline void obj_cgroup_put(struct obj_cgroup *objcg)
+static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
+ unsigned long nr)
{
- percpu_ref_put(&objcg->refcnt);
+ percpu_ref_get_many(&objcg->refcnt, nr);
}
-/*
- * After the initialization objcg->memcg is always pointing at
- * a valid memcg, but can be atomically swapped to the parent memcg.
- *
- * The caller must ensure that the returned memcg won't be released:
- * e.g. acquire the rcu_read_lock or css_set_lock.
- */
-static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
+static inline void obj_cgroup_put(struct obj_cgroup *objcg)
{
- return READ_ONCE(objcg->memcg);
+ percpu_ref_put(&objcg->refcnt);
}
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
@@ -867,43 +939,9 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
extern bool cgroup_memory_noswap;
#endif
-struct mem_cgroup *lock_page_memcg(struct page *page);
-void __unlock_page_memcg(struct mem_cgroup *memcg);
+void lock_page_memcg(struct page *page);
void unlock_page_memcg(struct page *page);
-/*
- * idx can be of type enum memcg_stat_item or node_stat_item.
- * Keep in sync with memcg_exact_page_state().
- */
-static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
- long x = atomic_long_read(&memcg->vmstats[idx]);
-#ifdef CONFIG_SMP
- if (x < 0)
- x = 0;
-#endif
- return x;
-}
-
-/*
- * idx can be of type enum memcg_stat_item or node_stat_item.
- * Keep in sync with memcg_exact_page_state().
- */
-static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
- int idx)
-{
- long x = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- x += per_cpu(memcg->vmstats_local->stat[idx], cpu);
-#ifdef CONFIG_SMP
- if (x < 0)
- x = 0;
-#endif
- return x;
-}
-
void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
/* idx can be of type enum memcg_stat_item or node_stat_item */
@@ -979,10 +1017,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
local_irq_restore(flags);
}
-unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
- gfp_t gfp_mask,
- unsigned long *total_scanned);
-
void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
unsigned long count);
@@ -1063,13 +1097,15 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
void split_page_memcg(struct page *head, unsigned int nr);
+unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned);
+
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
#define MEM_CGROUP_ID_MAX 0
-struct mem_cgroup;
-
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
@@ -1139,6 +1175,16 @@ static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
return 0;
}
+static inline int mem_cgroup_swapin_charge_page(struct page *page,
+ struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
+{
+ return 0;
+}
+
+static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry)
+{
+}
+
static inline void mem_cgroup_uncharge(struct page *page)
{
}
@@ -1157,18 +1203,15 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
return &pgdat->__lruvec;
}
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
- struct pglist_data *pgdat)
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
{
+ pg_data_t *pgdat = page_pgdat(page);
+
return &pgdat->__lruvec;
}
-static inline bool lruvec_holds_page_lru_lock(struct page *page,
- struct lruvec *lruvec)
+static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
{
- pg_data_t *pgdat = page_pgdat(page);
-
- return lruvec == &pgdat->__lruvec;
}
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
@@ -1187,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
}
+static inline
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
+{
+ return NULL;
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
@@ -1289,12 +1338,7 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
{
}
-static inline struct mem_cgroup *lock_page_memcg(struct page *page)
-{
- return NULL;
-}
-
-static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
+static inline void lock_page_memcg(struct page *page)
{
}
@@ -1334,17 +1378,6 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
{
}
-static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
- return 0;
-}
-
-static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
- int idx)
-{
- return 0;
-}
-
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
int idx,
int nr)
@@ -1390,18 +1423,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
mod_node_page_state(page_pgdat(page), idx, val);
}
-static inline
-unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
- gfp_t gfp_mask,
- unsigned long *total_scanned)
-{
- return 0;
-}
-
-static inline void split_page_memcg(struct page *head, unsigned int nr)
-{
-}
-
static inline void count_memcg_events(struct mem_cgroup *memcg,
enum vm_event_item idx,
unsigned long count)
@@ -1424,9 +1445,17 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
}
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline void split_page_memcg(struct page *head, unsigned int nr)
{
}
+
+static inline
+unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
+{
+ return 0;
+}
#endif /* CONFIG_MEMCG */
static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
@@ -1468,12 +1497,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
spin_unlock_irqrestore(&lruvec->lru_lock, flags);
}
+/* Test requires a stable page->memcg binding, see page_memcg() */
+static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
+{
+ return lruvec_pgdat(lruvec) == page_pgdat(page) &&
+ lruvec_memcg(lruvec) == page_memcg(page);
+}
+
/* Don't lock again iff page's lruvec locked */
static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
struct lruvec *locked_lruvec)
{
if (locked_lruvec) {
- if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+ if (page_matches_lruvec(page, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irq(locked_lruvec);
@@ -1487,7 +1523,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
struct lruvec *locked_lruvec, unsigned long *flags)
{
if (locked_lruvec) {
- if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+ if (page_matches_lruvec(page, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
@@ -1563,10 +1599,10 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
return false;
}
-extern int memcg_expand_shrinker_maps(int new_id);
-
-extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
- int nid, int shrinker_id);
+int alloc_shrinker_info(struct mem_cgroup *memcg);
+void free_shrinker_info(struct mem_cgroup *memcg);
+void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
+void reparent_shrinker_deferred(struct mem_cgroup *memcg);
#else
#define mem_cgroup_sockets_enabled 0
static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
@@ -1576,8 +1612,8 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
return false;
}
-static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
- int nid, int shrinker_id)
+static inline void set_shrinker_bit(struct mem_cgroup *memcg,
+ int nid, int shrinker_id)
{
}
#endif