diff options
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r-- | include/linux/memcontrol.h | 344 |
1 files changed, 190 insertions, 154 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8ef51c58f470..bfe5c486f4ad 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -76,10 +76,27 @@ enum mem_cgroup_events_target { }; struct memcg_vmstats_percpu { - long stat[MEMCG_NR_STAT]; - unsigned long events[NR_VM_EVENT_ITEMS]; - unsigned long nr_page_events; - unsigned long targets[MEM_CGROUP_NTARGETS]; + /* Local (CPU and cgroup) page state & events */ + long state[MEMCG_NR_STAT]; + unsigned long events[NR_VM_EVENT_ITEMS]; + + /* Delta calculation for lockless upward propagation */ + long state_prev[MEMCG_NR_STAT]; + unsigned long events_prev[NR_VM_EVENT_ITEMS]; + + /* Cgroup1: threshold notifications & softlimit tree updates */ + unsigned long nr_page_events; + unsigned long targets[MEM_CGROUP_NTARGETS]; +}; + +struct memcg_vmstats { + /* Aggregated (CPU and subtree) page state & events */ + long state[MEMCG_NR_STAT]; + unsigned long events[NR_VM_EVENT_ITEMS]; + + /* Pending child counts during tree propagation */ + long state_pending[MEMCG_NR_STAT]; + unsigned long events_pending[NR_VM_EVENT_ITEMS]; }; struct mem_cgroup_reclaim_iter { @@ -97,12 +114,13 @@ struct batched_lruvec_stat { }; /* - * Bitmap of shrinker::id corresponding to memcg-aware shrinkers, - * which have elements charged to this memcg. + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to this memcg. */ -struct memcg_shrinker_map { +struct shrinker_info { struct rcu_head rcu; - unsigned long map[]; + atomic_long_t *nr_deferred; + unsigned long *map; }; /* @@ -128,7 +146,7 @@ struct mem_cgroup_per_node { struct mem_cgroup_reclaim_iter iter; - struct memcg_shrinker_map __rcu *shrinker_map; + struct shrinker_info __rcu *shrinker_info; struct rb_node tree_node; /* RB tree node */ unsigned long usage_in_excess;/* Set to the value by which */ @@ -174,7 +192,7 @@ enum memcg_kmem_state { struct memcg_padding { char x[0]; } ____cacheline_internodealigned_in_smp; -#define MEMCG_PADDING(name) struct memcg_padding name; +#define MEMCG_PADDING(name) struct memcg_padding name #else #define MEMCG_PADDING(name) #endif @@ -287,8 +305,8 @@ struct mem_cgroup { MEMCG_PADDING(_pad1_); - atomic_long_t vmstats[MEMCG_NR_STAT]; - atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; + /* memory.stat */ + struct memcg_vmstats vmstats; /* memory.events */ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; @@ -315,10 +333,6 @@ struct mem_cgroup { atomic_t moving_account; struct task_struct *move_lock_task; - /* Legacy local VM stats and events */ - struct memcg_vmstats_percpu __percpu *vmstats_local; - - /* Subtree VM stats and events (batched updates) */ struct memcg_vmstats_percpu __percpu *vmstats_percpu; #ifdef CONFIG_CGROUP_WRITEBACK @@ -335,8 +349,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif - struct mem_cgroup_per_node *nodeinfo[0]; - /* WARNING: nodeinfo must be the last member here */ + struct mem_cgroup_per_node *nodeinfo[]; }; /* @@ -358,6 +371,62 @@ enum page_memcg_data_flags { #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) +static inline bool PageMemcgKmem(struct page *page); + +/* + * After the initialization objcg->memcg is always pointing at + * a valid memcg, but can be atomically swapped to the parent memcg. + * + * The caller must ensure that the returned memcg won't be released: + * e.g. acquire the rcu_read_lock or css_set_lock. + */ +static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) +{ + return READ_ONCE(objcg->memcg); +} + +/* + * __page_memcg - get the memory cgroup associated with a non-kmem page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages or + * kmem pages. + */ +static inline struct mem_cgroup *__page_memcg(struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * __page_objcg - get the object cgroup associated with a kmem page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper object cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages or + * LRU pages. + */ +static inline struct obj_cgroup *__page_objcg(struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); + + return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + /* * page_memcg - get the memory cgroup associated with a page * @page: a pointer to the page struct @@ -367,20 +436,23 @@ enum page_memcg_data_flags { * proper memory cgroup pointer. It's not safe to call this function * against some type of pages, e.g. slab pages or ex-slab pages. * - * Any of the following ensures page and memcg binding stability: + * For a non-kmem page any of the following ensures page and memcg binding + * stability: + * * - the page lock * - LRU isolation * - lock_page_memcg() * - exclusive reference + * + * For a kmem page a caller should hold an rcu read lock to protect memcg + * associated with a kmem page from being released. */ static inline struct mem_cgroup *page_memcg(struct page *page) { - unsigned long memcg_data = page->memcg_data; - - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + if (PageMemcgKmem(page)) + return obj_cgroup_memcg(__page_objcg(page)); + else + return __page_memcg(page); } /* @@ -394,11 +466,19 @@ static inline struct mem_cgroup *page_memcg(struct page *page) */ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) { + unsigned long memcg_data = READ_ONCE(page->memcg_data); + VM_BUG_ON_PAGE(PageSlab(page), page); WARN_ON_ONCE(!rcu_read_lock_held()); - return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) & - ~MEMCG_DATA_FLAGS_MASK); + if (memcg_data & MEMCG_DATA_KMEM) { + struct obj_cgroup *objcg; + + objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return obj_cgroup_memcg(objcg); + } + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -406,15 +486,21 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) * @page: a pointer to the page struct * * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function unlike page_memcg() can take any page + * or NULL. This function unlike page_memcg() can take any page * as an argument. It has to be used in cases when it's not known if a page - * has an associated memory cgroup pointer or an object cgroups vector. + * has an associated memory cgroup pointer or an object cgroups vector or + * an object cgroup. + * + * For a non-kmem page any of the following ensures page and memcg binding + * stability: * - * Any of the following ensures page and memcg binding stability: * - the page lock * - LRU isolation * - lock_page_memcg() * - exclusive reference + * + * For a kmem page a caller should hold an rcu read lock to protect memcg + * associated with a kmem page from being released. */ static inline struct mem_cgroup *page_memcg_check(struct page *page) { @@ -427,9 +513,17 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; + if (memcg_data & MEMCG_DATA_KMEM) { + struct obj_cgroup *objcg; + + objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return obj_cgroup_memcg(objcg); + } + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } +#ifdef CONFIG_MEMCG_KMEM /* * PageMemcgKmem - check if the page has MemcgKmem flag set * @page: a pointer to the page struct @@ -444,7 +538,6 @@ static inline bool PageMemcgKmem(struct page *page) return page->memcg_data & MEMCG_DATA_KMEM; } -#ifdef CONFIG_MEMCG_KMEM /* * page_objcgs - get the object cgroups vector associated with a page * @page: a pointer to the page struct @@ -486,6 +579,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #else +static inline bool PageMemcgKmem(struct page *page) +{ + return false; +} + static inline struct obj_cgroup **page_objcgs(struct page *page) { return NULL; @@ -596,18 +694,15 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) } int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, + gfp_t gfp, swp_entry_t entry); +void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); void mem_cgroup_uncharge(struct page *page); void mem_cgroup_uncharge_list(struct list_head *page_list); void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); -static struct mem_cgroup_per_node * -mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) -{ - return memcg->nodeinfo[nid]; -} - /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node * @memcg: memcg of the wanted lruvec @@ -631,7 +726,7 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, if (!memcg) memcg = root_mem_cgroup; - mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); + mz = memcg->nodeinfo[pgdat->node_id]; lruvec = &mz->lruvec; out: /* @@ -647,35 +742,18 @@ out: /** * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page * @page: the page - * @pgdat: pgdat of the page * * This function relies on page->mem_cgroup being stable. */ -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, - struct pglist_data *pgdat) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) { + pg_data_t *pgdat = page_pgdat(page); struct mem_cgroup *memcg = page_memcg(page); VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); return mem_cgroup_lruvec(memcg, pgdat); } -static inline bool lruvec_holds_page_lru_lock(struct page *page, - struct lruvec *lruvec) -{ - pg_data_t *pgdat = page_pgdat(page); - const struct mem_cgroup *memcg; - struct mem_cgroup_per_node *mz; - - if (mem_cgroup_disabled()) - return lruvec == &pgdat->__lruvec; - - mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - memcg = page_memcg(page) ? : root_mem_cgroup; - - return lruvec->pgdat == pgdat && mz->memcg == memcg; -} - struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); @@ -708,21 +786,15 @@ static inline void obj_cgroup_get(struct obj_cgroup *objcg) percpu_ref_get(&objcg->refcnt); } -static inline void obj_cgroup_put(struct obj_cgroup *objcg) +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg, + unsigned long nr) { - percpu_ref_put(&objcg->refcnt); + percpu_ref_get_many(&objcg->refcnt, nr); } -/* - * After the initialization objcg->memcg is always pointing at - * a valid memcg, but can be atomically swapped to the parent memcg. - * - * The caller must ensure that the returned memcg won't be released: - * e.g. acquire the rcu_read_lock or css_set_lock. - */ -static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) +static inline void obj_cgroup_put(struct obj_cgroup *objcg) { - return READ_ONCE(objcg->memcg); + percpu_ref_put(&objcg->refcnt); } static inline void mem_cgroup_put(struct mem_cgroup *memcg) @@ -867,43 +939,9 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); extern bool cgroup_memory_noswap; #endif -struct mem_cgroup *lock_page_memcg(struct page *page); -void __unlock_page_memcg(struct mem_cgroup *memcg); +void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); -/* - * idx can be of type enum memcg_stat_item or node_stat_item. - * Keep in sync with memcg_exact_page_state(). - */ -static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) -{ - long x = atomic_long_read(&memcg->vmstats[idx]); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} - -/* - * idx can be of type enum memcg_stat_item or node_stat_item. - * Keep in sync with memcg_exact_page_state(). - */ -static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, - int idx) -{ - long x = 0; - int cpu; - - for_each_possible_cpu(cpu) - x += per_cpu(memcg->vmstats_local->stat[idx], cpu); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} - void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); /* idx can be of type enum memcg_stat_item or node_stat_item */ @@ -979,10 +1017,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, local_irq_restore(flags); } -unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned); - void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count); @@ -1063,13 +1097,15 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, void split_page_memcg(struct page *head, unsigned int nr); +unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned); + #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 #define MEM_CGROUP_ID_MAX 0 -struct mem_cgroup; - static inline struct mem_cgroup *page_memcg(struct page *page) { return NULL; @@ -1139,6 +1175,16 @@ static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, return 0; } +static inline int mem_cgroup_swapin_charge_page(struct page *page, + struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) +{ + return 0; +} + +static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) +{ +} + static inline void mem_cgroup_uncharge(struct page *page) { } @@ -1157,18 +1203,15 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, return &pgdat->__lruvec; } -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, - struct pglist_data *pgdat) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) { + pg_data_t *pgdat = page_pgdat(page); + return &pgdat->__lruvec; } -static inline bool lruvec_holds_page_lru_lock(struct page *page, - struct lruvec *lruvec) +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) { - pg_data_t *pgdat = page_pgdat(page); - - return lruvec == &pgdat->__lruvec; } static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) @@ -1187,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } +static inline +struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) +{ + return NULL; +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } @@ -1289,12 +1338,7 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { } -static inline struct mem_cgroup *lock_page_memcg(struct page *page) -{ - return NULL; -} - -static inline void __unlock_page_memcg(struct mem_cgroup *memcg) +static inline void lock_page_memcg(struct page *page) { } @@ -1334,17 +1378,6 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) { } -static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) -{ - return 0; -} - -static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, - int idx) -{ - return 0; -} - static inline void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int nr) @@ -1390,18 +1423,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, mod_node_page_state(page_pgdat(page), idx, val); } -static inline -unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) -{ - return 0; -} - -static inline void split_page_memcg(struct page *head, unsigned int nr) -{ -} - static inline void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count) @@ -1424,9 +1445,17 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline void split_page_memcg(struct page *head, unsigned int nr) { } + +static inline +unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned) +{ + return 0; +} #endif /* CONFIG_MEMCG */ static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) @@ -1468,12 +1497,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, spin_unlock_irqrestore(&lruvec->lru_lock, flags); } +/* Test requires a stable page->memcg binding, see page_memcg() */ +static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) +{ + return lruvec_pgdat(lruvec) == page_pgdat(page) && + lruvec_memcg(lruvec) == page_memcg(page); +} + /* Don't lock again iff page's lruvec locked */ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, struct lruvec *locked_lruvec) { if (locked_lruvec) { - if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irq(locked_lruvec); @@ -1487,7 +1523,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, struct lruvec *locked_lruvec, unsigned long *flags) { if (locked_lruvec) { - if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); @@ -1563,10 +1599,10 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return false; } -extern int memcg_expand_shrinker_maps(int new_id); - -extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg, - int nid, int shrinker_id); +int alloc_shrinker_info(struct mem_cgroup *memcg); +void free_shrinker_info(struct mem_cgroup *memcg); +void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); +void reparent_shrinker_deferred(struct mem_cgroup *memcg); #else #define mem_cgroup_sockets_enabled 0 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; @@ -1576,8 +1612,8 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return false; } -static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg, - int nid, int shrinker_id) +static inline void set_shrinker_bit(struct mem_cgroup *memcg, + int nid, int shrinker_id) { } #endif |