summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c138
1 files changed, 65 insertions, 73 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e8ca4bdcb03c..b29b850cf399 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -197,7 +197,7 @@ static struct move_charge_struct {
};
/*
- * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
+ * Maximum loops in mem_cgroup_soft_reclaim(), used for soft
* limit reclaim to prevent infinite loops, if they ever occur.
*/
#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
@@ -742,6 +742,10 @@ struct memcg_vmstats {
long state[MEMCG_NR_STAT];
unsigned long events[NR_MEMCG_EVENTS];
+ /* Non-hierarchical (CPU aggregated) page state & events */
+ long state_local[MEMCG_NR_STAT];
+ unsigned long events_local[NR_MEMCG_EVENTS];
+
/* Pending child counts during tree propagation */
long state_pending[MEMCG_NR_STAT];
unsigned long events_pending[NR_MEMCG_EVENTS];
@@ -775,11 +779,8 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
/* idx can be of type enum memcg_stat_item or node_stat_item. */
static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
{
- long x = 0;
- int cpu;
+ long x = READ_ONCE(memcg->vmstats->state_local[idx]);
- for_each_possible_cpu(cpu)
- x += per_cpu(memcg->vmstats_percpu->state[idx], cpu);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
@@ -926,16 +927,12 @@ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
{
- long x = 0;
- int cpu;
int index = memcg_events_index(event);
if (index < 0)
return 0;
- for_each_possible_cpu(cpu)
- x += per_cpu(memcg->vmstats_percpu->events[index], cpu);
- return x;
+ return READ_ONCE(memcg->vmstats->events_local[index]);
}
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
@@ -1629,7 +1626,6 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
WARN_ON_ONCE(seq_buf_has_overflowed(s));
}
-#define K(x) ((x) << (PAGE_SHIFT-10))
/**
* mem_cgroup_print_oom_context: Print OOM information relevant to
* memory controller.
@@ -3036,21 +3032,21 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
return objcg;
}
-struct obj_cgroup *get_obj_cgroup_from_page(struct page *page)
+struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
{
struct obj_cgroup *objcg;
if (!memcg_kmem_online())
return NULL;
- if (PageMemcgKmem(page)) {
- objcg = __folio_objcg(page_folio(page));
+ if (folio_memcg_kmem(folio)) {
+ objcg = __folio_objcg(folio);
obj_cgroup_get(objcg);
} else {
struct mem_cgroup *memcg;
rcu_read_lock();
- memcg = __folio_memcg(page_folio(page));
+ memcg = __folio_memcg(folio);
if (memcg)
objcg = __get_obj_cgroup_from_memcg(memcg);
else
@@ -3871,10 +3867,6 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
case _MEMSWAP:
ret = mem_cgroup_resize_max(memcg, nr_pages, true);
break;
- case _KMEM:
- /* kmem.limit_in_bytes is deprecated. */
- ret = -EOPNOTSUPP;
- break;
case _TCP:
ret = memcg_update_tcp_max(memcg, nr_pages);
break;
@@ -5086,12 +5078,6 @@ static struct cftype mem_cgroup_legacy_files[] = {
},
#endif
{
- .name = "kmem.limit_in_bytes",
- .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
- .write = mem_cgroup_write,
- .read_u64 = mem_cgroup_read_u64,
- },
- {
.name = "kmem.usage_in_bytes",
.private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
.read_u64 = mem_cgroup_read_u64,
@@ -5165,6 +5151,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
* those references are manageable from userspace.
*/
+#define MEM_CGROUP_ID_MAX ((1UL << MEM_CGROUP_ID_SHIFT) - 1)
static DEFINE_IDR(mem_cgroup_idr);
static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
@@ -5526,7 +5513,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
struct memcg_vmstats_percpu *statc;
- long delta, v;
+ long delta, delta_cpu, v;
int i, nid;
statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
@@ -5542,19 +5529,23 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
memcg->vmstats->state_pending[i] = 0;
/* Add CPU changes on this level since the last flush */
+ delta_cpu = 0;
v = READ_ONCE(statc->state[i]);
if (v != statc->state_prev[i]) {
- delta += v - statc->state_prev[i];
+ delta_cpu = v - statc->state_prev[i];
+ delta += delta_cpu;
statc->state_prev[i] = v;
}
- if (!delta)
- continue;
-
/* Aggregate counts on this level and propagate upwards */
- memcg->vmstats->state[i] += delta;
- if (parent)
- parent->vmstats->state_pending[i] += delta;
+ if (delta_cpu)
+ memcg->vmstats->state_local[i] += delta_cpu;
+
+ if (delta) {
+ memcg->vmstats->state[i] += delta;
+ if (parent)
+ parent->vmstats->state_pending[i] += delta;
+ }
}
for (i = 0; i < NR_MEMCG_EVENTS; i++) {
@@ -5562,18 +5553,22 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (delta)
memcg->vmstats->events_pending[i] = 0;
+ delta_cpu = 0;
v = READ_ONCE(statc->events[i]);
if (v != statc->events_prev[i]) {
- delta += v - statc->events_prev[i];
+ delta_cpu = v - statc->events_prev[i];
+ delta += delta_cpu;
statc->events_prev[i] = v;
}
- if (!delta)
- continue;
+ if (delta_cpu)
+ memcg->vmstats->events_local[i] += delta_cpu;
- memcg->vmstats->events[i] += delta;
- if (parent)
- parent->vmstats->events_pending[i] += delta;
+ if (delta) {
+ memcg->vmstats->events[i] += delta;
+ if (parent)
+ parent->vmstats->events_pending[i] += delta;
+ }
}
for_each_node_state(nid, N_MEMORY) {
@@ -5591,18 +5586,22 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (delta)
pn->lruvec_stats.state_pending[i] = 0;
+ delta_cpu = 0;
v = READ_ONCE(lstatc->state[i]);
if (v != lstatc->state_prev[i]) {
- delta += v - lstatc->state_prev[i];
+ delta_cpu = v - lstatc->state_prev[i];
+ delta += delta_cpu;
lstatc->state_prev[i] = v;
}
- if (!delta)
- continue;
+ if (delta_cpu)
+ pn->lruvec_stats.state_local[i] += delta_cpu;
- pn->lruvec_stats.state[i] += delta;
- if (ppn)
- ppn->lruvec_stats.state_pending[i] += delta;
+ if (delta) {
+ pn->lruvec_stats.state[i] += delta;
+ if (ppn)
+ ppn->lruvec_stats.state_pending[i] += delta;
+ }
}
}
}
@@ -5648,7 +5647,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
{
struct page *page = vm_normal_page(vma, addr, ptent);
- if (!page || !page_mapped(page))
+ if (!page)
return NULL;
if (PageAnon(page)) {
if (!(mc.flags & MOVE_ANON))
@@ -5657,8 +5656,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
if (!(mc.flags & MOVE_FILE))
return NULL;
}
- if (!get_page_unless_zero(page))
- return NULL;
+ get_page(page);
return page;
}
@@ -5766,7 +5764,7 @@ static int mem_cgroup_move_account(struct page *page,
if (folio_mapped(folio)) {
__mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages);
__mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages);
- if (folio_test_transhuge(folio)) {
+ if (folio_test_pmd_mappable(folio)) {
__mod_lruvec_state(from_vec, NR_ANON_THPS,
-nr_pages);
__mod_lruvec_state(to_vec, NR_ANON_THPS,
@@ -5852,25 +5850,20 @@ out:
* @ptent: the pte to be checked
* @target: the pointer the target page or swap ent will be stored(can be NULL)
*
- * Returns
- * 0(MC_TARGET_NONE): if the pte is not a target for move charge.
- * 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
- * move charge. if @target is not NULL, the page is stored in target->page
- * with extra refcnt got(Callers should handle it).
- * 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
- * target for charge migration. if @target is not NULL, the entry is stored
- * in target->ent.
- * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is device memory and
- * thus not on the lru.
- * For now we such page is charge like a regular page would be as for all
- * intent and purposes it is just special memory taking the place of a
- * regular page.
- *
- * See Documentations/vm/hmm.txt and include/linux/hmm.h
- *
- * Called with pte lock held.
+ * Context: Called with pte lock held.
+ * Return:
+ * * MC_TARGET_NONE - If the pte is not a target for move charge.
+ * * MC_TARGET_PAGE - If the page corresponding to this pte is a target for
+ * move charge. If @target is not NULL, the page is stored in target->page
+ * with extra refcnt taken (Caller should release it).
+ * * MC_TARGET_SWAP - If the swap entry corresponding to this pte is a
+ * target for charge migration. If @target is not NULL, the entry is
+ * stored in target->ent.
+ * * MC_TARGET_DEVICE - Like MC_TARGET_PAGE but page is device memory and
+ * thus not on the lru. For now such page is charged like a regular page
+ * would be as it is just special memory taking the place of a regular page.
+ * See Documentations/vm/hmm.txt and include/linux/hmm.h
*/
-
static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, union mc_target *target)
{
@@ -6024,6 +6017,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
static const struct mm_walk_ops precharge_walk_ops = {
.pmd_entry = mem_cgroup_count_precharge_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@@ -6303,6 +6297,7 @@ put: /* get_mctgt_type() gets & locks the page */
static const struct mm_walk_ops charge_walk_ops = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static void mem_cgroup_move_charge(void)
@@ -6696,8 +6691,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
lru_add_drain_all();
reclaimed = try_to_free_mem_cgroup_pages(memcg,
- nr_to_reclaim - nr_reclaimed,
- GFP_KERNEL, reclaim_options);
+ min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
+ GFP_KERNEL, reclaim_options);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
@@ -7535,9 +7530,6 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
struct mem_cgroup *memcg;
unsigned short id;
- if (mem_cgroup_disabled())
- return;
-
id = swap_cgroup_record(entry, 0, nr_pages);
rcu_read_lock();
memcg = mem_cgroup_from_id(id);
@@ -7787,7 +7779,7 @@ bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
* @objcg: the object cgroup
* @size: size of compressed object
*
- * This forces the charge after obj_cgroup_may_swap() allowed
+ * This forces the charge after obj_cgroup_may_zswap() allowed
* compression and storage in zwap for this cgroup to go ahead.
*/
void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)