From 23e9f0138963ceef2a252d887534923a0502b2da Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Nov 2023 11:14:50 +0800 Subject: mm/vmstat: move pgdemote_* to per-node stats Demotion will migrate pages across nodes. Previously, only the global demotion statistics were accounted for. Changed them to per-node statistics, making it easier to observe where demotion occurs on each node. This will help to identify which nodes are under pressure. This patch also make pgdemote_* behind CONFIG_NUMA_BALANCING, since demotion is not available for !CONFIG_NUMA_BALANCING With this patch, here is a sample where node0 node1 are DRAM, node3 is PMEM: Global stats: $ grep demote /proc/vmstat pgdemote_kswapd 254288 pgdemote_direct 113497 pgdemote_khugepaged 0 Per-node stats: $ grep demote /sys/devices/system/node/node0/vmstat # demotion source pgdemote_kswapd 68454 pgdemote_direct 83431 pgdemote_khugepaged 0 $ grep demote /sys/devices/system/node/node1/vmstat # demotion source pgdemote_kswapd 185834 pgdemote_direct 30066 pgdemote_khugepaged 0 $ grep demote /sys/devices/system/node/node3/vmstat # demotion target pgdemote_kswapd 0 pgdemote_direct 0 pgdemote_khugepaged 0 Link: https://lkml.kernel.org/r/20231103031450.1456523-1-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Acked-by: "Huang, Ying" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- mm/vmstat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 359460deb377..afa5a38fcc9c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1248,6 +1248,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_NUMA_BALANCING "pgpromote_success", "pgpromote_candidate", + "pgdemote_kswapd", + "pgdemote_direct", + "pgdemote_khugepaged", #endif /* enum writeback_stat_item counters */ @@ -1279,9 +1282,6 @@ const char * const vmstat_text[] = { "pgsteal_kswapd", "pgsteal_direct", "pgsteal_khugepaged", - "pgdemote_kswapd", - "pgdemote_direct", - "pgdemote_khugepaged", "pgscan_kswapd", "pgscan_direct", "pgscan_khugepaged", -- cgit From 7108cc3f765cafd48a6a35f8add140beaecfa75b Mon Sep 17 00:00:00 2001 From: Domenico Cerasuolo Date: Thu, 30 Nov 2023 11:40:21 -0800 Subject: mm: memcg: add per-memcg zswap writeback stat Since zswap now writes back pages from memcg-specific LRUs, we now need a new stat to show writebacks count for each memcg. [nphamcs@gmail.com: rename ZSWP_WB to ZSWPWB] Link: https://lkml.kernel.org/r/20231205193307.2432803-1-nphamcs@gmail.com Link: https://lkml.kernel.org/r/20231130194023.4102148-5-nphamcs@gmail.com Suggested-by: Nhat Pham Signed-off-by: Domenico Cerasuolo Signed-off-by: Nhat Pham Tested-by: Bagas Sanjaya Reviewed-by: Yosry Ahmed Cc: Chris Li Cc: Dan Streetman Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Seth Jennings Cc: Shakeel Butt Cc: Shuah Khan Cc: Vitaly Wool Signed-off-by: Andrew Morton --- mm/vmstat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index afa5a38fcc9c..cfd8d8256f8e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1401,6 +1401,7 @@ const char * const vmstat_text[] = { #ifdef CONFIG_ZSWAP "zswpin", "zswpout", + "zswpwb", #endif #ifdef CONFIG_X86 "direct_map_level2_splits", -- cgit From b805ab3c6935d14654ccc28f16ffce7a13c2c528 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 29 Dec 2023 10:26:51 +0800 Subject: mm/vmstat: move pgdemote_* out of CONFIG_NUMA_BALANCING Demotion can work well without CONFIG_NUMA_BALANCING. But the commit 23e9f0138963 ("mm/vmstat: move pgdemote_* to per-node stats") wrongly hid it behind CONFIG_NUMA_BALANCING. Fix it by moving them out of CONFIG_NUMA_BALANCING. Link: https://lkml.kernel.org/r/20231229022651.3229174-1-lizhijian@fujitsu.com Fixes: 23e9f0138963 ("mm/vmstat: move pgdemote_* to per-node stats") Signed-off-by: Li Zhijian Cc: "Huang, Ying" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index cfd8d8256f8e..1437ca2f28c5 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1248,10 +1248,10 @@ const char * const vmstat_text[] = { #ifdef CONFIG_NUMA_BALANCING "pgpromote_success", "pgpromote_candidate", +#endif "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", -#endif /* enum writeback_stat_item counters */ "nr_dirty_threshold", -- cgit From fd37721803c6e73619108f76ad2e12a9aa5fafaf Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 28 Dec 2023 17:47:03 +0300 Subject: mm, treewide: introduce NR_PAGE_ORDERS NR_PAGE_ORDERS defines the number of page orders supported by the page allocator, ranging from 0 to MAX_ORDER, MAX_ORDER + 1 in total. NR_PAGE_ORDERS assists in defining arrays of page orders and allows for more natural iteration over them. [kirill.shutemov@linux.intel.com: fixup for kerneldoc warning] Link: https://lkml.kernel.org/r/20240101111512.7empzyifq7kxtzk3@box Link: https://lkml.kernel.org/r/20231228144704.14033-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Zi Yan Cc: Linus Torvalds Signed-off-by: Andrew Morton --- mm/vmstat.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 1437ca2f28c5..03ead31c46a0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1059,7 +1059,7 @@ static void fill_contig_page_info(struct zone *zone, info->free_blocks_total = 0; info->free_blocks_suitable = 0; - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { unsigned long blocks; /* @@ -1476,7 +1476,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, int order; seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order <= MAX_ORDER; ++order) + for (order = 0; order < NR_PAGE_ORDERS; ++order) /* * Access to nr_free is lockless as nr_free is used only for * printing purposes. Use data_race to avoid KCSAN warning. @@ -1505,7 +1505,7 @@ static void pagetypeinfo_showfree_print(struct seq_file *m, pgdat->node_id, zone->name, migratetype_names[mtype]); - for (order = 0; order <= MAX_ORDER; ++order) { + for (order = 0; order < NR_PAGE_ORDERS; ++order) { unsigned long freecount = 0; struct free_area *area; struct list_head *curr; @@ -1545,7 +1545,7 @@ static void pagetypeinfo_showfree(struct seq_file *m, void *arg) /* Print header */ seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); - for (order = 0; order <= MAX_ORDER; ++order) + for (order = 0; order < NR_PAGE_ORDERS; ++order) seq_printf(m, "%6d ", order); seq_putc(m, '\n'); @@ -2181,7 +2181,7 @@ static void unusable_show_print(struct seq_file *m, seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order <= MAX_ORDER; ++order) { + for (order = 0; order < NR_PAGE_ORDERS; ++order) { fill_contig_page_info(zone, order, &info); index = unusable_free_index(order, &info); seq_printf(m, "%d.%03d ", index / 1000, index % 1000); @@ -2233,7 +2233,7 @@ static void extfrag_show_print(struct seq_file *m, seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order <= MAX_ORDER; ++order) { + for (order = 0; order < NR_PAGE_ORDERS; ++order) { fill_contig_page_info(zone, order, &info); index = __fragmentation_index(order, &info); seq_printf(m, "%2d.%03d ", index / 1000, index % 1000); -- cgit From 5e0a760b44417f7cadd79de2204d6247109558a0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 28 Dec 2023 17:47:04 +0300 Subject: mm, treewide: rename MAX_ORDER to MAX_PAGE_ORDER commit 23baf831a32c ("mm, treewide: redefine MAX_ORDER sanely") has changed the definition of MAX_ORDER to be inclusive. This has caused issues with code that was not yet upstream and depended on the previous definition. To draw attention to the altered meaning of the define, rename MAX_ORDER to MAX_PAGE_ORDER. Link: https://lkml.kernel.org/r/20231228144704.14033-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Linus Torvalds Signed-off-by: Andrew Morton --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 03ead31c46a0..db79935e4a54 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1092,7 +1092,7 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in { unsigned long requested = 1UL << order; - if (WARN_ON_ONCE(order > MAX_ORDER)) + if (WARN_ON_ONCE(order > MAX_PAGE_ORDER)) return 0; if (!info->free_blocks_total) -- cgit