summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mmzone.h18
-rw-r--r--mm/vmscan.c19
2 files changed, 27 insertions, 10 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e822335f214..d863698a84e0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -293,9 +293,21 @@ static inline bool is_active_lru(enum lru_list lru)
#define ANON_AND_FILE 2
enum lruvec_flags {
- LRUVEC_CONGESTED, /* lruvec has many dirty pages
- * backed by a congested BDI
- */
+ /*
+ * An lruvec has many dirty pages backed by a congested BDI:
+ * 1. LRUVEC_CGROUP_CONGESTED is set by cgroup-level reclaim.
+ * It can be cleared by cgroup reclaim or kswapd.
+ * 2. LRUVEC_NODE_CONGESTED is set by kswapd node-level reclaim.
+ * It can only be cleared by kswapd.
+ *
+ * Essentially, kswapd can unthrottle an lruvec throttled by cgroup
+ * reclaim, but not vice versa. This only applies to the root cgroup.
+ * The goal is to prevent cgroup reclaim on the root cgroup (e.g.
+ * memory.reclaim) to unthrottle an unbalanced node (that was throttled
+ * by kswapd).
+ */
+ LRUVEC_CGROUP_CONGESTED,
+ LRUVEC_NODE_CONGESTED,
};
#endif /* !__GENERATING_BOUNDS_H */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7068be8a034..1080209a568b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6578,10 +6578,13 @@ again:
* Legacy memcg will stall in page writeback so avoid forcibly
* stalling in reclaim_throttle().
*/
- if ((current_is_kswapd() ||
- (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
- sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
- set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
+ if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested) {
+ if (cgroup_reclaim(sc) && writeback_throttling_sane(sc))
+ set_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags);
+
+ if (current_is_kswapd())
+ set_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags);
+ }
/*
* Stall direct reclaim for IO completions if the lruvec is
@@ -6591,7 +6594,8 @@ again:
*/
if (!current_is_kswapd() && current_may_throttle() &&
!sc->hibernation_mode &&
- test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
+ (test_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags) ||
+ test_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags)))
reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED);
if (should_continue_reclaim(pgdat, nr_node_reclaimed, sc))
@@ -6848,7 +6852,7 @@ retry:
lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup,
zone->zone_pgdat);
- clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+ clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
}
}
@@ -7237,7 +7241,8 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
{
struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
- clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+ clear_bit(LRUVEC_NODE_CONGESTED, &lruvec->flags);
+ clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
clear_bit(PGDAT_DIRTY, &pgdat->flags);
clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
}