summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c96
1 files changed, 72 insertions, 24 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ecc648b6191..e411385b304a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -200,6 +200,29 @@ static bool sane_reclaim(struct scan_control *sc)
#endif
return false;
}
+
+static void set_memcg_congestion(pg_data_t *pgdat,
+ struct mem_cgroup *memcg,
+ bool congested)
+{
+ struct mem_cgroup_per_node *mn;
+
+ if (!memcg)
+ return;
+
+ mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+ WRITE_ONCE(mn->congested, congested);
+}
+
+static bool memcg_congested(pg_data_t *pgdat,
+ struct mem_cgroup *memcg)
+{
+ struct mem_cgroup_per_node *mn;
+
+ mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+ return READ_ONCE(mn->congested);
+
+}
#else
static bool global_reclaim(struct scan_control *sc)
{
@@ -210,6 +233,18 @@ static bool sane_reclaim(struct scan_control *sc)
{
return true;
}
+
+static inline void set_memcg_congestion(struct pglist_data *pgdat,
+ struct mem_cgroup *memcg, bool congested)
+{
+}
+
+static inline bool memcg_congested(struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ return false;
+
+}
#endif
/*
@@ -2474,6 +2509,12 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
return true;
}
+static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
+{
+ return test_bit(PGDAT_CONGESTED, &pgdat->flags) ||
+ (memcg && memcg_congested(pgdat, memcg));
+}
+
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
{
struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2556,29 +2597,27 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
if (sc->nr_reclaimed - nr_reclaimed)
reclaimable = true;
- /*
- * If reclaim is isolating dirty pages under writeback, it
- * implies that the long-lived page allocation rate is exceeding
- * the page laundering rate. Either the global limits are not
- * being effective at throttling processes due to the page
- * distribution throughout zones or there is heavy usage of a
- * slow backing device. The only option is to throttle from
- * reclaim context which is not ideal as there is no guarantee
- * the dirtying process is throttled in the same way
- * balance_dirty_pages() manages.
- *
- * Once a node is flagged PGDAT_WRITEBACK, kswapd will count the
- * number of pages under pages flagged for immediate reclaim and
- * stall if any are encountered in the nr_immediate check below.
- */
- if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
- set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+ if (current_is_kswapd()) {
+ /*
+ * If reclaim is isolating dirty pages under writeback,
+ * it implies that the long-lived page allocation rate
+ * is exceeding the page laundering rate. Either the
+ * global limits are not being effective at throttling
+ * processes due to the page distribution throughout
+ * zones or there is heavy usage of a slow backing
+ * device. The only option is to throttle from reclaim
+ * context which is not ideal as there is no guarantee
+ * the dirtying process is throttled in the same way
+ * balance_dirty_pages() manages.
+ *
+ * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+ * count the number of pages under pages flagged for
+ * immediate reclaim and stall if any are encountered
+ * in the nr_immediate check below.
+ */
+ if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+ set_bit(PGDAT_WRITEBACK, &pgdat->flags);
- /*
- * Legacy memcg will stall in page writeback so avoid forcibly
- * stalling here.
- */
- if (sane_reclaim(sc)) {
/*
* Tag a node as congested if all the dirty pages
* scanned were backed by a congested BDI and
@@ -2602,14 +2641,22 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
}
/*
+ * Legacy memcg will stall in page writeback so avoid forcibly
+ * stalling in wait_iff_congested().
+ */
+ if (!global_reclaim(sc) && sane_reclaim(sc) &&
+ sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+ set_memcg_congestion(pgdat, root, true);
+
+ /*
* Stall direct reclaim for IO completions if underlying BDIs
* and node is congested. Allow kswapd to continue until it
* starts encountering unqueued dirty pages or cycling through
* the LRU too quickly.
*/
if (!sc->hibernation_mode && !current_is_kswapd() &&
- current_may_throttle())
- wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10);
+ current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+ wait_iff_congested(BLK_RW_ASYNC, HZ/10);
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
@@ -2826,6 +2873,7 @@ retry:
continue;
last_pgdat = zone->zone_pgdat;
snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+ set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false);
}
delayacct_freepages_end();