summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c94
1 files changed, 74 insertions, 20 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7a30150b4dee..e8cb983a8c84 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2006,6 +2006,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
* Both inactive lists should also be large enough that each inactive
* page has a chance to be referenced again before it is reclaimed.
*
+ * If that fails and refaulting is observed, the inactive list grows.
+ *
* The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages
* on this LRU, maintained by the pageout code. A zone->inactive_ratio
* of 3 means 3:1 or 25% of the pages are kept on the inactive list.
@@ -2022,12 +2024,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct scan_control *sc, bool trace)
+ struct mem_cgroup *memcg,
+ struct scan_control *sc, bool actual_reclaim)
{
- unsigned long inactive_ratio;
- unsigned long inactive, active;
- enum lru_list inactive_lru = file * LRU_FILE;
enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ enum lru_list inactive_lru = file * LRU_FILE;
+ unsigned long inactive, active;
+ unsigned long inactive_ratio;
+ unsigned long refaults;
unsigned long gb;
/*
@@ -2040,27 +2045,43 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
- gb = (inactive + active) >> (30 - PAGE_SHIFT);
- if (gb)
- inactive_ratio = int_sqrt(10 * gb);
+ if (memcg)
+ refaults = mem_cgroup_read_stat(memcg,
+ MEMCG_WORKINGSET_ACTIVATE);
else
- inactive_ratio = 1;
+ refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
+
+ /*
+ * When refaults are being observed, it means a new workingset
+ * is being established. Disable active list protection to get
+ * rid of the stale workingset quickly.
+ */
+ if (file && actual_reclaim && lruvec->refaults != refaults) {
+ inactive_ratio = 0;
+ } else {
+ gb = (inactive + active) >> (30 - PAGE_SHIFT);
+ if (gb)
+ inactive_ratio = int_sqrt(10 * gb);
+ else
+ inactive_ratio = 1;
+ }
- if (trace)
- trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,
- sc->reclaim_idx,
- lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
- lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
- inactive_ratio, file);
+ if (actual_reclaim)
+ trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx,
+ lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
+ lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
+ inactive_ratio, file);
return inactive * inactive_ratio < active;
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct lruvec *lruvec, struct scan_control *sc)
+ struct lruvec *lruvec, struct mem_cgroup *memcg,
+ struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru),
+ memcg, sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
@@ -2169,7 +2190,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, sc, false) &&
+ if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
@@ -2320,7 +2341,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
- lruvec, sc);
+ lruvec, memcg, sc);
}
}
@@ -2387,7 +2408,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, sc, true))
+ if (inactive_list_is_low(lruvec, false, memcg, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
@@ -2703,6 +2724,26 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
sc->gfp_mask = orig_mask;
}
+static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
+{
+ struct mem_cgroup *memcg;
+
+ memcg = mem_cgroup_iter(root_memcg, NULL, NULL);
+ do {
+ unsigned long refaults;
+ struct lruvec *lruvec;
+
+ if (memcg)
+ refaults = mem_cgroup_read_stat(memcg,
+ MEMCG_WORKINGSET_ACTIVATE);
+ else
+ refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
+
+ lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ lruvec->refaults = refaults;
+ } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
+}
+
/*
* This is the main entry point to direct page reclaim.
*
@@ -2723,6 +2764,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc)
{
int initial_priority = sc->priority;
+ pg_data_t *last_pgdat;
+ struct zoneref *z;
+ struct zone *zone;
retry:
delayacct_freepages_start();
@@ -2749,6 +2793,15 @@ retry:
sc->may_writepage = 1;
} while (--sc->priority >= 0);
+ last_pgdat = NULL;
+ for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
+ sc->nodemask) {
+ if (zone->zone_pgdat == last_pgdat)
+ continue;
+ last_pgdat = zone->zone_pgdat;
+ snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+ }
+
delayacct_freepages_end();
if (sc->nr_reclaimed)
@@ -3033,7 +3086,7 @@ static void age_active_anon(struct pglist_data *pgdat,
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, sc, true))
+ if (inactive_list_is_low(lruvec, false, memcg, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
@@ -3280,6 +3333,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
pgdat->kswapd_failures++;
out:
+ snapshot_refaults(NULL, pgdat);
/*
* Return the order kswapd stopped reclaiming at as
* prepare_kswapd_sleep() takes it into account. If another caller