diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 133 |
1 files changed, 124 insertions, 9 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 24ab1f7394ab..bd8971a29204 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -88,6 +88,9 @@ struct scan_control { /* Can pages be swapped as part of reclaim? */ unsigned int may_swap:1; + /* e.g. boosted watermark reclaim leaves slabs alone */ + unsigned int may_shrinkslab:1; + /* * Cgroups are not reclaimed below their configured memory.low, * unless we threaten to OOM. If any cgroups are skipped due to @@ -2756,8 +2759,10 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) shrink_node_memcg(pgdat, memcg, sc, &lru_pages); node_lru_pages += lru_pages; - shrink_slab(sc->gfp_mask, pgdat->node_id, + if (sc->may_shrinkslab) { + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority); + } /* Record the group's reclaim efficiency */ vmpressure(sc->gfp_mask, memcg, false, @@ -3239,6 +3244,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = 1, + .may_shrinkslab = 1, }; /* @@ -3283,6 +3289,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, .may_unmap = 1, .reclaim_idx = MAX_NR_ZONES - 1, .may_swap = !noswap, + .may_shrinkslab = 1, }; unsigned long lru_pages; @@ -3329,6 +3336,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = may_swap, + .may_shrinkslab = 1, }; /* @@ -3379,6 +3387,30 @@ static void age_active_anon(struct pglist_data *pgdat, } while (memcg); } +static bool pgdat_watermark_boosted(pg_data_t *pgdat, int classzone_idx) +{ + int i; + struct zone *zone; + + /* + * Check for watermark boosts top-down as the higher zones + * are more likely to be boosted. Both watermarks and boosts + * should not be checked at the time time as reclaim would + * start prematurely when there is no boosting and a lower + * zone is balanced. + */ + for (i = classzone_idx; i >= 0; i--) { + zone = pgdat->node_zones + i; + if (!managed_zone(zone)) + continue; + + if (zone->watermark_boost) + return true; + } + + return false; +} + /* * Returns true if there is an eligible zone balanced for the request order * and classzone_idx @@ -3389,6 +3421,10 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) unsigned long mark = -1; struct zone *zone; + /* + * Check watermarks bottom-up as lower zones are more likely to + * meet watermarks. + */ for (i = 0; i <= classzone_idx; i++) { zone = pgdat->node_zones + i; @@ -3517,14 +3553,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; unsigned long pflags; + unsigned long nr_boost_reclaim; + unsigned long zone_boosts[MAX_NR_ZONES] = { 0, }; + bool boosted; struct zone *zone; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .order = order, - .priority = DEF_PRIORITY, - .may_writepage = !laptop_mode, .may_unmap = 1, - .may_swap = 1, }; psi_memstall_enter(&pflags); @@ -3532,9 +3568,28 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); + /* + * Account for the reclaim boost. Note that the zone boost is left in + * place so that parallel allocations that are near the watermark will + * stall or direct reclaim until kswapd is finished. + */ + nr_boost_reclaim = 0; + for (i = 0; i <= classzone_idx; i++) { + zone = pgdat->node_zones + i; + if (!managed_zone(zone)) + continue; + + nr_boost_reclaim += zone->watermark_boost; + zone_boosts[i] = zone->watermark_boost; + } + boosted = nr_boost_reclaim; + +restart: + sc.priority = DEF_PRIORITY; do { unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; + bool balanced; bool ret; sc.reclaim_idx = classzone_idx; @@ -3561,13 +3616,40 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) } /* - * Only reclaim if there are no eligible zones. Note that - * sc.reclaim_idx is not used as buffer_heads_over_limit may - * have adjusted it. + * If the pgdat is imbalanced then ignore boosting and preserve + * the watermarks for a later time and restart. Note that the + * zone watermarks will be still reset at the end of balancing + * on the grounds that the normal reclaim should be enough to + * re-evaluate if boosting is required when kswapd next wakes. + */ + balanced = pgdat_balanced(pgdat, sc.order, classzone_idx); + if (!balanced && nr_boost_reclaim) { + nr_boost_reclaim = 0; + goto restart; + } + + /* + * If boosting is not active then only reclaim if there are no + * eligible zones. Note that sc.reclaim_idx is not used as + * buffer_heads_over_limit may have adjusted it. */ - if (pgdat_balanced(pgdat, sc.order, classzone_idx)) + if (!nr_boost_reclaim && balanced) goto out; + /* Limit the priority of boosting to avoid reclaim writeback */ + if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) + raise_priority = false; + + /* + * Do not writeback or swap pages for boosted reclaim. The + * intent is to relieve pressure not issue sub-optimal IO + * from reclaim context. If no pages are reclaimed, the + * reclaim will be aborted. + */ + sc.may_writepage = !laptop_mode && !nr_boost_reclaim; + sc.may_swap = !nr_boost_reclaim; + sc.may_shrinkslab = !nr_boost_reclaim; + /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. All @@ -3619,6 +3701,16 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * progress in reclaiming pages */ nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; + nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); + + /* + * If reclaim made no progress for a boost, stop reclaim as + * IO cannot be queued and it could be an infinite loop in + * extreme circumstances. + */ + if (nr_boost_reclaim && !nr_reclaimed) + break; + if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); @@ -3627,6 +3719,28 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) pgdat->kswapd_failures++; out: + /* If reclaim was boosted, account for the reclaim done in this pass */ + if (boosted) { + unsigned long flags; + + for (i = 0; i <= classzone_idx; i++) { + if (!zone_boosts[i]) + continue; + + /* Increments are under the zone lock */ + zone = pgdat->node_zones + i; + spin_lock_irqsave(&zone->lock, flags); + zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); + spin_unlock_irqrestore(&zone->lock, flags); + } + + /* + * As there is now likely space, wakeup kcompact to defragment + * pageblocks. + */ + wakeup_kcompactd(pgdat, pageblock_order, classzone_idx); + } + snapshot_refaults(NULL, pgdat); __fs_reclaim_release(); psi_memstall_leave(&pflags); @@ -3855,7 +3969,8 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, /* Hopeless node, leave it to direct reclaim if possible */ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || - pgdat_balanced(pgdat, order, classzone_idx)) { + (pgdat_balanced(pgdat, order, classzone_idx) && + !pgdat_watermark_boosted(pgdat, classzone_idx))) { /* * There may be plenty of free memory available, but it's too * fragmented for high-order allocations. Wake up kcompactd |