summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c78
1 files changed, 37 insertions, 41 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7bb23ff229b6..70aa739c6b68 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -87,6 +87,7 @@ struct scan_control {
/* The highest zone to isolate pages for reclaim from */
enum zone_type reclaim_idx;
+ /* Writepage batching in laptop mode; RECLAIM_WRITE */
unsigned int may_writepage:1;
/* Can mapped pages be reclaimed? */
@@ -1055,6 +1056,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* throttling so we could easily OOM just because too many
* pages are in writeback and there is nothing else to
* reclaim. Wait for the writeback to complete.
+ *
+ * In cases 1) and 2) we activate the pages to get them out of
+ * the way while we continue scanning for clean pages on the
+ * inactive list and refilling from the active list. The
+ * observation here is that waiting for disk writes is more
+ * expensive than potentially causing reloads down the line.
+ * Since they're marked for immediate reclaim, they won't put
+ * memory pressure on the cache working set any longer than it
+ * takes to write them to disk.
*/
if (PageWriteback(page)) {
/* Case 1 above */
@@ -1062,7 +1072,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
PageReclaim(page) &&
test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
nr_immediate++;
- goto keep_locked;
+ goto activate_locked;
/* Case 2 above */
} else if (sane_reclaim(sc) ||
@@ -1080,7 +1090,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
SetPageReclaim(page);
nr_writeback++;
- goto keep_locked;
+ goto activate_locked;
/* Case 3 above */
} else {
@@ -1152,13 +1162,18 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (PageDirty(page)) {
/*
- * Only kswapd can writeback filesystem pages to
- * avoid risk of stack overflow but only writeback
- * if many dirty pages have been encountered.
+ * Only kswapd can writeback filesystem pages
+ * to avoid risk of stack overflow. But avoid
+ * injecting inefficient single-page IO into
+ * flusher writeback as much as possible: only
+ * write pages when we've encountered many
+ * dirty pages, and when we've already scanned
+ * the rest of the LRU for clean pages and see
+ * the same dirty pages again (PageReclaim).
*/
if (page_is_file_cache(page) &&
- (!current_is_kswapd() ||
- !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
+ (!current_is_kswapd() || !PageReclaim(page) ||
+ !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
@@ -1168,7 +1183,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
SetPageReclaim(page);
- goto keep_locked;
+ goto activate_locked;
}
if (references == PAGEREF_RECLAIM_CLEAN)
@@ -1373,13 +1388,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
* wants to isolate pages it will be able to operate on without
* blocking - clean pages for the most part.
*
- * ISOLATE_CLEAN means that only clean pages should be isolated. This
- * is used by reclaim when it is cannot write to backing storage
- *
* ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
* that it is possible to migrate without blocking
*/
- if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+ if (mode & ISOLATE_ASYNC_MIGRATE) {
/* All the caller can do on PageWriteback is block */
if (PageWriteback(page))
return ret;
@@ -1387,10 +1399,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
if (PageDirty(page)) {
struct address_space *mapping;
- /* ISOLATE_CLEAN means only clean pages */
- if (mode & ISOLATE_CLEAN)
- return ret;
-
/*
* Only pages without mappings or that have a
* ->migratepage callback are possible to migrate
@@ -1731,8 +1739,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
- isolate_mode |= ISOLATE_CLEAN;
spin_lock_irq(&pgdat->lru_lock);
@@ -1806,12 +1812,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
/*
* If dirty pages are scanned that are not queued for IO, it
- * implies that flushers are not keeping up. In this case, flag
- * the pgdat PGDAT_DIRTY and kswapd will start writing pages from
- * reclaim context.
+ * implies that flushers are not doing their job. This can
+ * happen when memory pressure pushes dirty pages to the end of
+ * the LRU before the dirty limits are breached and the dirty
+ * data has expired. It can also happen when the proportion of
+ * dirty pages grows not through writes but through memory
+ * pressure reclaiming all the clean cache. And in some cases,
+ * the flushers simply cannot keep up with the allocation
+ * rate. Nudge the flusher threads in case they are asleep, but
+ * also allow kswapd to start writing pages during reclaim.
*/
- if (stat.nr_unqueued_dirty == nr_taken)
+ if (stat.nr_unqueued_dirty == nr_taken) {
+ wakeup_flusher_threads(0, WB_REASON_VMSCAN);
set_bit(PGDAT_DIRTY, &pgdat->flags);
+ }
/*
* If kswapd scans pages marked marked for immediate
@@ -1929,8 +1943,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
- isolate_mode |= ISOLATE_CLEAN;
spin_lock_irq(&pgdat->lru_lock);
@@ -2759,8 +2771,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc)
{
int initial_priority = sc->priority;
- unsigned long total_scanned = 0;
- unsigned long writeback_threshold;
retry:
delayacct_freepages_start();
@@ -2773,7 +2783,6 @@ retry:
sc->nr_scanned = 0;
shrink_zones(zonelist, sc);
- total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
break;
@@ -2786,20 +2795,6 @@ retry:
*/
if (sc->priority < DEF_PRIORITY - 2)
sc->may_writepage = 1;
-
- /*
- * Try to write back as many pages as we just scanned. This
- * tends to cause slow streaming writers to write data to the
- * disk smoothly, at the dirtying rate, which is nice. But
- * that's undesirable in laptop mode, where we *want* lumpy
- * writeout. So in laptop mode, write out the whole world.
- */
- writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
- if (total_scanned > writeback_threshold) {
- wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
- WB_REASON_TRY_TO_FREE_PAGES);
- sc->may_writepage = 1;
- }
} while (--sc->priority >= 0);
delayacct_freepages_end();
@@ -3101,6 +3096,7 @@ static bool zone_balanced(struct zone *zone, int order, int classzone_idx)
*/
clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags);
clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags);
+ clear_bit(PGDAT_WRITEBACK, &zone->zone_pgdat->flags);
return true;
}