summaryrefslogtreecommitdiff
path: root/mm/compaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c334
1 files changed, 200 insertions, 134 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index c8bcdea15f5f..dbc9f86b1934 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -229,6 +229,33 @@ static void reset_cached_positions(struct zone *zone)
pageblock_start_pfn(zone_end_pfn(zone) - 1);
}
+#ifdef CONFIG_SPARSEMEM
+/*
+ * If the PFN falls into an offline section, return the start PFN of the
+ * next online section. If the PFN falls into an online section or if
+ * there is no next online section, return 0.
+ */
+static unsigned long skip_offline_sections(unsigned long start_pfn)
+{
+ unsigned long start_nr = pfn_to_section_nr(start_pfn);
+
+ if (online_section_nr(start_nr))
+ return 0;
+
+ while (++start_nr <= __highest_present_section_nr) {
+ if (online_section_nr(start_nr))
+ return section_nr_to_pfn(start_nr);
+ }
+
+ return 0;
+}
+#else
+static unsigned long skip_offline_sections(unsigned long start_pfn)
+{
+ return 0;
+}
+#endif
+
/*
* Compound pages of >= pageblock_order should consistently be skipped until
* released. It is always pointless to compact pages of such order (if they are
@@ -392,18 +419,14 @@ void reset_isolation_suitable(pg_data_t *pgdat)
* Sets the pageblock skip bit if it was clear. Note that this is a hint as
* locks are not required for read/writers. Returns true if it was already set.
*/
-static bool test_and_set_skip(struct compact_control *cc, struct page *page,
- unsigned long pfn)
+static bool test_and_set_skip(struct compact_control *cc, struct page *page)
{
bool skip;
- /* Do no update if skip hint is being ignored */
+ /* Do not update if skip hint is being ignored */
if (cc->ignore_skip_hint)
return false;
- if (!pageblock_aligned(pfn))
- return false;
-
skip = get_pageblock_skip(page);
if (!skip && !cc->no_set_skip_hint)
set_pageblock_skip(page);
@@ -440,9 +463,6 @@ static void update_pageblock_skip(struct compact_control *cc,
if (cc->no_set_skip_hint)
return;
- if (!page)
- return;
-
set_pageblock_skip(page);
/* Update where async and sync compaction should restart */
@@ -470,8 +490,7 @@ static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
{
}
-static bool test_and_set_skip(struct compact_control *cc, struct page *page,
- unsigned long pfn)
+static bool test_and_set_skip(struct compact_control *cc, struct page *page)
{
return false;
}
@@ -745,8 +764,9 @@ isolate_freepages_range(struct compact_control *cc,
}
/* Similar to reclaim, but different enough that they don't share logic */
-static bool too_many_isolated(pg_data_t *pgdat)
+static bool too_many_isolated(struct compact_control *cc)
{
+ pg_data_t *pgdat = cc->zone->zone_pgdat;
bool too_many;
unsigned long active, inactive, isolated;
@@ -758,6 +778,17 @@ static bool too_many_isolated(pg_data_t *pgdat)
isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
node_page_state(pgdat, NR_ISOLATED_ANON);
+ /*
+ * Allow GFP_NOFS to isolate past the limit set for regular
+ * compaction runs. This prevents an ABBA deadlock when other
+ * compactors have already isolated to the limit, but are
+ * blocked on filesystem locks held by the GFP_NOFS thread.
+ */
+ if (cc->gfp_mask & __GFP_FS) {
+ inactive >>= 3;
+ active >>= 3;
+ }
+
too_many = isolated > (inactive + active) / 2;
if (!too_many)
wake_throttle_isolated(pgdat);
@@ -791,6 +822,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
struct lruvec *lruvec;
unsigned long flags = 0;
struct lruvec *locked = NULL;
+ struct folio *folio = NULL;
struct page *page = NULL, *valid_page = NULL;
struct address_space *mapping;
unsigned long start_pfn = low_pfn;
@@ -806,7 +838,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* list by either parallel reclaimers or compaction. If there are,
* delay for some time until fewer pages are isolated
*/
- while (unlikely(too_many_isolated(pgdat))) {
+ while (unlikely(too_many_isolated(cc))) {
/* stop isolation if there are still pages not migrated */
if (cc->nr_migratepages)
return -EAGAIN;
@@ -887,7 +919,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (!valid_page && pageblock_aligned(low_pfn)) {
if (!isolation_suitable(cc, page)) {
low_pfn = end_pfn;
- page = NULL;
+ folio = NULL;
goto isolate_abort;
}
valid_page = page;
@@ -919,7 +951,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* Hugepage was successfully isolated and placed
* on the cc->migratepages list.
*/
- low_pfn += compound_nr(page) - 1;
+ folio = page_folio(page);
+ low_pfn += folio_nr_pages(folio) - 1;
goto isolate_success_no_list;
}
@@ -987,8 +1020,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
locked = NULL;
}
- if (isolate_movable_page(page, mode))
+ if (isolate_movable_page(page, mode)) {
+ folio = page_folio(page);
goto isolate_success;
+ }
}
goto isolate_fail;
@@ -999,7 +1034,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
- if (unlikely(!get_page_unless_zero(page)))
+ folio = folio_get_nontail_page(page);
+ if (unlikely(!folio))
goto isolate_fail;
/*
@@ -1007,8 +1043,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* so avoid taking lru_lock and isolating it unnecessarily in an
* admittedly racy check.
*/
- mapping = page_mapping(page);
- if (!mapping && (page_count(page) - 1) > total_mapcount(page))
+ mapping = folio_mapping(folio);
+ if (!mapping && (folio_ref_count(folio) - 1) > folio_mapcount(folio))
goto isolate_fail_put;
/*
@@ -1019,11 +1055,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
goto isolate_fail_put;
/* Only take pages on LRU: a check now makes later tests safe */
- if (!PageLRU(page))
+ if (!folio_test_lru(folio))
goto isolate_fail_put;
/* Compaction might skip unevictable pages but CMA takes them */
- if (!(mode & ISOLATE_UNEVICTABLE) && PageUnevictable(page))
+ if (!(mode & ISOLATE_UNEVICTABLE) && folio_test_unevictable(folio))
goto isolate_fail_put;
/*
@@ -1032,10 +1068,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* it will be able to migrate without blocking - clean pages
* for the most part. PageWriteback would require blocking.
*/
- if ((mode & ISOLATE_ASYNC_MIGRATE) && PageWriteback(page))
+ if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_writeback(folio))
goto isolate_fail_put;
- if ((mode & ISOLATE_ASYNC_MIGRATE) && PageDirty(page)) {
+ if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_dirty(folio)) {
bool migrate_dirty;
/*
@@ -1047,22 +1083,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* the page lock until after the page is removed
* from the page cache.
*/
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto isolate_fail_put;
- mapping = page_mapping(page);
+ mapping = folio_mapping(folio);
migrate_dirty = !mapping ||
mapping->a_ops->migrate_folio;
- unlock_page(page);
+ folio_unlock(folio);
if (!migrate_dirty)
goto isolate_fail_put;
}
- /* Try isolate the page */
- if (!TestClearPageLRU(page))
+ /* Try isolate the folio */
+ if (!folio_test_clear_lru(folio))
goto isolate_fail_put;
- lruvec = folio_lruvec(page_folio(page));
+ lruvec = folio_lruvec(folio);
/* If we already hold the lock, we can skip some rechecking */
if (lruvec != locked) {
@@ -1072,44 +1108,49 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
locked = lruvec;
- lruvec_memcg_debug(lruvec, page_folio(page));
+ lruvec_memcg_debug(lruvec, folio);
- /* Try get exclusive access under lock */
- if (!skip_updated) {
+ /*
+ * Try get exclusive access under lock. If marked for
+ * skip, the scan is aborted unless the current context
+ * is a rescan to reach the end of the pageblock.
+ */
+ if (!skip_updated && valid_page) {
skip_updated = true;
- if (test_and_set_skip(cc, page, low_pfn))
+ if (test_and_set_skip(cc, valid_page) &&
+ !cc->finish_pageblock) {
goto isolate_abort;
+ }
}
/*
- * Page become compound since the non-locked check,
- * and it's on LRU. It can only be a THP so the order
- * is safe to read and it's 0 for tail pages.
+ * folio become large since the non-locked check,
+ * and it's on LRU.
*/
- if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
- low_pfn += compound_nr(page) - 1;
- nr_scanned += compound_nr(page) - 1;
- SetPageLRU(page);
+ if (unlikely(folio_test_large(folio) && !cc->alloc_contig)) {
+ low_pfn += folio_nr_pages(folio) - 1;
+ nr_scanned += folio_nr_pages(folio) - 1;
+ folio_set_lru(folio);
goto isolate_fail_put;
}
}
- /* The whole page is taken off the LRU; skip the tail pages. */
- if (PageCompound(page))
- low_pfn += compound_nr(page) - 1;
+ /* The folio is taken off the LRU */
+ if (folio_test_large(folio))
+ low_pfn += folio_nr_pages(folio) - 1;
/* Successfully isolated */
- del_page_from_lru_list(page, lruvec);
- mod_node_page_state(page_pgdat(page),
- NR_ISOLATED_ANON + page_is_file_lru(page),
- thp_nr_pages(page));
+ lruvec_del_folio(lruvec, folio);
+ node_stat_mod_folio(folio,
+ NR_ISOLATED_ANON + folio_is_file_lru(folio),
+ folio_nr_pages(folio));
isolate_success:
- list_add(&page->lru, &cc->migratepages);
+ list_add(&folio->lru, &cc->migratepages);
isolate_success_no_list:
- cc->nr_migratepages += compound_nr(page);
- nr_isolated += compound_nr(page);
- nr_scanned += compound_nr(page) - 1;
+ cc->nr_migratepages += folio_nr_pages(folio);
+ nr_isolated += folio_nr_pages(folio);
+ nr_scanned += folio_nr_pages(folio) - 1;
/*
* Avoid isolating too much unless this block is being
@@ -1131,7 +1172,7 @@ isolate_fail_put:
unlock_page_lruvec_irqrestore(locked, flags);
locked = NULL;
}
- put_page(page);
+ folio_put(folio);
isolate_fail:
if (!skip_on_failure && ret != -ENOMEM)
@@ -1172,14 +1213,14 @@ isolate_fail:
if (unlikely(low_pfn > end_pfn))
low_pfn = end_pfn;
- page = NULL;
+ folio = NULL;
isolate_abort:
if (locked)
unlock_page_lruvec_irqrestore(locked, flags);
- if (page) {
- SetPageLRU(page);
- put_page(page);
+ if (folio) {
+ folio_set_lru(folio);
+ folio_put(folio);
}
/*
@@ -1191,7 +1232,7 @@ isolate_abort:
* rescanned twice in a row.
*/
if (low_pfn == end_pfn && (!nr_isolated || cc->finish_pageblock)) {
- if (valid_page && !skip_updated)
+ if (!cc->no_set_skip_hint && valid_page && !skip_updated)
set_pageblock_skip(valid_page);
update_cached_migrate(cc, low_pfn);
}
@@ -1379,7 +1420,7 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn)
isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
/* Skip this pageblock in the future as it's full or nearly full */
- if (cc->nr_freepages < cc->nr_migratepages)
+ if (start_pfn == end_pfn)
set_pageblock_skip(page);
return;
@@ -1403,11 +1444,10 @@ static int next_search_order(struct compact_control *cc, int order)
return order;
}
-static unsigned long
-fast_isolate_freepages(struct compact_control *cc)
+static void fast_isolate_freepages(struct compact_control *cc)
{
unsigned int limit = max(1U, freelist_scan_limit(cc) >> 1);
- unsigned int nr_scanned = 0;
+ unsigned int nr_scanned = 0, total_isolated = 0;
unsigned long low_pfn, min_pfn, highest = 0;
unsigned long nr_isolated = 0;
unsigned long distance;
@@ -1417,7 +1457,7 @@ fast_isolate_freepages(struct compact_control *cc)
/* Full compaction passes in a negative order */
if (cc->order <= 0)
- return cc->free_pfn;
+ return;
/*
* If starting the scan, use a deeper search and use the highest
@@ -1506,6 +1546,7 @@ fast_isolate_freepages(struct compact_control *cc)
set_page_private(page, order);
nr_isolated = 1 << order;
nr_scanned += nr_isolated - 1;
+ total_isolated += nr_isolated;
cc->nr_freepages += nr_isolated;
list_add_tail(&page->lru, &cc->freepages);
count_compact_events(COMPACTISOLATED, nr_isolated);
@@ -1518,6 +1559,10 @@ fast_isolate_freepages(struct compact_control *cc)
spin_unlock_irqrestore(&cc->zone->lock, flags);
+ /* Skip fast search if enough freepages isolated */
+ if (cc->nr_freepages >= cc->nr_migratepages)
+ break;
+
/*
* Smaller scan on next order so the total scan is related
* to freelist_scan_limit.
@@ -1526,6 +1571,9 @@ fast_isolate_freepages(struct compact_control *cc)
limit = max(1U, limit >> 1);
}
+ trace_mm_compaction_fast_isolate_freepages(min_pfn, cc->free_pfn,
+ nr_scanned, total_isolated);
+
if (!page) {
cc->fast_search_fail++;
if (scan_start) {
@@ -1556,11 +1604,10 @@ fast_isolate_freepages(struct compact_control *cc)
cc->total_free_scanned += nr_scanned;
if (!page)
- return cc->free_pfn;
+ return;
low_pfn = page_to_pfn(page);
fast_isolate_around(cc, low_pfn);
- return low_pfn;
}
/*
@@ -1684,11 +1731,10 @@ splitmap:
* This is a migrate-callback that "allocates" freepages by taking pages
* from the isolated freelists in the block we are migrating to.
*/
-static struct page *compaction_alloc(struct page *migratepage,
- unsigned long data)
+static struct folio *compaction_alloc(struct folio *src, unsigned long data)
{
struct compact_control *cc = (struct compact_control *)data;
- struct page *freepage;
+ struct folio *dst;
if (list_empty(&cc->freepages)) {
isolate_freepages(cc);
@@ -1697,11 +1743,11 @@ static struct page *compaction_alloc(struct page *migratepage,
return NULL;
}
- freepage = list_entry(cc->freepages.next, struct page, lru);
- list_del(&freepage->lru);
+ dst = list_entry(cc->freepages.next, struct folio, lru);
+ list_del(&dst->lru);
cc->nr_freepages--;
- return freepage;
+ return dst;
}
/*
@@ -1709,11 +1755,11 @@ static struct page *compaction_alloc(struct page *migratepage,
* freelist. All pages on the freelist are from the same zone, so there is no
* special handling needed for NUMA.
*/
-static void compaction_free(struct page *page, unsigned long data)
+static void compaction_free(struct folio *dst, unsigned long data)
{
struct compact_control *cc = (struct compact_control *)data;
- list_add(&page->lru, &cc->freepages);
+ list_add(&dst->lru, &cc->freepages);
cc->nr_freepages++;
}
@@ -1736,6 +1782,7 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE
*/
static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
static int sysctl_extfrag_threshold = 500;
+static int __read_mostly sysctl_compact_memory;
static inline void
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
@@ -1864,7 +1911,6 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
pfn = cc->zone->zone_start_pfn;
cc->fast_search_fail = 0;
found_block = true;
- set_pageblock_skip(freepage);
break;
}
}
@@ -1940,8 +1986,14 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
page = pageblock_pfn_to_page(block_start_pfn,
block_end_pfn, cc->zone);
- if (!page)
+ if (!page) {
+ unsigned long next_pfn;
+
+ next_pfn = skip_offline_sections(block_start_pfn);
+ if (next_pfn)
+ block_end_pfn = min(next_pfn, cc->free_pfn);
continue;
+ }
/*
* If isolation recently failed, do not retry. Only check the
@@ -2193,25 +2245,11 @@ static enum compact_result compact_finished(struct compact_control *cc)
return ret;
}
-static enum compact_result __compaction_suitable(struct zone *zone, int order,
- unsigned int alloc_flags,
- int highest_zoneidx,
- unsigned long wmark_target)
+static bool __compaction_suitable(struct zone *zone, int order,
+ int highest_zoneidx,
+ unsigned long wmark_target)
{
unsigned long watermark;
-
- if (is_via_compact_memory(order))
- return COMPACT_CONTINUE;
-
- watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
- /*
- * If watermarks for high-order allocation are already met, there
- * should be no need for compaction at all.
- */
- if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
- alloc_flags))
- return COMPACT_SUCCESS;
-
/*
* Watermarks for order-0 must be met for compaction to be able to
* isolate free pages for migration targets. This means that the
@@ -2229,29 +2267,20 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
low_wmark_pages(zone) : min_wmark_pages(zone);
watermark += compact_gap(order);
- if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
- ALLOC_CMA, wmark_target))
- return COMPACT_SKIPPED;
-
- return COMPACT_CONTINUE;
+ return __zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
+ ALLOC_CMA, wmark_target);
}
/*
* compaction_suitable: Is this suitable to run compaction on this zone now?
- * Returns
- * COMPACT_SKIPPED - If there are too few free pages for compaction
- * COMPACT_SUCCESS - If the allocation would succeed without compaction
- * COMPACT_CONTINUE - If compaction should run now
*/
-enum compact_result compaction_suitable(struct zone *zone, int order,
- unsigned int alloc_flags,
- int highest_zoneidx)
+bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx)
{
- enum compact_result ret;
- int fragindex;
+ enum compact_result compact_result;
+ bool suitable;
- ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
- zone_page_state(zone, NR_FREE_PAGES));
+ suitable = __compaction_suitable(zone, order, highest_zoneidx,
+ zone_page_state(zone, NR_FREE_PAGES));
/*
* fragmentation index determines if allocation failures are due to
* low memory or external fragmentation
@@ -2268,17 +2297,24 @@ enum compact_result compaction_suitable(struct zone *zone, int order,
* excessive compaction for costly orders, but it should not be at the
* expense of system stability.
*/
- if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
- fragindex = fragmentation_index(zone, order);
- if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
- ret = COMPACT_NOT_SUITABLE_ZONE;
+ if (suitable) {
+ compact_result = COMPACT_CONTINUE;
+ if (order > PAGE_ALLOC_COSTLY_ORDER) {
+ int fragindex = fragmentation_index(zone, order);
+
+ if (fragindex >= 0 &&
+ fragindex <= sysctl_extfrag_threshold) {
+ suitable = false;
+ compact_result = COMPACT_NOT_SUITABLE_ZONE;
+ }
+ }
+ } else {
+ compact_result = COMPACT_SKIPPED;
}
- trace_mm_compaction_suitable(zone, order, ret);
- if (ret == COMPACT_NOT_SUITABLE_ZONE)
- ret = COMPACT_SKIPPED;
+ trace_mm_compaction_suitable(zone, order, compact_result);
- return ret;
+ return suitable;
}
bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
@@ -2294,7 +2330,6 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
ac->highest_zoneidx, ac->nodemask) {
unsigned long available;
- enum compact_result compact_result;
/*
* Do not consider all the reclaimable memory because we do not
@@ -2304,9 +2339,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
*/
available = zone_reclaimable_pages(zone) / order;
available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
- compact_result = __compaction_suitable(zone, order, alloc_flags,
- ac->highest_zoneidx, available);
- if (compact_result == COMPACT_CONTINUE)
+ if (__compaction_suitable(zone, order, ac->highest_zoneidx,
+ available))
return true;
}
@@ -2336,11 +2370,22 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
INIT_LIST_HEAD(&cc->migratepages);
cc->migratetype = gfp_migratetype(cc->gfp_mask);
- ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
- cc->highest_zoneidx);
- /* Compaction is likely to fail */
- if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
- return ret;
+
+ if (!is_via_compact_memory(cc->order)) {
+ unsigned long watermark;
+
+ /* Allocation can already succeed, nothing to do */
+ watermark = wmark_pages(cc->zone,
+ cc->alloc_flags & ALLOC_WMARK_MASK);
+ if (zone_watermark_ok(cc->zone, cc->order, watermark,
+ cc->highest_zoneidx, cc->alloc_flags))
+ return COMPACT_SUCCESS;
+
+ /* Compaction is likely to fail */
+ if (!compaction_suitable(cc->zone, cc->order,
+ cc->highest_zoneidx))
+ return COMPACT_SKIPPED;
+ }
/*
* Clear pageblock skip if there were failures recently and compaction
@@ -2456,7 +2501,8 @@ rescan:
}
/*
* If an ASYNC or SYNC_LIGHT fails to migrate a page
- * within the current order-aligned block, scan the
+ * within the current order-aligned block and
+ * fast_find_migrateblock may be used then scan the
* remainder of the pageblock. This will mark the
* pageblock "skip" to avoid rescanning in the near
* future. This will isolate more pages than necessary
@@ -2464,8 +2510,9 @@ rescan:
* fast_find_migrateblock revisiting blocks that were
* recently partially scanned.
*/
- if (cc->direct_compaction && !cc->finish_pageblock &&
- (cc->mode < MIGRATE_SYNC)) {
+ if (!pageblock_aligned(cc->migrate_pfn) &&
+ !cc->ignore_skip_hint && !cc->finish_pageblock &&
+ (cc->mode < MIGRATE_SYNC)) {
cc->finish_pageblock = true;
/*
@@ -2780,6 +2827,15 @@ static int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int
static int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
+ int ret;
+
+ ret = proc_dointvec(table, write, buffer, length, ppos);
+ if (ret)
+ return ret;
+
+ if (sysctl_compact_memory != 1)
+ return -EINVAL;
+
if (write)
compact_nodes();
@@ -2833,8 +2889,14 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
if (!populated_zone(zone))
continue;
- if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
- highest_zoneidx) == COMPACT_CONTINUE)
+ /* Allocation can already succeed, check other zones */
+ if (zone_watermark_ok(zone, pgdat->kcompactd_max_order,
+ min_wmark_pages(zone),
+ highest_zoneidx, 0))
+ continue;
+
+ if (compaction_suitable(zone, pgdat->kcompactd_max_order,
+ highest_zoneidx))
return true;
}
@@ -2871,8 +2933,12 @@ static void kcompactd_do_work(pg_data_t *pgdat)
if (compaction_deferred(zone, cc.order))
continue;
- if (compaction_suitable(zone, cc.order, 0, zoneid) !=
- COMPACT_CONTINUE)
+ /* Allocation can already succeed, nothing to do */
+ if (zone_watermark_ok(zone, cc.order,
+ min_wmark_pages(zone), zoneid, 0))
+ continue;
+
+ if (!compaction_suitable(zone, cc.order, zoneid))
continue;
if (kthread_should_stop())
@@ -3021,7 +3087,7 @@ static int kcompactd(void *p)
* This kcompactd start function will be called by init and node-hot-add.
* On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
*/
-void kcompactd_run(int nid)
+void __meminit kcompactd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
@@ -3039,7 +3105,7 @@ void kcompactd_run(int nid)
* Called by memory hotplug when all memory in a node is offlined. Caller must
* be holding mem_hotplug_begin/done().
*/
-void kcompactd_stop(int nid)
+void __meminit kcompactd_stop(int nid)
{
struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
@@ -3095,7 +3161,7 @@ static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
static struct ctl_table vm_compaction[] = {
{
.procname = "compact_memory",
- .data = NULL,
+ .data = &sysctl_compact_memory,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = sysctl_compaction_handler,