summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c132
1 files changed, 45 insertions, 87 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c05eb9efec07..b06868fc4926 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -919,7 +919,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* exceptional entries and shadow exceptional entries in the
* same address_space.
*/
- if (reclaimed && page_is_file_cache(page) &&
+ if (reclaimed && page_is_file_lru(page) &&
!mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(page, target_memcg);
__delete_from_page_cache(page, shadow);
@@ -1043,7 +1043,7 @@ static void page_check_dirty_writeback(struct page *page,
* Anonymous pages are not handled by flushers and must be written
* from reclaim context. Do not stall reclaim based on them
*/
- if (!page_is_file_cache(page) ||
+ if (!page_is_file_lru(page) ||
(PageAnon(page) && !PageSwapBacked(page))) {
*dirty = false;
*writeback = false;
@@ -1084,9 +1084,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
- int may_enter_fs;
enum page_references references = PAGEREF_RECLAIM;
- bool dirty, writeback;
+ bool dirty, writeback, may_enter_fs;
unsigned int nr_pages;
cond_resched();
@@ -1267,7 +1266,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto activate_locked_split;
}
- may_enter_fs = 1;
+ may_enter_fs = true;
/* Adding to swap updated mapping */
mapping = page_mapping(page);
@@ -1316,7 +1315,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* the rest of the LRU for clean pages and see
* the same dirty pages again (PageReclaim).
*/
- if (page_is_file_cache(page) &&
+ if (page_is_file_lru(page) &&
(!current_is_kswapd() || !PageReclaim(page) ||
!test_bit(PGDAT_DIRTY, &pgdat->flags))) {
/*
@@ -1460,7 +1459,7 @@ activate_locked:
try_to_free_swap(page);
VM_BUG_ON_PAGE(PageActive(page), page);
if (!PageMlocked(page)) {
- int type = page_is_file_cache(page);
+ int type = page_is_file_lru(page);
SetPageActive(page);
stat->nr_activate[type] += nr_pages;
count_memcg_page_event(page, PGACTIVATE);
@@ -1498,7 +1497,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
LIST_HEAD(clean_pages);
list_for_each_entry_safe(page, next, page_list, lru) {
- if (page_is_file_cache(page) && !PageDirty(page) &&
+ if (page_is_file_lru(page) && !PageDirty(page) &&
!__PageMovable(page) && !PageUnevictable(page)) {
ClearPageActive(page);
list_move(&page->lru, &clean_pages);
@@ -2054,7 +2053,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
* IO, plus JVM can create lots of anon VM_EXEC pages,
* so we ignore them here.
*/
- if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
+ if ((vm_flags & VM_EXEC) && page_is_file_lru(page)) {
list_add(&page->lru, &l_active);
continue;
}
@@ -2096,7 +2095,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
unsigned long reclaim_pages(struct list_head *page_list)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
unsigned long nr_reclaimed = 0;
LIST_HEAD(node_page_list);
struct reclaim_stat dummy_stat;
@@ -2111,7 +2110,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
while (!list_empty(page_list)) {
page = lru_to_page(page_list);
- if (nid == -1) {
+ if (nid == NUMA_NO_NODE) {
nid = page_to_nid(page);
INIT_LIST_HEAD(&node_page_list);
}
@@ -2132,7 +2131,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
putback_lru_page(page);
}
- nid = -1;
+ nid = NUMA_NO_NODE;
}
if (!list_empty(&node_page_list)) {
@@ -2415,19 +2414,20 @@ out:
/*
* Scan types proportional to swappiness and
* their relative recent reclaim efficiency.
- * Make sure we don't miss the last page
- * because of a round-off error.
+ * Make sure we don't miss the last page on
+ * the offlined memory cgroups because of a
+ * round-off error.
*/
- scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+ scan = mem_cgroup_online(memcg) ?
+ div64_u64(scan * fraction[file], denominator) :
+ DIV64_U64_ROUND_UP(scan * fraction[file],
denominator);
break;
case SCAN_FILE:
case SCAN_ANON:
/* Scan one type exclusively */
- if ((scan_balance == SCAN_FILE) != file) {
- lruvec_size = 0;
+ if ((scan_balance == SCAN_FILE) != file)
scan = 0;
- }
break;
default:
/* Look ma, no brain */
@@ -3093,7 +3093,6 @@ retry:
if (sc->memcg_low_skipped) {
sc->priority = initial_priority;
sc->force_deactivate = 0;
- sc->skipped_deactivate = 0;
sc->memcg_low_reclaim = 1;
sc->memcg_low_skipped = 0;
goto retry;
@@ -3133,8 +3132,9 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)
/* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
- pgdat->kswapd_classzone_idx = min(pgdat->kswapd_classzone_idx,
- (enum zone_type)ZONE_NORMAL);
+ if (READ_ONCE(pgdat->kswapd_classzone_idx) > ZONE_NORMAL)
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, ZONE_NORMAL);
+
wake_up_interruptible(&pgdat->kswapd_wait);
}
@@ -3766,9 +3766,9 @@ out:
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
enum zone_type prev_classzone_idx)
{
- if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
- return prev_classzone_idx;
- return pgdat->kswapd_classzone_idx;
+ enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
+
+ return curr_idx == MAX_NR_ZONES ? prev_classzone_idx : curr_idx;
}
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
@@ -3812,8 +3812,11 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
* the previous request that slept prematurely.
*/
if (remaining) {
- pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
- pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order);
+ WRITE_ONCE(pgdat->kswapd_classzone_idx,
+ kswapd_classzone_idx(pgdat, classzone_idx));
+
+ if (READ_ONCE(pgdat->kswapd_order) < reclaim_order)
+ WRITE_ONCE(pgdat->kswapd_order, reclaim_order);
}
finish_wait(&pgdat->kswapd_wait, &wait);
@@ -3890,12 +3893,12 @@ static int kswapd(void *p)
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
set_freezable();
- pgdat->kswapd_order = 0;
- pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
+ WRITE_ONCE(pgdat->kswapd_order, 0);
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
for ( ; ; ) {
bool ret;
- alloc_order = reclaim_order = pgdat->kswapd_order;
+ alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
kswapd_try_sleep:
@@ -3903,10 +3906,10 @@ kswapd_try_sleep:
classzone_idx);
/* Read the new order and classzone_idx */
- alloc_order = reclaim_order = pgdat->kswapd_order;
+ alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
- pgdat->kswapd_order = 0;
- pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
+ WRITE_ONCE(pgdat->kswapd_order, 0);
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
ret = try_to_freeze();
if (kthread_should_stop())
@@ -3950,20 +3953,23 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
enum zone_type classzone_idx)
{
pg_data_t *pgdat;
+ enum zone_type curr_idx;
if (!managed_zone(zone))
return;
if (!cpuset_zone_allowed(zone, gfp_flags))
return;
+
pgdat = zone->zone_pgdat;
+ curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
+
+ if (curr_idx == MAX_NR_ZONES || curr_idx < classzone_idx)
+ WRITE_ONCE(pgdat->kswapd_classzone_idx, classzone_idx);
+
+ if (READ_ONCE(pgdat->kswapd_order) < order)
+ WRITE_ONCE(pgdat->kswapd_order, order);
- if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
- pgdat->kswapd_classzone_idx = classzone_idx;
- else
- pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx,
- classzone_idx);
- pgdat->kswapd_order = max(pgdat->kswapd_order, order);
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
@@ -4027,27 +4033,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
}
#endif /* CONFIG_HIBERNATION */
-/* It's optimal to keep kswapds on the same CPUs as their memory, but
- not required for correctness. So if the last cpu in a node goes
- away, we get changed to run anywhere: as the first one comes back,
- restore their cpu bindings. */
-static int kswapd_cpu_online(unsigned int cpu)
-{
- int nid;
-
- for_each_node_state(nid, N_MEMORY) {
- pg_data_t *pgdat = NODE_DATA(nid);
- const struct cpumask *mask;
-
- mask = cpumask_of_node(pgdat->node_id);
-
- if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
- /* One of our CPUs online: restore mask */
- set_cpus_allowed_ptr(pgdat->kswapd, mask);
- }
- return 0;
-}
-
/*
* This kswapd start function will be called by init and node-hot-add.
* On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
@@ -4087,15 +4072,11 @@ void kswapd_stop(int nid)
static int __init kswapd_init(void)
{
- int nid, ret;
+ int nid;
swap_setup();
for_each_node_state(nid, N_MEMORY)
kswapd_run(nid);
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "mm/vmscan:online", kswapd_cpu_online,
- NULL);
- WARN_ON(ret < 0);
return 0;
}
@@ -4274,29 +4255,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
}
#endif
-/*
- * page_evictable - test whether a page is evictable
- * @page: the page to test
- *
- * Test whether page is evictable--i.e., should be placed on active/inactive
- * lists vs unevictable list.
- *
- * Reasons page might not be evictable:
- * (1) page's mapping marked unevictable
- * (2) page is part of an mlocked VMA
- *
- */
-int page_evictable(struct page *page)
-{
- int ret;
-
- /* Prevent address_space of inode and swap cache from being freed */
- rcu_read_lock();
- ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
- rcu_read_unlock();
- return ret;
-}
-
/**
* check_move_unevictable_pages - check pages for evictability and move to
* appropriate zone lru list