summaryrefslogtreecommitdiff
path: root/mm/compaction.c
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@linux.alibaba.com>2020-12-15 12:34:29 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 14:48:04 -0800
commit6168d0da2b479ce25a4647de194045de1bdd1f1d (patch)
treed3a8a1d484d5ca32097347ca9b314483b6186fbd /mm/compaction.c
parentfc574c23558c63799dd99a9bb1d62e33708abaf5 (diff)
mm/lru: replace pgdat lru_lock with lruvec lock
This patch moves per node lru_lock into lruvec, thus bring a lru_lock for each of memcg per node. So on a large machine, each of memcg don't have to suffer from per node pgdat->lru_lock competition. They could go fast with their self lru_lock. After move memcg charge before lru inserting, page isolation could serialize page's memcg, then per memcg lruvec lock is stable and could replace per node lru lock. In isolate_migratepages_block(), compact_unlock_should_abort and lock_page_lruvec_irqsave are open coded to work with compact_control. Also add a debug func in locking which may give some clues if there are sth out of hands. Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Hugh Dickins helped on the patch polish, thanks! [alex.shi@linux.alibaba.com: fix comment typo] Link: https://lkml.kernel.org/r/5b085715-292a-4b43-50b3-d73dc90d1de5@linux.alibaba.com [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/5a4c2b72-7ee8-2478-fc0e-85eb83aafec4@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-18-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Acked-by: Hugh Dickins <hughd@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Rong Chen <rong.a.chen@intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Yang Shi <yang.shi@linux.alibaba.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Alexander Duyck <alexander.duyck@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Jann Horn <jannh@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mika Penttilä <mika.penttila@nextfour.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Shakeel Butt <shakeelb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c56
1 files changed, 36 insertions, 20 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 50938e66083c..e5acb9714436 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -804,7 +804,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
unsigned long nr_scanned = 0, nr_isolated = 0;
struct lruvec *lruvec;
unsigned long flags = 0;
- bool locked = false;
+ struct lruvec *locked = NULL;
struct page *page = NULL, *valid_page = NULL;
unsigned long start_pfn = low_pfn;
bool skip_on_failure = false;
@@ -868,11 +868,20 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* contention, to give chance to IRQs. Abort completely if
* a fatal signal is pending.
*/
- if (!(low_pfn % SWAP_CLUSTER_MAX)
- && compact_unlock_should_abort(&pgdat->lru_lock,
- flags, &locked, cc)) {
- low_pfn = 0;
- goto fatal_pending;
+ if (!(low_pfn % SWAP_CLUSTER_MAX)) {
+ if (locked) {
+ unlock_page_lruvec_irqrestore(locked, flags);
+ locked = NULL;
+ }
+
+ if (fatal_signal_pending(current)) {
+ cc->contended = true;
+
+ low_pfn = 0;
+ goto fatal_pending;
+ }
+
+ cond_resched();
}
if (!pfn_valid_within(low_pfn))
@@ -944,9 +953,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (unlikely(__PageMovable(page)) &&
!PageIsolated(page)) {
if (locked) {
- spin_unlock_irqrestore(&pgdat->lru_lock,
- flags);
- locked = false;
+ unlock_page_lruvec_irqrestore(locked, flags);
+ locked = NULL;
}
if (!isolate_movable_page(page, isolate_mode))
@@ -987,10 +995,19 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (!TestClearPageLRU(page))
goto isolate_fail_put;
+ rcu_read_lock();
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
+
/* If we already hold the lock, we can skip some rechecking */
- if (!locked) {
- locked = compact_lock_irqsave(&pgdat->lru_lock,
- &flags, cc);
+ if (lruvec != locked) {
+ if (locked)
+ unlock_page_lruvec_irqrestore(locked, flags);
+
+ compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
+ locked = lruvec;
+ rcu_read_unlock();
+
+ lruvec_memcg_debug(lruvec, page);
/* Try get exclusive access under lock */
if (!skip_updated) {
@@ -1009,9 +1026,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
SetPageLRU(page);
goto isolate_fail_put;
}
- }
-
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ } else
+ rcu_read_unlock();
/* The whole page is taken off the LRU; skip the tail pages. */
if (PageCompound(page))
@@ -1045,8 +1061,8 @@ isolate_success:
isolate_fail_put:
/* Avoid potential deadlock in freeing page under lru_lock */
if (locked) {
- spin_unlock_irqrestore(&pgdat->lru_lock, flags);
- locked = false;
+ unlock_page_lruvec_irqrestore(locked, flags);
+ locked = NULL;
}
put_page(page);
@@ -1061,8 +1077,8 @@ isolate_fail:
*/
if (nr_isolated) {
if (locked) {
- spin_unlock_irqrestore(&pgdat->lru_lock, flags);
- locked = false;
+ unlock_page_lruvec_irqrestore(locked, flags);
+ locked = NULL;
}
putback_movable_pages(&cc->migratepages);
cc->nr_migratepages = 0;
@@ -1090,7 +1106,7 @@ isolate_fail:
isolate_abort:
if (locked)
- spin_unlock_irqrestore(&pgdat->lru_lock, flags);
+ unlock_page_lruvec_irqrestore(locked, flags);
if (page) {
SetPageLRU(page);
put_page(page);