summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorNaoya Horiguchi <naoya.horiguchi@nec.com>2022-01-14 14:09:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-15 16:30:31 +0200
commitbf181c582588f8f7406d52f2ee228539b465f173 (patch)
tree4e134888baa6df7f2754621670e028a5690a6a4c /mm/page_alloc.c
parentc9fdc4d5487a16bd1f003fc8b66e91f88efb50e6 (diff)
mm/hwpoison: fix unpoison_memory()
After recent soft-offline rework, error pages can be taken off from buddy allocator, but the existing unpoison_memory() does not properly undo the operation. Moreover, due to the recent change on __get_hwpoison_page(), get_page_unless_zero() is hardly called for hwpoisoned pages. So __get_hwpoison_page() highly likely returns -EBUSY (meaning to fail to grab page refcount) and unpoison just clears PG_hwpoison without releasing a refcount. That does not lead to a critical issue like kernel panic, but unpoisoned pages never get back to buddy (leaked permanently), which is not good. To (partially) fix this, we need to identify "taken off" pages from other types of hwpoisoned pages. We can't use refcount or page flags for this purpose, so a pseudo flag is defined by hacking ->private field. Someone might think that put_page() is enough to cancel taken-off pages, but the normal free path contains some operations not suitable for the current purpose, and can fire VM_BUG_ON(). Note that unpoison_memory() is now supposed to be cancel hwpoison events injected only by madvise() or /sys/devices/system/memory/{hard,soft}_offline_page, not by MCE injection, so please don't try to use unpoison when testing with MCE injection. [lkp@intel.com: report build failure for ARCH=i386] Link: https://lkml.kernel.org/r/20211115084006.3728254-4-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Reviewed-by: Yang Shi <shy828301@gmail.com> Cc: David Hildenbrand <david@redhat.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Michal Hocko <mhocko@suse.com> Cc: Ding Hui <dinghui@sangfor.com.cn> Cc: Tony Luck <tony.luck@intel.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c27
1 files changed, 27 insertions, 0 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 99fc65c532f0..d4205e5e41d1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/swapops.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/jiffies.h>
@@ -9508,6 +9509,7 @@ bool take_page_off_buddy(struct page *page)
del_page_from_free_list(page_head, zone, page_order);
break_down_buddy_pages(zone, page_head, page, 0,
page_order, migratetype);
+ SetPageHWPoisonTakenOff(page);
if (!is_migrate_isolate(migratetype))
__mod_zone_freepage_state(zone, -1, migratetype);
ret = true;
@@ -9519,6 +9521,31 @@ bool take_page_off_buddy(struct page *page)
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
}
+
+/*
+ * Cancel takeoff done by take_page_off_buddy().
+ */
+bool put_page_back_buddy(struct page *page)
+{
+ struct zone *zone = page_zone(page);
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long flags;
+ int migratetype = get_pfnblock_migratetype(page, pfn);
+ bool ret = false;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ if (put_page_testzero(page)) {
+ ClearPageHWPoisonTakenOff(page);
+ __free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
+ if (TestClearPageHWPoison(page)) {
+ num_poisoned_pages_dec();
+ ret = true;
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+
+ return ret;
+}
#endif
#ifdef CONFIG_ZONE_DMA