summaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-06 14:08:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-06 14:08:17 -0700
commit512b7931ad0561ffe14265f9ff554a3c081b476b (patch)
treea94450d08468e094d2d92a495de4650faab09c1f /mm/memory-failure.c
parentfe91c4725aeed35023ba4f7a1e1adfebb6878c23 (diff)
parent658f9ae761b5965893727dd4edcdad56e5a439bb (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "257 patches. Subsystems affected by this patch series: scripts, ocfs2, vfs, and mm (slab-generic, slab, slub, kconfig, dax, kasan, debug, pagecache, gup, swap, memcg, pagemap, mprotect, mremap, iomap, tracing, vmalloc, pagealloc, memory-failure, hugetlb, userfaultfd, vmscan, tools, memblock, oom-kill, hugetlbfs, migration, thp, readahead, nommu, ksm, vmstat, madvise, memory-hotplug, rmap, zsmalloc, highmem, zram, cleanups, kfence, and damon)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (257 commits) mm/damon: remove return value from before_terminate callback mm/damon: fix a few spelling mistakes in comments and a pr_debug message mm/damon: simplify stop mechanism Docs/admin-guide/mm/pagemap: wordsmith page flags descriptions Docs/admin-guide/mm/damon/start: simplify the content Docs/admin-guide/mm/damon/start: fix a wrong link Docs/admin-guide/mm/damon/start: fix wrong example commands mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on mm/damon: remove unnecessary variable initialization Documentation/admin-guide/mm/damon: add a document for DAMON_RECLAIM mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM) selftests/damon: support watermarks mm/damon/dbgfs: support watermarks mm/damon/schemes: activate schemes based on a watermarks mechanism tools/selftests/damon: update for regions prioritization of schemes mm/damon/dbgfs: support prioritization weights mm/damon/vaddr,paddr: support pageout prioritization mm/damon/schemes: prioritize regions within the quotas mm/damon/selftests: support schemes quotas mm/damon/dbgfs: support quotas of schemes ...
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c111
1 files changed, 76 insertions, 35 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ff51edd6e992..f64ebb6226cb 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -39,6 +39,7 @@
#include <linux/kernel-page-flags.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
+#include <linux/dax.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/export.h>
@@ -57,6 +58,7 @@
#include <linux/ratelimit.h>
#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
+#include <linux/shmem_fs.h>
#include "internal.h"
#include "ras/ras_event.h"
@@ -673,7 +675,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
#define hwpoison_hugetlb_range NULL
#endif
-static struct mm_walk_ops hwp_walk_ops = {
+static const struct mm_walk_ops hwp_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
};
@@ -806,12 +808,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
return ret;
}
+struct page_state {
+ unsigned long mask;
+ unsigned long res;
+ enum mf_action_page_type type;
+
+ /* Callback ->action() has to unlock the relevant page inside it. */
+ int (*action)(struct page_state *ps, struct page *p);
+};
+
+/*
+ * Return true if page is still referenced by others, otherwise return
+ * false.
+ *
+ * The extra_pins is true when one extra refcount is expected.
+ */
+static bool has_extra_refcount(struct page_state *ps, struct page *p,
+ bool extra_pins)
+{
+ int count = page_count(p) - 1;
+
+ if (extra_pins)
+ count -= 1;
+
+ if (count > 0) {
+ pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
+ page_to_pfn(p), action_page_types[ps->type], count);
+ return true;
+ }
+
+ return false;
+}
+
/*
* Error hit kernel page.
* Do nothing, try to be lucky and not touch this instead. For a few cases we
* could be more sophisticated.
*/
-static int me_kernel(struct page *p, unsigned long pfn)
+static int me_kernel(struct page_state *ps, struct page *p)
{
unlock_page(p);
return MF_IGNORED;
@@ -820,9 +854,9 @@ static int me_kernel(struct page *p, unsigned long pfn)
/*
* Page in unknown state. Do nothing.
*/
-static int me_unknown(struct page *p, unsigned long pfn)
+static int me_unknown(struct page_state *ps, struct page *p)
{
- pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
+ pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p));
unlock_page(p);
return MF_FAILED;
}
@@ -830,10 +864,11 @@ static int me_unknown(struct page *p, unsigned long pfn)
/*
* Clean (or cleaned) page cache page.
*/
-static int me_pagecache_clean(struct page *p, unsigned long pfn)
+static int me_pagecache_clean(struct page_state *ps, struct page *p)
{
int ret;
struct address_space *mapping;
+ bool extra_pins;
delete_from_lru_cache(p);
@@ -863,13 +898,23 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
}
/*
+ * The shmem page is kept in page cache instead of truncating
+ * so is expected to have an extra refcount after error-handling.
+ */
+ extra_pins = shmem_mapping(mapping);
+
+ /*
* Truncation is a bit tricky. Enable it per file system for now.
*
* Open: to take i_rwsem or not for this? Right now we don't.
*/
- ret = truncate_error_page(p, pfn, mapping);
+ ret = truncate_error_page(p, page_to_pfn(p), mapping);
+ if (has_extra_refcount(ps, p, extra_pins))
+ ret = MF_FAILED;
+
out:
unlock_page(p);
+
return ret;
}
@@ -878,7 +923,7 @@ out:
* Issues: when the error hit a hole page the error is not properly
* propagated.
*/
-static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+static int me_pagecache_dirty(struct page_state *ps, struct page *p)
{
struct address_space *mapping = page_mapping(p);
@@ -922,7 +967,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
mapping_set_error(mapping, -EIO);
}
- return me_pagecache_clean(p, pfn);
+ return me_pagecache_clean(ps, p);
}
/*
@@ -944,9 +989,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
* Clean swap cache pages can be directly isolated. A later page fault will
* bring in the known good data from disk.
*/
-static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+static int me_swapcache_dirty(struct page_state *ps, struct page *p)
{
int ret;
+ bool extra_pins = false;
ClearPageDirty(p);
/* Trigger EIO in shmem: */
@@ -954,10 +1000,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn)
ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
unlock_page(p);
+
+ if (ret == MF_DELAYED)
+ extra_pins = true;
+
+ if (has_extra_refcount(ps, p, extra_pins))
+ ret = MF_FAILED;
+
return ret;
}
-static int me_swapcache_clean(struct page *p, unsigned long pfn)
+static int me_swapcache_clean(struct page_state *ps, struct page *p)
{
int ret;
@@ -965,6 +1018,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
unlock_page(p);
+
+ if (has_extra_refcount(ps, p, false))
+ ret = MF_FAILED;
+
return ret;
}
@@ -974,7 +1031,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
* - Error on hugepage is contained in hugepage unit (not in raw page unit.)
* To narrow down kill region to one page, we need to break up pmd.
*/
-static int me_huge_page(struct page *p, unsigned long pfn)
+static int me_huge_page(struct page_state *ps, struct page *p)
{
int res;
struct page *hpage = compound_head(p);
@@ -985,7 +1042,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
mapping = page_mapping(hpage);
if (mapping) {
- res = truncate_error_page(hpage, pfn, mapping);
+ res = truncate_error_page(hpage, page_to_pfn(p), mapping);
unlock_page(hpage);
} else {
res = MF_FAILED;
@@ -1003,6 +1060,9 @@ static int me_huge_page(struct page *p, unsigned long pfn)
}
}
+ if (has_extra_refcount(ps, p, false))
+ res = MF_FAILED;
+
return res;
}
@@ -1028,14 +1088,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
#define slab (1UL << PG_slab)
#define reserved (1UL << PG_reserved)
-static struct page_state {
- unsigned long mask;
- unsigned long res;
- enum mf_action_page_type type;
-
- /* Callback ->action() has to unlock the relevant page inside it. */
- int (*action)(struct page *p, unsigned long pfn);
-} error_states[] = {
+static struct page_state error_states[] = {
{ reserved, reserved, MF_MSG_KERNEL, me_kernel },
/*
* free pages are specially detected outside this table:
@@ -1095,19 +1148,10 @@ static int page_action(struct page_state *ps, struct page *p,
unsigned long pfn)
{
int result;
- int count;
/* page p should be unlocked after returning from ps->action(). */
- result = ps->action(p, pfn);
+ result = ps->action(ps, p);
- count = page_count(p) - 1;
- if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
- count--;
- if (count > 0) {
- pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
- pfn, action_page_types[ps->type], count);
- result = MF_FAILED;
- }
action_result(pfn, ps->type, result);
/* Could do more checks here if page looks ok */
@@ -1400,14 +1444,11 @@ static int identify_page_state(unsigned long pfn, struct page *p,
static int try_to_split_thp_page(struct page *page, const char *msg)
{
lock_page(page);
- if (!PageAnon(page) || unlikely(split_huge_page(page))) {
+ if (unlikely(split_huge_page(page))) {
unsigned long pfn = page_to_pfn(page);
unlock_page(page);
- if (!PageAnon(page))
- pr_info("%s: %#lx: non anonymous thp\n", msg, pfn);
- else
- pr_info("%s: %#lx: thp split failed\n", msg, pfn);
+ pr_info("%s: %#lx: thp split failed\n", msg, pfn);
put_page(page);
return -EBUSY;
}