summaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
commit6e17c6de3ddf3073741d9c91a796ee696914d8a0 (patch)
tree2c425707f78642625dbe2c824c7fded2021e3dc7 /mm/memory-failure.c
parent6aeadf7896bff4ca230702daba8788455e6b866e (diff)
parentacc72d59c7509540c27c49625cb4b5a8db1f1a84 (diff)
Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton: - Yosry Ahmed brought back some cgroup v1 stats in OOM logs - Yosry has also eliminated cgroup's atomic rstat flushing - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree - Johannes Weiner has done some cleanup work on the compaction code - David Hildenbrand has contributed additional selftests for get_user_pages() - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code - Huang Ying has done some maintenance on the swap code's usage of device refcounting - Christoph Hellwig has some cleanups for the filemap/directio code - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings - John Hubbard has a series of fixes to the MM selftesting code - ZhangPeng continues the folio conversion campaign - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8 - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code - Vishal Moola also has done some folio conversion work - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch * tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits) mm/hugetlb: remove hugetlb_set_page_subpool() mm: nommu: correct the range of mmap_sem_read_lock in task_mem() hugetlb: revert use of page_cache_next_miss() Revert "page cache: fix page_cache_next/prev_miss off by one" mm/vmscan: fix root proactive reclaim unthrottling unbalanced node mm: memcg: rename and document global_reclaim() mm: kill [add|del]_page_to_lru_list() mm: compaction: convert to use a folio in isolate_migratepages_block() mm: zswap: fix double invalidate with exclusive loads mm: remove unnecessary pagevec includes mm: remove references to pagevec mm: rename invalidate_mapping_pagevec to mapping_try_invalidate mm: remove struct pagevec net: convert sunrpc from pagevec to folio_batch i915: convert i915_gpu_error to use a folio_batch pagevec: rename fbatch_count() mm: remove check_move_unevictable_pages() drm: convert drm_gem_put_pages() to use a folio_batch i915: convert shmem_sg_free_table() to use a folio_batch scatterlist: add sg_set_folio() ...
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c45
1 files changed, 21 insertions, 24 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5b663eca1f29..e245191e6b04 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -6,16 +6,16 @@
* High level machine check handler. Handles pages reported by the
* hardware as being corrupted usually due to a multi-bit ECC memory or cache
* failure.
- *
+ *
* In addition there is a "soft offline" entry point that allows stop using
* not-yet-corrupted-by-suspicious pages without killing anything.
*
* Handles page cache pages in various states. The tricky part
- * here is that we can access any page asynchronously in respect to
- * other VM users, because memory failures could happen anytime and
- * anywhere. This could violate some of their assumptions. This is why
- * this code has to be extremely careful. Generally it tries to use
- * normal locking rules, as in get the standard locks, even if that means
+ * here is that we can access any page asynchronously in respect to
+ * other VM users, because memory failures could happen anytime and
+ * anywhere. This could violate some of their assumptions. This is why
+ * this code has to be extremely careful. Generally it tries to use
+ * normal locking rules, as in get the standard locks, even if that means
* the error handling takes potentially a long time.
*
* It can be very tempting to add handling for obscure cases here.
@@ -25,12 +25,12 @@
* https://git.kernel.org/cgit/utils/cpu/mce/mce-test.git/
* - The case actually shows up as a frequent (top 10) page state in
* tools/mm/page-types when running a real workload.
- *
+ *
* There are several operations here with exponential complexity because
- * of unsuitable VM data structures. For example the operation to map back
- * from RMAP chains to processes has to walk the complete process list and
+ * of unsuitable VM data structures. For example the operation to map back
+ * from RMAP chains to processes has to walk the complete process list and
* has non linear complexity with the number. But since memory corruptions
- * are rare we hope to get away with this. This avoids impacting the core
+ * are rare we hope to get away with this. This avoids impacting the core
* VM.
*/
@@ -123,7 +123,6 @@ const struct attribute_group memory_failure_attr_group = {
.attrs = memory_failure_attr,
};
-#ifdef CONFIG_SYSCTL
static struct ctl_table memory_failure_table[] = {
{
.procname = "memory_failure_early_kill",
@@ -146,14 +145,6 @@ static struct ctl_table memory_failure_table[] = {
{ }
};
-static int __init memory_failure_sysctl_init(void)
-{
- register_sysctl_init("vm", memory_failure_table);
- return 0;
-}
-late_initcall(memory_failure_sysctl_init);
-#endif /* CONFIG_SYSCTL */
-
/*
* Return values:
* 1: the page is dissolved (if needed) and taken off from buddy,
@@ -395,6 +386,7 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ pte_t ptent;
VM_BUG_ON_VMA(address == -EFAULT, vma);
pgd = pgd_offset(vma->vm_mm, address);
@@ -414,7 +406,10 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
if (pmd_devmap(*pmd))
return PMD_SHIFT;
pte = pte_offset_map(pmd, address);
- if (pte_present(*pte) && pte_devmap(*pte))
+ if (!pte)
+ return 0;
+ ptent = ptep_get(pte);
+ if (pte_present(ptent) && pte_devmap(ptent))
ret = PAGE_SHIFT;
pte_unmap(pte);
return ret;
@@ -800,13 +795,13 @@ static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
goto out;
}
- if (pmd_trans_unstable(pmdp))
- goto out;
-
mapped_pte = ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp,
addr, &ptl);
+ if (!ptep)
+ goto out;
+
for (; addr != end; ptep++, addr += PAGE_SIZE) {
- ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
+ ret = check_hwpoisoned_entry(ptep_get(ptep), addr, PAGE_SHIFT,
hwp->pfn, &hwp->tk);
if (ret == 1)
break;
@@ -2441,6 +2436,8 @@ static int __init memory_failure_init(void)
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}
+ register_sysctl_init("vm", memory_failure_table);
+
return 0;
}
core_initcall(memory_failure_init);