summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm_inline.h19
-rw-r--r--include/linux/swapops.h15
-rw-r--r--mm/hugetlb.c32
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c15
-rw-r--r--mm/mprotect.c4
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/swapfile.c2
8 files changed, 65 insertions, 28 deletions
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 21d6c72bcc71..a86c84600787 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -524,6 +524,25 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
}
/*
+ * Computes the pte marker to copy from the given source entry into dst_vma.
+ * If no marker should be copied, returns 0.
+ * The caller should insert a new pte created with make_pte_marker().
+ */
+static inline pte_marker copy_pte_marker(
+ swp_entry_t entry, struct vm_area_struct *dst_vma)
+{
+ pte_marker srcm = pte_marker_get(entry);
+ /* Always copy error entries. */
+ pte_marker dstm = srcm & PTE_MARKER_POISONED;
+
+ /* Only copy PTE markers if UFFD register matches. */
+ if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
+ dstm |= PTE_MARKER_UFFD_WP;
+
+ return dstm;
+}
+
+/*
* If this pte is wr-protected by uffd-wp in any form, arm the special pte to
* replace a none pte. NOTE! This should only be called when *pte is already
* cleared so we will never accidentally replace something valuable. Meanwhile
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 4c932cb45e0b..bff1e8d97de0 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -393,7 +393,12 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
-#define PTE_MARKER_SWAPIN_ERROR BIT(1)
+/*
+ * "Poisoned" here is meant in the very general sense of "future accesses are
+ * invalid", instead of referring very specifically to hardware memory errors.
+ * This marker is meant to represent any of various different causes of this.
+ */
+#define PTE_MARKER_POISONED BIT(1)
#define PTE_MARKER_MASK (BIT(2) - 1)
static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
@@ -421,15 +426,15 @@ static inline pte_t make_pte_marker(pte_marker marker)
return swp_entry_to_pte(make_pte_marker_entry(marker));
}
-static inline swp_entry_t make_swapin_error_entry(void)
+static inline swp_entry_t make_poisoned_swp_entry(void)
{
- return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR);
+ return make_pte_marker_entry(PTE_MARKER_POISONED);
}
-static inline int is_swapin_error_entry(swp_entry_t entry)
+static inline int is_poisoned_swp_entry(swp_entry_t entry)
{
return is_pte_marker_entry(entry) &&
- (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR);
+ (pte_marker_get(entry) & PTE_MARKER_POISONED);
}
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e3839eee4657..ffee2978dfed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,6 +34,7 @@
#include <linux/nospec.h>
#include <linux/delayacct.h>
#include <linux/memory.h>
+#include <linux/mm_inline.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -5101,15 +5102,12 @@ again:
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
- /* No swap on hugetlb */
- WARN_ON_ONCE(
- is_swapin_error_entry(pte_to_swp_entry(entry)));
- /*
- * We copy the pte marker only if the dst vma has
- * uffd-wp enabled.
- */
- if (userfaultfd_wp(dst_vma))
- set_huge_pte_at(dst, addr, dst_pte, entry);
+ pte_marker marker = copy_pte_marker(
+ pte_to_swp_entry(entry), dst_vma);
+
+ if (marker)
+ set_huge_pte_at(dst, addr, dst_pte,
+ make_pte_marker(marker));
} else {
entry = huge_ptep_get(src_pte);
pte_folio = page_folio(pte_page(entry));
@@ -6089,14 +6087,26 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
entry = huge_ptep_get(ptep);
- /* PTE markers should be handled the same way as none pte */
- if (huge_pte_none_mostly(entry))
+ if (huge_pte_none_mostly(entry)) {
+ if (is_pte_marker(entry)) {
+ pte_marker marker =
+ pte_marker_get(pte_to_swp_entry(entry));
+
+ if (marker & PTE_MARKER_POISONED) {
+ ret = VM_FAULT_HWPOISON_LARGE;
+ goto out_mutex;
+ }
+ }
+
/*
+ * Other PTE markers should be handled the same way as none PTE.
+ *
* hugetlb_no_page will drop vma lock and hugetlb fault
* mutex internally, which make us return immediately.
*/
return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
entry, flags);
+ }
ret = 0;
diff --git a/mm/madvise.c b/mm/madvise.c
index 05f97038eac3..da65f8bd9ac3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -664,7 +664,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
free_swap_and_cache(entry);
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} else if (is_hwpoison_entry(entry) ||
- is_swapin_error_entry(entry)) {
+ is_poisoned_swp_entry(entry)) {
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
}
continue;
diff --git a/mm/memory.c b/mm/memory.c
index ff19719da032..36b164ee9ffb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -860,8 +860,11 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
return -EBUSY;
return -ENOENT;
} else if (is_pte_marker_entry(entry)) {
- if (is_swapin_error_entry(entry) || userfaultfd_wp(dst_vma))
- set_pte_at(dst_mm, addr, dst_pte, pte);
+ pte_marker marker = copy_pte_marker(entry, dst_vma);
+
+ if (marker)
+ set_pte_at(dst_mm, addr, dst_pte,
+ make_pte_marker(marker));
return 0;
}
if (!userfaultfd_wp(dst_vma))
@@ -1502,7 +1505,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
!zap_drop_file_uffd_wp(details))
continue;
} else if (is_hwpoison_entry(entry) ||
- is_swapin_error_entry(entry)) {
+ is_poisoned_swp_entry(entry)) {
if (!should_zap_cows(details))
continue;
} else {
@@ -3651,7 +3654,7 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
* none pte. Otherwise it means the pte could have changed, so retry.
*
* This should also cover the case where e.g. the pte changed
- * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_SWAPIN_ERROR.
+ * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_POISONED.
* So is_pte_marker() check is not enough to safely drop the pte.
*/
if (pte_same(vmf->orig_pte, ptep_get(vmf->pte)))
@@ -3697,8 +3700,8 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
/* Higher priority than uffd-wp when data corrupted */
- if (marker & PTE_MARKER_SWAPIN_ERROR)
- return VM_FAULT_SIGBUS;
+ if (marker & PTE_MARKER_POISONED)
+ return VM_FAULT_HWPOISON;
if (pte_marker_entry_uffd_wp(entry))
return pte_marker_handle_uffd_wp(vmf);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6f658d483704..5c3112d92466 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -230,10 +230,10 @@ static long change_pte_range(struct mmu_gather *tlb,
newpte = pte_swp_mkuffd_wp(newpte);
} else if (is_pte_marker_entry(entry)) {
/*
- * Ignore swapin errors unconditionally,
+ * Ignore error swap entries unconditionally,
* because any access should sigbus anyway.
*/
- if (is_swapin_error_entry(entry))
+ if (is_poisoned_swp_entry(entry))
continue;
/*
* If this is uffd-wp pte marker and we'd like
diff --git a/mm/shmem.c b/mm/shmem.c
index 8dfd72bdc86a..235f2b2fd202 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1707,7 +1707,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
swp_entry_t swapin_error;
void *old;
- swapin_error = make_swapin_error_entry();
+ swapin_error = make_poisoned_swp_entry();
old = xa_cmpxchg_irq(&mapping->i_pages, index,
swp_to_radix_entry(swap),
swp_to_radix_entry(swapin_error), 0);
@@ -1752,7 +1752,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
swap = radix_to_swp_entry(*foliop);
*foliop = NULL;
- if (is_swapin_error_entry(swap))
+ if (is_poisoned_swp_entry(swap))
return -EIO;
si = get_swap_device(swap);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d996c335fc3c..346e22b8ae97 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1771,7 +1771,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
swp_entry = make_hwpoison_entry(swapcache);
page = swapcache;
} else {
- swp_entry = make_swapin_error_entry();
+ swp_entry = make_poisoned_swp_entry();
}
new_pte = swp_entry_to_pte(swp_entry);
ret = 0;