summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-23 17:09:35 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-23 17:09:35 -0800
commit3822a7c40997dc86b1458766a3f146d62393f084 (patch)
tree4473720ecbfaabeedfe58484425be77d0f89f736 /arch
parente4bc15889506723d7b93c053ad4a75cd58248d74 (diff)
parentf9366f4c2a29d14f5992b195e268240c2deb116e (diff)
Merge tag 'mm-stable-2023-02-20-13-37' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Daniel Verkamp has contributed a memfd series ("mm/memfd: add F_SEAL_EXEC") which permits the setting of the memfd execute bit at memfd creation time, with the option of sealing the state of the X bit. - Peter Xu adds a patch series ("mm/hugetlb: Make huge_pte_offset() thread-safe for pmd unshare") which addresses a rare race condition related to PMD unsharing. - Several folioification patch serieses from Matthew Wilcox, Vishal Moola, Sidhartha Kumar and Lorenzo Stoakes - Johannes Weiner has a series ("mm: push down lock_page_memcg()") which does perform some memcg maintenance and cleanup work. - SeongJae Park has added DAMOS filtering to DAMON, with the series "mm/damon/core: implement damos filter". These filters provide users with finer-grained control over DAMOS's actions. SeongJae has also done some DAMON cleanup work. - Kairui Song adds a series ("Clean up and fixes for swap"). - Vernon Yang contributed the series "Clean up and refinement for maple tree". - Yu Zhao has contributed the "mm: multi-gen LRU: memcg LRU" series. It adds to MGLRU an LRU of memcgs, to improve the scalability of global reclaim. - David Hildenbrand has added some userfaultfd cleanup work in the series "mm: uffd-wp + change_protection() cleanups". - Christoph Hellwig has removed the generic_writepages() library function in the series "remove generic_writepages". - Baolin Wang has performed some maintenance on the compaction code in his series "Some small improvements for compaction". - Sidhartha Kumar is doing some maintenance work on struct page in his series "Get rid of tail page fields". - David Hildenbrand contributed some cleanup, bugfixing and generalization of pte management and of pte debugging in his series "mm: support __HAVE_ARCH_PTE_SWP_EXCLUSIVE on all architectures with swap PTEs". - Mel Gorman and Neil Brown have removed the __GFP_ATOMIC allocation flag in the series "Discard __GFP_ATOMIC". - Sergey Senozhatsky has improved zsmalloc's memory utilization with his series "zsmalloc: make zspage chain size configurable". - Joey Gouly has added prctl() support for prohibiting the creation of writeable+executable mappings. The previous BPF-based approach had shortcomings. See "mm: In-kernel support for memory-deny-write-execute (MDWE)". - Waiman Long did some kmemleak cleanup and bugfixing in the series "mm/kmemleak: Simplify kmemleak_cond_resched() & fix UAF". - T.J. Alumbaugh has contributed some MGLRU cleanup work in his series "mm: multi-gen LRU: improve". - Jiaqi Yan has provided some enhancements to our memory error statistics reporting, mainly by presenting the statistics on a per-node basis. See the series "Introduce per NUMA node memory error statistics". - Mel Gorman has a second and hopefully final shot at fixing a CPU-hog regression in compaction via his series "Fix excessive CPU usage during compaction". - Christoph Hellwig does some vmalloc maintenance work in the series "cleanup vfree and vunmap". - Christoph Hellwig has removed block_device_operations.rw_page() in ths series "remove ->rw_page". - We get some maple_tree improvements and cleanups in Liam Howlett's series "VMA tree type safety and remove __vma_adjust()". - Suren Baghdasaryan has done some work on the maintainability of our vm_flags handling in the series "introduce vm_flags modifier functions". - Some pagemap cleanup and generalization work in Mike Rapoport's series "mm, arch: add generic implementation of pfn_valid() for FLATMEM" and "fixups for generic implementation of pfn_valid()" - Baoquan He has done some work to make /proc/vmallocinfo and /proc/kcore better represent the real state of things in his series "mm/vmalloc.c: allow vread() to read out vm_map_ram areas". - Jason Gunthorpe rationalized the GUP system's interface to the rest of the kernel in the series "Simplify the external interface for GUP". - SeongJae Park wishes to migrate people from DAMON's debugfs interface over to its sysfs interface. To support this, we'll temporarily be printing warnings when people use the debugfs interface. See the series "mm/damon: deprecate DAMON debugfs interface". - Andrey Konovalov provided the accurately named "lib/stackdepot: fixes and clean-ups" series. - Huang Ying has provided a dramatic reduction in migration's TLB flush IPI rates with the series "migrate_pages(): batch TLB flushing". - Arnd Bergmann has some objtool fixups in "objtool warning fixes". * tag 'mm-stable-2023-02-20-13-37' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (505 commits) include/linux/migrate.h: remove unneeded externs mm/memory_hotplug: cleanup return value handing in do_migrate_range() mm/uffd: fix comment in handling pte markers mm: change to return bool for isolate_movable_page() mm: hugetlb: change to return bool for isolate_hugetlb() mm: change to return bool for isolate_lru_page() mm: change to return bool for folio_isolate_lru() objtool: add UACCESS exceptions for __tsan_volatile_read/write kmsan: disable ftrace in kmsan core code kasan: mark addr_has_metadata __always_inline mm: memcontrol: rename memcg_kmem_enabled() sh: initialize max_mapnr m68k/nommu: add missing definition of ARCH_PFN_OFFSET mm: percpu: fix incorrect size in pcpu_obj_full_size() maple_tree: reduce stack usage with gcc-9 and earlier mm: page_alloc: call panic() when memoryless node allocation fails mm: multi-gen LRU: avoid futile retries migrate_pages: move THP/hugetlb migration support check to simplify code migrate_pages: batch flushing TLB migrate_pages: share more code between _unmap and _move ...
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/page.h9
-rw-r--r--arch/alpha/include/asm/pgtable.h40
-rw-r--r--arch/arc/include/asm/page.h1
-rw-r--r--arch/arc/include/asm/pgtable-bits-arcv2.h26
-rw-r--r--arch/arm/include/asm/memory.h2
-rw-r--r--arch/arm/include/asm/page.h2
-rw-r--r--arch/arm/include/asm/pgtable-2level.h3
-rw-r--r--arch/arm/include/asm/pgtable-3level.h3
-rw-r--r--arch/arm/include/asm/pgtable.h34
-rw-r--r--arch/arm/kernel/process.c2
-rw-r--r--arch/arm64/include/asm/page.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h1
-rw-r--r--arch/arm64/kernel/vdso.c6
-rw-r--r--arch/arm64/mm/fault.c4
-rw-r--r--arch/csky/abiv1/inc/abi/pgtable-bits.h13
-rw-r--r--arch/csky/abiv2/inc/abi/pgtable-bits.h19
-rw-r--r--arch/csky/include/asm/page.h1
-rw-r--r--arch/csky/include/asm/pgtable.h17
-rw-r--r--arch/hexagon/include/asm/page.h1
-rw-r--r--arch/hexagon/include/asm/pgtable.h36
-rw-r--r--arch/ia64/include/asm/page.h18
-rw-r--r--arch/ia64/include/asm/pgtable.h31
-rw-r--r--arch/ia64/mm/init.c8
-rw-r--r--arch/loongarch/include/asm/page.h13
-rw-r--r--arch/loongarch/include/asm/pgtable-bits.h4
-rw-r--r--arch/loongarch/include/asm/pgtable.h38
-rw-r--r--arch/loongarch/include/asm/tlb.h2
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h35
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h39
-rw-r--r--arch/m68k/include/asm/page.h6
-rw-r--r--arch/m68k/include/asm/page_mm.h1
-rw-r--r--arch/m68k/include/asm/page_no.h11
-rw-r--r--arch/m68k/include/asm/pgtable_no.h6
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h38
-rw-r--r--arch/microblaze/include/asm/page.h1
-rw-r--r--arch/microblaze/include/asm/pgtable.h44
-rw-r--r--arch/mips/include/asm/page.h28
-rw-r--r--arch/mips/include/asm/pgtable-32.h88
-rw-r--r--arch/mips/include/asm/pgtable-64.h23
-rw-r--r--arch/mips/include/asm/pgtable.h35
-rw-r--r--arch/nios2/include/asm/page.h9
-rw-r--r--arch/nios2/include/asm/pgtable-bits.h3
-rw-r--r--arch/nios2/include/asm/pgtable.h37
-rw-r--r--arch/openrisc/include/asm/page.h2
-rw-r--r--arch/openrisc/include/asm/pgtable.h40
-rw-r--r--arch/parisc/include/asm/page.h4
-rw-r--r--arch/parisc/include/asm/pgtable.h40
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h37
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h22
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-40x.h6
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-44x.h18
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-85xx.h4
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h24
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h15
-rw-r--r--arch/powerpc/include/asm/nohash/pte-e500.h1
-rw-r--r--arch/powerpc/include/asm/page.h9
-rw-r--r--arch/powerpc/kernel/pci_64.c2
-rw-r--r--arch/powerpc/kernel/vdso.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c6
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c2
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c2
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c14
-rw-r--r--arch/powerpc/platforms/pseries/vas.c3
-rw-r--r--arch/riscv/include/asm/page.h5
-rw-r--r--arch/riscv/include/asm/pgtable-bits.h3
-rw-r--r--arch/riscv/include/asm/pgtable.h28
-rw-r--r--arch/riscv/kernel/vdso.c6
-rw-r--r--arch/s390/include/asm/page.h5
-rw-r--r--arch/s390/include/asm/pgtable.h1
-rw-r--r--arch/s390/kernel/vdso.c4
-rw-r--r--arch/s390/mm/gmap.c11
-rw-r--r--arch/sh/include/asm/page.h3
-rw-r--r--arch/sh/include/asm/pgtable_32.h53
-rw-r--r--arch/sh/kernel/cpu/sh4/sq.c2
-rw-r--r--arch/sh/mm/init.c1
-rw-r--r--arch/sparc/include/asm/page_32.h1
-rw-r--r--arch/sparc/include/asm/pgtable_32.h26
-rw-r--r--arch/sparc/include/asm/pgtable_64.h37
-rw-r--r--arch/sparc/include/asm/pgtsrmmu.h14
-rw-r--r--arch/um/include/asm/page.h1
-rw-r--r--arch/um/include/asm/pgtable.h36
-rw-r--r--arch/x86/entry/vdso/vma.c4
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/include/asm/page.h5
-rw-r--r--arch/x86/include/asm/page_32.h4
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/pgtable-2level.h26
-rw-r--r--arch/x86/include/asm/pgtable-3level.h26
-rw-r--r--arch/x86/include/asm/pgtable.h27
-rw-r--r--arch/x86/kernel/cpu/sgx/driver.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/virt.c2
-rw-r--r--arch/x86/mm/fault.c23
-rw-r--r--arch/x86/mm/pat/memtype.c14
-rw-r--r--arch/x86/um/mem_32.c2
-rw-r--r--arch/xtensa/include/asm/page.h4
-rw-r--r--arch/xtensa/include/asm/pgtable.h31
98 files changed, 1014 insertions, 401 deletions
diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 8f3f5eecba28..4db1ebc0ed99 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -17,9 +17,8 @@
extern void clear_page(void *page);
#define clear_user_page(page, vaddr, pg) clear_page(page)
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
extern void copy_page(void * _to, void * _from);
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
@@ -87,10 +86,6 @@ typedef struct page *pgtable_t;
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
-#endif /* CONFIG_FLATMEM */
-
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 9e45f6735d5d..ba43cb841d19 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -74,6 +74,9 @@ struct vm_area_struct;
#define _PAGE_DIRTY 0x20000
#define _PAGE_ACCESSED 0x40000
+/* We borrow bit 39 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x8000000000UL
+
/*
* NOTE! The "accessed" bit isn't necessarily exact: it can be kept exactly
* by software (use the KRE/URE/KWE/UWE bits appropriately), but I'll fake it.
@@ -301,18 +304,47 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
}
/*
- * Non-present pages: high 24 bits are offset, next 8 bits type,
- * low 32 bits zero.
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * <------------------- offset ------------------> E <--- type -->
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <--------------------------- zeroes -------------------------->
+ *
+ * E is the exclusive marker that is not stored in swap entries.
*/
extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
-{ pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; }
+{ pte_t pte; pte_val(pte) = ((type & 0x7f) << 32) | (offset << 40); return pte; }
-#define __swp_type(x) (((x).val >> 32) & 0xff)
+#define __swp_type(x) (((x).val >> 32) & 0x7f)
#define __swp_offset(x) ((x).val >> 40)
#define __swp_entry(type, off) ((swp_entry_t) { pte_val(mk_swap_pte((type), (off))) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
#define pte_ERROR(e) \
printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
#define pmd_ERROR(e) \
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 9a62e1d87967..e43fe27ec54d 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -109,7 +109,6 @@ extern int pfn_valid(unsigned long pfn);
#else /* CONFIG_HIGHMEM */
#define ARCH_PFN_OFFSET virt_to_pfn(CONFIG_LINUX_RAM_BASE)
-#define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
#endif /* CONFIG_HIGHMEM */
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index 515e82db519f..6e9f8ca6d6a1 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -26,6 +26,9 @@
#define _PAGE_GLOBAL (1 << 8) /* ASID agnostic (H) */
#define _PAGE_PRESENT (1 << 9) /* PTE/TLB Valid (H) */
+/* We borrow bit 5 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_DIRTY
+
#ifdef CONFIG_ARC_MMU_V4
#define _PAGE_HW_SZ (1 << 10) /* Normal/super (H) */
#else
@@ -106,9 +109,18 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep);
-/* Encode swap {type,off} tuple into PTE
- * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
- * PAGE_PRESENT is zero in a PTE holding swap "identifier"
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <-------------- offset -------------> <--- zero --> E < type ->
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * The zero'ed bits include _PAGE_PRESENT.
*/
#define __swp_entry(type, off) ((swp_entry_t) \
{ ((type) & 0x1f) | ((off) << 13) })
@@ -120,6 +132,14 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+PTE_BIT_FUNC(swp_mkexclusive, |= (_PAGE_SWP_EXCLUSIVE));
+PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE));
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#include <asm/hugepage.h>
#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index d8eef4bd8c71..62e9df024445 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -386,6 +386,4 @@ static inline unsigned long __virt_to_idmap(unsigned long x)
#endif
-#include <asm-generic/memory_model.h>
-
#endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 5fcc8a600e36..74bb5947b387 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -158,6 +158,7 @@ typedef struct page *pgtable_t;
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
extern int pfn_valid(unsigned long);
+#define pfn_valid pfn_valid
#endif
#include <asm/memory.h>
@@ -167,5 +168,6 @@ extern int pfn_valid(unsigned long);
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
#include <asm-generic/getorder.h>
+#include <asm-generic/memory_model.h>
#endif
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 92abd4cd8ca2..ce543cd9380c 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -126,6 +126,9 @@
#define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */
#define L_PTE_NONE (_AT(pteval_t, 1) << 11)
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define L_PTE_SWP_EXCLUSIVE L_PTE_RDONLY
+
/*
* These are the memory types, defined to be compatible with
* pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index eabe72ff7381..106049791500 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -76,6 +76,9 @@
#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */
#define L_PTE_RDONLY (_AT(pteval_t, 1) << 58) /* READ ONLY */
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define L_PTE_SWP_EXCLUSIVE (_AT(pteval_t, 1) << 7)
+
#define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
#define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index f049072b2e85..a58ccbb406ad 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -271,27 +271,47 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
}
/*
- * Encode and decode a swap entry. Swap entries are stored in the Linux
- * page tables as follows:
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
*
* 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- * <--------------- offset ------------------------> < type -> 0 0
+ * <------------------- offset ------------------> E < type -> 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
*
- * This gives us up to 31 swap files and 128GB per swap file. Note that
+ * This gives us up to 31 swap files and 64GB per swap file. Note that
* the offset field is always non-zero.
*/
#define __SWP_TYPE_SHIFT 2
#define __SWP_TYPE_BITS 5
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
-#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT + 1)
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
+#define __swp_entry(type, offset) ((swp_entry_t) { (((type) & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT) | \
+ ((offset) << __SWP_OFFSET_SHIFT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(swp) __pte((swp).val | PTE_TYPE_FAULT)
+#define __swp_entry_to_pte(swp) __pte((swp).val)
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_isset(pte, L_PTE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(L_PTE_SWP_EXCLUSIVE));
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(L_PTE_SWP_EXCLUSIVE));
+}
/*
* It is an error for the kernel to have more swap files than we can
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index c81e7be2b4ea..0e8ff85890ad 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -315,7 +315,7 @@ static int __init gate_vma_init(void)
gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
gate_vma.vm_start = 0xffff0000;
gate_vma.vm_end = 0xffff0000 + PAGE_SIZE;
- gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
+ vm_flags_init(&gate_vma, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC);
return 0;
}
arch_initcall(gate_vma_init);
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 993a27ea6f54..2312e6ee595f 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -29,9 +29,9 @@ void copy_user_highpage(struct page *to, struct page *from,
void copy_highpage(struct page *to, struct page *from);
#define __HAVE_ARCH_COPY_HIGHPAGE
-struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr);
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
void tag_clear_highpage(struct page *to);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 27455bfd64bc..b6ba466e2e8a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -421,7 +421,6 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
}
-#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
static inline pte_t pte_swp_mkexclusive(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index e59a32aa0c49..0119dc91abb5 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -138,13 +138,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- unsigned long size = vma->vm_end - vma->vm_start;
-
if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
#ifdef CONFIG_COMPAT_VDSO
if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
#endif
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 596f46dabe4e..f4cb0f85ccf4 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -925,7 +925,7 @@ NOKPROBE_SYMBOL(do_debug_exception);
/*
* Used during anonymous page fault handling.
*/
-struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr)
{
gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO;
@@ -938,7 +938,7 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
if (vma->vm_flags & VM_MTE)
flags |= __GFP_ZEROTAGS;
- return alloc_page_vma(flags, vma, vaddr);
+ return vma_alloc_folio(flags, 0, vma, vaddr, false);
}
void tag_clear_highpage(struct page *page)
diff --git a/arch/csky/abiv1/inc/abi/pgtable-bits.h b/arch/csky/abiv1/inc/abi/pgtable-bits.h
index 752c8b3f9194..ae7a2f76dd42 100644
--- a/arch/csky/abiv1/inc/abi/pgtable-bits.h
+++ b/arch/csky/abiv1/inc/abi/pgtable-bits.h
@@ -10,6 +10,9 @@
#define _PAGE_ACCESSED (1<<3)
#define _PAGE_MODIFIED (1<<4)
+/* We borrow bit 9 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1<<9)
+
/* implemented in hardware */
#define _PAGE_GLOBAL (1<<6)
#define _PAGE_VALID (1<<7)
@@ -26,7 +29,8 @@
#define _PAGE_PROT_NONE _PAGE_READ
/*
- * Encode and decode a swap entry
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
*
* Format of swap PTE:
* bit 0: _PAGE_PRESENT (zero)
@@ -35,15 +39,16 @@
* bit 6: _PAGE_GLOBAL (zero)
* bit 7: _PAGE_VALID (zero)
* bit 8: swap type[4]
- * bit 9 - 31: swap offset
+ * bit 9: exclusive marker
+ * bit 10 - 31: swap offset
*/
#define __swp_type(x) ((((x).val >> 2) & 0xf) | \
(((x).val >> 4) & 0x10))
-#define __swp_offset(x) ((x).val >> 9)
+#define __swp_offset(x) ((x).val >> 10)
#define __swp_entry(type, offset) ((swp_entry_t) { \
((type & 0xf) << 2) | \
((type & 0x10) << 4) | \
- ((offset) << 9)})
+ ((offset) << 10)})
#define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/csky/abiv2/inc/abi/pgtable-bits.h b/arch/csky/abiv2/inc/abi/pgtable-bits.h
index 7e7f389f546f..526152bd2156 100644
--- a/arch/csky/abiv2/inc/abi/pgtable-bits.h
+++ b/arch/csky/abiv2/inc/abi/pgtable-bits.h
@@ -10,6 +10,9 @@
#define _PAGE_PRESENT (1<<10)
#define _PAGE_MODIFIED (1<<11)
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1<<7)
+
/* implemented in hardware */
#define _PAGE_GLOBAL (1<<0)
#define _PAGE_VALID (1<<1)
@@ -26,23 +29,25 @@
#define _PAGE_PROT_NONE _PAGE_WRITE
/*
- * Encode and decode a swap entry
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
*
* Format of swap PTE:
* bit 0: _PAGE_GLOBAL (zero)
* bit 1: _PAGE_VALID (zero)
* bit 2 - 6: swap type
- * bit 7 - 8: swap offset[0 - 1]
+ * bit 7: exclusive marker
+ * bit 8: swap offset[0]
* bit 9: _PAGE_WRITE (zero)
* bit 10: _PAGE_PRESENT (zero)
- * bit 11 - 31: swap offset[2 - 22]
+ * bit 11 - 31: swap offset[1 - 21]
*/
#define __swp_type(x) (((x).val >> 2) & 0x1f)
-#define __swp_offset(x) ((((x).val >> 7) & 0x3) | \
- (((x).val >> 9) & 0x7ffffc))
+#define __swp_offset(x) ((((x).val >> 8) & 0x1) | \
+ (((x).val >> 10) & 0x3ffffe))
#define __swp_entry(type, offset) ((swp_entry_t) { \
((type & 0x1f) << 2) | \
- ((offset & 0x3) << 7) | \
- ((offset & 0x7ffffc) << 9)})
+ ((offset & 0x1) << 8) | \
+ ((offset & 0x3ffffe) << 10)})
#endif /* __ASM_CSKY_PGTABLE_BITS_H */
diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h
index ed7451478b1b..b23e3006a9e0 100644
--- a/arch/csky/include/asm/page.h
+++ b/arch/csky/include/asm/page.h
@@ -39,7 +39,6 @@
#define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && \
(void *)(kaddr) < high_memory)
-#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && ((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
extern void *memset(void *dest, int c, size_t l);
extern void *memcpy(void *to, const void *from, size_t l);
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 77bc6caff2d2..d4042495febc 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -200,6 +200,23 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
}
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h
index d7d4f9fca327..9c03b9965f07 100644
--- a/arch/hexagon/include/asm/page.h
+++ b/arch/hexagon/include/asm/page.h
@@ -95,7 +95,6 @@ struct page;
/* Default vm area behavior is non-executable. */
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
/* Need to not use a define for linesize; may move this to another file. */
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index f7048c18b6f9..59393613d086 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -61,6 +61,9 @@ extern unsigned long empty_zero_page;
* So we'll put up with a bit of inefficiency for now...
*/
+/* We borrow bit 6 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1<<6)
+
/*
* Top "FOURTH" level (pgd), which for the Hexagon VM is really
* only the second from the bottom, pgd and pud both being collapsed.
@@ -359,9 +362,12 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page))
/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
* Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the PTE is
* interpreted as swap information. The remaining free bits are interpreted as
- * swap type/offset tuple. Rather than have the TLB fill handler test
+ * listed below. Rather than have the TLB fill handler test
* _PAGE_PRESENT, we're going to reserve the permissions bits and set them to
* all zeros for swap entries, which speeds up the miss handler at the cost of
* 3 bits of offset. That trade-off can be revisited if necessary, but Hexagon
@@ -371,9 +377,10 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
* Format of swap PTE:
* bit 0: Present (zero)
* bits 1-5: swap type (arch independent layer uses 5 bits max)
- * bits 6-9: bits 3:0 of offset
+ * bit 6: exclusive marker
+ * bits 7-9: bits 2:0 of offset
* bits 10-12: effectively _PAGE_PROTNONE (all zero)
- * bits 13-31: bits 22:4 of swap offset
+ * bits 13-31: bits 21:3 of swap offset
*
* The split offset makes some of the following macros a little gnarly,
* but there's plenty of precedent for this sort of thing.
@@ -383,11 +390,28 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define __swp_type(swp_pte) (((swp_pte).val >> 1) & 0x1f)
#define __swp_offset(swp_pte) \
- ((((swp_pte).val >> 6) & 0xf) | (((swp_pte).val >> 9) & 0x7ffff0))
+ ((((swp_pte).val >> 7) & 0x7) | (((swp_pte).val >> 10) & 0x3ffff8))
#define __swp_entry(type, offset) \
((swp_entry_t) { \
- ((type << 1) | \
- ((offset & 0x7ffff0) << 9) | ((offset & 0xf) << 6)) })
+ (((type & 0x1f) << 1) | \
+ ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) })
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
#endif
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
index 1b990466d540..310b09c3342d 100644
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -82,25 +82,19 @@ do { \
} while (0)
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
({ \
- struct page *page = alloc_page_vma( \
- GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr); \
- if (page) \
- flush_dcache_page(page); \
- page; \
+ struct folio *folio = vma_alloc_folio( \
+ GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false); \
+ if (folio) \
+ flush_dcache_folio(folio); \
+ folio; \
})
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
-
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#include <asm-generic/memory_model.h>
-#ifdef CONFIG_FLATMEM
-# define pfn_valid(pfn) ((pfn) < max_mapnr)
-#endif
-
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 01517a5e6778..21c97e31a28a 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -58,6 +58,9 @@
#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */
#define _PAGE_PROTNONE (__IA64_UL(1) << 63)
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1 << 7)
+
#define _PFN_MASK _PAGE_PPN_MASK
/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */
#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED)
@@ -399,6 +402,9 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void paging_init (void);
/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
* Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of
* bits in the swap-type field of the swap pte. It would be nice to
* enforce that, but we can't easily include <linux/swap.h> here.
@@ -406,16 +412,35 @@ extern void paging_init (void);
*
* Format of swap pte:
* bit 0 : present bit (must be zero)
- * bits 1- 7: swap-type
+ * bits 1- 6: swap type
+ * bit 7 : exclusive marker
* bits 8-62: swap offset
* bit 63 : _PAGE_PROTNONE bit
*/
-#define __swp_type(entry) (((entry).val >> 1) & 0x7f)
+#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
#define __swp_offset(entry) (((entry).val << 1) >> 9)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 8) })
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type & 0x3f) << 1) | \
+ ((long) (offset) << 8) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index fc4e4217e87f..7f5353e28516 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -109,7 +109,7 @@ ia64_init_addr_space (void)
vma_set_anonymous(vma);
vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
vma->vm_end = vma->vm_start + PAGE_SIZE;
- vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
+ vm_flags_init(vma, VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT);
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
mmap_write_lock(current->mm);
if (insert_vm_struct(current->mm, vma)) {
@@ -127,8 +127,8 @@ ia64_init_addr_space (void)
vma_set_anonymous(vma);
vma->vm_end = PAGE_SIZE;
vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
- vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
- VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_init(vma, VM_READ | VM_MAYREAD | VM_IO |
+ VM_DONTEXPAND | VM_DONTDUMP);
mmap_write_lock(current->mm);
if (insert_vm_struct(current->mm, vma)) {
mmap_write_unlock(current->mm);
@@ -272,7 +272,7 @@ static int __init gate_vma_init(void)
vma_init(&gate_vma, NULL);
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
- gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+ vm_flags_init(&gate_vma, VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC);
gate_vma.vm_page_prot = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX);
return 0;
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 53f284a96182..fb5338b352e6 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -82,19 +82,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#ifdef CONFIG_FLATMEM
-
-static inline int pfn_valid(unsigned long pfn)
-{
- /* avoid <linux/mm.h> include hell */
- extern unsigned long max_mapnr;
- unsigned long pfn_offset = ARCH_PFN_OFFSET;
-
- return pfn >= pfn_offset && pfn < max_mapnr;
-}
-
-#endif
-
#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h
index 3d1e0a69975a..8b98d22a145b 100644
--- a/arch/loongarch/include/asm/pgtable-bits.h
+++ b/arch/loongarch/include/asm/pgtable-bits.h
@@ -20,6 +20,7 @@
#define _PAGE_SPECIAL_SHIFT 11
#define _PAGE_HGLOBAL_SHIFT 12 /* HGlobal is a PMD bit */
#define _PAGE_PFN_SHIFT 12
+#define _PAGE_SWP_EXCLUSIVE_SHIFT 23
#define _PAGE_PFN_END_SHIFT 48
#define _PAGE_NO_READ_SHIFT 61
#define _PAGE_NO_EXEC_SHIFT 62
@@ -33,6 +34,9 @@
#define _PAGE_PROTNONE (_ULCAST_(1) << _PAGE_PROTNONE_SHIFT)
#define _PAGE_SPECIAL (_ULCAST_(1) << _PAGE_SPECIAL_SHIFT)
+/* We borrow bit 23 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (_ULCAST_(1) << _PAGE_SWP_EXCLUSIVE_SHIFT)
+
/* Used by TLB hardware (placed in EntryLo*) */
#define _PAGE_VALID (_ULCAST_(1) << _PAGE_VALID_SHIFT)
#define _PAGE_DIRTY (_ULCAST_(1) << _PAGE_DIRTY_SHIFT)
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 7a34e900d8c1..d28fb9dbec59 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -249,13 +249,26 @@ extern void pud_init(void *addr);
extern void pmd_init(void *addr);
/*
- * Non-present pages: high 40 bits are offset, next 8 bits type,
- * low 16 bits zero.
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * <--------------------------- offset ---------------------------
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * --------------> E <--- type ---> <---------- zeroes ---------->
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * The zero'ed bits include _PAGE_PRESENT and _PAGE_PROTNONE.
*/
static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
-{ pte_t pte; pte_val(pte) = (type << 16) | (offset << 24); return pte; }
+{ pte_t pte; pte_val(pte) = ((type & 0x7f) << 16) | (offset << 24); return pte; }
-#define __swp_type(x) (((x).val >> 16) & 0xff)
+#define __swp_type(x) (((x).val >> 16) & 0x7f)
#define __swp_offset(x) ((x).val >> 24)
#define __swp_entry(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type), (offset))) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
@@ -263,6 +276,23 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
#define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
extern void paging_init(void);
#define pte_none(pte) (!(pte_val(pte) & ~_PAGE_GLOBAL))
diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h
index dd24f5898f65..f5e4deb97402 100644
--- a/arch/loongarch/include/asm/tlb.h
+++ b/arch/loongarch/include/asm/tlb.h
@@ -149,7 +149,7 @@ static inline void tlb_flush(struct mmu_gather *tlb)
struct vm_area_struct vma;
vma.vm_mm = tlb->mm;
- vma.vm_flags = 0;
+ vm_flags_init(&vma, 0);
if (tlb->fullmm) {
flush_tlb_mm(tlb->mm);
return;
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index b619b22823f8..13741c1245e1 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -46,6 +46,9 @@
#define _CACHEMASK040 (~0x060)
#define _PAGE_GLOBAL040 0x400 /* 68040 global bit, used for kva descs */
+/* We borrow bit 24 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE CF_PAGE_NOCACHE
+
/*
* Externally used page protection values.
*/
@@ -254,15 +257,41 @@ static inline pte_t pte_mkcache(pte_t pte)
extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
/*
- * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e))
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <------------------ offset -------------> 0 0 0 E <-- type --->
+ *
+ * E is the exclusive marker that is not stored in swap entries.
*/
-#define __swp_type(x) ((x).val & 0xFF)
+#define __swp_type(x) ((x).val & 0x7f)
#define __swp_offset(x) ((x).val >> 11)
-#define __swp_entry(typ, off) ((swp_entry_t) { (typ) | \
+#define __swp_entry(typ, off) ((swp_entry_t) { ((typ) & 0x7f) | \
(off << 11) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) (__pte((x).val))
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 7ac3d64c6b33..ec0dc19ab834 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -41,6 +41,9 @@
#define _PAGE_PROTNONE 0x004
+/* We borrow bit 11 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x800
+
#ifndef __ASSEMBLY__
/* This is the cache mode to be used for pages containing page descriptors for
@@ -124,7 +127,7 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
* expects pmd_page() to exists, only to then DCE it all. Provide a dummy to
* make the compiler happy.
*/
-#define pmd_page(pmd) NULL
+#define pmd_page(pmd) ((struct page *)NULL)
#define pud_none(pud) (!pud_val(pud))
@@ -169,12 +172,40 @@ static inline pte_t pte_mkcache(pte_t pte)
#define swapper_pg_dir kernel_pg_dir
extern pgd_t kernel_pg_dir[128];
-/* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
-#define __swp_type(x) (((x).val >> 4) & 0xff)
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <----------------- offset ------------> E <-- type ---> 0 0 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
+#define __swp_type(x) (((x).val >> 4) & 0x7f)
#define __swp_offset(x) ((x).val >> 12)
-#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 12) })
+#define __swp_entry(type, offset) ((swp_entry_t) { (((type) & 0x7f) << 4) | ((offset) << 12) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* _MOTOROLA_PGTABLE_H */
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 2f1c54e4725d..a5993ad83ed8 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -62,11 +62,7 @@ extern unsigned long _ramend;
#include <asm/page_no.h>
#endif
-#ifndef CONFIG_MMU
-#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT))
-#define __pfn_to_phys(pfn) PFN_PHYS(pfn)
-#endif
-
#include <asm-generic/getorder.h>
+#include <asm-generic/memory_model.h>
#endif /* _M68K_PAGE_H */
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index a5b459bcb7d8..3903db2e8da7 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -134,7 +134,6 @@ extern int m68k_virt_to_node_shift;
})
#define ARCH_PFN_OFFSET (m68k_memory[0].addr >> PAGE_SHIFT)
-#include <asm-generic/memory_model.h>
#define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
#define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn))
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index c9d0d84158a4..060e4c0e7605 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -13,9 +13,8 @@ extern unsigned long memory_end;
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
#define __pa(vaddr) ((unsigned long)(vaddr))
#define __va(paddr) ((void *)((unsigned long)(paddr)))
@@ -26,13 +25,11 @@ extern unsigned long memory_end;
#define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
#define page_to_virt(page) __va(((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET))
-#define pfn_to_page(pfn) virt_to_page(pfn_to_virt(pfn))
-#define page_to_pfn(page) virt_to_pfn(page_to_virt(page))
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
-
#define virt_addr_valid(kaddr) (((unsigned long)(kaddr) >= PAGE_OFFSET) && \
((unsigned long)(kaddr) < memory_end))
+#define ARCH_PFN_OFFSET PHYS_PFN(PAGE_OFFSET_RAW)
+
#endif /* __ASSEMBLY__ */
#endif /* _M68K_PAGE_NO_H */
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index fed58da3a6b6..fc044df52b96 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -31,12 +31,6 @@
extern void paging_init(void);
#define swapper_pg_dir ((pgd_t *) 0)
-#define __swp_type(x) (0)
-#define __swp_offset(x) (0)
-#define __swp_entry(typ,off) ((swp_entry_t) { ((typ) | ((off) << 7)) })
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index 90d57e537eb1..e582b0484a55 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -71,6 +71,9 @@
#define SUN3_PMD_MASK (0x0000003F)
#define SUN3_PMD_MAGIC (0x0000002B)
+/* We borrow bit 6 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x040
+
#ifndef __ASSEMBLY__
/*
@@ -152,12 +155,41 @@ static inline pte_t pte_mkcache(pte_t pte) { return pte; }
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
-/* Macros to (de)construct the fake PTEs representing swap pages. */
-#define __swp_type(x) ((x).val & 0x7F)
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * 0 <--------------------- offset ----------------> E <- type -->
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
+#define __swp_type(x) ((x).val & 0x3f)
#define __swp_offset(x) (((x).val) >> 7)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) | ((offset) << 7)) })
+#define __swp_entry(type, offset) ((swp_entry_t) { (((type) & 0x3f) | \
+ (((offset) << 7) & ~SUN3_PAGE_VALID)) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* !_SUN3_PGTABLE_H */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 4b8b2fa78fc5..7b9861bcd458 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -112,7 +112,6 @@ extern int page_is_ram(unsigned long pfn);
# define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
# define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT)
-# define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < (max_mapnr + ARCH_PFN_OFFSET))
# endif /* __ASSEMBLY__ */
#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr)))
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 42f5988e998b..d1b8272abcd9 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -131,10 +131,10 @@ extern pte_t *va_to_pte(unsigned long address);
* of the 16 available. Bit 24-26 of the TLB are cleared in the TLB
* miss handler. Bit 27 is PAGE_USER, thus selecting the correct
* zone.
- * - PRESENT *must* be in the bottom two bits because swap cache
- * entries use the top 30 bits. Because 4xx doesn't support SMP
- * anyway, M is irrelevant so we borrow it for PAGE_PRESENT. Bit 30
- * is cleared in the TLB miss handler before the TLB entry is loaded.
+ * - PRESENT *must* be in the bottom two bits because swap PTEs use the top
+ * 30 bits. Because 4xx doesn't support SMP anyway, M is irrelevant so we
+ * borrow it for PAGE_PRESENT. Bit 30 is cleared in the TLB miss handler
+ * before the TLB entry is loaded.
* - All other bits of the PTE are loaded into TLBLO without
* * modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
* software PTE bits. We actually use bits 21, 24, 25, and
@@ -155,6 +155,9 @@ extern pte_t *va_to_pte(unsigned long address);
#define _PAGE_ACCESSED 0x400 /* software: R: page referenced */
#define _PMD_PRESENT PAGE_MASK
+/* We borrow bit 24 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_DIRTY
+
/*
* Some bits are unused...
*/
@@ -393,18 +396,39 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
/*
- * Encode and decode a swap entry.
- * Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit, or the _PAGE_HASHPTE bit
- * (if used). -- paulus
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <------------------ offset -------------------> E < type -> 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
*/
-#define __swp_type(entry) ((entry).val & 0x3f)
+#define __swp_type(entry) ((entry).val & 0x1f)
#define __swp_offset(entry) ((entry).val >> 6)
#define __swp_entry(type, offset) \
- ((swp_entry_t) { (type) | ((offset) << 6) })
+ ((swp_entry_t) { ((type) & 0x1f) | ((offset) << 6) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
extern unsigned long iopa(unsigned long addr);
/* Values for nocacheflag and cmode */
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 96bc798c1ec1..5978a8dfb917 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -224,34 +224,6 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#ifdef CONFIG_FLATMEM
-
-static inline int pfn_valid(unsigned long pfn)
-{
- /* avoid <linux/mm.h> include hell */
- extern unsigned long max_mapnr;
- unsigned long pfn_offset = ARCH_PFN_OFFSET;
-
- return pfn >= pfn_offset && pfn < max_mapnr;
-}
-
-#elif defined(CONFIG_SPARSEMEM)
-
-/* pfn_valid is defined in linux/mmzone.h */
-
-#elif defined(CONFIG_NUMA)
-
-#define pfn_valid(pfn) \
-({ \
- unsigned long __pfn = (pfn); \
- int __n = pfn_to_nid(__pfn); \
- ((__n >= 0) ? (__pfn < NODE_DATA(__n)->node_start_pfn + \
- NODE_DATA(__n)->node_spanned_pages) \
- : 0); \
-})
-
-#endif
-
#define virt_to_pfn(kaddr) PFN_DOWN(virt_to_phys((void *)(kaddr)))
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index b40a0e69fccc..ba0016709a1a 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -191,49 +191,113 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
#define pte_page(x) pfn_to_page(pte_pfn(x))
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ */
#if defined(CONFIG_CPU_R3K_TLB)
-/* Swap entries must have VALID bit cleared. */
+/*
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <----------- offset ------------> < type -> V G E 0 0 0 0 0 0 P
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_PRESENT (P), _PAGE_VALID (V) and_PAGE_GLOBAL (G) have to remain
+ * unused.
+ */
#define __swp_type(x) (((x).val >> 10) & 0x1f)
#define __swp_offset(x) ((x).val >> 15)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 10) | ((offset) << 15) })
+#define __swp_entry(type, offset) ((swp_entry_t) { (((type) & 0x1f) << 10) | ((offset) << 15) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1 << 7)
+
#else
#if defined(CONFIG_XPA)
-/* Swap entries must have VALID and GLOBAL bits cleared. */
+/*
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * 0 0 0 0 0 0 E P <------------------ zeroes ------------------->
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <----------------- offset ------------------> < type -> V G 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_PRESENT (P), _PAGE_VALID (V) and_PAGE_GLOBAL (G) have to remain
+ * unused.
+ */
#define __swp_type(x) (((x).val >> 4) & 0x1f)
#define __swp_offset(x) ((x).val >> 9)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 9) })
+#define __swp_entry(type, offset) ((swp_entry_t) { (((type) & 0x1f) << 4) | ((offset) << 9) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_high })
#define __swp_entry_to_pte(x) ((pte_t) { 0, (x).val })
+/*
+ * We borrow bit 57 (bit 25 in the low PTE) to store the exclusive marker in
+ * swap PTEs.
+ */
+#define _PAGE_SWP_EXCLUSIVE (1 << 25)
+
#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
-/* Swap entries must have VALID and GLOBAL bits cleared. */
+/*
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * <------------------ zeroes -------------------> E P 0 0 0 0 0 0
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <------------------- offset --------------------> < type -> V G
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_PRESENT (P), _PAGE_VALID (V) and_PAGE_GLOBAL (G) have to remain
+ * unused.
+ */
#define __swp_type(x) (((x).val >> 2) & 0x1f)
#define __swp_offset(x) ((x).val >> 7)
-#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 7) })
+#define __swp_entry(type, offset) ((swp_entry_t) { (((type) & 0x1f) << 2) | ((offset) << 7) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_high })
#define __swp_entry_to_pte(x) ((pte_t) { 0, (x).val })
+/*
+ * We borrow bit 39 (bit 7 in the low PTE) to store the exclusive marker in swap
+ * PTEs.
+ */
+#define _PAGE_SWP_EXCLUSIVE (1 << 7)
+
#else
/*
- * Constraints:
- * _PAGE_PRESENT at bit 0
- * _PAGE_MODIFIED at bit 4
- * _PAGE_GLOBAL at bit 6
- * _PAGE_VALID at bit 7
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <------------- offset --------------> < type -> 0 0 0 0 0 0 E P
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_PRESENT (P), _PAGE_VALID (V) and_PAGE_GLOBAL (G) have to remain
+ * unused. The location of V and G varies.
*/
#define __swp_type(x) (((x).val >> 8) & 0x1f)
#define __swp_offset(x) ((x).val >> 13)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 8) | ((offset) << 13) })
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 8) | ((offset) << 13) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+/* We borrow bit 1 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1 << 1)
+
#endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */
#endif /* defined(CONFIG_CPU_R3K_TLB) */
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index c6310192b654..98e24e3e7f2b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -320,16 +320,31 @@ extern void pud_init(void *addr);
extern void pmd_init(void *addr);
/*
- * Non-present pages: high 40 bits are offset, next 8 bits type,
- * low 16 bits zero.
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * <--------------------------- offset ---------------------------
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * --------------> E <-- type ---> <---------- zeroes ----------->
+ *
+ * E is the exclusive marker that is not stored in swap entries.
*/
static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
-{ pte_t pte; pte_val(pte) = (type << 16) | (offset << 24); return pte; }
+{ pte_t pte; pte_val(pte) = ((type & 0x7f) << 16) | (offset << 24); return pte; }
-#define __swp_type(x) (((x).val >> 16) & 0xff)
+#define __swp_type(x) (((x).val >> 16) & 0x7f)
#define __swp_offset(x) ((x).val >> 24)
#define __swp_entry(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type), (offset))) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+/* We borrow bit 23 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1 << 23)
+
#endif /* _ASM_PGTABLE_64_H */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index a68c0b01d8cd..791389bf3c12 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -528,6 +528,41 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
}
#endif
+#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte.pte_low & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte.pte_low |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte.pte_low &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+#else
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+#endif
extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
pte_t pte);
diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h
index 6a989819a7c1..0ae7d9ce369b 100644
--- a/arch/nios2/include/asm/page.h
+++ b/arch/nios2/include/asm/page.h
@@ -86,15 +86,6 @@ extern struct page *mem_map;
# define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-static inline bool pfn_valid(unsigned long pfn)
-{
- /* avoid <linux/mm.h> include hell */
- extern unsigned long max_mapnr;
- unsigned long pfn_offset = ARCH_PFN_OFFSET;
-
- return pfn >= pfn_offset && pfn < max_mapnr;
-}
-
# define virt_to_page(vaddr) pfn_to_page(PFN_DOWN(virt_to_phys(vaddr)))
# define virt_addr_valid(vaddr) pfn_valid(PFN_DOWN(virt_to_phys(vaddr)))
diff --git a/arch/nios2/include/asm/pgtable-bits.h b/arch/nios2/include/asm/pgtable-bits.h
index bfddff383e89..724f9b08b1d1 100644
--- a/arch/nios2/include/asm/pgtable-bits.h
+++ b/arch/nios2/include/asm/pgtable-bits.h
@@ -31,4 +31,7 @@
#define _PAGE_ACCESSED (1<<26) /* page referenced */
#define _PAGE_DIRTY (1<<27) /* dirty page */
+/* We borrow bit 31 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1<<31)
+
#endif /* _ASM_NIOS2_PGTABLE_BITS_H */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index ab793bc517f5..0f5c2564e9f5 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -232,23 +232,44 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
__FILE__, __LINE__, pgd_val(e))
/*
- * Encode and decode a swap entry (must be !pte_none(pte) && !pte_present(pte):
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
*
- * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 ... 1 0
- * 0 0 0 0 type. 0 0 0 0 0 0 offset.........
+ * Format of swap PTEs:
*
- * This gives us up to 2**2 = 4 swap files and 2**20 * 4K = 4G per swap file.
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * E < type -> 0 0 0 0 0 0 <-------------- offset --------------->
*
- * Note that the offset field is always non-zero, thus !pte_none(pte) is always
- * true.
+ * E is the exclusive marker that is not stored in swap entries.
+ *
+ * Note that the offset field is always non-zero if the swap type is 0, thus
+ * !pte_none() is always true.
*/
-#define __swp_type(swp) (((swp).val >> 26) & 0x3)
+#define __swp_type(swp) (((swp).val >> 26) & 0x1f)
#define __swp_offset(swp) ((swp).val & 0xfffff)
-#define __swp_entry(type, off) ((swp_entry_t) { (((type) & 0x3) << 26) \
+#define __swp_entry(type, off) ((swp_entry_t) { (((type) & 0x1f) << 26) \
| ((off) & 0xfffff) })
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
extern void __init paging_init(void);
extern void __init mmu_init(void);
diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h
index aab6e64d6db4..52b0d7e76446 100644
--- a/arch/openrisc/include/asm/page.h
+++ b/arch/openrisc/include/asm/page.h
@@ -80,8 +80,6 @@ typedef struct page *pgtable_t;
#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
-
#define virt_addr_valid(kaddr) (pfn_valid(virt_to_pfn(kaddr)))
#endif /* __ASSEMBLY__ */
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 6477c17b3062..3eb9b9555d0d 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -154,6 +154,9 @@ extern void paging_init(void);
#define _KERNPG_TABLE \
(_PAGE_BASE | _PAGE_SRE | _PAGE_SWE | _PAGE_ACCESSED | _PAGE_DIRTY)
+/* We borrow bit 11 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_U_SHARED
+
#define PAGE_NONE __pgprot(_PAGE_ALL)
#define PAGE_READONLY __pgprot(_PAGE_ALL | _PAGE_URE | _PAGE_SRE)
#define PAGE_READONLY_X __pgprot(_PAGE_ALL | _PAGE_URE | _PAGE_SRE | _PAGE_EXEC)
@@ -385,16 +388,43 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
/* __PHX__ FIXME, SWAP, this probably doesn't work */
-/* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
-/* Since the PAGE_PRESENT bit is bit 4, we can use the bits above */
-
-#define __swp_type(x) (((x).val >> 5) & 0x7f)
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <-------------- offset ---------------> E <- type --> 0 0 0 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * The zero'ed bits include _PAGE_PRESENT.
+ */
+#define __swp_type(x) (((x).val >> 5) & 0x3f)
#define __swp_offset(x) ((x).val >> 12)
#define __swp_entry(type, offset) \
- ((swp_entry_t) { ((type) << 5) | ((offset) << 12) })
+ ((swp_entry_t) { (((type) & 0x3f) << 5) | ((offset) << 12) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
typedef pte_t *pte_addr_t;
#endif /* __ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 6faaaa3ebe9b..667e703c0e8f 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -155,10 +155,6 @@ extern int npmem_ranges;
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-#ifndef CONFIG_SPARSEMEM
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
-#endif
-
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index ea357430aafe..e2950f5db7c9 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -218,6 +218,9 @@ extern void __update_cache(pte_t pte);
#define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE)
#define _PAGE_KERNEL (_PAGE_KERNEL_RO | _PAGE_WRITE)
+/* We borrow bit 23 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_ACCESSED
+
/* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds
* are page-aligned, we don't care about the PAGE_OFFSET bits, except
* for a few meta-information bits, so we shift the address to be
@@ -394,17 +397,48 @@ extern void paging_init (void);
#define update_mmu_cache(vms,addr,ptep) __update_cache(*ptep)
-/* Encode and de-code a swap entry */
-
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs (32bit):
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <---------------- offset -----------------> P E <ofs> < type ->
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_PRESENT (P) must be 0.
+ *
+ * For the 64bit version, the offset is extended by 32bit.
+ */
#define __swp_type(x) ((x).val & 0x1f)
#define __swp_offset(x) ( (((x).val >> 6) & 0x7) | \
(((x).val >> 8) & ~0x7) )
-#define __swp_entry(type, offset) ((swp_entry_t) { (type) | \
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ ((type) & 0x1f) | \
((offset & 0x7) << 6) | \
((offset & ~0x7) << 8) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
pte_t pte;
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 75823f39e042..7bf1fe7297c6 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -42,6 +42,9 @@
#define _PMD_PRESENT_MASK (PAGE_MASK)
#define _PMD_BAD (~PAGE_MASK)
+/* We borrow the _PAGE_USER bit to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_USER
+
/* And here we include common definitions */
#define _PAGE_KERNEL_RO 0
@@ -363,17 +366,41 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/*
- * Encode and decode a swap entry.
- * Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
- * -- paulus
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs (32bit PTEs):
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <----------------- offset --------------------> < type -> E H P
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_PRESENT (P) and __PAGE_HASHPTE (H) must be 0.
+ *
+ * For 64bit PTEs, the offset is extended by 32bit.
*/
#define __swp_type(entry) ((entry).val & 0x1f)
#define __swp_offset(entry) ((entry).val >> 5)
-#define __swp_entry(type, offset) ((swp_entry_t) { (type) | ((offset) << 5) })
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) & 0x1f) | ((offset) << 5) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
static inline int pte_read(pte_t pte) { return 1; }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index cb4c67bf45d7..4acc9690f599 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -717,7 +717,6 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
}
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
-#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
static inline pte_t pte_swp_mkexclusive(pte_t pte)
{
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 70edad44dff6..fec56d965f00 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -360,18 +360,30 @@ static inline int pte_young(pte_t pte)
#endif
#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+
/*
- * Encode and decode a swap entry.
- * Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit.
- * -- paulus
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs (32bit PTEs):
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <------------------ offset -------------------> < type -> E 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ *
+ * For 64bit PTEs, the offset is extended by 32bit.
*/
#define __swp_type(entry) ((entry).val & 0x1f)
#define __swp_offset(entry) ((entry).val >> 5)
-#define __swp_entry(type, offset) ((swp_entry_t) { (type) | ((offset) << 5) })
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) & 0x1f) | ((offset) << 5) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
+/* We borrow LSB 2 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x000004
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h
index 2d3153cfc0d7..6fe46e754556 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -27,9 +27,9 @@
* of the 16 available. Bit 24-26 of the TLB are cleared in the TLB
* miss handler. Bit 27 is PAGE_USER, thus selecting the correct
* zone.
- * - PRESENT *must* be in the bottom two bits because swap cache
- * entries use the top 30 bits. Because 40x doesn't support SMP
- * anyway, M is irrelevant so we borrow it for PAGE_PRESENT. Bit 30
+ * - PRESENT *must* be in the bottom two bits because swap PTEs
+ * use the top 30 bits. Because 40x doesn't support SMP anyway, M is
+ * irrelevant so we borrow it for PAGE_PRESENT. Bit 30
* is cleared in the TLB miss handler before the TLB entry is loaded.
* - All other bits of the PTE are loaded into TLBLO without
* modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h
index 78bc304f750e..b7ed13cee137 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h
@@ -56,20 +56,10 @@
* above bits. Note that the bit values are CPU specific, not architecture
* specific.
*
- * The kernel PTE entry holds an arch-dependent swp_entry structure under
- * certain situations. In other words, in such situations some portion of
- * the PTE bits are used as a swp_entry. In the PPC implementation, the
- * 3-24th LSB are shared with swp_entry, however the 0-2nd three LSB still
- * hold protection values. That means the three protection bits are
- * reserved for both PTE and SWAP entry at the most significant three
- * LSBs.
- *
- * There are three protection bits available for SWAP entry:
- * _PAGE_PRESENT
- * _PAGE_HASHPTE (if HW has)
- *
- * So those three bits have to be inside of 0-2nd LSB of PTE.
- *
+ * The kernel PTE entry can be an ordinary PTE mapping a page or a special swap
+ * PTE. In case of a swap PTE, LSB 2-24 are used to store information regarding
+ * the swap entry. However LSB 0-1 still hold protection values, for example,
+ * to distinguish swap PTEs from ordinary PTEs, and must be used with care.
*/
#define _PAGE_PRESENT 0x00000001 /* S: PTE valid */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
index 93fb8e11a3f1..16451df5ddb0 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-85xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
@@ -11,8 +11,8 @@
32 33 34 35 36 ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63
RPN...................... 0 0 U0 U1 U2 U3 UX SX UW SW UR SR
- - PRESENT *must* be in the bottom three bits because swap cache
- entries use the top 29 bits.
+ - PRESENT *must* be in the bottom two bits because swap PTEs use
+ the top 30 bits.
*/
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 879e9a6e5a87..287e25864ffa 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -276,22 +276,40 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <-------------------------- offset ----------------------------
+ *
+ * 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 6 6 6 6
+ * 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ * --------------> <----------- zero ------------> E < type -> 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
#define MAX_SWAPFILES_CHECK() do { \
BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
} while (0)
#define SWP_TYPE_BITS 5
-#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
+#define __swp_type(x) (((x).val >> 2) \
& ((1UL << SWP_TYPE_BITS) - 1))
#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT)
#define __swp_entry(type, offset) ((swp_entry_t) { \
- ((type) << _PAGE_BIT_SWAP_TYPE) \
+ (((type) & 0x1f) << 2) \
| ((offset) << PTE_RPN_SHIFT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
#define __swp_entry_to_pte(x) __pte((x).val)
+/* We borrow MSB 56 (LSB 7) to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x80
+
int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
void unmap_kernel_page(unsigned long va);
extern int __meminit vmemmap_create_mapping(unsigned long start,
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 69c3a050a3d8..a6caaaab6f92 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -151,6 +151,21 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
}
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
/* Insert a PTE, top-level function is out of line. It uses an inline
* low level function in the respective pgtable-* files
*/
diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h
index 0934e8965e4e..d8924cbd61e4 100644
--- a/arch/powerpc/include/asm/nohash/pte-e500.h
+++ b/arch/powerpc/include/asm/nohash/pte-e500.h
@@ -12,7 +12,6 @@
/* Architected bits */
#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
#define _PAGE_SW1 0x000002
-#define _PAGE_BIT_SWAP_TYPE 2
#define _PAGE_BAP_SR 0x000004
#define _PAGE_BAP_UR 0x000008
#define _PAGE_BAP_SW 0x000010
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index edf1dd1b0ca9..f2b6bf5687d0 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -117,15 +117,6 @@ extern long long virt_phys_offset;
#ifdef CONFIG_FLATMEM
#define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
-#ifndef __ASSEMBLY__
-extern unsigned long max_mapnr;
-static inline bool pfn_valid(unsigned long pfn)
-{
- unsigned long min_pfn = ARCH_PFN_OFFSET;
-
- return pfn >= min_pfn && pfn < max_mapnr;
-}
-#endif
#endif
#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 0c7cfb9fab04..fd42059ae2a5 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -132,7 +132,7 @@ void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size)
* address decoding but I'd rather not deal with those outside of the
* reserved 64K legacy region.
*/
- area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END,
+ area = __get_vm_area_caller(size, VM_IOREMAP, PHB_IO_BASE, PHB_IO_END,
__builtin_return_address(0));
if (!area)
return NULL;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 507f8228f983..7a2ff9010f17 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -120,10 +120,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- unsigned long size = vma->vm_end - vma->vm_start;
-
if (vma_is_special_mapping(vma, &vvar_spec))
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
}
mmap_read_unlock(mm);
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 1d67baa5557a..709ebd578394 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -393,6 +393,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
{
unsigned long gfn = memslot->base_gfn;
unsigned long end, start = gfn_to_hva(kvm, gfn);
+ unsigned long vm_flags;
int ret = 0;
struct vm_area_struct *vma;
int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
@@ -409,12 +410,15 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
ret = H_STATE;
break;
}
+ /* Copy vm_flags to avoid partial modifications in ksm_madvise */
+ vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
- merge_flag, &vma->vm_flags);
+ merge_flag, &vm_flags);
if (ret) {
ret = H_STATE;
break;
}
+ vm_flags_reset(vma, vm_flags);
start = vma->vm_end;
} while (end > vma->vm_end);
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 4f566bea5e10..712ab91ced39 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -324,7 +324,7 @@ static int kvmppc_xive_native_mmap(struct kvm_device *dev,
return -EINVAL;
}
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
/*
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index d73b3b4176e8..b75a9fb99599 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -156,7 +156,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
* VM_NOHUGEPAGE and split them.
*/
for_each_vma_range(vmi, vma, addr + len) {
- vma->vm_flags |= VM_NOHUGEPAGE;
+ vm_flags_set(vma, VM_NOHUGEPAGE);
walk_page_vma(vma, &subpage_walk_ops, NULL);
}
}
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index eb5bed333750..36c21648d19a 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -414,7 +414,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
/*
* When the LPAR lost credits due to core removal or during
* migration, invalidate the existing mapping for the current
- * paste addresses and set windows in-active (zap_page_range in
+ * paste addresses and set windows in-active (zap_vma_pages in
* reconfig_close_windows()).
* New mapping will be done later after migration or new credits
* available. So continue to receive faults if the user space
@@ -525,7 +525,7 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
pfn = paste_addr >> PAGE_SHIFT;
/* flags, page_prot from cxl_mmap(), except we want cachable */
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 62d90a5e23d1..02a8158c469d 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -291,7 +291,7 @@ static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
vma->vm_ops = &spufs_mem_mmap_vmops;
@@ -381,7 +381,7 @@ static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_ops = &spufs_cntl_mmap_vmops;
@@ -1043,7 +1043,7 @@ static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_ops = &spufs_signal1_mmap_vmops;
@@ -1179,7 +1179,7 @@ static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_ops = &spufs_signal2_mmap_vmops;
@@ -1302,7 +1302,7 @@ static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_ops = &spufs_mss_mmap_vmops;
@@ -1364,7 +1364,7 @@ static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_ops = &spufs_psmap_mmap_vmops;
@@ -1424,7 +1424,7 @@ static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_ops = &spufs_mfc_mmap_vmops;
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 4ad6e510d405..559112312810 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -760,8 +760,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
* is done before the original mmap() and after the ioctl.
*/
if (vma)
- zap_page_range(vma, vma->vm_start,
- vma->vm_end - vma->vm_start);
+ zap_vma_pages(vma);
mmap_write_unlock(task_ref->mm);
mutex_unlock(&task_ref->mmap_mutex);
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 9f432c1b5289..7fed7c431928 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -171,11 +171,6 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) \
- (((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
-#endif
-
#endif /* __ASSEMBLY__ */
#define virt_addr_valid(vaddr) ({ \
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index b9e13a8fe2b7..f896708e8331 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -27,6 +27,9 @@
*/
#define _PAGE_PROT_NONE _PAGE_GLOBAL
+/* Used for swap PTEs only. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_ACCESSED
+
#define _PAGE_PFN_SHIFT 10
/*
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 3e01f4f3ab08..d8d8de0ded99 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -728,16 +728,18 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * Encode and decode a swap entry
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
*
* Format of swap PTE:
* bit 0: _PAGE_PRESENT (zero)
* bit 1 to 3: _PAGE_LEAF (zero)
* bit 5: _PAGE_PROT_NONE (zero)
- * bits 6 to 10: swap type
- * bits 10 to XLEN-1: swap offset
+ * bit 6: exclusive marker
+ * bits 7 to 11: swap type
+ * bits 11 to XLEN-1: swap offset
*/
-#define __SWP_TYPE_SHIFT 6
+#define __SWP_TYPE_SHIFT 7
#define __SWP_TYPE_BITS 5
#define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -748,11 +750,27 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT)
#define __swp_entry(type, offset) ((swp_entry_t) \
- { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
+ { (((type) & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT) | \
+ ((offset) << __SWP_OFFSET_SHIFT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
#define __swp_entry_to_pmd(swp) __pmd((swp).val)
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index e410275918ac..5c30212d8d1c 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -124,13 +124,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- unsigned long size = vma->vm_end - vma->vm_start;
-
if (vma_is_special_mapping(vma, vdso_info.dm))
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
#ifdef CONFIG_COMPAT
if (vma_is_special_mapping(vma, compat_vdso_info.dm))
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
#endif
}
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 61dea67bb9c7..8a2a3b5d1e29 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -73,9 +73,8 @@ static inline void copy_page(void *to, void *from)
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
/*
* These are used to make use of C type-checking..
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b87ca864d27d..2c70b4d1263d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -827,7 +827,6 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
static inline int pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ff7bf4432229..bbaefd84f15e 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -59,11 +59,9 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- unsigned long size = vma->vm_end - vma->vm_start;
-
if (!vma_is_special_mapping(vma, &vvar_mapping))
continue;
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
break;
}
mmap_read_unlock(mm);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 74e1d873dce0..5a716bdcba05 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -722,7 +722,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
if (is_vm_hugetlb_page(vma))
continue;
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range(vma, vmaddr, size);
+ zap_page_range_single(vma, vmaddr, size, NULL);
}
mmap_read_unlock(gmap->mm);
}
@@ -2522,8 +2522,7 @@ static inline void thp_split_mm(struct mm_struct *mm)
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma) {
- vma->vm_flags &= ~VM_HUGEPAGE;
- vma->vm_flags |= VM_NOHUGEPAGE;
+ vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
walk_page_vma(vma, &thp_split_walk_ops, NULL);
}
mm->def_flags |= VM_NOHUGEPAGE;
@@ -2588,14 +2587,18 @@ int gmap_mark_unmergeable(void)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ unsigned long vm_flags;
int ret;
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma) {
+ /* Copy vm_flags to avoid partial modifications in ksm_madvise */
+ vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags);
+ MADV_UNMERGEABLE, &vm_flags);
if (ret)
return ret;
+ vm_flags_reset(vma, vm_flags);
}
mm->def_flags &= ~VM_MERGEABLE;
return 0;
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index eca5daa43b93..09ac6c7faee0 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -169,9 +169,6 @@ typedef struct page *pgtable_t;
#define PFN_START (__MEMORY_START >> PAGE_SHIFT)
#define ARCH_PFN_OFFSET (PFN_START)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) >= min_low_pfn && (pfn) < max_low_pfn)
-#endif
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#include <asm-generic/memory_model.h>
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index d0240decacca..21952b094650 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -423,40 +423,69 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#endif
/*
- * Encode and de-code a swap entry
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
*
* Constraints:
* _PAGE_PRESENT at bit 8
* _PAGE_PROTNONE at bit 9
*
- * For the normal case, we encode the swap type into bits 0:7 and the
- * swap offset into bits 10:30. For the 64-bit PTE case, we keep the
- * preserved bits in the low 32-bits and use the upper 32 as the swap
- * offset (along with a 5-bit type), following the same approach as x86
- * PAE. This keeps the logic quite simple.
+ * For the normal case, we encode the swap type and offset into the swap PTE
+ * such that bits 8 and 9 stay zero. For the 64-bit PTE case, we use the
+ * upper 32 for the swap offset and swap type, following the same approach as
+ * x86 PAE. This keeps the logic quite simple.
*
* As is evident by the Alpha code, if we ever get a 64-bit unsigned
* long (swp_entry_t) to match up with the 64-bit PTEs, this all becomes
* much cleaner..
- *
- * NOTE: We should set ZEROs at the position of _PAGE_PRESENT
- * and _PAGE_PROTNONE bits
*/
+
#ifdef CONFIG_X2TLB
+/*
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * <--------------------- offset ----------------------> < type ->
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <------------------- zeroes --------------------> E 0 0 0 0 0 0
+ */
#define __swp_type(x) ((x).val & 0x1f)
#define __swp_offset(x) ((x).val >> 5)
-#define __swp_entry(type, offset) ((swp_entry_t){ (type) | (offset) << 5})
+#define __swp_entry(type, offset) ((swp_entry_t){ ((type) & 0x1f) | (offset) << 5})
#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
#define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val })
#else
-#define __swp_type(x) ((x).val & 0xff)
+/*
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <--------------- offset ----------------> 0 0 0 0 E < type -> 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
+#define __swp_type(x) ((x).val & 0x1f)
#define __swp_offset(x) ((x).val >> 10)
-#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) <<10})
+#define __swp_entry(type, offset) ((swp_entry_t){((type) & 0x1f) | (offset) << 10})
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 1 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 1 })
#endif
+/* In both cases, we borrow bit 6 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_USER
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte.pte_low & _PAGE_SWP_EXCLUSIVE;
+}
+
+PTE_BIT_FUNC(low, swp_mkexclusive, |= _PAGE_SWP_EXCLUSIVE);
+PTE_BIT_FUNC(low, swp_clear_exclusive, &= ~_PAGE_SWP_EXCLUSIVE);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SH_PGTABLE_32_H */
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index a76b94e41e91..27f2e3da5aa2 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -103,7 +103,7 @@ static int __sq_remap(struct sq_mapping *map, pgprot_t prot)
#if defined(CONFIG_MMU)
struct vm_struct *vma;
- vma = __get_vm_area_caller(map->size, VM_ALLOC, map->sq_addr,
+ vma = __get_vm_area_caller(map->size, VM_IOREMAP, map->sq_addr,
SQ_ADDRMAX, __builtin_return_address(0));
if (!vma)
return -ENOMEM;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 506784702430..bf1b54055316 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -301,6 +301,7 @@ void __init paging_init(void)
*/
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
min_low_pfn = __MEMORY_START >> PAGE_SHIFT;
+ set_max_mapnr(max_low_pfn - min_low_pfn);
nodes_clear(node_online_map);
diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h
index fff8861df107..6be6f683f98f 100644
--- a/arch/sparc/include/asm/page_32.h
+++ b/arch/sparc/include/asm/page_32.h
@@ -130,7 +130,6 @@ extern unsigned long pfn_base;
#define ARCH_PFN_OFFSET (pfn_base)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_valid(pfn) (((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr))
#define virt_addr_valid(kaddr) ((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT) < max_mapnr)
#include <asm-generic/memory_model.h>
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 5acc05b572e6..d4330e3c57a6 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -323,7 +323,16 @@ void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
unsigned long xva, unsigned int len);
void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len);
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <-------------- offset ---------------> < type -> E 0 0 0 0 0 0
+ */
static inline unsigned long __swp_type(swp_entry_t entry)
{
return (entry.val >> SRMMU_SWP_TYPE_SHIFT) & SRMMU_SWP_TYPE_MASK;
@@ -344,6 +353,21 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & SRMMU_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) | SRMMU_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~SRMMU_SWP_EXCLUSIVE);
+}
+
static inline unsigned long
__get_phys (unsigned long addr)
{
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 3bc9736bddb1..2dc8d4641734 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -187,6 +187,9 @@ bool kern_addr_valid(unsigned long addr);
#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
+/* We borrow bit 20 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _AC(0x0000000000100000, UL)
+
#ifndef __ASSEMBLY__
pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long);
@@ -961,18 +964,46 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#endif
-/* Encode and de-code a swap entry */
-#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL)
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * <--------------------------- offset ---------------------------
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * --------------------> E <-- type ---> <------- zeroes -------->
+ */
+#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0x7fUL)
#define __swp_offset(entry) ((entry).val >> (PAGE_SHIFT + 8UL))
#define __swp_entry(type, offset) \
( (swp_entry_t) \
{ \
- (((long)(type) << PAGE_SHIFT) | \
+ ((((long)(type) & 0x7fUL) << PAGE_SHIFT) | \
((long)(offset) << (PAGE_SHIFT + 8UL))) \
} )
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
int page_in_phys_avail(unsigned long paddr);
/*
diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
index 6067925972d9..18e68d43f036 100644
--- a/arch/sparc/include/asm/pgtsrmmu.h
+++ b/arch/sparc/include/asm/pgtsrmmu.h
@@ -53,21 +53,13 @@
#define SRMMU_CHG_MASK (0xffffff00 | SRMMU_REF | SRMMU_DIRTY)
-/* SRMMU swap entry encoding
- *
- * We use 5 bits for the type and 19 for the offset. This gives us
- * 32 swapfiles of 4GB each. Encoding looks like:
- *
- * oooooooooooooooooootttttRRRRRRRR
- * fedcba9876543210fedcba9876543210
- *
- * The bottom 7 bits are reserved for protection and status bits, especially
- * PRESENT.
- */
+/* SRMMU swap entry encoding */
#define SRMMU_SWP_TYPE_MASK 0x1f
#define SRMMU_SWP_TYPE_SHIFT 7
#define SRMMU_SWP_OFF_MASK 0xfffff
#define SRMMU_SWP_OFF_SHIFT (SRMMU_SWP_TYPE_SHIFT + 5)
+/* We borrow bit 6 to store the exclusive marker in swap PTEs. */
+#define SRMMU_SWP_EXCLUSIVE SRMMU_DIRTY
/* Some day I will implement true fine grained access bits for
* user pages because the SRMMU gives us the capabilities to
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index cdbd9653aa14..84866127d074 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -108,7 +108,6 @@ extern unsigned long uml_physmem;
#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
#define pfn_to_phys(pfn) PFN_PHYS(pfn)
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
#include <asm-generic/memory_model.h>
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 4e3052f2671a..a70d1618eb35 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -21,6 +21,9 @@
#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE;
pte_present gives true */
+/* We borrow bit 10 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x400
+
#ifdef CONFIG_3_LEVEL_PGTABLES
#include <asm/pgtable-3level.h>
#else
@@ -288,16 +291,45 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
#define update_mmu_cache(vma,address,ptep) do {} while (0)
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <--------------- offset ----------------> E < type -> 0 0 0 1 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_NEWPAGE (bit 1) is always set to 1 in set_pte().
+ */
#define __swp_type(x) (((x).val >> 5) & 0x1f)
#define __swp_offset(x) ((x).val >> 11)
#define __swp_entry(type, offset) \
- ((swp_entry_t) { ((type) << 5) | ((offset) << 11) })
+ ((swp_entry_t) { (((type) & 0x1f) << 5) | ((offset) << 11) })
#define __pte_to_swp_entry(pte) \
((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_set_bits(pte, _PAGE_SWP_EXCLUSIVE);
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_clear_bits(pte, _PAGE_SWP_EXCLUSIVE);
+ return pte;
+}
+
/* Clear a kernel PTE and flush it from the TLB */
#define kpte_clear_flush(ptep, vaddr) \
do { \
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 2738eb28cb2e..11a5c68d1218 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -116,10 +116,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- unsigned long size = vma->vm_end - vma->vm_start;
-
if (vma_is_special_mapping(vma, &vvar_mapping))
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
}
mmap_read_unlock(mm);
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 4af81df133ee..d234ca797e4a 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -391,7 +391,7 @@ void __init map_vsyscall(void)
}
if (vsyscall_mode == XONLY)
- gate_vma.vm_flags = VM_EXEC;
+ vm_flags_init(&gate_vma, VM_EXEC);
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 9cc82f305f4b..d18e5c332cb9 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -34,9 +34,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
copy_page(to, from);
}
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index df42f8aa99e4..580d71aca65a 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -15,10 +15,6 @@ extern unsigned long __phys_addr(unsigned long);
#define __phys_addr_symbol(x) __phys_addr(x)
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
-#endif /* CONFIG_FLATMEM */
-
#include <linux/string.h>
static inline void clear_page(void *page)
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 198e03e59ca1..cc6b8e087192 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,10 +39,6 @@ extern unsigned long __phys_addr_symbol(unsigned long);
#define __phys_reloc_hide(x) (x)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_pfn)
-#endif
-
void clear_page_orig(void *page);
void clear_page_rep(void *page);
void clear_page_erms(void *page);
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 60d0f9015317..e9482a11ac52 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -80,21 +80,37 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
return ((value >> rightshift) & mask) << leftshift;
}
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <----------------- offset ------------------> 0 E <- type --> 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
#define SWP_TYPE_BITS 5
+#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1)
+#define _SWP_TYPE_SHIFT (_PAGE_BIT_PRESENT + 1)
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
-#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
- & ((1U << SWP_TYPE_BITS) - 1))
+#define __swp_type(x) (((x).val >> _SWP_TYPE_SHIFT) \
+ & _SWP_TYPE_MASK)
#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
#define __swp_entry(type, offset) ((swp_entry_t) { \
- ((type) << (_PAGE_BIT_PRESENT + 1)) \
+ (((type) & _SWP_TYPE_MASK) << _SWP_TYPE_SHIFT) \
| ((offset) << SWP_OFFSET_SHIFT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE
+
/* No inverted PFNs on 2 level page tables */
static inline u64 protnone_mask(u64 val)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 967b135fa2c0..9e7c0b719c3c 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -145,8 +145,24 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * < type -> <---------------------- offset ----------------------
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * --------------------------------------------> 0 E 0 0 0 0 0 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
#define SWP_TYPE_BITS 5
+#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1)
#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
@@ -154,9 +170,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
-#define __swp_type(x) (((x).val) & ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_type(x) (((x).val) & _SWP_TYPE_MASK)
#define __swp_offset(x) ((x).val >> SWP_TYPE_BITS)
-#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << SWP_TYPE_BITS})
+#define __swp_entry(type, offset) ((swp_entry_t){((type) & _SWP_TYPE_MASK) \
+ | (offset) << SWP_TYPE_BITS})
/*
* Normally, __swp_entry() converts from arch-independent swp_entry_t to
@@ -184,6 +201,9 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
__pteval_swp_offset(pte)))
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE
+
#include <asm/pgtable-invert.h>
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0564edd24ffb..7425f32e5293 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -289,6 +289,11 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
return native_make_pte(v & ~clear);
}
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_RW);
+}
+
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
@@ -313,7 +318,7 @@ static inline int pte_uffd_wp(pte_t pte)
static inline pte_t pte_mkuffd_wp(pte_t pte)
{
- return pte_set_flags(pte, _PAGE_UFFD_WP);
+ return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP));
}
static inline pte_t pte_clear_uffd_wp(pte_t pte)
@@ -332,11 +337,6 @@ static inline pte_t pte_mkold(pte_t pte)
return pte_clear_flags(pte, _PAGE_ACCESSED);
}
-static inline pte_t pte_wrprotect(pte_t pte)
-{
- return pte_clear_flags(pte, _PAGE_RW);
-}
-
static inline pte_t pte_mkexec(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_NX);
@@ -401,6 +401,11 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
return native_make_pmd(v & ~clear);
}
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ return pmd_clear_flags(pmd, _PAGE_RW);
+}
+
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pmd_uffd_wp(pmd_t pmd)
{
@@ -409,7 +414,7 @@ static inline int pmd_uffd_wp(pmd_t pmd)
static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
{
- return pmd_set_flags(pmd, _PAGE_UFFD_WP);
+ return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP));
}
static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
@@ -428,11 +433,6 @@ static inline pmd_t pmd_mkclean(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_DIRTY);
}
-static inline pmd_t pmd_wrprotect(pmd_t pmd)
-{
- return pmd_clear_flags(pmd, _PAGE_RW);
-}
-
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
@@ -1299,8 +1299,6 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{
}
-#ifdef _PAGE_SWP_EXCLUSIVE
-#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
static inline pte_t pte_swp_mkexclusive(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
@@ -1315,7 +1313,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE);
}
-#endif /* _PAGE_SWP_EXCLUSIVE */
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index aa9b8b868867..262f5fb18d74 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -95,7 +95,7 @@ static int sgx_mmap(struct file *file, struct vm_area_struct *vma)
return ret;
vma->vm_ops = &sgx_vm_ops;
- vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+ vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO);
vma->vm_private_data = encl;
return 0;
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index 6a77a14eee38..c3e37eaec8ec 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -105,7 +105,7 @@ static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_ops = &sgx_vepc_vm_ops;
/* Don't copy VMA in fork() */
- vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY;
+ vm_flags_set(vma, VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY);
vma->vm_private_data = vepc;
return 0;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7b0d4ab894c8..a498ae1fbe66 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -260,7 +260,7 @@ static noinline int vmalloc_fault(unsigned long address)
}
NOKPROBE_SYMBOL(vmalloc_fault);
-static void __arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -284,27 +284,6 @@ static void __arch_sync_kernel_mappings(unsigned long start, unsigned long end)
}
}
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
-{
- __arch_sync_kernel_mappings(start, end);
-#ifdef CONFIG_KMSAN
- /*
- * KMSAN maintains two additional metadata page mappings for the
- * [VMALLOC_START, VMALLOC_END) range. These mappings start at
- * KMSAN_VMALLOC_SHADOW_START and KMSAN_VMALLOC_ORIGIN_START and
- * have to be synced together with the vmalloc memory mapping.
- */
- if (start >= VMALLOC_START && end < VMALLOC_END) {
- __arch_sync_kernel_mappings(
- start - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START,
- end - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START);
- __arch_sync_kernel_mappings(
- start - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START,
- end - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START);
- }
-#endif
-}
-
static bool low_pfn(unsigned long pfn)
{
return pfn < max_low_pfn;
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 004b37f026d1..46a00aa858b6 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -999,7 +999,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
ret = reserve_pfn_range(paddr, size, prot, 0);
if (ret == 0 && vma)
- vma->vm_flags |= VM_PAT;
+ vm_flags_set(vma, VM_PAT);
return ret;
}
@@ -1045,7 +1045,7 @@ void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
* can be for the entire vma (in which case pfn, size are zero).
*/
void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size)
+ unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
unsigned long prot;
@@ -1064,8 +1064,12 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
- if (vma)
- vma->vm_flags &= ~VM_PAT;
+ if (vma) {
+ if (mm_wr_locked)
+ vm_flags_clear(vma, VM_PAT);
+ else
+ __vm_flags_mod(vma, 0, VM_PAT);
+ }
}
/*
@@ -1075,7 +1079,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
*/
void untrack_pfn_moved(struct vm_area_struct *vma)
{
- vma->vm_flags &= ~VM_PAT;
+ vm_flags_clear(vma, VM_PAT);
}
pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index cafd01f730da..29b2203bc82c 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -16,7 +16,7 @@ static int __init gate_vma_init(void)
vma_init(&gate_vma, NULL);
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
- gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+ vm_flags_init(&gate_vma, VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC);
gate_vma.vm_page_prot = PAGE_READONLY;
return 0;
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 493eb7083b1a..a77d04972eb9 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -11,6 +11,8 @@
#ifndef _XTENSA_PAGE_H
#define _XTENSA_PAGE_H
+#include <linux/const.h>
+
#include <asm/processor.h>
#include <asm/types.h>
#include <asm/cache.h>
@@ -189,8 +191,6 @@ static inline unsigned long ___pa(unsigned long va)
#endif
#define __va(x) \
((void *)((unsigned long) (x) - PHYS_OFFSET + PAGE_OFFSET))
-#define pfn_valid(pfn) \
- ((pfn) >= ARCH_PFN_OFFSET && ((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT)
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 5b5484d707b2..fc7a14884c6c 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -96,7 +96,7 @@
* +- - - - - - - - - - - - - - - - - - - - -+
* (PAGE_NONE)| PPN | 0 | 00 | ADW | 01 | 11 | 11 |
* +-----------------------------------------+
- * swap | index | type | 01 | 11 | 00 |
+ * swap | index | type | 01 | 11 | e0 |
* +-----------------------------------------+
*
* For T1050 hardware and earlier the layout differs for present and (PAGE_NONE)
@@ -112,6 +112,7 @@
* RI ring (0=privileged, 1=user, 2 and 3 are unused)
* CA cache attribute: 00 bypass, 01 writeback, 10 writethrough
* (11 is invalid and used to mark pages that are not present)
+ * e exclusive marker in swap PTEs
* w page is writable (hw)
* x page is executable (hw)
* index swap offset / PAGE_SIZE (bit 11-31: 21 bits -> 8 GB)
@@ -158,6 +159,9 @@
#define _PAGE_DIRTY (1<<7) /* software: page dirty */
#define _PAGE_ACCESSED (1<<8) /* software: page accessed (read) */
+/* We borrow bit 1 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE (1<<1)
+
#ifdef CONFIG_MMU
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -343,19 +347,36 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
}
/*
- * Encode and decode a swap and file entry.
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
*/
-#define SWP_TYPE_BITS 5
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(entry) (((entry).val >> 6) & 0x1f)
#define __swp_offset(entry) ((entry).val >> 11)
#define __swp_entry(type,offs) \
- ((swp_entry_t){((type) << 6) | ((offs) << 11) | \
+ ((swp_entry_t){(((type) & 0x1f) << 6) | ((offs) << 11) | \
_PAGE_CA_INVALID | _PAGE_USER})
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
+ return pte;
+}
+
#endif /* !defined (__ASSEMBLY__) */