summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/osf_sys.c2
-rw-r--r--arch/arc/mm/mmap.c3
-rw-r--r--arch/arm/mm/fault-armv.c6
-rw-r--r--arch/arm/mm/mmap.c7
-rw-r--r--arch/arm64/Kconfig4
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/mmzone.h13
-rw-r--r--arch/arm64/include/asm/pgtable.h30
-rw-r--r--arch/arm64/include/asm/topology.h1
-rw-r--r--arch/arm64/kvm/nested.c1
-rw-r--r--arch/csky/abiv1/mmap.c3
-rw-r--r--arch/csky/kernel/vdso.c28
-rw-r--r--arch/hexagon/kernel/vdso.c14
-rw-r--r--arch/loongarch/configs/loongson3_defconfig1
-rw-r--r--arch/loongarch/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/mmzone.h16
-rw-r--r--arch/loongarch/include/asm/topology.h1
-rw-r--r--arch/loongarch/kernel/numa.c21
-rw-r--r--arch/loongarch/mm/mmap.c5
-rw-r--r--arch/mips/Kconfig5
-rw-r--r--arch/mips/include/asm/mach-ip27/mmzone.h1
-rw-r--r--arch/mips/include/asm/mach-loongson64/mmzone.h4
-rw-r--r--arch/mips/loongson64/numa.c28
-rw-r--r--arch/mips/mm/mmap.c5
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c12
-rw-r--r--arch/mips/sgi-ip27/ip27-smp.c2
-rw-r--r--arch/nios2/mm/init.c12
-rw-r--r--arch/parisc/kernel/sys_parisc.c5
-rw-r--r--arch/parisc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/mmu_context.h9
-rw-r--r--arch/powerpc/include/asm/mmzone.h6
-rw-r--r--arch/powerpc/include/asm/pgtable.h1
-rw-r--r--arch/powerpc/kernel/vdso.c35
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c20
-rw-r--r--arch/powerpc/mm/book3s64/slice.c10
-rw-r--r--arch/powerpc/mm/numa.c26
-rw-r--r--arch/powerpc/mm/pgtable-frag.c6
-rw-r--r--arch/powerpc/mm/pgtable.c6
-rw-r--r--arch/powerpc/platforms/pseries/papr-vpd.c5
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/mmzone.h13
-rw-r--r--arch/riscv/include/asm/topology.h4
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/mmzone.h17
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/pgtable.h1
-rw-r--r--arch/s390/kernel/numa.c3
-rw-r--r--arch/s390/kernel/uv.c23
-rw-r--r--arch/s390/mm/fault.c16
-rw-r--r--arch/s390/mm/mmap.c4
-rw-r--r--arch/s390/pci/pci_mmio.c22
-rw-r--r--arch/sh/include/asm/mmzone.h3
-rw-r--r--arch/sh/kernel/vsyscall/vsyscall.c14
-rw-r--r--arch/sh/mm/init.c7
-rw-r--r--arch/sh/mm/mmap.c5
-rw-r--r--arch/sh/mm/numa.c3
-rw-r--r--arch/sparc/include/asm/mmzone.h4
-rw-r--r--arch/sparc/include/asm/pgtable_64.h1
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c2
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c4
-rw-r--r--arch/sparc/mm/init_64.c11
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/mmu_context.h5
-rw-r--r--arch/x86/include/asm/mmzone.h6
-rw-r--r--arch/x86/include/asm/mmzone_32.h17
-rw-r--r--arch/x86/include/asm/mmzone_64.h18
-rw-r--r--arch/x86/include/asm/numa.h26
-rw-r--r--arch/x86/include/asm/pgtable.h150
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/sparsemem.h9
-rw-r--r--arch/x86/kernel/sys_x86_64.c21
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology.c1
-rw-r--r--arch/x86/mm/numa.c622
-rw-r--r--arch/x86/mm/numa_emulation.c585
-rw-r--r--arch/x86/mm/numa_internal.h24
-rw-r--r--arch/x86/mm/pat/memtype.c25
-rw-r--r--arch/x86/mm/pgtable.c18
-rw-r--r--arch/x86/um/vdso/vma.c12
-rw-r--r--arch/x86/xen/mmu_pv.c7
-rw-r--r--arch/xtensa/kernel/syscall.c3
86 files changed, 440 insertions, 1654 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index e5f881bc8288..8886ab539273 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1229,7 +1229,7 @@ arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
unsigned long limit;
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 69a915297155..2185afe8d59f 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -23,7 +23,8 @@
*/
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 2286c2ea60ec..831793cd6ff9 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -61,7 +61,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
return ret;
}
-#if USE_SPLIT_PTE_PTLOCKS
+#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
/*
* If we are using split PTE locks, then we need to take the page
* lock here. Otherwise we are using shared mm->page_table_lock
@@ -80,10 +80,10 @@ static inline void do_pte_unlock(spinlock_t *ptl)
{
spin_unlock(ptl);
}
-#else /* !USE_SPLIT_PTE_PTLOCKS */
+#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */
static inline void do_pte_lock(spinlock_t *ptl) {}
static inline void do_pte_unlock(spinlock_t *ptl) {}
-#endif /* USE_SPLIT_PTE_PTLOCKS */
+#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
unsigned long pfn)
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index d65d0e6ed10a..3dbb383c26d5 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -28,7 +28,8 @@
*/
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -78,8 +79,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags, vm_flags_t vm_flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e430ff33c80d..49f054dcd4de 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -101,6 +101,7 @@ config ARM64
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_SUPPORTS_PER_VMA_LOCK
+ select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_RT
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
@@ -2104,7 +2105,8 @@ config ARM64_MTE
depends on ARM64_PAN
select ARCH_HAS_SUBPAGE_FAULTS
select ARCH_USES_HIGH_VMA_FLAGS
- select ARCH_USES_PG_ARCH_X
+ select ARCH_USES_PG_ARCH_2
+ select ARCH_USES_PG_ARCH_3
help
Memory Tagging (part of the ARMv8.5 Extensions) provides
architectural support for run-time, always-on detection of
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 7d7d97ad3cd5..4e350df9a02d 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -9,6 +9,7 @@ syscall-y += unistd_compat_32.h
generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += parport.h
diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h
deleted file mode 100644
index fa17e01d9ab2..000000000000
--- a/arch/arm64/include/asm/mmzone.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MMZONE_H
-#define __ASM_MMZONE_H
-
-#ifdef CONFIG_NUMA
-
-#include <asm/numa.h>
-
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[(nid)])
-
-#endif /* CONFIG_NUMA */
-#endif /* __ASM_MMZONE_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 96c2b0b07c4c..c329ea061dc9 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -407,6 +407,7 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
/*
* Select all bits except the pfn
*/
+#define pte_pgprot pte_pgprot
static inline pgprot_t pte_pgprot(pte_t pte)
{
unsigned long pfn = pte_pfn(pte);
@@ -600,6 +601,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
}
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+#define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL)))
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+ return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL));
+}
+#endif
+
#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
@@ -617,6 +626,27 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+#define pud_special(pte) pte_special(pud_pte(pud))
+#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud)))
+#endif
+
+#define pmd_pgprot pmd_pgprot
+static inline pgprot_t pmd_pgprot(pmd_t pmd)
+{
+ unsigned long pfn = pmd_pfn(pmd);
+
+ return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
+}
+
+#define pud_pgprot pud_pgprot
+static inline pgprot_t pud_pgprot(pud_t pud)
+{
+ unsigned long pfn = pud_pfn(pud);
+
+ return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
+}
+
static inline void __set_pte_at(struct mm_struct *mm,
unsigned long __always_unused addr,
pte_t *ptep, pte_t pte, unsigned int nr)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 0f6ef432fb84..5fc3af9f8f29 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -5,6 +5,7 @@
#include <linux/cpumask.h>
#ifdef CONFIG_NUMA
+#include <asm/numa.h>
struct pci_bus;
int pcibus_to_node(struct pci_bus *bus);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 638b6205526f..f9e30dd34c7a 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -62,7 +62,6 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
*/
num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU;
tmp = kvrealloc(kvm->arch.nested_mmus,
- size_mul(sizeof(*kvm->arch.nested_mmus), kvm->arch.nested_mmus_size),
size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus),
GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!tmp)
diff --git a/arch/csky/abiv1/mmap.c b/arch/csky/abiv1/mmap.c
index 7f826331d409..1047865e82a9 100644
--- a/arch/csky/abiv1/mmap.c
+++ b/arch/csky/abiv1/mmap.c
@@ -23,7 +23,8 @@
*/
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
diff --git a/arch/csky/kernel/vdso.c b/arch/csky/kernel/vdso.c
index 2ca886e4a458..5c9ef63c29f1 100644
--- a/arch/csky/kernel/vdso.c
+++ b/arch/csky/kernel/vdso.c
@@ -45,9 +45,16 @@ arch_initcall(vdso_init);
int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp)
{
+ struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long vdso_base, vdso_len;
int ret;
+ static struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ };
+ static struct vm_special_mapping vvar_mapping = {
+ .name = "[vvar]",
+ };
vdso_len = (vdso_pages + 1) << PAGE_SHIFT;
@@ -65,22 +72,29 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
*/
mm->context.vdso = (void *)vdso_base;
- ret =
- install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ vdso_mapping.pages = vdso_pagelist;
+ vma =
+ _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
- vdso_pagelist);
+ &vdso_mapping);
- if (unlikely(ret)) {
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
mm->context.vdso = NULL;
goto end;
}
vdso_base += (vdso_pages << PAGE_SHIFT);
- ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
- (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
+ vvar_mapping.pages = &vdso_pagelist[vdso_pages];
+ vma = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ (VM_READ | VM_MAYREAD), &vvar_mapping);
- if (unlikely(ret))
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
mm->context.vdso = NULL;
+ goto end;
+ }
+ ret = 0;
end:
mmap_write_unlock(mm);
return ret;
diff --git a/arch/hexagon/kernel/vdso.c b/arch/hexagon/kernel/vdso.c
index 2e4872d62124..6fd27ff1df73 100644
--- a/arch/hexagon/kernel/vdso.c
+++ b/arch/hexagon/kernel/vdso.c
@@ -51,7 +51,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
int ret;
unsigned long vdso_base;
+ struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
+ static struct vm_special_mapping vdso_mapping = {
+ name = "[vdso]",
+ };
if (mmap_write_lock_killable(mm))
return -EINTR;
@@ -66,16 +70,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
}
/* MAYWRITE to allow gdb to COW and set breakpoints. */
- ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ vdso_mapping.pages = &vdso_page;
+ vma = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &vdso_page);
+ &vdso_mapping);
- if (ret)
+ ret = PTR_ERR(vma);
+ if (IS_ERR(vma))
goto up_fail;
mm->context.vdso = (void *)vdso_base;
-
+ ret = 0;
up_fail:
mmap_write_unlock(mm);
return ret;
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index b4252c357c8e..75b366407a60 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -96,7 +96,6 @@ CONFIG_ZPOOL=y
CONFIG_ZSWAP=y
CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y
CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
CONFIG_ZSMALLOC=m
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 4635b755b2b4..5b5a6c90e6e2 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -8,5 +8,6 @@ generic-y += early_ioremap.h
generic-y += qrwlock.h
generic-y += user.h
generic-y += ioctl.h
+generic-y += mmzone.h
generic-y += statfs.h
generic-y += param.h
diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h
deleted file mode 100644
index 2b9a90727e19..000000000000
--- a/arch/loongarch/include/asm/mmzone.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Author: Huacai Chen (chenhuacai@loongson.cn)
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_MMZONE_H_
-#define _ASM_MMZONE_H_
-
-#include <asm/page.h>
-#include <asm/numa.h>
-
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid) (node_data[(nid)])
-
-#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h
index 66128dec0bf6..50273c9187d0 100644
--- a/arch/loongarch/include/asm/topology.h
+++ b/arch/loongarch/include/asm/topology.h
@@ -8,6 +8,7 @@
#include <linux/smp.h>
#ifdef CONFIG_NUMA
+#include <asm/numa.h>
extern cpumask_t cpus_on_node[];
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 8fe21f868f72..84fe7f854820 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -27,10 +27,7 @@
#include <asm/time.h>
int numa_off;
-struct pglist_data *node_data[MAX_NUMNODES];
unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES];
-
-EXPORT_SYMBOL(node_data);
EXPORT_SYMBOL(node_distances);
static struct numa_meminfo numa_meminfo;
@@ -190,24 +187,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
return numa_add_memblk_to(nid, start, end, &numa_meminfo);
}
-static void __init alloc_node_data(int nid)
-{
- void *nd;
- unsigned long nd_pa;
- size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE);
-
- nd_pa = memblock_phys_alloc_try_nid(nd_sz, SMP_CACHE_BYTES, nid);
- if (!nd_pa) {
- pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n", nd_sz, nid);
- return;
- }
-
- nd = __va(nd_pa);
-
- node_data[nid] = nd;
- memset(nd, 0, sizeof(pg_data_t));
-}
-
static void __init node_mem_init(unsigned int node)
{
unsigned long start_pfn, end_pfn;
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index 889030985135..914e82ff3f65 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -89,7 +89,8 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
}
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags)
{
return arch_get_unmapped_area_common(filp,
addr0, len, pgoff, flags, UP);
@@ -101,7 +102,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0,
*/
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
unsigned long addr0, unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
return arch_get_unmapped_area_common(filp,
addr0, len, pgoff, flags, DOWN);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 023ad33a7e94..397edf05dd72 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -502,7 +502,6 @@ config MACH_LOONGSON64
select USE_OF
select BUILTIN_DTB
select PCI_HOST_GENERIC
- select HAVE_ARCH_NODEDATA_EXTENSION if NUMA
help
This enables the support of Loongson-2/3 family of machines.
@@ -735,7 +734,6 @@ config SGI_IP27
select WAR_R10000_LLSC
select MIPS_L1_CACHE_SHIFT_7
select NUMA
- select HAVE_ARCH_NODEDATA_EXTENSION
help
This are the SGI Origin 200, Origin 2000 and Onyx 2 Graphics
workstations. To compile a Linux kernel that runs on these, say Y
@@ -2613,9 +2611,6 @@ config NUMA
config SYS_SUPPORTS_NUMA
bool
-config HAVE_ARCH_NODEDATA_EXTENSION
- bool
-
config RELOCATABLE
bool "Relocatable kernel"
depends on SYS_SUPPORTS_RELOCATABLE
diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h
index 08c36e50a860..56959eb9cb26 100644
--- a/arch/mips/include/asm/mach-ip27/mmzone.h
+++ b/arch/mips/include/asm/mach-ip27/mmzone.h
@@ -22,7 +22,6 @@ struct node_data {
extern struct node_data *__node_data[];
-#define NODE_DATA(n) (&__node_data[(n)]->pglist)
#define hub_data(n) (&__node_data[(n)]->hub)
#endif /* _ASM_MACH_MMZONE_H */
diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h
index a3d65d37b8b5..8fb70fd3c9c4 100644
--- a/arch/mips/include/asm/mach-loongson64/mmzone.h
+++ b/arch/mips/include/asm/mach-loongson64/mmzone.h
@@ -14,10 +14,6 @@
#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
#define nid_to_addrbase(nid) ((unsigned long)(nid) << NODE_ADDRSPACE_SHIFT)
-extern struct pglist_data *__node_data[];
-
-#define NODE_DATA(n) (__node_data[n])
-
extern void __init prom_init_numa_memory(void);
#endif /* _ASM_MACH_MMZONE_H */
diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
index 68dafd6d3e25..8388400d052f 100644
--- a/arch/mips/loongson64/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -29,8 +29,6 @@
unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
EXPORT_SYMBOL(__node_distances);
-struct pglist_data *__node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(__node_data);
cpumask_t __node_cpumask[MAX_NUMNODES];
EXPORT_SYMBOL(__node_cpumask);
@@ -83,12 +81,8 @@ static void __init init_topology_matrix(void)
static void __init node_mem_init(unsigned int node)
{
- struct pglist_data *nd;
unsigned long node_addrspace_offset;
unsigned long start_pfn, end_pfn;
- unsigned long nd_pa;
- int tnid;
- const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
node_addrspace_offset = nid_to_addrbase(node);
pr_info("Node%d's addrspace_offset is 0x%lx\n",
@@ -98,16 +92,8 @@ static void __init node_mem_init(unsigned int node)
pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
node, start_pfn, end_pfn);
- nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, node);
- if (!nd_pa)
- panic("Cannot allocate %zu bytes for node %d data\n",
- nd_size, node);
- nd = __va(nd_pa);
- memset(nd, 0, sizeof(struct pglist_data));
- tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
- if (tnid != node)
- pr_info("NODE_DATA(%d) on node %d\n", node, tnid);
- __node_data[node] = nd;
+ alloc_node_data(node);
+
NODE_DATA(node)->node_start_pfn = start_pfn;
NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
@@ -198,13 +184,3 @@ void __init prom_init_numa_memory(void)
pr_info("CP0_PageGrain: CP0 5.1 (0x%x)\n", read_c0_pagegrain());
prom_meminit();
}
-
-pg_data_t * __init arch_alloc_nodedata(int nid)
-{
- return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES);
-}
-
-void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
- __node_data[nid] = pgdat;
-}
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 7e11d7b58761..5d2a1225785b 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -98,7 +98,8 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
}
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags)
{
return arch_get_unmapped_area_common(filp,
addr0, len, pgoff, flags, UP);
@@ -110,7 +111,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0,
*/
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
unsigned long addr0, unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
return arch_get_unmapped_area_common(filp,
addr0, len, pgoff, flags, DOWN);
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index b8ca94cfb4fe..1963313f55d8 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -35,7 +35,6 @@
#define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT)
struct node_data *__node_data[MAX_NUMNODES];
-
EXPORT_SYMBOL(__node_data);
static u64 gen_region_mask(void)
@@ -361,6 +360,7 @@ static void __init node_mem_init(nasid_t node)
*/
__node_data[node] = __va(slot_freepfn << PAGE_SHIFT);
memset(__node_data[node], 0, PAGE_SIZE);
+ node_data[node] = &__node_data[node]->pglist;
NODE_DATA(node)->node_start_pfn = start_pfn;
NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
@@ -423,13 +423,3 @@ void __init mem_init(void)
memblock_free_all();
setup_zero_pages(); /* This comes from node 0 */
}
-
-pg_data_t * __init arch_alloc_nodedata(int nid)
-{
- return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES);
-}
-
-void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
- __node_data[nid] = (struct node_data *)pgdat;
-}
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index 5d2652a1d35a..62733e049570 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -70,11 +70,13 @@ void cpu_node_probe(void)
gda_t *gdap = GDA;
nodes_clear(node_online_map);
+ nodes_clear(node_possible_map);
for (i = 0; i < MAX_NUMNODES; i++) {
nasid_t nasid = gdap->g_nasidtable[i];
if (nasid == INVALID_NASID)
break;
node_set_online(nasid);
+ node_set(nasid, node_possible_map);
highest = node_scan_cpus(nasid, highest);
}
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index 3459df28afee..a2278485de19 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -82,6 +82,10 @@ void __init mmu_init(void)
pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
pte_t invalid_pte_table[PTRS_PER_PTE] __aligned(PAGE_SIZE);
static struct page *kuser_page[1];
+static struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .pages = kuser_page,
+};
static int alloc_kuser_page(void)
{
@@ -106,18 +110,18 @@ arch_initcall(alloc_kuser_page);
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
- int ret;
+ struct vm_area_struct *vma;
mmap_write_lock(mm);
/* Map kuser helpers to user space address */
- ret = install_special_mapping(mm, KUSER_BASE, KUSER_SIZE,
+ vma = _install_special_mapping(mm, KUSER_BASE, KUSER_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD |
- VM_MAYEXEC, kuser_page);
+ VM_MAYEXEC, &vdso_mapping);
mmap_write_unlock(mm);
- return ret;
+ return IS_ERR(vma) ? PTR_ERR(vma) : 0;
}
const char *arch_vma_name(struct vm_area_struct *vma)
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index f7722451276e..f852fe274abe 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -167,7 +167,8 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
}
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags)
{
return arch_get_unmapped_area_common(filp,
addr, len, pgoff, flags, UP);
@@ -175,7 +176,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
unsigned long addr, unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
return arch_get_unmapped_area_common(filp,
addr, len, pgoff, flags, DOWN);
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index 0356199bd9e7..aa664f7ddb63 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -40,7 +40,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
addr = ALIGN(addr, huge_page_size(h));
/* we need to make sure the colouring is OK */
- return arch_get_unmapped_area(file, addr, len, pgoff, flags);
+ return arch_get_unmapped_area(file, addr, len, pgoff, flags, 0);
}
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 6001d580c0dd..a5e3e7f97f4d 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -81,7 +81,6 @@ CONFIG_MODULE_SIG_SHA512=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
-CONFIG_Z3FOLD=y
CONFIG_ZSMALLOC=y
# CONFIG_SLAB_MERGE_DEFAULT is not set
CONFIG_SLAB_FREELIST_RANDOM=y
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f8ba72573cae..6d98e6f08d4d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1098,6 +1098,7 @@ extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern pud_t pud_modify(pud_t pud, pgprot_t newprot);
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
@@ -1358,6 +1359,8 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
#define __HAVE_ARCH_PMDP_INVALIDATE
extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
+extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
#define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 99707456c2cd..a157ab513347 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -257,15 +257,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
extern void arch_exit_mmap(struct mm_struct *mm);
-static inline void arch_unmap(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- unsigned long vdso_base = (unsigned long)mm->context.vdso;
-
- if (start <= vdso_base && vdso_base < end)
- mm->context.vdso = NULL;
-}
-
#ifdef CONFIG_PPC_MEM_KEYS
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
bool execute, bool foreign);
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index da827d2d0866..d99863cd6cde 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -20,12 +20,6 @@
#ifdef CONFIG_NUMA
-extern struct pglist_data *node_data[];
-/*
- * Return a pointer to the node data for node n.
- */
-#define NODE_DATA(nid) (node_data[nid])
-
/*
* Following are specific to this numa platform.
*/
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 264a6c09517a..2f72ad885332 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -65,6 +65,7 @@ static inline unsigned long pte_pfn(pte_t pte)
/*
* Select all bits except the pfn
*/
+#define pte_pgprot pte_pgprot
static inline pgprot_t pte_pgprot(pte_t pte)
{
unsigned long pte_flags;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7a2ff9010f17..ee4b9d676cff 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
}
+static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * close() is called for munmap() but also for mremap(). In the mremap()
+ * case the vdso pointer has already been updated by the mremap() hook
+ * above, so it must not be set to NULL here.
+ */
+ if (vma->vm_start != (unsigned long)mm->context.vdso)
+ return;
+
+ mm->context.vdso = NULL;
+}
+
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
static struct vm_special_mapping vdso32_spec __ro_after_init = {
.name = "[vdso]",
.mremap = vdso32_mremap,
+ .close = vdso_close,
};
static struct vm_special_mapping vdso64_spec __ro_after_init = {
.name = "[vdso]",
.mremap = vdso64_mremap,
+ .close = vdso_close,
};
#ifdef CONFIG_TIME_NS
@@ -197,13 +214,6 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
/* Add required alignment. */
vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
- /*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO.
- */
- mm->context.vdso = (void __user *)vdso_base + vvar_size;
-
vma = _install_special_mapping(mm, vdso_base, vvar_size,
VM_READ | VM_MAYREAD | VM_IO |
VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
@@ -223,10 +233,15 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
VM_READ | VM_EXEC | VM_MAYREAD |
VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
- if (IS_ERR(vma))
+ if (IS_ERR(vma)) {
do_munmap(mm, vdso_base, vvar_size, NULL);
+ return PTR_ERR(vma);
+ }
+
+ // Now that the mappings are in place, set the mm VDSO pointer
+ mm->context.vdso = (void __user *)vdso_base + vvar_size;
- return PTR_ERR_OR_ZERO(vma);
+ return 0;
}
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
@@ -240,8 +255,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
return -EINTR;
rc = __arch_setup_additional_pages(bprm, uses_interp);
- if (rc)
- mm->context.vdso = NULL;
mmap_write_unlock(mm);
return rc;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index f4d8d3c40e5c..5a4a75369043 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -176,6 +176,17 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
return __pmd(old_pmd);
}
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ unsigned long old_pud;
+
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ old_pud = pud_hugepage_update(vma->vm_mm, address, pudp, _PAGE_PRESENT, _PAGE_INVALID);
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return __pud(old_pud);
+}
+
pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp, int full)
{
@@ -259,6 +270,15 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmdv &= _HPAGE_CHG_MASK;
return pmd_set_protbits(__pmd(pmdv), newprot);
}
+
+pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ unsigned long pudv;
+
+ pudv = pud_val(pud);
+ pudv &= _HPAGE_CHG_MASK;
+ return pud_set_protbits(__pud(pudv), newprot);
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/* For use by kexec, called with MMU off */
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index ef3ce37f1bb3..87307d0fc3b8 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -637,10 +637,11 @@ unsigned long arch_get_unmapped_area(struct file *filp,
unsigned long addr,
unsigned long len,
unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags,
+ vm_flags_t vm_flags)
{
if (radix_enabled())
- return generic_get_unmapped_area(filp, addr, len, pgoff, flags);
+ return generic_get_unmapped_area(filp, addr, len, pgoff, flags, vm_flags);
return slice_get_unmapped_area(addr, len, flags,
mm_ctx_user_psize(&current->mm->context), 0);
@@ -650,10 +651,11 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
const unsigned long addr0,
const unsigned long len,
const unsigned long pgoff,
- const unsigned long flags)
+ const unsigned long flags,
+ vm_flags_t vm_flags)
{
if (radix_enabled())
- return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags);
+ return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags, vm_flags);
return slice_get_unmapped_area(addr0, len, flags,
mm_ctx_user_psize(&current->mm->context), 1);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index aa89899f0c1a..3c1da08304d0 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -43,11 +43,9 @@ static char *cmdline __initdata;
int numa_cpu_lookup_table[NR_CPUS];
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
-struct pglist_data *node_data[MAX_NUMNODES];
EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(node_to_cpumask_map);
-EXPORT_SYMBOL(node_data);
static int primary_domain_index;
static int n_mem_addr_cells, n_mem_size_cells;
@@ -1095,27 +1093,9 @@ void __init dump_numa_cpu_topology(void)
static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
{
u64 spanned_pages = end_pfn - start_pfn;
- const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
- u64 nd_pa;
- void *nd;
- int tnid;
-
- nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
- if (!nd_pa)
- panic("Cannot allocate %zu bytes for node %d data\n",
- nd_size, nid);
-
- nd = __va(nd_pa);
-
- /* report and initialize */
- pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n",
- nd_pa, nd_pa + nd_size - 1);
- tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
- if (tnid != nid)
- pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid);
-
- node_data[nid] = nd;
- memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+
+ alloc_node_data(nid);
+
NODE_DATA(nid)->node_id = nid;
NODE_DATA(nid)->node_start_pfn = start_pfn;
NODE_DATA(nid)->node_spanned_pages = spanned_pages;
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index 8c31802f97e8..e89f64a0f24a 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -136,10 +136,10 @@ void pte_fragment_free(unsigned long *table, int kernel)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
{
- struct page *page;
+ struct folio *folio;
- page = virt_to_page(pgtable);
- SetPageActive(page);
+ folio = virt_to_folio(pgtable);
+ folio_set_active(folio);
pte_fragment_free((unsigned long *)pgtable, 0);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index ab0656115424..7316396e452d 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -297,6 +297,12 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
}
#if defined(CONFIG_PPC_8xx)
+
+#if defined(CONFIG_SPLIT_PTE_PTLOCKS) || defined(CONFIG_SPLIT_PMD_PTLOCKS)
+/* We need the same lock to protect the PMD table and the two PTE tables. */
+#error "8M hugetlb folios are incompatible with split page table locks"
+#endif
+
static void __set_huge_pte_at(pmd_t *pmd, pte_t *ptep, pte_basic_t val)
{
pte_basic_t *entry = (pte_basic_t *)ptep;
diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c
index c29e85db5f35..1574176e3ffc 100644
--- a/arch/powerpc/platforms/pseries/papr-vpd.c
+++ b/arch/powerpc/platforms/pseries/papr-vpd.c
@@ -156,10 +156,7 @@ static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len)
const char *old_ptr = blob->data;
char *new_ptr;
- new_ptr = old_ptr ?
- kvrealloc(old_ptr, old_len, new_len, GFP_KERNEL_ACCOUNT) :
- kvmalloc(len, GFP_KERNEL_ACCOUNT);
-
+ new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT);
if (!new_ptr)
return -ENOMEM;
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 5c589770f2a8..1461af12da6e 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -5,6 +5,7 @@ syscall-y += syscall_table_64.h
generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
+generic-y += mmzone.h
generic-y += parport.h
generic-y += spinlock.h
generic-y += spinlock_types.h
diff --git a/arch/riscv/include/asm/mmzone.h b/arch/riscv/include/asm/mmzone.h
deleted file mode 100644
index fa17e01d9ab2..000000000000
--- a/arch/riscv/include/asm/mmzone.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MMZONE_H
-#define __ASM_MMZONE_H
-
-#ifdef CONFIG_NUMA
-
-#include <asm/numa.h>
-
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[(nid)])
-
-#endif /* CONFIG_NUMA */
-#endif /* __ASM_MMZONE_H */
diff --git a/arch/riscv/include/asm/topology.h b/arch/riscv/include/asm/topology.h
index 61183688bdd5..fe1a8bf6902d 100644
--- a/arch/riscv/include/asm/topology.h
+++ b/arch/riscv/include/asm/topology.h
@@ -4,6 +4,10 @@
#include <linux/arch_topology.h>
+#ifdef CONFIG_NUMA
+#include <asm/numa.h>
+#endif
+
/* Replace task scheduler's default frequency-invariant accounting */
#define arch_scale_freq_tick topology_scale_freq_tick
#define arch_set_freq_scale topology_set_freq_scale
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 4b904110d27c..297bf7157968 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,3 +7,4 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += kvm_types.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
deleted file mode 100644
index 73e3e7c6976c..000000000000
--- a/arch/s390/include/asm/mmzone.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * Copyright IBM Corp. 2015
- */
-
-#ifndef _ASM_S390_MMZONE_H
-#define _ASM_S390_MMZONE_H
-
-#ifdef CONFIG_NUMA
-
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif /* CONFIG_NUMA */
-#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 16e4caa931f1..73e1e03317b4 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -176,8 +176,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
int arch_make_folio_accessible(struct folio *folio);
#define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
-int arch_make_page_accessible(struct page *page);
-#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
struct vm_layout {
unsigned long kaslr_offset;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 3fa280d0672a..0ffbaf741955 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -955,6 +955,7 @@ static inline int pte_unused(pte_t pte)
* young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
* must not be set.
*/
+#define pte_pgprot pte_pgprot
static inline pgprot_t pte_pgprot(pte_t pte)
{
unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 23ab9f02f278..ddc1448ea2e1 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -14,9 +14,6 @@
#include <linux/node.h>
#include <asm/numa.h>
-struct pglist_data *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
void __init numa_setup(void)
{
int nid;
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 36db065c7cf7..9646f773208a 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -14,6 +14,7 @@
#include <linux/memblock.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/pagewalk.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
@@ -462,9 +463,9 @@ EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
{
struct vm_area_struct *vma;
+ struct folio_walk fw;
unsigned long uaddr;
struct folio *folio;
- struct page *page;
int rc;
rc = -EFAULT;
@@ -483,11 +484,15 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
goto out;
rc = 0;
- /* we take an extra reference here */
- page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
- if (IS_ERR_OR_NULL(page))
+ folio = folio_walk_start(&fw, vma, uaddr, 0);
+ if (!folio)
goto out;
- folio = page_folio(page);
+ /*
+ * See gmap_make_secure(): large folios cannot be secure. Small
+ * folio implies FW_LEVEL_PTE.
+ */
+ if (folio_test_large(folio) || !pte_write(fw.pte))
+ goto out_walk_end;
rc = uv_destroy_folio(folio);
/*
* Fault handlers can race; it is possible that two CPUs will fault
@@ -500,7 +505,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
*/
if (rc)
rc = uv_convert_from_secure_folio(folio);
- folio_put(folio);
+out_walk_end:
+ folio_walk_end(&fw, vma);
out:
mmap_read_unlock(gmap->mm);
return rc;
@@ -548,11 +554,6 @@ int arch_make_folio_accessible(struct folio *folio)
}
EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
-int arch_make_page_accessible(struct page *page)
-{
- return arch_make_folio_accessible(page_folio(page));
-}
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 8e149ef5e89b..ad8b0d6b77ea 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -34,6 +34,7 @@
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <linux/kfence.h>
+#include <linux/pagewalk.h>
#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
@@ -492,9 +493,9 @@ void do_secure_storage_access(struct pt_regs *regs)
union teid teid = { .val = regs->int_parm_long };
unsigned long addr = get_fault_address(regs);
struct vm_area_struct *vma;
+ struct folio_walk fw;
struct mm_struct *mm;
struct folio *folio;
- struct page *page;
struct gmap *gmap;
int rc;
@@ -536,15 +537,18 @@ void do_secure_storage_access(struct pt_regs *regs)
vma = find_vma(mm, addr);
if (!vma)
return handle_fault_error(regs, SEGV_MAPERR);
- page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
- if (IS_ERR_OR_NULL(page)) {
+ folio = folio_walk_start(&fw, vma, addr, 0);
+ if (!folio) {
mmap_read_unlock(mm);
break;
}
- folio = page_folio(page);
- if (arch_make_folio_accessible(folio))
- send_sig(SIGSEGV, current, 0);
+ /* arch_make_folio_accessible() needs a raised refcount. */
+ folio_get(folio);
+ rc = arch_make_folio_accessible(folio);
folio_put(folio);
+ folio_walk_end(&fw, vma);
+ if (rc)
+ send_sig(SIGSEGV, current, 0);
mmap_read_unlock(mm);
break;
case KERNEL_FAULT:
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 206756946589..96efa061ce01 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -82,7 +82,7 @@ static int get_align_mask(struct file *filp, unsigned long flags)
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -117,7 +117,7 @@ check_asce_limit:
unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
- unsigned long flags)
+ unsigned long flags, vm_flags_t vm_flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 5398729bfe1b..de5c0b389a3e 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -118,12 +118,11 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
const void __user *, user_buffer, size_t, length)
{
+ struct follow_pfnmap_args args = { };
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
struct vm_area_struct *vma;
- pte_t *ptep;
- spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -169,11 +168,13 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
+ args.address = mmio_addr;
+ args.vma = vma;
+ ret = follow_pfnmap_start(&args);
if (ret)
goto out_unlock_mmap;
- io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
@@ -181,7 +182,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
ret = zpci_memcpy_toio(io_addr, buf, length);
out_unlock_pt:
- pte_unmap_unlock(ptep, ptl);
+ follow_pfnmap_end(&args);
out_unlock_mmap:
mmap_read_unlock(current->mm);
out_free:
@@ -260,12 +261,11 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
void __user *, user_buffer, size_t, length)
{
+ struct follow_pfnmap_args args = { };
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
struct vm_area_struct *vma;
- pte_t *ptep;
- spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -308,11 +308,13 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
+ args.vma = vma;
+ args.address = mmio_addr;
+ ret = follow_pfnmap_start(&args);
if (ret)
goto out_unlock_mmap;
- io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
@@ -322,7 +324,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
ret = zpci_memcpy_fromio(buf, io_addr, length);
out_unlock_pt:
- pte_unmap_unlock(ptep, ptl);
+ follow_pfnmap_end(&args);
out_unlock_mmap:
mmap_read_unlock(current->mm);
diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h
index 7b8dead2723d..63f88b465e39 100644
--- a/arch/sh/include/asm/mmzone.h
+++ b/arch/sh/include/asm/mmzone.h
@@ -5,9 +5,6 @@
#ifdef CONFIG_NUMA
#include <linux/numa.h>
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-
static inline int pfn_to_nid(unsigned long pfn)
{
int nid;
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 1bd85a6949c4..add35c51e017 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -36,6 +36,10 @@ __setup("vdso=", vdso_setup);
*/
extern const char vsyscall_trapa_start, vsyscall_trapa_end;
static struct page *syscall_pages[1];
+static struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .pages = syscall_pages,
+};
int __init vsyscall_init(void)
{
@@ -58,6 +62,7 @@ int __init vsyscall_init(void)
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
unsigned long addr;
int ret;
@@ -70,14 +75,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
goto up_fail;
}
- ret = install_special_mapping(mm, addr, PAGE_SIZE,
+ vdso_mapping.pages = syscall_pages;
+ vma = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
- syscall_pages);
- if (unlikely(ret))
+ &vdso_mapping);
+ ret = PTR_ERR(vma);
+ if (IS_ERR(vma))
goto up_fail;
current->mm->context.vdso = (void *)addr;
+ ret = 0;
up_fail:
mmap_write_unlock(mm);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index d1fe90b2f5ff..2a88b0c9e70f 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -212,12 +212,7 @@ void __init allocate_pgdat(unsigned int nid)
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
#ifdef CONFIG_NUMA
- NODE_DATA(nid) = memblock_alloc_try_nid(
- sizeof(struct pglist_data),
- SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
- MEMBLOCK_ALLOC_ACCESSIBLE, nid);
- if (!NODE_DATA(nid))
- panic("Can't allocate pgdat for node %d\n", nid);
+ alloc_node_data(nid);
#endif
NODE_DATA(nid)->node_start_pfn = start_pfn;
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index bee329d4149a..c442734d9b0c 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -52,7 +52,8 @@ static inline unsigned long COLOUR_ALIGN(unsigned long addr,
}
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -99,7 +100,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
+ const unsigned long flags, vm_flags_t vm_flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 50f0dc1744d0..9bc212b5e762 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -14,9 +14,6 @@
#include <linux/pfn.h>
#include <asm/sections.h>
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL_GPL(node_data);
-
/*
* On SH machines the conventional approach is to stash system RAM
* in node 0, and other memory blocks in to node 1 and up, ordered by
diff --git a/arch/sparc/include/asm/mmzone.h b/arch/sparc/include/asm/mmzone.h
index a236d8aa893a..74eb2c71d077 100644
--- a/arch/sparc/include/asm/mmzone.h
+++ b/arch/sparc/include/asm/mmzone.h
@@ -6,10 +6,6 @@
#include <linux/cpumask.h>
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid) (node_data[nid])
-
extern int numa_cpu_lookup_table[];
extern cpumask_t numa_cpumask_lookup_table[];
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 3fe429d73a65..2b7f358762c1 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -783,6 +783,7 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
return __pmd(pte_val(pte));
}
+#define pmd_pgprot pmd_pgprot
static inline pgprot_t pmd_pgprot(pmd_t entry)
{
unsigned long val = pmd_val(entry);
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 08a19727795c..80822f922e76 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -39,7 +39,7 @@ SYSCALL_DEFINE0(getpagesize)
return PAGE_SIZE; /* Possibly older binaries want 8192 on sun4's? */
}
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
{
struct vm_unmapped_area_info info = {};
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index d9c3b34ca744..acade309dc2f 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -87,7 +87,7 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr,
return base + off;
}
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct * vma;
@@ -146,7 +146,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
+ const unsigned long flags, vm_flags_t vm_flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 53d7cb5bbffe..21f8cbbd0581 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1075,14 +1075,9 @@ static void __init allocate_node_data(int nid)
{
struct pglist_data *p;
unsigned long start_pfn, end_pfn;
-#ifdef CONFIG_NUMA
- NODE_DATA(nid) = memblock_alloc_node(sizeof(struct pglist_data),
- SMP_CACHE_BYTES, nid);
- if (!NODE_DATA(nid)) {
- prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
- prom_halt();
- }
+#ifdef CONFIG_NUMA
+ alloc_node_data(nid);
NODE_DATA(nid)->node_id = nid;
#endif
@@ -1115,11 +1110,9 @@ static void init_node_masks_nonnuma(void)
}
#ifdef CONFIG_NUMA
-struct pglist_data *node_data[MAX_NUMNODES];
EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
-EXPORT_SYMBOL(node_data);
static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
u32 cfg_handle)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 273b17a5cb81..2852fcd82cbd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86_64
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_PER_VMA_LOCK
+ select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
select HAVE_ARCH_SOFT_DIRTY
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
@@ -299,6 +300,7 @@ config X86
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
+ select NUMA_MEMBLKS if NUMA
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG if PCI
select PERF_EVENTS
@@ -1601,14 +1603,6 @@ config X86_64_ACPI_NUMA
help
Enable ACPI SRAT based node topology detection.
-config NUMA_EMU
- bool "NUMA emulation"
- depends on NUMA
- help
- Enable NUMA emulation. A flat machine will be split
- into virtual nodes when booted with "numa=fake=N", where N is the
- number of nodes. This is only useful for debugging.
-
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
range 1 10
@@ -1808,6 +1802,7 @@ config X86_PAT
def_bool y
prompt "x86 PAT support" if EXPERT
depends on MTRR
+ select ARCH_USES_PG_ARCH_2
help
Use PAT attributes to setup page level cache control.
@@ -1819,10 +1814,6 @@ config X86_PAT
If unsure, say Y.
-config ARCH_USES_PG_UNCACHED
- def_bool y
- depends on X86_PAT
-
config X86_UMIP
def_bool y
prompt "User Mode Instruction Prevention" if EXPERT
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 944454306ef4..04a35b2c26e9 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -511,7 +511,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
if (init_unaccepted_memory()) {
debug_putstr("Accepting memory... ");
- accept_memory(__pa(output), __pa(output) + needed_size);
+ accept_memory(__pa(output), needed_size);
}
entry_offset = decompress_kernel(output, virt_addr, error);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index b353a7be380c..dd8d1a85f671 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -256,6 +256,6 @@ static inline bool init_unaccepted_memory(void) { return false; }
/* Defined in EFI stub */
extern struct efi_unaccepted_memory *unaccepted_table;
-void accept_memory(phys_addr_t start, phys_addr_t end);
+void accept_memory(phys_addr_t start, unsigned long size);
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a192bdea69e2..6c23d1661b17 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,3 +11,4 @@ generated-y += xen-hypercalls.h
generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 19091ebb8633..2886cb668d7f 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -238,11 +238,6 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
deleted file mode 100644
index c41b41edd691..000000000000
--- a/arch/x86/include/asm/mmzone.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/mmzone_32.h>
-#else
-# include <asm/mmzone_64.h>
-#endif
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
deleted file mode 100644
index 2d4515e8b7df..000000000000
--- a/arch/x86/include/asm/mmzone_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002
- *
- */
-
-#ifndef _ASM_X86_MMZONE_32_H
-#define _ASM_X86_MMZONE_32_H
-
-#include <asm/smp.h>
-
-#ifdef CONFIG_NUMA
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-#endif /* CONFIG_NUMA */
-
-#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
deleted file mode 100644
index 0c585046f744..000000000000
--- a/arch/x86/include/asm/mmzone_64.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* K8 NUMA support */
-/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
-/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_MMZONE_64_H
-#define _ASM_X86_MMZONE_64_H
-
-#ifdef CONFIG_NUMA
-
-#include <linux/mmdebug.h>
-#include <asm/smp.h>
-
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif
-#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index ef2844d69173..5469d7a7c40f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -10,8 +10,6 @@
#ifdef CONFIG_NUMA
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
extern int numa_off;
/*
@@ -25,9 +23,6 @@ extern int numa_off;
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
static inline void set_apicid_to_node(int apicid, s16 node)
{
__apicid_to_node[apicid] = node;
@@ -54,31 +49,20 @@ static inline int numa_cpu_node(int cpu)
extern void numa_set_node(int cpu, int node);
extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
-extern void numa_add_cpu(int cpu);
-extern void numa_remove_cpu(int cpu);
+extern void numa_add_cpu(unsigned int cpu);
+extern void numa_remove_cpu(unsigned int cpu);
extern void init_gi_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void init_cpu_to_node(void) { }
-static inline void numa_add_cpu(int cpu) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
static inline void init_gi_nodes(void) { }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-void debug_cpumask_set_cpu(int cpu, int node, bool enable);
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
#endif
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
-int numa_emu_cmdline(char *str);
-#else /* CONFIG_NUMA_EMU */
-static inline int numa_emu_cmdline(char *str)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_NUMA_EMU */
-
#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e39311a89bf4..4c2d080d26b4 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -120,6 +120,34 @@ extern pmdval_t early_pmd_flags;
#define arch_end_context_switch(prev) do {} while(0)
#endif /* CONFIG_PARAVIRT_XXL */
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v & ~clear);
+}
+
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v & ~clear);
+}
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
@@ -174,6 +202,13 @@ static inline int pud_young(pud_t pud)
return pud_flags(pud) & _PAGE_ACCESSED;
}
+static inline bool pud_shstk(pud_t pud)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+ (_PAGE_DIRTY | _PAGE_PSE);
+}
+
static inline int pte_write(pte_t pte)
{
/*
@@ -310,6 +345,30 @@ static inline int pud_devmap(pud_t pud)
}
#endif
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_SPECIAL;
+}
+
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+ return pmd_set_flags(pmd, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_SPECIAL;
+}
+
+static inline pud_t pud_mkspecial(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
+
static inline int pgd_devmap(pgd_t pgd)
{
return 0;
@@ -480,20 +539,6 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
}
-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v | set);
-}
-
-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
{
@@ -588,20 +633,6 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
#define pmd_mkwrite pmd_mkwrite
-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v | set);
-}
-
-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pud_t pud_mksaveddirty(pud_t pud)
{
@@ -780,6 +811,12 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return pfn_pud(pud_pfn(pud),
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -827,14 +864,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmd_result = __pmd(val);
/*
- * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
- * 1. Marking Write=0 PMDs Dirty=1
- * 2. Marking Dirty=1 PMDs Write=0
- *
- * The first case cannot happen because the _PAGE_CHG_MASK will filter
- * out any Dirty bit passed in newprot. Handle the second case by
- * going through the mksaveddirty exercise. Only do this if the old
- * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ * Avoid creating shadow stack PMD by accident. See comment in
+ * pte_modify().
*/
if (oldval & _PAGE_RW)
pmd_result = pmd_mksaveddirty(pmd_result);
@@ -844,6 +875,29 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pmd_result;
}
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ pudval_t val = pud_val(pud), oldval = val;
+ pud_t pud_result;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
+
+ pud_result = __pud(val);
+
+ /*
+ * Avoid creating shadow stack PUD by accident. See comment in
+ * pte_modify().
+ */
+ if (oldval & _PAGE_RW)
+ pud_result = pud_mksaveddirty(pud_result);
+ else
+ pud_result = pud_clear_saveddirty(pud_result);
+
+ return pud_result;
+}
+
/*
* mprotect needs to preserve PAT and encryption bits when updating
* vm_page_prot
@@ -1078,8 +1132,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#define pud_leaf pud_leaf
static inline bool pud_leaf(pud_t pud)
{
- return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
+ return pud_val(pud) & _PAGE_PSE;
}
static inline int pud_bad(pud_t pud)
@@ -1383,10 +1436,28 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ return xchg(pudp, pud);
+ } else {
+ pud_t old = *pudp;
+ WRITE_ONCE(*pudp, pud);
+ return old;
+ }
+}
+#endif
+
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
@@ -1668,6 +1739,9 @@ void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
#define arch_check_zapped_pmd arch_check_zapped_pmd
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+#define arch_check_zapped_pud arch_check_zapped_pud
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud);
+
#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 3c4407271d08..7e9db77231ac 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -245,7 +245,6 @@ extern void cleanup_highmap(void);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#define HAVE_ARCH_UNMAPPED_AREA_VMFLAGS
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 64df897c0ee3..3918c7a434f5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -31,13 +31,4 @@
#endif /* CONFIG_SPARSEMEM */
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_NUMA_KEEP_MEMINFO
-extern int phys_to_target_node(phys_addr_t start);
-#define phys_to_target_node phys_to_target_node
-extern int memory_add_physaddr_to_nid(u64 start);
-#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
-#endif
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 01d7cd85ef97..87f8c9a71c49 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -121,7 +121,7 @@ static inline unsigned long stack_guard_placement(vm_flags_t vm_flags)
}
unsigned long
-arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len,
+arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
@@ -158,7 +158,7 @@ arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned l
}
unsigned long
-arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr0,
+arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr0,
unsigned long len, unsigned long pgoff,
unsigned long flags, vm_flags_t vm_flags)
{
@@ -228,20 +228,5 @@ bottomup:
* can happen with large stack limits and large mmap()
* allocations.
*/
- return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-}
-
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- return arch_get_unmapped_area_vmflags(filp, addr, len, pgoff, flags, 0);
-}
-
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
-{
- return arch_get_unmapped_area_topdown_vmflags(filp, addr, len, pgoff, flags, 0);
+ return arch_get_unmapped_area(filp, addr0, len, pgoff, flags, 0);
}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8d3a00e5c528..690fbf48e853 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -57,7 +57,6 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
-obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 9332b36a1091..628833afee37 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/nodemask.h>
#include <linux/memblock.h>
+#include <linux/numa_memblks.h>
#include <asm/io.h>
#include <linux/pci_ids.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 6ce10e3c6228..64e5cdb2460a 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/topology.h>
#include <linux/sort.h>
+#include <linux/numa_memblks.h>
#include <asm/e820/api.h>
#include <asm/proto.h>
@@ -22,16 +23,6 @@
#include "numa_internal.h"
int numa_off;
-nodemask_t numa_nodes_parsed __initdata;
-
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
-static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
-static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
-
-static int numa_distance_cnt;
-static u8 *numa_distance;
static __init int numa_setup(char *opt)
{
@@ -124,456 +115,27 @@ void __init setup_node_to_cpumask_map(void)
pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
}
-static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
- struct numa_meminfo *mi)
-{
- /* ignore zero length blks */
- if (start == end)
- return 0;
-
- /* whine about and ignore invalid blks */
- if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
- return 0;
- }
-
- if (mi->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("too many memblk ranges\n");
- return -EINVAL;
- }
-
- mi->blk[mi->nr_blks].start = start;
- mi->blk[mi->nr_blks].end = end;
- mi->blk[mi->nr_blks].nid = nid;
- mi->nr_blks++;
- return 0;
-}
-
-/**
- * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
- * @idx: Index of memblk to remove
- * @mi: numa_meminfo to remove memblk from
- *
- * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
- * decrementing @mi->nr_blks.
- */
-void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
-{
- mi->nr_blks--;
- memmove(&mi->blk[idx], &mi->blk[idx + 1],
- (mi->nr_blks - idx) * sizeof(mi->blk[0]));
-}
-
-/**
- * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
- * @dst: numa_meminfo to append block to
- * @idx: Index of memblk to remove
- * @src: numa_meminfo to remove memblk from
- */
-static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
- struct numa_meminfo *src)
-{
- dst->blk[dst->nr_blks++] = src->blk[idx];
- numa_remove_memblk_from(idx, src);
-}
-
-/**
- * numa_add_memblk - Add one numa_memblk to numa_meminfo
- * @nid: NUMA node ID of the new memblk
- * @start: Start address of the new memblk
- * @end: End address of the new memblk
- *
- * Add a new memblk to the default numa_meminfo.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
- return numa_add_memblk_to(nid, start, end, &numa_meminfo);
-}
-
-/* Allocate NODE_DATA for a node on the local memory */
-static void __init alloc_node_data(int nid)
+static int __init numa_register_nodes(void)
{
- const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
- u64 nd_pa;
- void *nd;
- int tnid;
-
- /*
- * Allocate node data. Try node-local memory and then any node.
- * Never allocate in DMA zone.
- */
- nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
- if (!nd_pa) {
- pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
- nd_size, nid);
- return;
- }
- nd = __va(nd_pa);
-
- /* report and initialize */
- printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
- nd_pa, nd_pa + nd_size - 1);
- tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
- if (tnid != nid)
- printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
-
- node_data[nid] = nd;
- memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
-
- node_set_online(nid);
-}
-
-/**
- * numa_cleanup_meminfo - Cleanup a numa_meminfo
- * @mi: numa_meminfo to clean up
- *
- * Sanitize @mi by merging and removing unnecessary memblks. Also check for
- * conflicts and clear unused memblks.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
-{
- const u64 low = 0;
- const u64 high = PFN_PHYS(max_pfn);
- int i, j, k;
-
- /* first, trim all entries */
- for (i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *bi = &mi->blk[i];
-
- /* move / save reserved memory ranges */
- if (!memblock_overlaps_region(&memblock.memory,
- bi->start, bi->end - bi->start)) {
- numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
- continue;
- }
-
- /* make sure all non-reserved blocks are inside the limits */
- bi->start = max(bi->start, low);
-
- /* preserve info for non-RAM areas above 'max_pfn': */
- if (bi->end > high) {
- numa_add_memblk_to(bi->nid, high, bi->end,
- &numa_reserved_meminfo);
- bi->end = high;
- }
-
- /* and there's no empty block */
- if (bi->start >= bi->end)
- numa_remove_memblk_from(i--, mi);
- }
-
- /* merge neighboring / overlapping entries */
- for (i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *bi = &mi->blk[i];
-
- for (j = i + 1; j < mi->nr_blks; j++) {
- struct numa_memblk *bj = &mi->blk[j];
- u64 start, end;
-
- /*
- * See whether there are overlapping blocks. Whine
- * about but allow overlaps of the same nid. They
- * will be merged below.
- */
- if (bi->end > bj->start && bi->start < bj->end) {
- if (bi->nid != bj->nid) {
- pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1,
- bj->nid, bj->start, bj->end - 1);
- return -EINVAL;
- }
- pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1,
- bj->start, bj->end - 1);
- }
-
- /*
- * Join together blocks on the same node, holes
- * between which don't overlap with memory on other
- * nodes.
- */
- if (bi->nid != bj->nid)
- continue;
- start = min(bi->start, bj->start);
- end = max(bi->end, bj->end);
- for (k = 0; k < mi->nr_blks; k++) {
- struct numa_memblk *bk = &mi->blk[k];
-
- if (bi->nid == bk->nid)
- continue;
- if (start < bk->end && end > bk->start)
- break;
- }
- if (k < mi->nr_blks)
- continue;
- printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1, bj->start,
- bj->end - 1, start, end - 1);
- bi->start = start;
- bi->end = end;
- numa_remove_memblk_from(j--, mi);
- }
- }
-
- /* clear unused ones */
- for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
- mi->blk[i].start = mi->blk[i].end = 0;
- mi->blk[i].nid = NUMA_NO_NODE;
- }
-
- return 0;
-}
-
-/*
- * Set nodes, which have memory in @mi, in *@nodemask.
- */
-static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
- const struct numa_meminfo *mi)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
- if (mi->blk[i].start != mi->blk[i].end &&
- mi->blk[i].nid != NUMA_NO_NODE)
- node_set(mi->blk[i].nid, *nodemask);
-}
-
-/**
- * numa_reset_distance - Reset NUMA distance table
- *
- * The current table is freed. The next numa_set_distance() call will
- * create a new one.
- */
-void __init numa_reset_distance(void)
-{
- size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
-
- /* numa_distance could be 1LU marking allocation failure, test cnt */
- if (numa_distance_cnt)
- memblock_free(numa_distance, size);
- numa_distance_cnt = 0;
- numa_distance = NULL; /* enable table creation */
-}
-
-static int __init numa_alloc_distance(void)
-{
- nodemask_t nodes_parsed;
- size_t size;
- int i, j, cnt = 0;
- u64 phys;
-
- /* size the new table and allocate it */
- nodes_parsed = numa_nodes_parsed;
- numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
-
- for_each_node_mask(i, nodes_parsed)
- cnt = i;
- cnt++;
- size = cnt * cnt * sizeof(numa_distance[0]);
-
- phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
- PFN_PHYS(max_pfn_mapped));
- if (!phys) {
- pr_warn("Warning: can't allocate distance table!\n");
- /* don't retry until explicitly reset */
- numa_distance = (void *)1LU;
- return -ENOMEM;
- }
-
- numa_distance = __va(phys);
- numa_distance_cnt = cnt;
-
- /* fill with the default distances */
- for (i = 0; i < cnt; i++)
- for (j = 0; j < cnt; j++)
- numa_distance[i * cnt + j] = i == j ?
- LOCAL_DISTANCE : REMOTE_DISTANCE;
- printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
-
- return 0;
-}
-
-/**
- * numa_set_distance - Set NUMA distance from one NUMA to another
- * @from: the 'from' node to set distance
- * @to: the 'to' node to set distance
- * @distance: NUMA distance
- *
- * Set the distance from node @from to @to to @distance. If distance table
- * doesn't exist, one which is large enough to accommodate all the currently
- * known nodes will be created.
- *
- * If such table cannot be allocated, a warning is printed and further
- * calls are ignored until the distance table is reset with
- * numa_reset_distance().
- *
- * If @from or @to is higher than the highest known node or lower than zero
- * at the time of table creation or @distance doesn't make sense, the call
- * is ignored.
- * This is to allow simplification of specific NUMA config implementations.
- */
-void __init numa_set_distance(int from, int to, int distance)
-{
- if (!numa_distance && numa_alloc_distance() < 0)
- return;
-
- if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
- from < 0 || to < 0) {
- pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
- from, to, distance);
- return;
- }
-
- if ((u8)distance != distance ||
- (from == to && distance != LOCAL_DISTANCE)) {
- pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
- from, to, distance);
- return;
- }
-
- numa_distance[from * numa_distance_cnt + to] = distance;
-}
-
-int __node_distance(int from, int to)
-{
- if (from >= numa_distance_cnt || to >= numa_distance_cnt)
- return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
- return numa_distance[from * numa_distance_cnt + to];
-}
-EXPORT_SYMBOL(__node_distance);
-
-/*
- * Mark all currently memblock-reserved physical memory (which covers the
- * kernel's own memory ranges) as hot-unswappable.
- */
-static void __init numa_clear_kernel_node_hotplug(void)
-{
- nodemask_t reserved_nodemask = NODE_MASK_NONE;
- struct memblock_region *mb_region;
- int i;
-
- /*
- * We have to do some preprocessing of memblock regions, to
- * make them suitable for reservation.
- *
- * At this time, all memory regions reserved by memblock are
- * used by the kernel, but those regions are not split up
- * along node boundaries yet, and don't necessarily have their
- * node ID set yet either.
- *
- * So iterate over all memory known to the x86 architecture,
- * and use those ranges to set the nid in memblock.reserved.
- * This will split up the memblock regions along node
- * boundaries and will set the node IDs as well.
- */
- for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = numa_meminfo.blk + i;
- int ret;
-
- ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
- WARN_ON_ONCE(ret);
- }
-
- /*
- * Now go over all reserved memblock regions, to construct a
- * node mask of all kernel reserved memory areas.
- *
- * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
- * numa_meminfo might not include all memblock.reserved
- * memory ranges, because quirks such as trim_snb_memory()
- * reserve specific pages for Sandy Bridge graphics. ]
- */
- for_each_reserved_mem_region(mb_region) {
- int nid = memblock_get_region_node(mb_region);
-
- if (nid != NUMA_NO_NODE)
- node_set(nid, reserved_nodemask);
- }
-
- /*
- * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
- * belonging to the reserved node mask.
- *
- * Note that this will include memory regions that reside
- * on nodes that contain kernel memory - entire nodes
- * become hot-unpluggable:
- */
- for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = numa_meminfo.blk + i;
-
- if (!node_isset(mb->nid, reserved_nodemask))
- continue;
-
- memblock_clear_hotplug(mb->start, mb->end - mb->start);
- }
-}
-
-static int __init numa_register_memblks(struct numa_meminfo *mi)
-{
- int i, nid;
-
- /* Account for nodes with cpus and no memory */
- node_possible_map = numa_nodes_parsed;
- numa_nodemask_from_meminfo(&node_possible_map, mi);
- if (WARN_ON(nodes_empty(node_possible_map)))
- return -EINVAL;
-
- for (i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *mb = &mi->blk[i];
- memblock_set_node(mb->start, mb->end - mb->start,
- &memblock.memory, mb->nid);
- }
-
- /*
- * At very early time, the kernel have to use some memory such as
- * loading the kernel image. We cannot prevent this anyway. So any
- * node the kernel resides in should be un-hotpluggable.
- *
- * And when we come here, alloc node data won't fail.
- */
- numa_clear_kernel_node_hotplug();
-
- /*
- * If sections array is gonna be used for pfn -> nid mapping, check
- * whether its granularity is fine enough.
- */
- if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
- unsigned long pfn_align = node_map_pfn_alignment();
-
- if (pfn_align && pfn_align < PAGES_PER_SECTION) {
- pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
- PFN_PHYS(pfn_align) >> 20,
- PFN_PHYS(PAGES_PER_SECTION) >> 20);
- return -EINVAL;
- }
- }
+ int nid;
if (!memblock_validate_numa_coverage(SZ_1M))
return -EINVAL;
/* Finally register nodes. */
for_each_node_mask(nid, node_possible_map) {
- u64 start = PFN_PHYS(max_pfn);
- u64 end = 0;
-
- for (i = 0; i < mi->nr_blks; i++) {
- if (nid != mi->blk[i].nid)
- continue;
- start = min(mi->blk[i].start, start);
- end = max(mi->blk[i].end, end);
- }
+ unsigned long start_pfn, end_pfn;
- if (start >= end)
+ /*
+ * Note, get_pfn_range_for_nid() depends on
+ * memblock_set_node() having already happened
+ */
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ if (start_pfn >= end_pfn)
continue;
alloc_node_data(nid);
+ node_set_online(nid);
}
/* Dump memblock with node info and return. */
@@ -609,39 +171,11 @@ static int __init numa_init(int (*init_func)(void))
for (i = 0; i < MAX_LOCAL_APIC; i++)
set_apicid_to_node(i, NUMA_NO_NODE);
- nodes_clear(numa_nodes_parsed);
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
- memset(&numa_meminfo, 0, sizeof(numa_meminfo));
- WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
- NUMA_NO_NODE));
- WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
- NUMA_NO_NODE));
- /* In case that parsing SRAT failed. */
- WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
- numa_reset_distance();
-
- ret = init_func();
- if (ret < 0)
- return ret;
-
- /*
- * We reset memblock back to the top-down direction
- * here because if we configured ACPI_NUMA, we have
- * parsed SRAT in init_func(). It is ok to have the
- * reset here even if we did't configure ACPI_NUMA
- * or acpi numa init fails and fallbacks to dummy
- * numa init.
- */
- memblock_set_bottom_up(false);
-
- ret = numa_cleanup_meminfo(&numa_meminfo);
+ ret = numa_memblks_init(init_func, /* memblock_force_top_down */ true);
if (ret < 0)
return ret;
- numa_emulation(&numa_meminfo, numa_distance_cnt);
-
- ret = numa_register_memblks(&numa_meminfo);
+ ret = numa_register_nodes();
if (ret < 0)
return ret;
@@ -782,12 +316,12 @@ void __init init_cpu_to_node(void)
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
# ifndef CONFIG_NUMA_EMU
-void numa_add_cpu(int cpu)
+void numa_add_cpu(unsigned int cpu)
{
cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
-void numa_remove_cpu(int cpu)
+void numa_remove_cpu(unsigned int cpu)
{
cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
@@ -825,7 +359,7 @@ int early_cpu_to_node(int cpu)
return per_cpu(x86_cpu_to_node_map, cpu);
}
-void debug_cpumask_set_cpu(int cpu, int node, bool enable)
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable)
{
struct cpumask *mask;
@@ -857,12 +391,12 @@ static void numa_set_cpumask(int cpu, bool enable)
debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
}
-void numa_add_cpu(int cpu)
+void numa_add_cpu(unsigned int cpu)
{
numa_set_cpumask(cpu, true);
}
-void numa_remove_cpu(int cpu)
+void numa_remove_cpu(unsigned int cpu)
{
numa_set_cpumask(cpu, false);
}
@@ -893,113 +427,29 @@ EXPORT_SYMBOL(cpumask_of_node);
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#ifdef CONFIG_NUMA_KEEP_MEMINFO
-static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
+#ifdef CONFIG_NUMA_EMU
+void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys,
+ unsigned int nr_emu_nids)
{
- int i;
-
- for (i = 0; i < mi->nr_blks; i++)
- if (mi->blk[i].start <= start && mi->blk[i].end > start)
- return mi->blk[i].nid;
- return NUMA_NO_NODE;
-}
-
-int phys_to_target_node(phys_addr_t start)
-{
- int nid = meminfo_to_nid(&numa_meminfo, start);
+ int i, j;
/*
- * Prefer online nodes, but if reserved memory might be
- * hot-added continue the search with reserved ranges.
+ * Transform __apicid_to_node table to use emulated nids by
+ * reverse-mapping phys_nid. The maps should always exist but fall
+ * back to zero just in case.
*/
- if (nid != NUMA_NO_NODE)
- return nid;
-
- return meminfo_to_nid(&numa_reserved_meminfo, start);
-}
-EXPORT_SYMBOL_GPL(phys_to_target_node);
-
-int memory_add_physaddr_to_nid(u64 start)
-{
- int nid = meminfo_to_nid(&numa_meminfo, start);
-
- if (nid == NUMA_NO_NODE)
- nid = numa_meminfo.blk[0].nid;
- return nid;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-
-#endif
-
-static int __init cmp_memblk(const void *a, const void *b)
-{
- const struct numa_memblk *ma = *(const struct numa_memblk **)a;
- const struct numa_memblk *mb = *(const struct numa_memblk **)b;
-
- return (ma->start > mb->start) - (ma->start < mb->start);
+ for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
+ if (__apicid_to_node[i] == NUMA_NO_NODE)
+ continue;
+ for (j = 0; j < nr_emu_nids; j++)
+ if (__apicid_to_node[i] == emu_nid_to_phys[j])
+ break;
+ __apicid_to_node[i] = j < nr_emu_nids ? j : 0;
+ }
}
-static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
-
-/**
- * numa_fill_memblks - Fill gaps in numa_meminfo memblks
- * @start: address to begin fill
- * @end: address to end fill
- *
- * Find and extend numa_meminfo memblks to cover the physical
- * address range @start-@end
- *
- * RETURNS:
- * 0 : Success
- * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
- */
-
-int __init numa_fill_memblks(u64 start, u64 end)
+u64 __init numa_emu_dma_end(void)
{
- struct numa_memblk **blk = &numa_memblk_list[0];
- struct numa_meminfo *mi = &numa_meminfo;
- int count = 0;
- u64 prev_end;
-
- /*
- * Create a list of pointers to numa_meminfo memblks that
- * overlap start, end. The list is used to make in-place
- * changes that fill out the numa_meminfo memblks.
- */
- for (int i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *bi = &mi->blk[i];
-
- if (memblock_addrs_overlap(start, end - start, bi->start,
- bi->end - bi->start)) {
- blk[count] = &mi->blk[i];
- count++;
- }
- }
- if (!count)
- return NUMA_NO_MEMBLK;
-
- /* Sort the list of pointers in memblk->start order */
- sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
-
- /* Make sure the first/last memblks include start/end */
- blk[0]->start = min(blk[0]->start, start);
- blk[count - 1]->end = max(blk[count - 1]->end, end);
-
- /*
- * Fill any gaps by tracking the previous memblks
- * end address and backfilling to it if needed.
- */
- prev_end = blk[0]->end;
- for (int i = 1; i < count; i++) {
- struct numa_memblk *curr = blk[i];
-
- if (prev_end >= curr->start) {
- if (prev_end < curr->end)
- prev_end = curr->end;
- } else {
- curr->start = prev_end;
- prev_end = curr->end;
- }
- }
- return 0;
+ return PFN_PHYS(MAX_DMA32_PFN);
}
+#endif /* CONFIG_NUMA_EMU */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
deleted file mode 100644
index 9a9305367fdd..000000000000
--- a/arch/x86/mm/numa_emulation.c
+++ /dev/null
@@ -1,585 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA emulation
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/topology.h>
-#include <linux/memblock.h>
-#include <asm/dma.h>
-
-#include "numa_internal.h"
-
-static int emu_nid_to_phys[MAX_NUMNODES];
-static char *emu_cmdline __initdata;
-
-int __init numa_emu_cmdline(char *str)
-{
- emu_cmdline = str;
- return 0;
-}
-
-static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
-{
- int i;
-
- for (i = 0; i < mi->nr_blks; i++)
- if (mi->blk[i].nid == nid)
- return i;
- return -ENOENT;
-}
-
-static u64 __init mem_hole_size(u64 start, u64 end)
-{
- unsigned long start_pfn = PFN_UP(start);
- unsigned long end_pfn = PFN_DOWN(end);
-
- if (start_pfn < end_pfn)
- return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
- return 0;
-}
-
-/*
- * Sets up nid to range from @start to @end. The return value is -errno if
- * something went wrong, 0 otherwise.
- */
-static int __init emu_setup_memblk(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- int nid, int phys_blk, u64 size)
-{
- struct numa_memblk *eb = &ei->blk[ei->nr_blks];
- struct numa_memblk *pb = &pi->blk[phys_blk];
-
- if (ei->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("NUMA: Too many emulated memblks, failing emulation\n");
- return -EINVAL;
- }
-
- ei->nr_blks++;
- eb->start = pb->start;
- eb->end = pb->start + size;
- eb->nid = nid;
-
- if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
- emu_nid_to_phys[nid] = pb->nid;
-
- pb->start += size;
- if (pb->start >= pb->end) {
- WARN_ON_ONCE(pb->start > pb->end);
- numa_remove_memblk_from(phys_blk, pi);
- }
-
- printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n",
- nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20);
- return 0;
-}
-
-/*
- * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr.
- *
- * Returns zero on success or negative on error.
- */
-static int __init split_nodes_interleave(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- u64 addr, u64 max_addr, int nr_nodes)
-{
- nodemask_t physnode_mask = numa_nodes_parsed;
- u64 size;
- int big;
- int nid = 0;
- int i, ret;
-
- if (nr_nodes <= 0)
- return -1;
- if (nr_nodes > MAX_NUMNODES) {
- pr_info("numa=fake=%d too large, reducing to %d\n",
- nr_nodes, MAX_NUMNODES);
- nr_nodes = MAX_NUMNODES;
- }
-
- /*
- * Calculate target node size. x86_32 freaks on __udivdi3() so do
- * the division in ulong number of pages and convert back.
- */
- size = max_addr - addr - mem_hole_size(addr, max_addr);
- size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
-
- /*
- * Calculate the number of big nodes that can be allocated as a result
- * of consolidating the remainder.
- */
- big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
- FAKE_NODE_MIN_SIZE;
-
- size &= FAKE_NODE_MIN_HASH_MASK;
- if (!size) {
- pr_err("Not enough memory for each node. "
- "NUMA emulation disabled.\n");
- return -1;
- }
-
- /*
- * Continue to fill physical nodes with fake nodes until there is no
- * memory left on any of them.
- */
- while (!nodes_empty(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
- u64 start, limit, end;
- int phys_blk;
-
- phys_blk = emu_find_memblk_by_nid(i, pi);
- if (phys_blk < 0) {
- node_clear(i, physnode_mask);
- continue;
- }
- start = pi->blk[phys_blk].start;
- limit = pi->blk[phys_blk].end;
- end = start + size;
-
- if (nid < big)
- end += FAKE_NODE_MIN_SIZE;
-
- /*
- * Continue to add memory to this fake node if its
- * non-reserved memory is less than the per-node size.
- */
- while (end - start - mem_hole_size(start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > limit) {
- end = limit;
- break;
- }
- }
-
- /*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
-
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (limit - end - mem_hole_size(end, limit) < size)
- end = limit;
-
- ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
- phys_blk,
- min(end, limit) - start);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * Returns the end address of a node so that there is at least `size' amount of
- * non-reserved memory or `max_addr' is reached.
- */
-static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
-{
- u64 end = start + size;
-
- while (end - start - mem_hole_size(start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > max_addr) {
- end = max_addr;
- break;
- }
- }
- return end;
-}
-
-static u64 uniform_size(u64 max_addr, u64 base, u64 hole, int nr_nodes)
-{
- unsigned long max_pfn = PHYS_PFN(max_addr);
- unsigned long base_pfn = PHYS_PFN(base);
- unsigned long hole_pfns = PHYS_PFN(hole);
-
- return PFN_PHYS((max_pfn - base_pfn - hole_pfns) / nr_nodes);
-}
-
-/*
- * Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'.
- *
- * Returns zero on success or negative on error.
- */
-static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- u64 addr, u64 max_addr, u64 size,
- int nr_nodes, struct numa_memblk *pblk,
- int nid)
-{
- nodemask_t physnode_mask = numa_nodes_parsed;
- int i, ret, uniform = 0;
- u64 min_size;
-
- if ((!size && !nr_nodes) || (nr_nodes && !pblk))
- return -1;
-
- /*
- * In the 'uniform' case split the passed in physical node by
- * nr_nodes, in the non-uniform case, ignore the passed in
- * physical block and try to create nodes of at least size
- * @size.
- *
- * In the uniform case, split the nodes strictly by physical
- * capacity, i.e. ignore holes. In the non-uniform case account
- * for holes and treat @size as a minimum floor.
- */
- if (!nr_nodes)
- nr_nodes = MAX_NUMNODES;
- else {
- nodes_clear(physnode_mask);
- node_set(pblk->nid, physnode_mask);
- uniform = 1;
- }
-
- if (uniform) {
- min_size = uniform_size(max_addr, addr, 0, nr_nodes);
- size = min_size;
- } else {
- /*
- * The limit on emulated nodes is MAX_NUMNODES, so the
- * size per node is increased accordingly if the
- * requested size is too small. This creates a uniform
- * distribution of node sizes across the entire machine
- * (but not necessarily over physical nodes).
- */
- min_size = uniform_size(max_addr, addr,
- mem_hole_size(addr, max_addr), nr_nodes);
- }
- min_size = ALIGN(max(min_size, FAKE_NODE_MIN_SIZE), FAKE_NODE_MIN_SIZE);
- if (size < min_size) {
- pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
- size >> 20, min_size >> 20);
- size = min_size;
- }
- size = ALIGN_DOWN(size, FAKE_NODE_MIN_SIZE);
-
- /*
- * Fill physical nodes with fake nodes of size until there is no memory
- * left on any of them.
- */
- while (!nodes_empty(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
- u64 start, limit, end;
- int phys_blk;
-
- phys_blk = emu_find_memblk_by_nid(i, pi);
- if (phys_blk < 0) {
- node_clear(i, physnode_mask);
- continue;
- }
-
- start = pi->blk[phys_blk].start;
- limit = pi->blk[phys_blk].end;
-
- if (uniform)
- end = start + size;
- else
- end = find_end_of_node(start, limit, size);
- /*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
-
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if ((limit - end - mem_hole_size(end, limit) < size)
- && !uniform)
- end = limit;
-
- ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
- phys_blk,
- min(end, limit) - start);
- if (ret < 0)
- return ret;
- }
- }
- return nid;
-}
-
-static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- u64 addr, u64 max_addr, u64 size)
-{
- return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
- 0, NULL, 0);
-}
-
-static int __init setup_emu2phys_nid(int *dfl_phys_nid)
-{
- int i, max_emu_nid = 0;
-
- *dfl_phys_nid = NUMA_NO_NODE;
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
- if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
- max_emu_nid = i;
- if (*dfl_phys_nid == NUMA_NO_NODE)
- *dfl_phys_nid = emu_nid_to_phys[i];
- }
- }
-
- return max_emu_nid;
-}
-
-/**
- * numa_emulation - Emulate NUMA nodes
- * @numa_meminfo: NUMA configuration to massage
- * @numa_dist_cnt: The size of the physical NUMA distance table
- *
- * Emulate NUMA nodes according to the numa=fake kernel parameter.
- * @numa_meminfo contains the physical memory configuration and is modified
- * to reflect the emulated configuration on success. @numa_dist_cnt is
- * used to determine the size of the physical distance table.
- *
- * On success, the following modifications are made.
- *
- * - @numa_meminfo is updated to reflect the emulated nodes.
- *
- * - __apicid_to_node[] is updated such that APIC IDs are mapped to the
- * emulated nodes.
- *
- * - NUMA distance table is rebuilt to represent distances between emulated
- * nodes. The distances are determined considering how emulated nodes
- * are mapped to physical nodes and match the actual distances.
- *
- * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical
- * nodes. This is used by numa_add_cpu() and numa_remove_cpu().
- *
- * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with
- * identity mapping and no other modification is made.
- */
-void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
-{
- static struct numa_meminfo ei __initdata;
- static struct numa_meminfo pi __initdata;
- const u64 max_addr = PFN_PHYS(max_pfn);
- u8 *phys_dist = NULL;
- size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
- int max_emu_nid, dfl_phys_nid;
- int i, j, ret;
-
- if (!emu_cmdline)
- goto no_emu;
-
- memset(&ei, 0, sizeof(ei));
- pi = *numa_meminfo;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- emu_nid_to_phys[i] = NUMA_NO_NODE;
-
- /*
- * If the numa=fake command-line contains a 'M' or 'G', it represents
- * the fixed node size. Otherwise, if it is just a single number N,
- * split the system RAM into N fake nodes.
- */
- if (strchr(emu_cmdline, 'U')) {
- nodemask_t physnode_mask = numa_nodes_parsed;
- unsigned long n;
- int nid = 0;
-
- n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
- ret = -1;
- for_each_node_mask(i, physnode_mask) {
- /*
- * The reason we pass in blk[0] is due to
- * numa_remove_memblk_from() called by
- * emu_setup_memblk() will delete entry 0
- * and then move everything else up in the pi.blk
- * array. Therefore we should always be looking
- * at blk[0].
- */
- ret = split_nodes_size_interleave_uniform(&ei, &pi,
- pi.blk[0].start, pi.blk[0].end, 0,
- n, &pi.blk[0], nid);
- if (ret < 0)
- break;
- if (ret < n) {
- pr_info("%s: phys: %d only got %d of %ld nodes, failing\n",
- __func__, i, ret, n);
- ret = -1;
- break;
- }
- nid = ret;
- }
- } else if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
- u64 size;
-
- size = memparse(emu_cmdline, &emu_cmdline);
- ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
- } else {
- unsigned long n;
-
- n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
- ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
- }
- if (*emu_cmdline == ':')
- emu_cmdline++;
-
- if (ret < 0)
- goto no_emu;
-
- if (numa_cleanup_meminfo(&ei) < 0) {
- pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
- goto no_emu;
- }
-
- /* copy the physical distance table */
- if (numa_dist_cnt) {
- u64 phys;
-
- phys = memblock_phys_alloc_range(phys_size, PAGE_SIZE, 0,
- PFN_PHYS(max_pfn_mapped));
- if (!phys) {
- pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
- goto no_emu;
- }
- phys_dist = __va(phys);
-
- for (i = 0; i < numa_dist_cnt; i++)
- for (j = 0; j < numa_dist_cnt; j++)
- phys_dist[i * numa_dist_cnt + j] =
- node_distance(i, j);
- }
-
- /*
- * Determine the max emulated nid and the default phys nid to use
- * for unmapped nodes.
- */
- max_emu_nid = setup_emu2phys_nid(&dfl_phys_nid);
-
- /* commit */
- *numa_meminfo = ei;
-
- /* Make sure numa_nodes_parsed only contains emulated nodes */
- nodes_clear(numa_nodes_parsed);
- for (i = 0; i < ARRAY_SIZE(ei.blk); i++)
- if (ei.blk[i].start != ei.blk[i].end &&
- ei.blk[i].nid != NUMA_NO_NODE)
- node_set(ei.blk[i].nid, numa_nodes_parsed);
-
- /*
- * Transform __apicid_to_node table to use emulated nids by
- * reverse-mapping phys_nid. The maps should always exist but fall
- * back to zero just in case.
- */
- for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
- if (__apicid_to_node[i] == NUMA_NO_NODE)
- continue;
- for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
- if (__apicid_to_node[i] == emu_nid_to_phys[j])
- break;
- __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
- }
-
- /* make sure all emulated nodes are mapped to a physical node */
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
- if (emu_nid_to_phys[i] == NUMA_NO_NODE)
- emu_nid_to_phys[i] = dfl_phys_nid;
-
- /* transform distance table */
- numa_reset_distance();
- for (i = 0; i < max_emu_nid + 1; i++) {
- for (j = 0; j < max_emu_nid + 1; j++) {
- int physi = emu_nid_to_phys[i];
- int physj = emu_nid_to_phys[j];
- int dist;
-
- if (get_option(&emu_cmdline, &dist) == 2)
- ;
- else if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
- dist = physi == physj ?
- LOCAL_DISTANCE : REMOTE_DISTANCE;
- else
- dist = phys_dist[physi * numa_dist_cnt + physj];
-
- numa_set_distance(i, j, dist);
- }
- }
-
- /* free the copied physical distance table */
- memblock_free(phys_dist, phys_size);
- return;
-
-no_emu:
- /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
- emu_nid_to_phys[i] = i;
-}
-
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-void numa_add_cpu(int cpu)
-{
- int physnid, nid;
-
- nid = early_cpu_to_node(cpu);
- BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
-
- physnid = emu_nid_to_phys[nid];
-
- /*
- * Map the cpu to each emulated node that is allocated on the physical
- * node of the cpu's apic id.
- */
- for_each_online_node(nid)
- if (emu_nid_to_phys[nid] == physnid)
- cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
-}
-
-void numa_remove_cpu(int cpu)
-{
- int i;
-
- for_each_online_node(i)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
-}
-#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
-static void numa_set_cpumask(int cpu, bool enable)
-{
- int nid, physnid;
-
- nid = early_cpu_to_node(cpu);
- if (nid == NUMA_NO_NODE) {
- /* early_cpu_to_node() already emits a warning and trace */
- return;
- }
-
- physnid = emu_nid_to_phys[nid];
-
- for_each_online_node(nid) {
- if (emu_nid_to_phys[nid] != physnid)
- continue;
-
- debug_cpumask_set_cpu(cpu, nid, enable);
- }
-}
-
-void numa_add_cpu(int cpu)
-{
- numa_set_cpumask(cpu, true);
-}
-
-void numa_remove_cpu(int cpu)
-{
- numa_set_cpumask(cpu, false);
-}
-#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index 86860f279662..11e1ff370c10 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -5,30 +5,6 @@
#include <linux/types.h>
#include <asm/numa.h>
-struct numa_memblk {
- u64 start;
- u64 end;
- int nid;
-};
-
-struct numa_meminfo {
- int nr_blks;
- struct numa_memblk blk[NR_NODE_MEMBLKS];
-};
-
-void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
-int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
-void __init numa_reset_distance(void);
-
void __init x86_numa_init(void);
-#ifdef CONFIG_NUMA_EMU
-void __init numa_emulation(struct numa_meminfo *numa_meminfo,
- int numa_dist_cnt);
-#else
-static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
- int numa_dist_cnt)
-{ }
-#endif
-
#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index bdc2a240c2aa..f73b5ce270b3 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -104,7 +104,7 @@ __setup("debugpat", pat_debug_setup);
#ifdef CONFIG_X86_PAT
/*
- * X86 PAT uses page flags arch_1 and uncached together to keep track of
+ * X86 PAT uses page flags arch_1 and arch_2 together to keep track of
* memory type of pages that have backing page struct.
*
* X86 PAT supports 4 different memory types:
@@ -118,9 +118,9 @@ __setup("debugpat", pat_debug_setup);
#define _PGMT_WB 0
#define _PGMT_WC (1UL << PG_arch_1)
-#define _PGMT_UC_MINUS (1UL << PG_uncached)
-#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1)
-#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
+#define _PGMT_UC_MINUS (1UL << PG_arch_2)
+#define _PGMT_WT (1UL << PG_arch_2 | 1UL << PG_arch_1)
+#define _PGMT_MASK (1UL << PG_arch_2 | 1UL << PG_arch_1)
#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
static inline enum page_cache_mode get_page_memtype(struct page *pg)
@@ -951,23 +951,20 @@ static void free_pfn_range(u64 paddr, unsigned long size)
static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
resource_size_t *phys)
{
- pte_t *ptep, pte;
- spinlock_t *ptl;
+ struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start };
- if (follow_pte(vma, vma->vm_start, &ptep, &ptl))
+ if (follow_pfnmap_start(&args))
return -EINVAL;
- pte = ptep_get(ptep);
-
/* Never return PFNs of anon folios in COW mappings. */
- if (vm_normal_folio(vma, vma->vm_start, pte)) {
- pte_unmap_unlock(ptep, ptl);
+ if (!args.special) {
+ follow_pfnmap_end(&args);
return -EINVAL;
}
- *prot = pgprot_val(pte_pgprot(pte));
- *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
- pte_unmap_unlock(ptep, ptl);
+ *prot = pgprot_val(args.pgprot);
+ *phys = (resource_size_t)args.pfn << PAGE_SHIFT;
+ follow_pfnmap_end(&args);
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index f5931499c2d6..5745a354a241 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -641,6 +641,18 @@ pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
}
#endif
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return old;
+}
+#endif
+
/**
* reserve_top_address - reserves a hole in the top of kernel address space
* @reserve - size of hole to reserve
@@ -926,3 +938,9 @@ void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
pmd_shstk(pmd));
}
+
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
+{
+ /* See note in arch_check_zapped_pte() */
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pud_shstk(pud));
+}
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 76d9f6ce7a3d..f238f7b33cdd 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -52,8 +52,11 @@ subsys_initcall(init_vdso);
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- int err;
+ struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
+ static struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ };
if (!vdso_enabled)
return 0;
@@ -61,12 +64,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (mmap_write_lock_killable(mm))
return -EINTR;
- err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
+ vdso_mapping.pages = vdsop;
+ vma = _install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdsop);
+ &vdso_mapping);
mmap_write_unlock(mm);
- return err;
+ return IS_ERR(vma) ? PTR_ERR(vma) : 0;
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 839e6613753d..55a4996d0c04 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -665,7 +665,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
{
spinlock_t *ptl = NULL;
-#if USE_SPLIT_PTE_PTLOCKS
+#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
ptl = ptlock_ptr(page_ptdesc(page));
spin_lock_nest_lock(ptl, &mm->page_table_lock);
#endif
@@ -1553,7 +1553,8 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
__set_pfn_prot(pfn, PAGE_KERNEL_RO);
- if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned)
+ if (level == PT_PTE && IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS) &&
+ !pinned)
__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
xen_mc_issue(XEN_LAZY_MMU);
@@ -1581,7 +1582,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
if (pinned) {
xen_mc_batch();
- if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
+ if (level == PT_PTE && IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS))
__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
__set_pfn_prot(pfn, PAGE_KERNEL);
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index b3c2450d6f23..dc54f854c2f5 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -55,7 +55,8 @@ asmlinkage long xtensa_fadvise64_64(int fd, int advice,
#ifdef CONFIG_MMU
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+ unsigned long len, unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags)
{
struct vm_area_struct *vmm;
struct vma_iterator vmi;