summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-03-14 12:23:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-03-14 12:23:34 -0700
commit50eb842fe517b2765b7748c3016082b484a6dbb8 (patch)
tree043787bcebc48b58698469d5699f9e034b92a391
parent88fe49249c99de14e543c632a46248d85411ab9e (diff)
parent2766f1821600cc7562bae2128ad0b163f744c5d9 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "28 patches. Subsystems affected by this series: mm (memblock, pagealloc, hugetlb, highmem, kfence, oom-kill, madvise, kasan, userfaultfd, memcg, and zram), core-kernel, kconfig, fork, binfmt, MAINTAINERS, kbuild, and ia64" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (28 commits) zram: fix broken page writeback zram: fix return value on writeback_store mm/memcg: set memcg when splitting page mm/memcg: rename mem_cgroup_split_huge_fixup to split_page_memcg and add nr_pages argument ia64: fix ptrace(PTRACE_SYSCALL_INFO_EXIT) sign ia64: fix ia64_syscall_get_set_arguments() for break-based syscalls mm/userfaultfd: fix memory corruption due to writeprotect kasan: fix KASAN_STACK dependency for HW_TAGS kasan, mm: fix crash with HW_TAGS and DEBUG_PAGEALLOC mm/madvise: replace ptrace attach requirement for process_madvise include/linux/sched/mm.h: use rcu_dereference in in_vfork() kfence: fix reports if constant function prefixes exist kfence, slab: fix cache_alloc_debugcheck_after() for bulk allocations kfence: fix printk format for ptrdiff_t linux/compiler-clang.h: define HAVE_BUILTIN_BSWAP* MAINTAINERS: exclude uapi directories in API/ABI section binfmt_misc: fix possible deadlock in bm_register_write mm/highmem.c: fix zero_user_segments() with start > end hugetlb: do early cow when page pinned on src mm mm: use is_cow_mapping() across tree where proper ...
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/ia64/include/asm/syscall.h2
-rw-r--r--arch/ia64/kernel/ptrace.c24
-rw-r--r--drivers/block/zram/zram_drv.c17
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c2
-rw-r--r--fs/binfmt_misc.c29
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--include/linux/compiler-clang.h6
-rw-r--r--include/linux/memblock.h4
-rw-r--r--include/linux/memcontrol.h6
-rw-r--r--include/linux/mm.h21
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/sched/mm.h3
-rw-r--r--include/linux/stop_machine.h11
-rw-r--r--init/Kconfig3
-rw-r--r--kernel/fork.c8
-rw-r--r--lib/Kconfig.kasan1
-rw-r--r--mm/highmem.c17
-rw-r--r--mm/huge_memory.c10
-rw-r--r--mm/hugetlb.c123
-rw-r--r--mm/internal.h5
-rw-r--r--mm/kfence/report.c30
-rw-r--r--mm/madvise.c13
-rw-r--r--mm/memcontrol.c15
-rw-r--r--mm/memory.c16
-rw-r--r--mm/page_alloc.c167
-rw-r--r--mm/slab.c2
28 files changed, 332 insertions, 214 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4ba84a132d29..aa84121c5611 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -261,8 +261,8 @@ ABI/API
L: linux-api@vger.kernel.org
F: include/linux/syscalls.h
F: kernel/sys_ni.c
-F: include/uapi/
-F: arch/*/include/uapi/
+X: include/uapi/
+X: arch/*/include/uapi/
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
M: Hans de Goede <hdegoede@redhat.com>
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 6c6f16e409a8..0d23c0049301 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- return regs->r10 == -1 ? regs->r8:0;
+ return regs->r10 == -1 ? -regs->r8:0;
}
static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index c3490ee2daa5..e14f5653393a 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
{
struct syscall_get_set_args *args = data;
struct pt_regs *pt = args->regs;
- unsigned long *krbs, cfm, ndirty;
+ unsigned long *krbs, cfm, ndirty, nlocals, nouts;
int i, count;
if (unw_unwind_to_user(info) < 0)
return;
+ /*
+ * We get here via a few paths:
+ * - break instruction: cfm is shared with caller.
+ * syscall args are in out= regs, locals are non-empty.
+ * - epsinstruction: cfm is set by br.call
+ * locals don't exist.
+ *
+ * For both cases argguments are reachable in cfm.sof - cfm.sol.
+ * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ]
+ */
cfm = pt->cr_ifs;
+ nlocals = (cfm >> 7) & 0x7f; /* aka sol */
+ nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */
krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
count = 0;
if (in_syscall(pt))
- count = min_t(int, args->n, cfm & 0x7f);
+ count = min_t(int, args->n, nouts);
+ /* Iterate over outs. */
for (i = 0; i < count; i++) {
+ int j = ndirty + nlocals + i + args->i;
if (args->rw)
- *ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
- args->args[i];
+ *ia64_rse_skip_regs(krbs, j) = args->args[i];
else
- args->args[i] = *ia64_rse_skip_regs(krbs,
- ndirty + i + args->i);
+ args->args[i] = *ia64_rse_skip_regs(krbs, j);
}
if (!args->rw) {
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index a711a2e2a794..cf8deecc39ef 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -627,7 +627,7 @@ static ssize_t writeback_store(struct device *dev,
struct bio_vec bio_vec;
struct page *page;
ssize_t ret = len;
- int mode;
+ int mode, err;
unsigned long blk_idx = 0;
if (sysfs_streq(buf, "idle"))
@@ -638,8 +638,8 @@ static ssize_t writeback_store(struct device *dev,
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
return -EINVAL;
- ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index);
- if (ret || index >= nr_pages)
+ if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
+ index >= nr_pages)
return -EINVAL;
nr_pages = 1;
@@ -663,7 +663,7 @@ static ssize_t writeback_store(struct device *dev,
goto release_init_lock;
}
- while (nr_pages--) {
+ for (; nr_pages != 0; index++, nr_pages--) {
struct bio_vec bvec;
bvec.bv_page = page;
@@ -728,12 +728,17 @@ static ssize_t writeback_store(struct device *dev,
* XXX: A single page IO would be inefficient for write
* but it would be not bad as starter.
*/
- ret = submit_bio_wait(&bio);
- if (ret) {
+ err = submit_bio_wait(&bio);
+ if (err) {
zram_slot_lock(zram, index);
zram_clear_flag(zram, index, ZRAM_UNDER_WB);
zram_clear_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index);
+ /*
+ * Return last IO error unless every IO were
+ * not suceeded.
+ */
+ ret = err;
continue;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 0a900afc66ff..45c9c6a7f1d6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -500,8 +500,6 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
vm_fault_t ret;
pgoff_t fault_page_size;
bool write = vmf->flags & FAULT_FLAG_WRITE;
- bool is_cow_mapping =
- (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
switch (pe_size) {
case PE_SIZE_PMD:
@@ -518,7 +516,7 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
}
/* Always do write dirty-tracking and COW on PTE level. */
- if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
+ if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
return VM_FAULT_FALLBACK;
ret = ttm_bo_vm_reserve(bo, vmf);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 3c03b1746661..cb9975889e2f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -49,7 +49,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
vma->vm_ops = &vmw_vm_ops;
/* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */
- if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE)
+ if (!is_cow_mapping(vma->vm_flags))
vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
return 0;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c457334de43f..e1eae7ea823a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -649,12 +649,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
struct super_block *sb = file_inode(file)->i_sb;
struct dentry *root = sb->s_root, *dentry;
int err = 0;
+ struct file *f = NULL;
e = create_entry(buffer, count);
if (IS_ERR(e))
return PTR_ERR(e);
+ if (e->flags & MISC_FMT_OPEN_FILE) {
+ f = open_exec(e->interpreter);
+ if (IS_ERR(f)) {
+ pr_notice("register: failed to install interpreter file %s\n",
+ e->interpreter);
+ kfree(e);
+ return PTR_ERR(f);
+ }
+ e->interp_file = f;
+ }
+
inode_lock(d_inode(root));
dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry);
@@ -678,21 +690,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
goto out2;
}
- if (e->flags & MISC_FMT_OPEN_FILE) {
- struct file *f;
-
- f = open_exec(e->interpreter);
- if (IS_ERR(f)) {
- err = PTR_ERR(f);
- pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
- simple_release_fs(&bm_mnt, &entry_count);
- iput(inode);
- inode = NULL;
- goto out2;
- }
- e->interp_file = f;
- }
-
e->dentry = dget(dentry);
inode->i_private = e;
inode->i_fop = &bm_entry_operations;
@@ -709,6 +706,8 @@ out:
inode_unlock(d_inode(root));
if (err) {
+ if (f)
+ filp_close(f, NULL);
kfree(e);
return err;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3cec6fbef725..e862cab69583 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1036,8 +1036,6 @@ struct clear_refs_private {
#ifdef CONFIG_MEM_SOFT_DIRTY
-#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE)
-
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
struct page *page;
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 04c0a5a717f7..d217c382b02d 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -31,6 +31,12 @@
#define __no_sanitize_thread
#endif
+#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#define __HAVE_BUILTIN_BSWAP16__
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
+
#if __has_feature(undefined_behavior_sanitizer)
/* GCC does not have __SANITIZE_UNDEFINED__ */
#define __no_sanitize_undefined \
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c88bc24e31aa..d13e3cd938b4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size)
/*
* Set the allocation direction to bottom-up or top-down.
*/
-static inline void memblock_set_bottom_up(bool enable)
+static inline __init void memblock_set_bottom_up(bool enable)
{
memblock.bottom_up = enable;
}
@@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable)
* if this is true, that said, memblock will allocate memory
* in bottom-up direction.
*/
-static inline bool memblock_bottom_up(void)
+static inline __init bool memblock_bottom_up(void)
{
return memblock.bottom_up;
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6dc793d587d..0c04d39a7967 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1061,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
rcu_read_unlock();
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void mem_cgroup_split_huge_fixup(struct page *head);
-#endif
+void split_page_memcg(struct page *head, unsigned int nr);
#else /* CONFIG_MEMCG */
@@ -1400,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
return 0;
}
-static inline void mem_cgroup_split_huge_fixup(struct page *head)
+static inline void split_page_memcg(struct page *head, unsigned int nr)
{
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 77e64e3eac80..64a71bf20536 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1300,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page)
GUP_PIN_COUNTING_BIAS;
}
+static inline bool is_cow_mapping(vm_flags_t flags)
+{
+ return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
+/*
+ * This should most likely only be called during fork() to see whether we
+ * should break the cow immediately for a page on the src mm.
+ */
+static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
+ struct page *page)
+{
+ if (!is_cow_mapping(vma->vm_flags))
+ return false;
+
+ if (!atomic_read(&vma->vm_mm->has_pinned))
+ return false;
+
+ return page_maybe_dma_pinned(page);
+}
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0974ad501a47..6613b26a8894 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -23,6 +23,7 @@
#endif
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
+#define INIT_PASID 0
struct address_space;
struct mem_cgroup;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1ae08b8462a4..90b2a0bce11c 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk)
* another oom-unkillable task does this it should blame itself.
*/
rcu_read_lock();
- ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+ ret = tsk->vfork_done &&
+ rcu_dereference(tsk->real_parent)->mm == tsk->mm;
rcu_read_unlock();
return ret;
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 30577c3aecf8..46fb3ebdd16e 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
-static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
unsigned long flags;
@@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
return ret;
}
-static inline int stop_machine(cpu_stop_fn_t fn, void *data,
- const struct cpumask *cpus)
+static __always_inline int
+stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
{
return stop_machine_cpuslocked(fn, data, cpus);
}
-static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
- const struct cpumask *cpus)
+static __always_inline int
+stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
+ const struct cpumask *cpus)
{
return stop_machine(fn, data, cpus);
}
diff --git a/init/Kconfig b/init/Kconfig
index 30c849094c28..5f5c776ef192 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -119,8 +119,7 @@ config INIT_ENV_ARG_LIMIT
config COMPILE_TEST
bool "Compile also drivers which will not load"
- depends on !UML && !S390
- default n
+ depends on HAS_IOMEM
help
Some drivers can be compiled on a different platform than they are
intended to be run on. Despite they cannot be loaded there (or even
diff --git a/kernel/fork.c b/kernel/fork.c
index 72e444cd0ffe..0acc8ed1076b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -994,6 +994,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
+static void mm_init_pasid(struct mm_struct *mm)
+{
+#ifdef CONFIG_IOMMU_SUPPORT
+ mm->pasid = INIT_PASID;
+#endif
+}
+
static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
@@ -1024,6 +1031,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
+ mm_init_pasid(mm);
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 624ae1df7984..fba9909e31b7 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE
config KASAN_STACK
int
+ depends on KASAN_GENERIC || KASAN_SW_TAGS
default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
default 0
diff --git a/mm/highmem.c b/mm/highmem.c
index 874b732b120c..86f2b9495f9c 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -368,20 +368,24 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
BUG_ON(end1 > page_size(page) || end2 > page_size(page));
+ if (start1 >= end1)
+ start1 = end1 = 0;
+ if (start2 >= end2)
+ start2 = end2 = 0;
+
for (i = 0; i < compound_nr(page); i++) {
void *kaddr = NULL;
- if (start1 < PAGE_SIZE || start2 < PAGE_SIZE)
- kaddr = kmap_atomic(page + i);
-
if (start1 >= PAGE_SIZE) {
start1 -= PAGE_SIZE;
end1 -= PAGE_SIZE;
} else {
unsigned this_end = min_t(unsigned, end1, PAGE_SIZE);
- if (end1 > start1)
+ if (end1 > start1) {
+ kaddr = kmap_atomic(page + i);
memset(kaddr + start1, 0, this_end - start1);
+ }
end1 -= this_end;
start1 = 0;
}
@@ -392,8 +396,11 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
} else {
unsigned this_end = min_t(unsigned, end2, PAGE_SIZE);
- if (end2 > start2)
+ if (end2 > start2) {
+ if (!kaddr)
+ kaddr = kmap_atomic(page + i);
memset(kaddr + start2, 0, this_end - start2);
+ }
end2 -= this_end;
start2 = 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 395c75111d33..ae907a9c2050 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1100,9 +1100,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* best effort that the pinned pages won't be replaced by another
* random page during the coming copy-on-write.
*/
- if (unlikely(is_cow_mapping(vma->vm_flags) &&
- atomic_read(&src_mm->has_pinned) &&
- page_maybe_dma_pinned(src_page))) {
+ if (unlikely(page_needs_cow_for_dma(vma, src_page))) {
pte_free(dst_mm, pgtable);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
@@ -1214,9 +1212,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
}
/* Please refer to comments in copy_huge_pmd() */
- if (unlikely(is_cow_mapping(vma->vm_flags) &&
- atomic_read(&src_mm->has_pinned) &&
- page_maybe_dma_pinned(pud_page(pud)))) {
+ if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) {
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
__split_huge_pud(vma, src_pud, addr);
@@ -2471,7 +2467,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
int i;
/* complete memcg works before add pages to LRU */
- mem_cgroup_split_huge_fixup(head);
+ split_page_memcg(head, nr);
if (PageAnon(head) && PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8fb42c6dd74b..5b1ab1f427c5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -331,6 +331,24 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
}
}
+static inline long
+hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from,
+ long to, struct hstate *h, struct hugetlb_cgroup *cg,
+ long *regions_needed)
+{
+ struct file_region *nrg;
+
+ if (!regions_needed) {
+ nrg = get_file_region_entry_from_cache(map, from, to);
+ record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg);
+ list_add(&nrg->link, rg->link.prev);
+ coalesce_file_region(map, nrg);
+ } else
+ *regions_needed += 1;
+
+ return to - from;
+}
+
/*
* Must be called with resv->lock held.
*
@@ -346,7 +364,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
long add = 0;
struct list_head *head = &resv->regions;
long last_accounted_offset = f;
- struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
+ struct file_region *rg = NULL, *trg = NULL;
if (regions_needed)
*regions_needed = 0;
@@ -369,24 +387,17 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
/* When we find a region that starts beyond our range, we've
* finished.
*/
- if (rg->from > t)
+ if (rg->from >= t)
break;
/* Add an entry for last_accounted_offset -> rg->from, and
* update last_accounted_offset.
*/
- if (rg->from > last_accounted_offset) {
- add += rg->from - last_accounted_offset;
- if (!regions_needed) {
- nrg = get_file_region_entry_from_cache(
- resv, last_accounted_offset, rg->from);
- record_hugetlb_cgroup_uncharge_info(h_cg, h,
- resv, nrg);
- list_add(&nrg->link, rg->link.prev);
- coalesce_file_region(resv, nrg);
- } else
- *regions_needed += 1;
- }
+ if (rg->from > last_accounted_offset)
+ add += hugetlb_resv_map_add(resv, rg,
+ last_accounted_offset,
+ rg->from, h, h_cg,
+ regions_needed);
last_accounted_offset = rg->to;
}
@@ -394,17 +405,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
/* Handle the case where our range extends beyond
* last_accounted_offset.
*/
- if (last_accounted_offset < t) {
- add += t - last_accounted_offset;
- if (!regions_needed) {
- nrg = get_file_region_entry_from_cache(
- resv, last_accounted_offset, t);
- record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
- list_add(&nrg->link, rg->link.prev);
- coalesce_file_region(resv, nrg);
- } else
- *regions_needed += 1;
- }
+ if (last_accounted_offset < t)
+ add += hugetlb_resv_map_add(resv, rg, last_accounted_offset,
+ t, h, h_cg, regions_needed);
VM_BUG_ON(add < 0);
return add;
@@ -3725,21 +3728,32 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
return false;
}
+static void
+hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
+ struct page *new_page)
+{
+ __SetPageUptodate(new_page);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
+ hugepage_add_new_anon_rmap(new_page, vma, addr);
+ hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
+ ClearHPageRestoreReserve(new_page);
+ SetHPageMigratable(new_page);
+}
+
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
{
pte_t *src_pte, *dst_pte, entry, dst_entry;
struct page *ptepage;
unsigned long addr;
- int cow;
+ bool cow = is_cow_mapping(vma->vm_flags);
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
+ unsigned long npages = pages_per_huge_page(h);
struct address_space *mapping = vma->vm_file->f_mapping;
struct mmu_notifier_range range;
int ret = 0;
- cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
-
if (cow) {
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
vma->vm_start,
@@ -3784,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
dst_entry = huge_ptep_get(dst_pte);
+again:
if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
/*
* Skip if src entry none. Also, skip in the
@@ -3807,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
} else {
+ entry = huge_ptep_get(src_pte);
+ ptepage = pte_page(entry);
+ get_page(ptepage);
+
+ /*
+ * This is a rare case where we see pinned hugetlb
+ * pages while they're prone to COW. We need to do the
+ * COW earlier during fork.
+ *
+ * When pre-allocating the page or copying data, we
+ * need to be without the pgtable locks since we could
+ * sleep during the process.
+ */
+ if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
+ pte_t src_pte_old = entry;
+ struct page *new;
+
+ spin_unlock(src_ptl);
+ spin_unlock(dst_ptl);
+ /* Do not use reserve as it's private owned */
+ new = alloc_huge_page(vma, addr, 1);
+ if (IS_ERR(new)) {
+ put_page(ptepage);
+ ret = PTR_ERR(new);
+ break;
+ }
+ copy_user_huge_page(new, ptepage, addr, vma,
+ npages);
+ put_page(ptepage);
+
+ /* Install the new huge page if src pte stable */
+ dst_ptl = huge_pte_lock(h, dst, dst_pte);
+ src_ptl = huge_pte_lockptr(h, src, src_pte);
+ spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+ entry = huge_ptep_get(src_pte);
+ if (!pte_same(src_pte_old, entry)) {
+ put_page(new);
+ /* dst_entry won't change as in child */
+ goto again;
+ }
+ hugetlb_install_page(vma, dst_pte, addr, new);
+ spin_unlock(src_ptl);
+ spin_unlock(dst_ptl);
+ continue;
+ }
+
if (cow) {
/*
* No need to notify as we are downgrading page
@@ -3817,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
*/
huge_ptep_set_wrprotect(src, addr, src_pte);
}
- entry = huge_ptep_get(src_pte);
- ptepage = pte_page(entry);
- get_page(ptepage);
+
page_dup_rmap(ptepage, true);
set_huge_pte_at(dst, addr, dst_pte, entry);
- hugetlb_count_add(pages_per_huge_page(h), dst);
+ hugetlb_count_add(npages, dst);
}
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
diff --git a/mm/internal.h b/mm/internal.h
index 9902648f2206..1432feec62df 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -296,11 +296,6 @@ static inline unsigned int buddy_order(struct page *page)
*/
#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
-static inline bool is_cow_mapping(vm_flags_t flags)
-{
- return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
-}
-
/*
* These three helpers classifies VMAs for virtual memory accounting.
*/
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index ab83d5a59bb1..e3f71451ad9e 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -20,6 +20,11 @@
#include "kfence.h"
+/* May be overridden by <asm/kfence.h>. */
+#ifndef ARCH_FUNC_PREFIX
+#define ARCH_FUNC_PREFIX ""
+#endif
+
extern bool no_hash_pointers;
/* Helper function to either print to a seq_file or to console. */
@@ -67,8 +72,9 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
for (skipnr = 0; skipnr < num_entries; skipnr++) {
int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
- if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") ||
- !strncmp(buf, "__slab_free", len)) {
+ if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
+ str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
+ !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
/*
* In case of tail calls from any of the below
* to any of the above.
@@ -77,10 +83,10 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
}
/* Also the *_bulk() variants by only checking prefixes. */
- if (str_has_prefix(buf, "kfree") ||
- str_has_prefix(buf, "kmem_cache_free") ||
- str_has_prefix(buf, "__kmalloc") ||
- str_has_prefix(buf, "kmem_cache_alloc"))
+ if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
+ str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
+ str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
+ str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
goto found;
}
if (fallback < num_entries)
@@ -116,12 +122,12 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met
lockdep_assert_held(&meta->lock);
if (meta->state == KFENCE_OBJECT_UNUSED) {
- seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata);
+ seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata);
return;
}
seq_con_printf(seq,
- "kfence-#%zd [0x%p-0x%p"
+ "kfence-#%td [0x%p-0x%p"
", size=%d, cache=%s] allocated by task %d:\n",
meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
(cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
@@ -204,7 +210,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write),
(void *)stack_entries[skipnr]);
- pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n",
+ pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n",
get_access_type(is_write), (void *)address,
left_of_object ? meta->addr - address : address - meta->addr,
left_of_object ? "left" : "right", object_index);
@@ -213,14 +219,14 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
case KFENCE_ERROR_UAF:
pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write),
(void *)stack_entries[skipnr]);
- pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n",
+ pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n",
get_access_type(is_write), (void *)address, object_index);
break;
case KFENCE_ERROR_CORRUPTION:
pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]);
pr_err("Corrupted memory at 0x%p ", (void *)address);
print_diff_canary(address, 16, meta);
- pr_cont(" (in kfence-#%zd):\n", object_index);
+ pr_cont(" (in kfence-#%td):\n", object_index);
break;
case KFENCE_ERROR_INVALID:
pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write),
@@ -230,7 +236,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
break;
case KFENCE_ERROR_INVALID_FREE:
pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]);
- pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address,
+ pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address,
object_index);
break;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index df692d2e35d4..01fef79ac761 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1198,12 +1198,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
goto release_task;
}
- mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+ /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
if (IS_ERR_OR_NULL(mm)) {
ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
goto release_task;
}
+ /*
+ * Require CAP_SYS_NICE for influencing process performance. Note that
+ * only non-destructive hints are currently supported.
+ */
+ if (!capable(CAP_SYS_NICE)) {
+ ret = -EPERM;
+ goto release_mm;
+ }
+
total_len = iov_iter_count(&iter);
while (iov_iter_count(&iter)) {
@@ -1218,6 +1228,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
if (ret == 0)
ret = total_len - iov_iter_count(&iter);
+release_mm:
mmput(mm);
release_task:
put_task_struct(task);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 845eec01ef9d..e064ac0d850a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3287,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
#endif /* CONFIG_MEMCG_KMEM */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
- * Because page_memcg(head) is not set on compound tails, set it now.
+ * Because page_memcg(head) is not set on tails, set it now.
*/
-void mem_cgroup_split_huge_fixup(struct page *head)
+void split_page_memcg(struct page *head, unsigned int nr)
{
struct mem_cgroup *memcg = page_memcg(head);
int i;
- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() || !memcg)
return;
- for (i = 1; i < HPAGE_PMD_NR; i++) {
- css_get(&memcg->css);
- head[i].memcg_data = (unsigned long)memcg;
- }
+ for (i = 1; i < nr; i++)
+ head[i].memcg_data = head->memcg_data;
+ css_get_many(&memcg->css, nr - 1);
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_MEMCG_SWAP
/**
diff --git a/mm/memory.c b/mm/memory.c
index c8e357627318..5efa07fb6cdc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -809,12 +809,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
struct page **prealloc, pte_t pte, struct page *page)
{
- struct mm_struct *src_mm = src_vma->vm_mm;
struct page *new_page;
- if (!is_cow_mapping(src_vma->vm_flags))
- return 1;
-
/*
* What we want to do is to check whether this page may
* have been pinned by the parent process. If so,
@@ -828,9 +824,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
* the page count. That might give false positives for
* for pinning, but it will work correctly.
*/
- if (likely(!atomic_read(&src_mm->has_pinned)))
- return 1;
- if (likely(!page_maybe_dma_pinned(page)))
+ if (likely(!page_needs_cow_for_dma(src_vma, page)))
return 1;
new_page = *prealloc;
@@ -3103,6 +3097,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
return handle_userfault(vmf, VM_UFFD_WP);
}
+ /*
+ * Userfaultfd write-protect can defer flushes. Ensure the TLB
+ * is flushed in this case before copying.
+ */
+ if (unlikely(userfaultfd_wp(vmf->vma) &&
+ mm_tlb_flush_pending(vmf->vma->vm_mm)))
+ flush_tlb_page(vmf->vma, vmf->address);
+
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
if (!vmf->page) {
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e4b29ee2b1e..cfc72873961d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1282,6 +1282,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
kernel_poison_pages(page, 1 << order);
/*
+ * With hardware tag-based KASAN, memory tags must be set before the
+ * page becomes unavailable via debug_pagealloc or arch_free_page.
+ */
+ kasan_free_nondeferred_pages(page, order);
+
+ /*
* arch_free_page() can make the page's contents inaccessible. s390
* does this. So nothing which can access the page's contents should
* happen after this.
@@ -1290,8 +1296,6 @@ static __always_inline bool free_pages_prepare(struct page *page,
debug_pagealloc_unmap_pages(page, 1 << order);
- kasan_free_nondeferred_pages(page, order);
-
return true;
}
@@ -3310,6 +3314,7 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
split_page_owner(page, 1 << order);
+ split_page_memcg(page, 1 << order);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -6259,12 +6264,65 @@ static void __meminit zone_init_free_lists(struct zone *zone)
}
}
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
+/*
+ * Only struct pages that correspond to ranges defined by memblock.memory
+ * are zeroed and initialized by going through __init_single_page() during
+ * memmap_init_zone().
+ *
+ * But, there could be struct pages that correspond to holes in
+ * memblock.memory. This can happen because of the following reasons:
+ * - physical memory bank size is not necessarily the exact multiple of the
+ * arbitrary section size
+ * - early reserved memory may not be listed in memblock.memory
+ * - memory layouts defined with memmap= kernel parameter may not align
+ * nicely with memmap sections
+ *
+ * Explicitly initialize those struct pages so that:
+ * - PG_Reserved is set
+ * - zone and node links point to zone and node that span the page if the
+ * hole is in the middle of a zone
+ * - zone and node links point to adjacent zone/node if the hole falls on
+ * the zone boundary; the pages in such holes will be prepended to the
+ * zone/node above the hole except for the trailing pages in the last
+ * section that will be appended to the zone/node below.
+ */
+static u64 __meminit init_unavailable_range(unsigned long spfn,
+ unsigned long epfn,
+ int zone, int node)
+{
+ unsigned long pfn;
+ u64 pgcnt = 0;
+
+ for (pfn = spfn; pfn < epfn; pfn++) {
+ if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
+ pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+ + pageblock_nr_pages - 1;
+ continue;
+ }
+ __init_single_page(pfn_to_page(pfn), pfn, zone, node);
+ __SetPageReserved(pfn_to_page(pfn));
+ pgcnt++;
+ }
+
+ return pgcnt;
+}
+#else
+static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn,
+ int zone, int node)
+{
+ return 0;
+}
+#endif
+
void __meminit __weak memmap_init_zone(struct zone *zone)
{
unsigned long zone_start_pfn = zone->zone_start_pfn;
unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone);
+ static unsigned long hole_pfn;
unsigned long start_pfn, end_pfn;
+ u64 pgcnt = 0;
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
@@ -6274,7 +6332,29 @@ void __meminit __weak memmap_init_zone(struct zone *zone)
memmap_init_range(end_pfn - start_pfn, nid,
zone_id, start_pfn, zone_end_pfn,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+
+ if (hole_pfn < start_pfn)
+ pgcnt += init_unavailable_range(hole_pfn, start_pfn,
+ zone_id, nid);
+ hole_pfn = end_pfn;
}
+
+#ifdef CONFIG_SPARSEMEM
+ /*
+ * Initialize the hole in the range [zone_end_pfn, section_end].
+ * If zone boundary falls in the middle of a section, this hole
+ * will be re-initialized during the call to this function for the
+ * higher zone.
+ */
+ end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION);
+ if (hole_pfn < end_pfn)
+ pgcnt += init_unavailable_range(hole_pfn, end_pfn,
+ zone_id, nid);
+#endif
+
+ if (pgcnt)
+ pr_info(" %s zone: %llu pages in unavailable ranges\n",
+ zone->name, pgcnt);
}
static int zone_batchsize(struct zone *zone)
@@ -7071,88 +7151,6 @@ void __init free_area_init_memoryless_node(int nid)
free_area_init_node(nid);
}
-#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
-/*
- * Initialize all valid struct pages in the range [spfn, epfn) and mark them
- * PageReserved(). Return the number of struct pages that were initialized.
- */
-static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
-{
- unsigned long pfn;
- u64 pgcnt = 0;
-
- for (pfn = spfn; pfn < epfn; pfn++) {
- if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
- pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
- + pageblock_nr_pages - 1;
- continue;
- }
- /*
- * Use a fake node/zone (0) for now. Some of these pages
- * (in memblock.reserved but not in memblock.memory) will
- * get re-initialized via reserve_bootmem_region() later.
- */
- __init_single_page(pfn_to_page(pfn), pfn, 0, 0);
- __SetPageReserved(pfn_to_page(pfn));
- pgcnt++;
- }
-
- return pgcnt;
-}
-
-/*
- * Only struct pages that are backed by physical memory are zeroed and
- * initialized by going through __init_single_page(). But, there are some
- * struct pages which are reserved in memblock allocator and their fields
- * may be accessed (for example page_to_pfn() on some configuration accesses
- * flags). We must explicitly initialize those struct pages.
- *
- * This function also addresses a similar issue where struct pages are left
- * uninitialized because the physical address range is not covered by
- * memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=, or when the highest physical
- * address (max_pfn) does not end on a section boundary.
- */
-static void __init init_unavailable_mem(void)
-{
- phys_addr_t start, end;
- u64 i, pgcnt;
- phys_addr_t next = 0;
-
- /*
- * Loop through unavailable ranges not covered by memblock.memory.
- */
- pgcnt = 0;
- for_each_mem_range(i, &start, &end) {
- if (next < start)
- pgcnt += init_unavailable_range(PFN_DOWN(next),
- PFN_UP(start));
- next = end;
- }
-
- /*
- * Early sections always have a fully populated memmap for the whole
- * section - see pfn_valid(). If the last section has holes at the
- * end and that section is marked "online", the memmap will be
- * considered initialized. Make sure that memmap has a well defined
- * state.
- */
- pgcnt += init_unavailable_range(PFN_DOWN(next),
- round_up(max_pfn, PAGES_PER_SECTION));
-
- /*
- * Struct pages that do not have backing memory. This could be because
- * firmware is using some of this memory, or for some other reasons.
- */
- if (pgcnt)
- pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
-}
-#else
-static inline void __init init_unavailable_mem(void)
-{
-}
-#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
-
#if MAX_NUMNODES > 1
/*
* Figure out the number of possible node ids.
@@ -7576,7 +7574,6 @@ void __init free_area_init(unsigned long *max_zone_pfn)
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
- init_unavailable_mem();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid);
diff --git a/mm/slab.c b/mm/slab.c
index 51fd424e0d6d..ae651bf540b7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2992,7 +2992,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
gfp_t flags, void *objp, unsigned long caller)
{
WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
- if (!objp)
+ if (!objp || is_kfence_address(objp))
return objp;
if (cachep->flags & SLAB_POISON) {
check_poison_obj(cachep, objp);