diff options
Diffstat (limited to 'mm/util.c')
-rw-r--r-- | mm/util.c | 355 |
1 files changed, 162 insertions, 193 deletions
diff --git a/mm/util.c b/mm/util.c index 669397235787..448117da071f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -12,6 +12,7 @@ #include <linux/security.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/sysctl.h> #include <linux/mman.h> #include <linux/hugetlb.h> #include <linux/vmalloc.h> @@ -23,9 +24,12 @@ #include <linux/processor.h> #include <linux/sizes.h> #include <linux/compat.h> +#include <linux/fsnotify.h> #include <linux/uaccess.h> +#include <kunit/visibility.h> + #include "internal.h" #include "swap.h" @@ -43,27 +47,41 @@ void kfree_const(const void *x) EXPORT_SYMBOL(kfree_const); /** - * kstrdup - allocate space for and copy an existing string - * @s: the string to duplicate + * __kmemdup_nul - Create a NUL-terminated string from @s, which might be unterminated. + * @s: The data to copy + * @len: The size of the data, not including the NUL terminator * @gfp: the GFP mask used in the kmalloc() call when allocating memory * - * Return: newly allocated copy of @s or %NULL in case of error + * Return: newly allocated copy of @s with NUL-termination or %NULL in + * case of error */ -noinline -char *kstrdup(const char *s, gfp_t gfp) +static __always_inline char *__kmemdup_nul(const char *s, size_t len, gfp_t gfp) { - size_t len; char *buf; - if (!s) + /* '+1' for the NUL terminator */ + buf = kmalloc_track_caller(len + 1, gfp); + if (!buf) return NULL; - len = strlen(s) + 1; - buf = kmalloc_track_caller(len, gfp); - if (buf) - memcpy(buf, s, len); + memcpy(buf, s, len); + /* Ensure the buf is always NUL-terminated, regardless of @s. */ + buf[len] = '\0'; return buf; } + +/** + * kstrdup - allocate space for and copy an existing string + * @s: the string to duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + * + * Return: newly allocated copy of @s or %NULL in case of error + */ +noinline +char *kstrdup(const char *s, gfp_t gfp) +{ + return s ? __kmemdup_nul(s, strlen(s), gfp) : NULL; +} EXPORT_SYMBOL(kstrdup); /** @@ -98,19 +116,7 @@ EXPORT_SYMBOL(kstrdup_const); */ char *kstrndup(const char *s, size_t max, gfp_t gfp) { - size_t len; - char *buf; - - if (!s) - return NULL; - - len = strnlen(s, max); - buf = kmalloc_track_caller(len+1, gfp); - if (buf) { - memcpy(buf, s, len); - buf[len] = '\0'; - } - return buf; + return s ? __kmemdup_nul(s, strnlen(s, max), gfp) : NULL; } EXPORT_SYMBOL(kstrndup); @@ -124,29 +130,29 @@ EXPORT_SYMBOL(kstrndup); * Return: newly allocated copy of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ -void *kmemdup(const void *src, size_t len, gfp_t gfp) +void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) { void *p; - p = kmalloc_track_caller(len, gfp); + p = kmalloc_node_track_caller_noprof(len, gfp, NUMA_NO_NODE, _RET_IP_); if (p) memcpy(p, src, len); return p; } -EXPORT_SYMBOL(kmemdup); +EXPORT_SYMBOL(kmemdup_noprof); /** * kmemdup_array - duplicate a given array. * * @src: array to duplicate. - * @element_size: size of each element of array. * @count: number of elements to duplicate from array. + * @element_size: size of each element of array. * @gfp: GFP mask to use. * * Return: duplicated array of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ -void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) { return kmemdup(src, size_mul(element_size, count), gfp); } @@ -184,19 +190,19 @@ EXPORT_SYMBOL(kvmemdup); */ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp) { - char *buf; + return s ? __kmemdup_nul(s, len, gfp) : NULL; +} +EXPORT_SYMBOL(kmemdup_nul); - if (!s) - return NULL; +static kmem_buckets *user_buckets __ro_after_init; - buf = kmalloc_track_caller(len + 1, gfp); - if (buf) { - memcpy(buf, s, len); - buf[len] = '\0'; - } - return buf; +static int __init init_user_buckets(void) +{ + user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL); + + return 0; } -EXPORT_SYMBOL(kmemdup_nul); +subsys_initcall(init_user_buckets); /** * memdup_user - duplicate memory region from user space @@ -211,7 +217,7 @@ void *memdup_user(const void __user *src, size_t len) { void *p; - p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); + p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); @@ -237,7 +243,7 @@ void *vmemdup_user(const void __user *src, size_t len) { void *p; - p = kvmalloc(len, GFP_USER); + p = kmem_buckets_valloc(user_buckets, len, GFP_USER); if (!p) return ERR_PTR(-ENOMEM); @@ -293,12 +299,7 @@ void *memdup_user_nul(const void __user *src, size_t len) { char *p; - /* - * Always use GFP_KERNEL, since copy_from_user() can sleep and - * cause pagefault, which makes it pointless to use GFP_NOFS - * or GFP_ATOMIC. - */ - p = kmalloc_track_caller(len + 1, GFP_KERNEL); + p = kmem_buckets_alloc_track_caller(user_buckets, len + 1, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); @@ -451,7 +452,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) if (gap + pad > gap) gap += pad; - if (gap < MIN_GAP) + if (gap < MIN_GAP && MIN_GAP < MAX_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; @@ -469,19 +470,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; + clear_bit(MMF_TOPDOWN, &mm->flags); } else { mm->mmap_base = mmap_base(random_factor, rlim_stack); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; + set_bit(MMF_TOPDOWN, &mm->flags); } } #elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { mm->mmap_base = TASK_UNMAPPED_BASE; - mm->get_unmapped_area = arch_get_unmapped_area; + clear_bit(MMF_TOPDOWN, &mm->flags); } #endif +#ifdef CONFIG_MMU +EXPORT_SYMBOL_IF_KUNIT(arch_pick_mmap_layout); +#endif /** * __account_locked_vm - account locked pages to an mm's locked_vm @@ -567,6 +571,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, LIST_HEAD(uf); ret = security_mmap_file(file, prot, flag); + if (!ret) + ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; @@ -580,6 +586,23 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, return ret; } +/* + * Perform a userland memory mapping into the current process address space. See + * the comment for do_mmap() for more details on this operation in general. + * + * This differs from do_mmap() in that: + * + * a. An offset parameter is provided rather than pgoff, which is both checked + * for overflow and page alignment. + * b. mmap locking is performed on the caller's behalf. + * c. Userfaultfd unmap events and memory population are handled. + * + * This means that this function performs essentially the same work as if + * userland were invoking mmap (2). + * + * Returns either an error, or the address at which the requested mapping has + * been performed. + */ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) @@ -594,152 +617,31 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, EXPORT_SYMBOL(vm_mmap); /** - * kvmalloc_node - attempt to allocate physically contiguous memory, but upon - * failure, fall back to non-contiguous (vmalloc) allocation. - * @size: size of the request. - * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. - * @node: numa node to allocate from - * - * Uses kmalloc to get the memory but if the allocation fails then falls back - * to the vmalloc allocator. Use kvfree for freeing the memory. - * - * GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier. - * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is - * preferable to the vmalloc fallback, due to visible performance drawbacks. - * - * Return: pointer to the allocated memory of %NULL in case of failure - */ -void *kvmalloc_node(size_t size, gfp_t flags, int node) -{ - gfp_t kmalloc_flags = flags; - void *ret; - - /* - * We want to attempt a large physically contiguous block first because - * it is less likely to fragment multiple larger blocks and therefore - * contribute to a long term fragmentation less than vmalloc fallback. - * However make sure that larger requests are not too disruptive - no - * OOM killer and no allocation failure warnings as we have a fallback. - */ - if (size > PAGE_SIZE) { - kmalloc_flags |= __GFP_NOWARN; - - if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL)) - kmalloc_flags |= __GFP_NORETRY; - - /* nofail semantic is implemented by the vmalloc fallback */ - kmalloc_flags &= ~__GFP_NOFAIL; - } - - ret = kmalloc_node(size, kmalloc_flags, node); - - /* - * It doesn't really make sense to fallback to vmalloc for sub page - * requests - */ - if (ret || size <= PAGE_SIZE) - return ret; - - /* non-sleeping allocations are not supported by vmalloc */ - if (!gfpflags_allow_blocking(flags)) - return NULL; - - /* Don't even allow crazy sizes */ - if (unlikely(size > INT_MAX)) { - WARN_ON_ONCE(!(flags & __GFP_NOWARN)); - return NULL; - } - - /* - * kvmalloc() can always use VM_ALLOW_HUGE_VMAP, - * since the callers already cannot assume anything - * about the resulting pointer, and cannot play - * protection games. - */ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, - node, __builtin_return_address(0)); -} -EXPORT_SYMBOL(kvmalloc_node); - -/** - * kvfree() - Free memory. - * @addr: Pointer to allocated memory. - * - * kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc(). - * It is slightly more efficient to use kfree() or vfree() if you are certain - * that you know which one to use. - * - * Context: Either preemptible task context or not-NMI interrupt. - */ -void kvfree(const void *addr) -{ - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); -} -EXPORT_SYMBOL(kvfree); - -/** - * kvfree_sensitive - Free a data object containing sensitive information. - * @addr: address of the data object to be freed. - * @len: length of the data object. - * - * Use the special memzero_explicit() function to clear the content of a - * kvmalloc'ed object containing sensitive data to make sure that the - * compiler won't optimize out the data clearing. - */ -void kvfree_sensitive(const void *addr, size_t len) -{ - if (likely(!ZERO_OR_NULL_PTR(addr))) { - memzero_explicit((void *)addr, len); - kvfree(addr); - } -} -EXPORT_SYMBOL(kvfree_sensitive); - -void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) -{ - void *newp; - - if (oldsize >= newsize) - return (void *)p; - newp = kvmalloc(newsize, flags); - if (!newp) - return NULL; - memcpy(newp, p, oldsize); - kvfree(p); - return newp; -} -EXPORT_SYMBOL(kvrealloc); - -/** * __vmalloc_array - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -void *__vmalloc_array(size_t n, size_t size, gfp_t flags) +void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - return __vmalloc(bytes, flags); + return __vmalloc_noprof(bytes, flags); } -EXPORT_SYMBOL(__vmalloc_array); +EXPORT_SYMBOL(__vmalloc_array_noprof); /** * vmalloc_array - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ -void *vmalloc_array(size_t n, size_t size) +void *vmalloc_array_noprof(size_t n, size_t size) { - return __vmalloc_array(n, size, GFP_KERNEL); + return __vmalloc_array_noprof(n, size, GFP_KERNEL); } -EXPORT_SYMBOL(vmalloc_array); +EXPORT_SYMBOL(vmalloc_array_noprof); /** * __vcalloc - allocate and zero memory for a virtually contiguous array. @@ -747,24 +649,24 @@ EXPORT_SYMBOL(vmalloc_array); * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -void *__vcalloc(size_t n, size_t size, gfp_t flags) +void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) { - return __vmalloc_array(n, size, flags | __GFP_ZERO); + return __vmalloc_array_noprof(n, size, flags | __GFP_ZERO); } -EXPORT_SYMBOL(__vcalloc); +EXPORT_SYMBOL(__vcalloc_noprof); /** * vcalloc - allocate and zero memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ -void *vcalloc(size_t n, size_t size) +void *vcalloc_noprof(size_t n, size_t size) { - return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO); + return __vmalloc_array_noprof(n, size, GFP_KERNEL | __GFP_ZERO); } -EXPORT_SYMBOL(vcalloc); +EXPORT_SYMBOL(vcalloc_noprof); -struct anon_vma *folio_anon_vma(struct folio *folio) +struct anon_vma *folio_anon_vma(const struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; @@ -828,15 +730,34 @@ void folio_copy(struct folio *dst, struct folio *src) } EXPORT_SYMBOL(folio_copy); +int folio_mc_copy(struct folio *dst, struct folio *src) +{ + long nr = folio_nr_pages(src); + long i = 0; + + for (;;) { + if (copy_mc_highpage(folio_page(dst, i), folio_page(src, i))) + return -EHWPOISON; + if (++i == nr) + break; + cond_resched(); + } + + return 0; +} +EXPORT_SYMBOL(folio_mc_copy); + int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; -int sysctl_overcommit_ratio __read_mostly = 50; -unsigned long sysctl_overcommit_kbytes __read_mostly; +static int sysctl_overcommit_ratio __read_mostly = 50; +static unsigned long sysctl_overcommit_kbytes __read_mostly; int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ -int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) +#ifdef CONFIG_SYSCTL + +static int overcommit_ratio_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -851,8 +772,8 @@ static void sync_overcommit_as(struct work_struct *dummy) percpu_counter_sync(&vm_committed_as); } -int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) +static int overcommit_policy_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int new_policy = -1; @@ -887,8 +808,8 @@ int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer, return ret; } -int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) +static int overcommit_kbytes_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -898,6 +819,54 @@ int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer, return ret; } +static const struct ctl_table util_sysctl_table[] = { + { + .procname = "overcommit_memory", + .data = &sysctl_overcommit_memory, + .maxlen = sizeof(sysctl_overcommit_memory), + .mode = 0644, + .proc_handler = overcommit_policy_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { + .procname = "overcommit_ratio", + .data = &sysctl_overcommit_ratio, + .maxlen = sizeof(sysctl_overcommit_ratio), + .mode = 0644, + .proc_handler = overcommit_ratio_handler, + }, + { + .procname = "overcommit_kbytes", + .data = &sysctl_overcommit_kbytes, + .maxlen = sizeof(sysctl_overcommit_kbytes), + .mode = 0644, + .proc_handler = overcommit_kbytes_handler, + }, + { + .procname = "user_reserve_kbytes", + .data = &sysctl_user_reserve_kbytes, + .maxlen = sizeof(sysctl_user_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "admin_reserve_kbytes", + .data = &sysctl_admin_reserve_kbytes, + .maxlen = sizeof(sysctl_admin_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, +}; + +static int __init init_vm_util_sysctls(void) +{ + register_sysctl_init("vm", util_sysctl_table); + return 0; +} +subsys_initcall(init_vm_util_sysctls); +#endif /* CONFIG_SYSCTL */ + /* * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used */ |