summaryrefslogtreecommitdiff
path: root/mm/nommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/nommu.c')
-rw-r--r--mm/nommu.c847
1 files changed, 401 insertions, 446 deletions
diff --git a/mm/nommu.c b/mm/nommu.c
index 749276beb109..c3a23b082adb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/mm/nommu.c
*
* Replacement code for mm functions to support CPU's that don't
* have any form of memory management unit (thus no virtual memory).
*
- * See Documentation/nommu-mmap.txt
+ * See Documentation/admin-guide/mm/nommu-mmap.rst
*
* Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
* Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
@@ -18,7 +19,6 @@
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
-#include <linux/vmacache.h>
#include <linux/mman.h>
#include <linux/swap.h>
#include <linux/file.h>
@@ -26,7 +26,6 @@
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/compiler.h>
#include <linux/mount.h>
@@ -37,23 +36,17 @@
#include <linux/printk.h>
#include <linux/uaccess.h>
+#include <linux/uio.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include "internal.h"
-void *high_memory;
-EXPORT_SYMBOL(high_memory);
-struct page *mem_map;
-unsigned long max_mapnr;
-EXPORT_SYMBOL(max_mapnr);
unsigned long highest_memmap_pfn;
-int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
int heap_stack_gap = 0;
atomic_long_t mmap_pages_allocated;
-EXPORT_SYMBOL(mem_map);
/* list of mapped, potentially shareable regions */
static struct kmem_cache *vm_region_jar;
@@ -71,7 +64,7 @@ const struct vm_operations_struct generic_file_vm_ops = {
*/
unsigned int kobjsize(const void *objp)
{
- struct page *page;
+ struct folio *folio;
/*
* If the object we have should not have ksize performed on it,
@@ -80,22 +73,22 @@ unsigned int kobjsize(const void *objp)
if (!objp || !virt_addr_valid(objp))
return 0;
- page = virt_to_head_page(objp);
+ folio = virt_to_folio(objp);
/*
* If the allocator sets PageSlab, we know the pointer came from
* kmalloc().
*/
- if (PageSlab(page))
+ if (folio_test_slab(folio))
return ksize(objp);
/*
- * If it's not a compound page, see if we have a matching VMA
+ * If it's not a large folio, see if we have a matching VMA
* region. This test is intentionally done in reverse order,
* so if there's no VMA, we still fall through and hand back
- * PAGE_SIZE for 0-order pages.
+ * PAGE_SIZE for 0-order folios.
*/
- if (!PageCompound(page)) {
+ if (!folio_test_large(folio)) {
struct vm_area_struct *vma;
vma = find_vma(current->mm, (unsigned long)objp);
@@ -107,159 +100,68 @@ unsigned int kobjsize(const void *objp)
* The ksize() function is only guaranteed to work for pointers
* returned by kmalloc(). So handle arbitrary pointers here.
*/
- return PAGE_SIZE << compound_order(page);
+ return folio_size(folio);
}
-static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- unsigned int foll_flags, struct page **pages,
- struct vm_area_struct **vmas, int *nonblocking)
-{
- struct vm_area_struct *vma;
- unsigned long vm_flags;
- int i;
-
- /* calculate required read or write permissions.
- * If FOLL_FORCE is set, we only require the "MAY" flags.
- */
- vm_flags = (foll_flags & FOLL_WRITE) ?
- (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
- vm_flags &= (foll_flags & FOLL_FORCE) ?
- (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
-
- for (i = 0; i < nr_pages; i++) {
- vma = find_vma(mm, start);
- if (!vma)
- goto finish_or_fault;
-
- /* protect what we can, including chardevs */
- if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
- !(vm_flags & vma->vm_flags))
- goto finish_or_fault;
-
- if (pages) {
- pages[i] = virt_to_page(start);
- if (pages[i])
- get_page(pages[i]);
- }
- if (vmas)
- vmas[i] = vma;
- start = (start + PAGE_SIZE) & PAGE_MASK;
- }
-
- return i;
-
-finish_or_fault:
- return i ? : -EFAULT;
-}
-
-/*
- * get a list of pages in an address range belonging to the specified process
- * and indicate the VMA that covers each page
- * - this is potentially dodgy as we may end incrementing the page count of a
- * slab page or a secondary page from a compound page
- * - don't permit access to VMAs that don't support it, such as I/O mappings
- */
-long get_user_pages(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas)
-{
- return __get_user_pages(current, current->mm, start, nr_pages,
- gup_flags, pages, vmas, NULL);
-}
-EXPORT_SYMBOL(get_user_pages);
-
-long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- int *locked)
-{
- return get_user_pages(start, nr_pages, gup_flags, pages, NULL);
-}
-EXPORT_SYMBOL(get_user_pages_locked);
-
-static long __get_user_pages_unlocked(struct task_struct *tsk,
- struct mm_struct *mm, unsigned long start,
- unsigned long nr_pages, struct page **pages,
- unsigned int gup_flags)
-{
- long ret;
- down_read(&mm->mmap_sem);
- ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages,
- NULL, NULL);
- up_read(&mm->mmap_sem);
- return ret;
-}
-
-long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
- struct page **pages, unsigned int gup_flags)
-{
- return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
- pages, gup_flags);
-}
-EXPORT_SYMBOL(get_user_pages_unlocked);
-
-/**
- * follow_pfn - look up PFN at a user virtual address
- * @vma: memory mapping
- * @address: user virtual address
- * @pfn: location to store found PFN
- *
- * Only IO mappings and raw PFN mappings are allowed.
- *
- * Returns zero and the pfn at @pfn on success, -ve otherwise.
- */
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
- unsigned long *pfn)
-{
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- return -EINVAL;
-
- *pfn = address >> PAGE_SHIFT;
- return 0;
-}
-EXPORT_SYMBOL(follow_pfn);
-
-LIST_HEAD(vmap_area_list);
-
void vfree(const void *addr)
{
kfree(addr);
}
EXPORT_SYMBOL(vfree);
-void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
+void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask)
{
/*
* You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc()
* returns only a logical address.
*/
- return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
+ return kmalloc_noprof(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
+}
+EXPORT_SYMBOL(__vmalloc_noprof);
+
+void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+ gfp_t flags, int node)
+{
+ return krealloc_noprof(p, size, (flags | __GFP_COMP) & ~__GFP_HIGHMEM);
+}
+
+void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
+ unsigned long start, unsigned long end, gfp_t gfp_mask,
+ pgprot_t prot, unsigned long vm_flags, int node,
+ const void *caller)
+{
+ return __vmalloc_noprof(size, gfp_mask);
}
-EXPORT_SYMBOL(__vmalloc);
-void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags)
+void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask,
+ int node, const void *caller)
{
- return __vmalloc(size, flags, PAGE_KERNEL);
+ return __vmalloc_noprof(size, gfp_mask);
}
-void *vmalloc_user(unsigned long size)
+static void *__vmalloc_user_flags(unsigned long size, gfp_t flags)
{
void *ret;
- ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ ret = __vmalloc(size, flags);
if (ret) {
struct vm_area_struct *vma;
- down_write(&current->mm->mmap_sem);
+ mmap_write_lock(current->mm);
vma = find_vma(current->mm, (unsigned long)ret);
if (vma)
- vma->vm_flags |= VM_USERMAP;
- up_write(&current->mm->mmap_sem);
+ vm_flags_set(vma, VM_USERMAP);
+ mmap_write_unlock(current->mm);
}
return ret;
}
-EXPORT_SYMBOL(vmalloc_user);
+
+void *vmalloc_user_noprof(unsigned long size)
+{
+ return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO);
+}
+EXPORT_SYMBOL(vmalloc_user_noprof);
struct page *vmalloc_to_page(const void *addr)
{
@@ -273,24 +175,13 @@ unsigned long vmalloc_to_pfn(const void *addr)
}
EXPORT_SYMBOL(vmalloc_to_pfn);
-long vread(char *buf, char *addr, unsigned long count)
-{
- /* Don't allow overflow */
- if ((unsigned long) buf + count < count)
- count = -(unsigned long) buf;
-
- memcpy(buf, addr, count);
- return count;
-}
-
-long vwrite(char *buf, char *addr, unsigned long count)
+long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
{
/* Don't allow overflow */
if ((unsigned long) addr + count < count)
count = -(unsigned long) addr;
- memcpy(addr, buf, count);
- return count;
+ return copy_to_iter(addr, count, iter);
}
/*
@@ -304,11 +195,29 @@ long vwrite(char *buf, char *addr, unsigned long count)
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vmalloc(unsigned long size)
+void *vmalloc_noprof(unsigned long size)
+{
+ return __vmalloc_noprof(size, GFP_KERNEL);
+}
+EXPORT_SYMBOL(vmalloc_noprof);
+
+/*
+ * vmalloc_huge_node - allocate virtually contiguous memory, on a node
+ *
+ * @size: allocation size
+ * @gfp_mask: flags for the page level allocator
+ * @node: node to use for allocation or NUMA_NO_NODE
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ *
+ * Due to NOMMU implications the node argument and HUGE page attribute is
+ * ignored.
+ */
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
{
- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+ return __vmalloc_noprof(size, gfp_mask);
}
-EXPORT_SYMBOL(vmalloc);
/*
* vzalloc - allocate virtually contiguous memory with zero fill
@@ -322,12 +231,11 @@ EXPORT_SYMBOL(vmalloc);
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vzalloc(unsigned long size)
+void *vzalloc_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ return __vmalloc_noprof(size, GFP_KERNEL | __GFP_ZERO);
}
-EXPORT_SYMBOL(vzalloc);
+EXPORT_SYMBOL(vzalloc_noprof);
/**
* vmalloc_node - allocate memory on a specific node
@@ -340,11 +248,11 @@ EXPORT_SYMBOL(vzalloc);
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vmalloc_node(unsigned long size, int node)
+void *vmalloc_node_noprof(unsigned long size, int node)
{
- return vmalloc(size);
+ return vmalloc_noprof(size);
}
-EXPORT_SYMBOL(vmalloc_node);
+EXPORT_SYMBOL(vmalloc_node_noprof);
/**
* vzalloc_node - allocate memory on a specific node with zero fill
@@ -358,28 +266,11 @@ EXPORT_SYMBOL(vmalloc_node);
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vzalloc_node(unsigned long size, int node)
-{
- return vzalloc(size);
-}
-EXPORT_SYMBOL(vzalloc_node);
-
-/**
- * vmalloc_exec - allocate virtually contiguous, executable memory
- * @size: allocation size
- *
- * Kernel-internal function to allocate enough pages to cover @size
- * the page level allocator and map them into contiguous and
- * executable kernel virtual space.
- *
- * For tight control over page level allocator and protection flags
- * use __vmalloc() instead.
- */
-
-void *vmalloc_exec(unsigned long size)
+void *vzalloc_node_noprof(unsigned long size, int node)
{
- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
+ return vzalloc_noprof(size);
}
+EXPORT_SYMBOL(vzalloc_node_noprof);
/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
@@ -388,11 +279,11 @@ void *vmalloc_exec(unsigned long size)
* Allocate enough 32bit PA addressable pages to cover @size from the
* page level allocator and map them into contiguous kernel virtual space.
*/
-void *vmalloc_32(unsigned long size)
+void *vmalloc_32_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
+ return __vmalloc_noprof(size, GFP_KERNEL);
}
-EXPORT_SYMBOL(vmalloc_32);
+EXPORT_SYMBOL(vmalloc_32_noprof);
/**
* vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
@@ -404,15 +295,15 @@ EXPORT_SYMBOL(vmalloc_32);
* VM_USERMAP is set on the corresponding VMA so that subsequent calls to
* remap_vmalloc_range() are permissible.
*/
-void *vmalloc_32_user(unsigned long size)
+void *vmalloc_32_user_noprof(unsigned long size)
{
/*
* We'll have to sort out the ZONE_DMA bits for 64-bit,
* but for now this can simply use vmalloc_user() directly.
*/
- return vmalloc_user(size);
+ return vmalloc_user_noprof(size);
}
-EXPORT_SYMBOL(vmalloc_32_user);
+EXPORT_SYMBOL(vmalloc_32_user_noprof);
void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
{
@@ -427,7 +318,7 @@ void vunmap(const void *addr)
}
EXPORT_SYMBOL(vunmap);
-void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+void *vm_map_ram(struct page **pages, unsigned int count, int node)
{
BUG();
return NULL;
@@ -445,21 +336,6 @@ void vm_unmap_aliases(void)
}
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
-/*
- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
- * have one.
- */
-void __weak vmalloc_sync_all(void)
-{
-}
-
-struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
-{
- BUG();
- return NULL;
-}
-EXPORT_SYMBOL_GPL(alloc_vm_area);
-
void free_vm_area(struct vm_struct *area)
{
BUG();
@@ -473,6 +349,27 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL(vm_insert_page);
+int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
+ struct page **pages, unsigned long *num)
+{
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vm_insert_pages);
+
+int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
+ unsigned long num)
+{
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vm_map_pages);
+
+int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
+ unsigned long num)
+{
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vm_map_pages_zero);
+
/*
* sys_brk() for the most part doesn't need the global kernel
* lock, except when an application is doing something nasty
@@ -501,12 +398,26 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
/*
* Ok, looks good - let it rip.
*/
- flush_icache_range(mm->brk, brk);
+ flush_icache_user_range(mm->brk, brk);
return mm->brk = brk;
}
+static int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
+
+static const struct ctl_table nommu_table[] = {
+ {
+ .procname = "nr_trim_pages",
+ .data = &sysctl_nr_trim_pages,
+ .maxlen = sizeof(sysctl_nr_trim_pages),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+};
+
/*
- * initialise the percpu counter for VM and region record slabs
+ * initialise the percpu counter for VM and region record slabs, initialise VMA
+ * state.
*/
void __init mmap_init(void)
{
@@ -515,6 +426,8 @@ void __init mmap_init(void)
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
+ register_sysctl_init("vm", nommu_table);
+ vma_state_init();
}
/*
@@ -601,7 +514,7 @@ static void delete_nommu_region(struct vm_region *region)
static void free_page_series(unsigned long from, unsigned long to)
{
for (; from < to; from += PAGE_SIZE) {
- struct page *page = virt_to_page(from);
+ struct page *page = virt_to_page((void *)from);
atomic_long_dec(&mmap_pages_allocated);
put_page(page);
@@ -646,26 +559,13 @@ static void put_nommu_region(struct vm_region *region)
__put_nommu_region(region);
}
-/*
- * add a VMA into a process's mm_struct in the appropriate place in the list
- * and tree and add to the address space's page tree also if not an anonymous
- * page
- * - should be called with mm->mmap_sem held writelocked
- */
-static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
+static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
{
- struct vm_area_struct *pvma, *prev;
- struct address_space *mapping;
- struct rb_node **p, *parent, *rb_prev;
-
- BUG_ON(!vma->vm_region);
-
- mm->map_count++;
vma->vm_mm = mm;
/* add the VMA to the mapping */
if (vma->vm_file) {
- mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vma->vm_file->f_mapping;
i_mmap_lock_write(mapping);
flush_dcache_mmap_lock(mapping);
@@ -673,67 +573,14 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
flush_dcache_mmap_unlock(mapping);
i_mmap_unlock_write(mapping);
}
-
- /* add the VMA to the tree */
- parent = rb_prev = NULL;
- p = &mm->mm_rb.rb_node;
- while (*p) {
- parent = *p;
- pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
-
- /* sort by: start addr, end addr, VMA struct addr in that order
- * (the latter is necessary as we may get identical VMAs) */
- if (vma->vm_start < pvma->vm_start)
- p = &(*p)->rb_left;
- else if (vma->vm_start > pvma->vm_start) {
- rb_prev = parent;
- p = &(*p)->rb_right;
- } else if (vma->vm_end < pvma->vm_end)
- p = &(*p)->rb_left;
- else if (vma->vm_end > pvma->vm_end) {
- rb_prev = parent;
- p = &(*p)->rb_right;
- } else if (vma < pvma)
- p = &(*p)->rb_left;
- else if (vma > pvma) {
- rb_prev = parent;
- p = &(*p)->rb_right;
- } else
- BUG();
- }
-
- rb_link_node(&vma->vm_rb, parent, p);
- rb_insert_color(&vma->vm_rb, &mm->mm_rb);
-
- /* add VMA to the VMA list also */
- prev = NULL;
- if (rb_prev)
- prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
-
- __vma_link_list(mm, vma, prev, parent);
}
-/*
- * delete a VMA from its owning mm_struct and address space
- */
-static void delete_vma_from_mm(struct vm_area_struct *vma)
-{
- int i;
- struct address_space *mapping;
- struct mm_struct *mm = vma->vm_mm;
- struct task_struct *curr = current;
-
- mm->map_count--;
- for (i = 0; i < VMACACHE_SIZE; i++) {
- /* if the vma is cached, invalidate the entire cache */
- if (curr->vmacache.vmas[i] == vma) {
- vmacache_invalidate(mm);
- break;
- }
- }
-
+static void cleanup_vma_from_mm(struct vm_area_struct *vma)
+{
+ vma->vm_mm->map_count--;
/* remove the VMA from the mapping */
if (vma->vm_file) {
+ struct address_space *mapping;
mapping = vma->vm_file->f_mapping;
i_mmap_lock_write(mapping);
@@ -742,81 +589,80 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
flush_dcache_mmap_unlock(mapping);
i_mmap_unlock_write(mapping);
}
+}
- /* remove from the MM's tree and list */
- rb_erase(&vma->vm_rb, &mm->mm_rb);
+/*
+ * delete a VMA from its owning mm_struct and address space
+ */
+static int delete_vma_from_mm(struct vm_area_struct *vma)
+{
+ VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_start);
- if (vma->vm_prev)
- vma->vm_prev->vm_next = vma->vm_next;
- else
- mm->mmap = vma->vm_next;
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, NULL)) {
+ pr_warn("Allocation of vma tree for process %d failed\n",
+ current->pid);
+ return -ENOMEM;
+ }
+ cleanup_vma_from_mm(vma);
- if (vma->vm_next)
- vma->vm_next->vm_prev = vma->vm_prev;
+ /* remove from the MM's tree and list */
+ vma_iter_clear(&vmi);
+ return 0;
}
-
/*
* destroy a VMA record
*/
static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
{
- if (vma->vm_ops && vma->vm_ops->close)
- vma->vm_ops->close(vma);
+ vma_close(vma);
if (vma->vm_file)
fput(vma->vm_file);
put_nommu_region(vma->vm_region);
vm_area_free(vma);
}
-/*
- * look up the first VMA in which addr resides, NULL if none
- * - should be called with mm->mmap_sem at least held readlocked
- */
-struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
+ unsigned long start_addr,
+ unsigned long end_addr)
{
- struct vm_area_struct *vma;
+ unsigned long index = start_addr;
- /* check the cache first */
- vma = vmacache_find(mm, addr);
- if (likely(vma))
- return vma;
-
- /* trawl the list (there may be multiple mappings in which addr
- * resides) */
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (vma->vm_start > addr)
- return NULL;
- if (vma->vm_end > addr) {
- vmacache_update(addr, vma);
- return vma;
- }
- }
-
- return NULL;
+ mmap_assert_locked(mm);
+ return mt_find(&mm->mm_mt, &index, end_addr - 1);
}
-EXPORT_SYMBOL(find_vma);
+EXPORT_SYMBOL(find_vma_intersection);
/*
- * find a VMA
- * - we don't extend stack VMAs under NOMMU conditions
+ * look up the first VMA in which addr resides, NULL if none
+ * - should be called with mm->mmap_lock at least held readlocked
*/
-struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
- return find_vma(mm, addr);
+ VMA_ITERATOR(vmi, mm, addr);
+
+ return vma_iter_load(&vmi);
}
+EXPORT_SYMBOL(find_vma);
/*
* expand a stack to a given address
* - not supported under NOMMU conditions
*/
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr)
{
return -ENOMEM;
}
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
+{
+ mmap_read_unlock(mm);
+ return NULL;
+}
+
/*
* look up the first VMA exactly that exactly matches addr
- * - should be called with mm->mmap_sem at least held readlocked
+ * - should be called with mm->mmap_lock at least held readlocked
*/
static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
unsigned long addr,
@@ -824,26 +670,17 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
{
struct vm_area_struct *vma;
unsigned long end = addr + len;
+ VMA_ITERATOR(vmi, mm, addr);
- /* check the cache first */
- vma = vmacache_find_exact(mm, addr, end);
- if (vma)
- return vma;
-
- /* trawl the list (there may be multiple mappings in which addr
- * resides) */
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (vma->vm_start < addr)
- continue;
- if (vma->vm_start > addr)
- return NULL;
- if (vma->vm_end == end) {
- vmacache_update(addr, vma);
- return vma;
- }
- }
+ vma = vma_iter_load(&vmi);
+ if (!vma)
+ return NULL;
+ if (vma->vm_start != addr)
+ return NULL;
+ if (vma->vm_end != end)
+ return NULL;
- return NULL;
+ return vma;
}
/*
@@ -883,7 +720,7 @@ static int validate_mmap_request(struct file *file,
if (file) {
/* files must support mmap */
- if (!file->f_op->mmap)
+ if (!can_mmap_file(file))
return -ENODEV;
/* work out if what we've got could possibly be shared
@@ -934,9 +771,6 @@ static int validate_mmap_request(struct file *file,
(file->f_mode & FMODE_WRITE))
return -EACCES;
- if (locks_verify_locked(file))
- return -EAGAIN;
-
if (!(capabilities & NOMMU_MAP_DIRECT))
return -ENODEV;
@@ -1011,37 +845,44 @@ static int validate_mmap_request(struct file *file,
* we've determined that we can make the mapping, now translate what we
* now know into VMA flags
*/
-static unsigned long determine_vm_flags(struct file *file,
- unsigned long prot,
- unsigned long flags,
- unsigned long capabilities)
+static vm_flags_t determine_vm_flags(struct file *file,
+ unsigned long prot,
+ unsigned long flags,
+ unsigned long capabilities)
{
- unsigned long vm_flags;
+ vm_flags_t vm_flags;
- vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags);
- /* vm_flags |= mm->def_flags; */
+ vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags);
- if (!(capabilities & NOMMU_MAP_DIRECT)) {
- /* attempt to share read-only copies of mapped file chunks */
+ if (!file) {
+ /*
+ * MAP_ANONYMOUS. MAP_SHARED is mapped to MAP_PRIVATE, because
+ * there is no fork().
+ */
vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
- if (file && !(prot & PROT_WRITE))
- vm_flags |= VM_MAYSHARE;
+ } else if (flags & MAP_PRIVATE) {
+ /* MAP_PRIVATE file mapping */
+ if (capabilities & NOMMU_MAP_DIRECT)
+ vm_flags |= (capabilities & NOMMU_VMFLAGS);
+ else
+ vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
+ if (!(prot & PROT_WRITE) && !current->ptrace)
+ /*
+ * R/O private file mapping which cannot be used to
+ * modify memory, especially also not via active ptrace
+ * (e.g., set breakpoints) or later by upgrading
+ * permissions (no mprotect()). We can try overlaying
+ * the file mapping, which will work e.g., on chardevs,
+ * ramfs/tmpfs/shmfs and romfs/cramf.
+ */
+ vm_flags |= VM_MAYOVERLAY;
} else {
- /* overlay a shareable mapping on the backing device or inode
- * if possible - used for chardevs, ramfs/tmpfs/shmfs and
- * romfs/cramfs */
- vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS);
- if (flags & MAP_SHARED)
- vm_flags |= VM_SHARED;
+ /* MAP_SHARED file mapping: NOMMU_MAP_DIRECT is set. */
+ vm_flags |= VM_SHARED | VM_MAYSHARE |
+ (capabilities & NOMMU_VMFLAGS);
}
- /* refuse to let anyone share private mappings with this process if
- * it's being traced - otherwise breakpoints set in it may interfere
- * with another untraced process
- */
- if ((flags & MAP_PRIVATE) && current->ptrace)
- vm_flags &= ~VM_MAYSHARE;
-
return vm_flags;
}
@@ -1053,7 +894,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
{
int ret;
- ret = call_mmap(vma->vm_file, vma);
+ ret = mmap_file(vma->vm_file, vma);
if (ret == 0) {
vma->vm_region->vm_top = vma->vm_region->vm_end;
return 0;
@@ -1079,15 +920,18 @@ static int do_mmap_private(struct vm_area_struct *vma,
void *base;
int ret, order;
- /* invoke the file's mapping function so that it can keep track of
- * shared mappings on devices or memory
- * - VM_MAYSHARE will be set if it may attempt to share
+ /*
+ * Invoke the file's mapping function so that it can keep track of
+ * shared mappings on devices or memory. VM_MAYOVERLAY will be set if
+ * it may attempt to share, which will make is_nommu_shared_mapping()
+ * happy.
*/
if (capabilities & NOMMU_MAP_DIRECT) {
- ret = call_mmap(vma->vm_file, vma);
+ ret = mmap_file(vma->vm_file, vma);
+ /* shouldn't return success if we're not sharing */
+ if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags)))
+ ret = -ENOSYS;
if (ret == 0) {
- /* shouldn't return success if we're not sharing */
- BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
vma->vm_region->vm_top = vma->vm_region->vm_end;
return 0;
}
@@ -1118,7 +962,8 @@ static int do_mmap_private(struct vm_area_struct *vma,
atomic_long_add(total, &mmap_pages_allocated);
- region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
+ vm_flags_set(vma, VM_MAPPED_COPY);
+ region->vm_flags = vma->vm_flags;
region->vm_start = (unsigned long) base;
region->vm_end = region->vm_start + len;
region->vm_top = region->vm_start + (total << PAGE_SHIFT);
@@ -1157,7 +1002,7 @@ error_free:
enomem:
pr_err("Allocation of length %lu from process %d (%s) failed\n",
len, current->pid, current->comm);
- show_free_areas(0, NULL);
+ show_mem();
return -ENOMEM;
}
@@ -1179,6 +1024,7 @@ unsigned long do_mmap(struct file *file,
struct rb_node *rb;
unsigned long capabilities, result;
int ret;
+ VMA_ITERATOR(vmi, current->mm, 0);
*populate = 0;
@@ -1197,6 +1043,7 @@ unsigned long do_mmap(struct file *file,
* now know into VMA flags */
vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
+
/* we're going to need to record the mapping */
region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
if (!region)
@@ -1210,7 +1057,7 @@ unsigned long do_mmap(struct file *file,
region->vm_flags = vm_flags;
region->vm_pgoff = pgoff;
- vma->vm_flags = vm_flags;
+ vm_flags_init(vma, vm_flags);
vma->vm_pgoff = pgoff;
if (file) {
@@ -1228,7 +1075,7 @@ unsigned long do_mmap(struct file *file,
* these cases, sharing is handled in the driver or filesystem rather
* than here
*/
- if (vm_flags & VM_MAYSHARE) {
+ if (is_nommu_shared_mapping(vm_flags)) {
struct vm_region *pregion;
unsigned long pglen, rpglen, pgend, rpgend, start;
@@ -1238,7 +1085,7 @@ unsigned long do_mmap(struct file *file,
for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
pregion = rb_entry(rb, struct vm_region, vm_rb);
- if (!(pregion->vm_flags & VM_MAYSHARE))
+ if (!is_nommu_shared_mapping(pregion->vm_flags))
continue;
/* search for overlapping mappings on the same file */
@@ -1274,7 +1121,7 @@ unsigned long do_mmap(struct file *file,
vma->vm_end = start + len;
if (pregion->vm_flags & VM_MAPPED_COPY)
- vma->vm_flags |= VM_MAPPED_COPY;
+ vm_flags_set(vma, VM_MAPPED_COPY);
else {
ret = do_mmap_shared_file(vma);
if (ret < 0) {
@@ -1334,7 +1181,9 @@ unsigned long do_mmap(struct file *file,
add_nommu_region(region);
/* clear anonymous mappings that don't ask for uninitialized data */
- if (!vma->vm_file && !(flags & MAP_UNINITIALIZED))
+ if (!vma->vm_file &&
+ (!IS_ENABLED(CONFIG_MMAP_ALLOW_UNINITIALIZED) ||
+ !(flags & MAP_UNINITIALIZED)))
memset((void *)region->vm_start, 0,
region->vm_end - region->vm_start);
@@ -1344,12 +1193,20 @@ unsigned long do_mmap(struct file *file,
current->mm->total_vm += len >> PAGE_SHIFT;
share:
- add_vma_to_mm(current->mm, vma);
+ BUG_ON(!vma->vm_region);
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, vma))
+ goto error_just_free;
+
+ setup_vma_to_mm(vma, current->mm);
+ current->mm->map_count++;
+ /* add the VMA to the tree */
+ vma_iter_store_new(&vmi, vma);
/* we flush the region from the icache only when the first executable
* mapping of it is made */
if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
- flush_icache_range(region->vm_start, region->vm_end);
+ flush_icache_user_range(region->vm_start, region->vm_end);
region->vm_icache_flushed = true;
}
@@ -1360,6 +1217,7 @@ share:
error_just_free:
up_write(&nommu_region_sem);
error:
+ vma_iter_free(&vmi);
if (region->vm_file)
fput(region->vm_file);
kmem_cache_free(vm_region_jar, region);
@@ -1378,13 +1236,13 @@ error_getting_vma:
kmem_cache_free(vm_region_jar, region);
pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n",
len, current->pid);
- show_free_areas(0, NULL);
+ show_mem();
return -ENOMEM;
error_getting_region:
pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n",
len, current->pid);
- show_free_areas(0, NULL);
+ show_mem();
return -ENOMEM;
}
@@ -1402,8 +1260,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
goto out;
}
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
if (file)
@@ -1447,18 +1303,20 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
* split a vma into two pieces at address 'addr', a new vma is allocated either
* for the first part or the tail.
*/
-int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, int new_below)
+static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long addr, int new_below)
{
struct vm_area_struct *new;
struct vm_region *region;
unsigned long npages;
+ struct mm_struct *mm;
/* we're only permitted to split anonymous regions (these should have
* only a single usage on the region) */
if (vma->vm_file)
return -ENOMEM;
+ mm = vma->vm_mm;
if (mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
@@ -1467,10 +1325,8 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return -ENOMEM;
new = vm_area_dup(vma);
- if (!new) {
- kmem_cache_free(vm_region_jar, region);
- return -ENOMEM;
- }
+ if (!new)
+ goto err_vma_dup;
/* most fields are the same, copy all, and then fixup */
*region = *vma->vm_region;
@@ -1485,10 +1341,16 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
region->vm_pgoff = new->vm_pgoff += npages;
}
+ vma_iter_config(vmi, new->vm_start, new->vm_end);
+ if (vma_iter_prealloc(vmi, vma)) {
+ pr_warn("Allocation of vma tree for process %d failed\n",
+ current->pid);
+ goto err_vmi_preallocate;
+ }
+
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
- delete_vma_from_mm(vma);
down_write(&nommu_region_sem);
delete_nommu_region(vma->vm_region);
if (new_below) {
@@ -1501,16 +1363,25 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
add_nommu_region(vma->vm_region);
add_nommu_region(new->vm_region);
up_write(&nommu_region_sem);
- add_vma_to_mm(mm, vma);
- add_vma_to_mm(mm, new);
+
+ setup_vma_to_mm(vma, mm);
+ setup_vma_to_mm(new, mm);
+ vma_iter_store_new(vmi, new);
+ mm->map_count++;
return 0;
+
+err_vmi_preallocate:
+ vm_area_free(new);
+err_vma_dup:
+ kmem_cache_free(vm_region_jar, region);
+ return -ENOMEM;
}
/*
* shrink a VMA by removing the specified chunk from either the beginning or
* the end
*/
-static int shrink_vma(struct mm_struct *mm,
+static int vmi_shrink_vma(struct vma_iterator *vmi,
struct vm_area_struct *vma,
unsigned long from, unsigned long to)
{
@@ -1518,12 +1389,15 @@ static int shrink_vma(struct mm_struct *mm,
/* adjust the VMA's pointers, which may reposition it in the MM's tree
* and list */
- delete_vma_from_mm(vma);
- if (from > vma->vm_start)
+ if (from > vma->vm_start) {
+ if (vma_iter_clear_gfp(vmi, from, vma->vm_end, GFP_KERNEL))
+ return -ENOMEM;
vma->vm_end = from;
- else
+ } else {
+ if (vma_iter_clear_gfp(vmi, vma->vm_start, to, GFP_KERNEL))
+ return -ENOMEM;
vma->vm_start = to;
- add_vma_to_mm(mm, vma);
+ }
/* cut the backing region down to size */
region = vma->vm_region;
@@ -1551,9 +1425,10 @@ static int shrink_vma(struct mm_struct *mm,
*/
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf)
{
+ VMA_ITERATOR(vmi, mm, start);
struct vm_area_struct *vma;
unsigned long end;
- int ret;
+ int ret = 0;
len = PAGE_ALIGN(len);
if (len == 0)
@@ -1562,7 +1437,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
end = start + len;
/* find the first potentially overlapping VMA */
- vma = find_vma(mm, start);
+ vma = vma_find(&vmi, end);
if (!vma) {
static int limit;
if (limit < 5) {
@@ -1581,7 +1456,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
return -EINVAL;
if (end == vma->vm_end)
goto erase_whole_vma;
- vma = vma->vm_next;
+ vma = vma_find(&vmi, end);
} while (vma);
return -EINVAL;
} else {
@@ -1595,28 +1470,29 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
if (end != vma->vm_end && offset_in_page(end))
return -EINVAL;
if (start != vma->vm_start && end != vma->vm_end) {
- ret = split_vma(mm, vma, start, 1);
+ ret = split_vma(&vmi, vma, start, 1);
if (ret < 0)
return ret;
}
- return shrink_vma(mm, vma, start, end);
+ return vmi_shrink_vma(&vmi, vma, start, end);
}
erase_whole_vma:
- delete_vma_from_mm(vma);
- delete_vma(mm, vma);
- return 0;
+ if (delete_vma_from_mm(vma))
+ ret = -ENOMEM;
+ else
+ delete_vma(mm, vma);
+ return ret;
}
-EXPORT_SYMBOL(do_munmap);
int vm_munmap(unsigned long addr, size_t len)
{
struct mm_struct *mm = current->mm;
int ret;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
ret = do_munmap(mm, addr, len, NULL);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
EXPORT_SYMBOL(vm_munmap);
@@ -1631,6 +1507,7 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
*/
void exit_mmap(struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
if (!mm)
@@ -1638,17 +1515,18 @@ void exit_mmap(struct mm_struct *mm)
mm->total_vm = 0;
- while ((vma = mm->mmap)) {
- mm->mmap = vma->vm_next;
- delete_vma_from_mm(vma);
+ /*
+ * Lock the mm to avoid assert complaining even though this is the only
+ * user of the mm
+ */
+ mmap_write_lock(mm);
+ for_each_vma(vmi, vma) {
+ cleanup_vma_from_mm(vma);
delete_vma(mm, vma);
cond_resched();
}
-}
-
-int vm_brk(unsigned long addr, unsigned long len)
-{
- return -ENOMEM;
+ __mt_destroy(&mm->mm_mt);
+ mmap_write_unlock(mm);
}
/*
@@ -1686,7 +1564,7 @@ static unsigned long do_mremap(unsigned long addr,
if (vma->vm_end != vma->vm_start + old_len)
return (unsigned long) -EFAULT;
- if (vma->vm_flags & VM_MAYSHARE)
+ if (is_nommu_shared_mapping(vma->vm_flags))
return (unsigned long) -EPERM;
if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
@@ -1703,25 +1581,19 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
{
unsigned long ret;
- down_write(&current->mm->mmap_sem);
+ mmap_write_lock(current->mm);
ret = do_mremap(addr, old_len, new_len, flags, new_addr);
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return ret;
}
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
- unsigned int foll_flags)
-{
- return NULL;
-}
-
int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
if (addr != (pfn << PAGE_SHIFT))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
return 0;
}
EXPORT_SYMBOL(remap_pfn_range);
@@ -1751,12 +1623,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);
-unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- return -ENOMEM;
-}
-
vm_fault_t filemap_fault(struct vm_fault *vmf)
{
BUG();
@@ -1764,20 +1630,22 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);
-void filemap_map_pages(struct vm_fault *vmf,
+vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
BUG();
+ return 0;
}
EXPORT_SYMBOL(filemap_map_pages);
-int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long addr, void *buf, int len, unsigned int gup_flags)
+static int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
int write = gup_flags & FOLL_WRITE;
- down_read(&mm->mmap_sem);
+ if (mmap_read_lock_killable(mm))
+ return 0;
/* the access must start within one of the target process's mappings */
vma = find_vma(mm, addr);
@@ -1799,7 +1667,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
len = 0;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return len;
}
@@ -1817,7 +1685,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags)
{
- return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
+ return __access_remote_vm(mm, addr, buf, len, gup_flags);
}
/*
@@ -1836,13 +1704,92 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
if (!mm)
return 0;
- len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
+ len = __access_remote_vm(mm, addr, buf, len, gup_flags);
mmput(mm);
return len;
}
EXPORT_SYMBOL_GPL(access_process_vm);
+#ifdef CONFIG_BPF_SYSCALL
+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len)
+{
+ unsigned long addr_end;
+ struct vm_area_struct *vma;
+ int ret = -EFAULT;
+
+ *(char *)buf = '\0';
+
+ if (mmap_read_lock_killable(mm))
+ return ret;
+
+ /* the access must start within one of the target process's mappings */
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto out;
+
+ if (check_add_overflow(addr, len, &addr_end))
+ goto out;
+
+ /* don't overrun this mapping */
+ if (addr_end > vma->vm_end)
+ len = vma->vm_end - addr;
+
+ /* only read mappings where it is permitted */
+ if (vma->vm_flags & VM_MAYREAD) {
+ ret = strscpy(buf, (char *)addr, len);
+ if (ret < 0)
+ ret = len - 1;
+ }
+
+out:
+ mmap_read_unlock(mm);
+ return ret;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk: the task of the target address space
+ * @addr: start address to read from
+ * @buf: destination buffer
+ * @len: number of bytes to copy
+ * @gup_flags: flags modifying lookup behaviour (unused)
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination);
+ * not including the trailing NUL. Always guaranteed to leave NUL-terminated
+ * buffer. On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ if (unlikely(len == 0))
+ return 0;
+
+ mm = get_task_mm(tsk);
+ if (!mm) {
+ *(char *)buf = '\0';
+ return -EFAULT;
+ }
+
+ ret = __copy_remote_vm_str(mm, addr, buf, len);
+
+ mmput(mm);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+#endif /* CONFIG_BPF_SYSCALL */
+
/**
* nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
* @inode: The inode to check
@@ -1850,8 +1797,8 @@ EXPORT_SYMBOL_GPL(access_process_vm);
* @newsize: The proposed filesize of the inode
*
* Check the shared mappings on an inode on behalf of a shrinking truncate to
- * make sure that that any outstanding VMAs aren't broken and then shrink the
- * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't
+ * make sure that any outstanding VMAs aren't broken and then shrink the
+ * vm_regions that extend beyond so that do_mmap() doesn't
* automatically grant mappings that are too large.
*/
int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
@@ -1919,7 +1866,7 @@ static int __meminit init_user_reserve(void)
{
unsigned long free_kbytes;
- free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+ free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
return 0;
@@ -1940,9 +1887,17 @@ static int __meminit init_admin_reserve(void)
{
unsigned long free_kbytes;
- free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+ free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
return 0;
}
subsys_initcall(init_admin_reserve);
+
+int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ mmap_write_lock(oldmm);
+ dup_mm_exe_file(mm, oldmm);
+ mmap_write_unlock(oldmm);
+ return 0;
+}