summaryrefslogtreecommitdiff
path: root/mm/nommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/nommu.c')
-rw-r--r--mm/nommu.c527
1 files changed, 286 insertions, 241 deletions
diff --git a/mm/nommu.c b/mm/nommu.c
index 5b83938ecb67..c3a23b082adb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -36,23 +36,17 @@
#include <linux/printk.h>
#include <linux/uaccess.h>
+#include <linux/uio.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include "internal.h"
-void *high_memory;
-EXPORT_SYMBOL(high_memory);
-struct page *mem_map;
-unsigned long max_mapnr;
-EXPORT_SYMBOL(max_mapnr);
unsigned long highest_memmap_pfn;
-int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
int heap_stack_gap = 0;
atomic_long_t mmap_pages_allocated;
-EXPORT_SYMBOL(mem_map);
/* list of mapped, potentially shareable regions */
static struct kmem_cache *vm_region_jar;
@@ -70,7 +64,7 @@ const struct vm_operations_struct generic_file_vm_ops = {
*/
unsigned int kobjsize(const void *objp)
{
- struct page *page;
+ struct folio *folio;
/*
* If the object we have should not have ksize performed on it,
@@ -79,22 +73,22 @@ unsigned int kobjsize(const void *objp)
if (!objp || !virt_addr_valid(objp))
return 0;
- page = virt_to_head_page(objp);
+ folio = virt_to_folio(objp);
/*
* If the allocator sets PageSlab, we know the pointer came from
* kmalloc().
*/
- if (PageSlab(page))
+ if (folio_test_slab(folio))
return ksize(objp);
/*
- * If it's not a compound page, see if we have a matching VMA
+ * If it's not a large folio, see if we have a matching VMA
* region. This test is intentionally done in reverse order,
* so if there's no VMA, we still fall through and hand back
- * PAGE_SIZE for 0-order pages.
+ * PAGE_SIZE for 0-order folios.
*/
- if (!PageCompound(page)) {
+ if (!folio_test_large(folio)) {
struct vm_area_struct *vma;
vma = find_vma(current->mm, (unsigned long)objp);
@@ -106,60 +100,43 @@ unsigned int kobjsize(const void *objp)
* The ksize() function is only guaranteed to work for pointers
* returned by kmalloc(). So handle arbitrary pointers here.
*/
- return page_size(page);
+ return folio_size(folio);
}
-/**
- * follow_pfn - look up PFN at a user virtual address
- * @vma: memory mapping
- * @address: user virtual address
- * @pfn: location to store found PFN
- *
- * Only IO mappings and raw PFN mappings are allowed.
- *
- * Returns zero and the pfn at @pfn on success, -ve otherwise.
- */
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
- unsigned long *pfn)
-{
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- return -EINVAL;
-
- *pfn = address >> PAGE_SHIFT;
- return 0;
-}
-EXPORT_SYMBOL(follow_pfn);
-
-LIST_HEAD(vmap_area_list);
-
void vfree(const void *addr)
{
kfree(addr);
}
EXPORT_SYMBOL(vfree);
-void *__vmalloc(unsigned long size, gfp_t gfp_mask)
+void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask)
{
/*
* You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc()
* returns only a logical address.
*/
- return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
+ return kmalloc_noprof(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
}
-EXPORT_SYMBOL(__vmalloc);
+EXPORT_SYMBOL(__vmalloc_noprof);
-void *__vmalloc_node_range(unsigned long size, unsigned long align,
+void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+ gfp_t flags, int node)
+{
+ return krealloc_noprof(p, size, (flags | __GFP_COMP) & ~__GFP_HIGHMEM);
+}
+
+void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller)
{
- return __vmalloc(size, gfp_mask);
+ return __vmalloc_noprof(size, gfp_mask);
}
-void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
+void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask,
int node, const void *caller)
{
- return __vmalloc(size, gfp_mask);
+ return __vmalloc_noprof(size, gfp_mask);
}
static void *__vmalloc_user_flags(unsigned long size, gfp_t flags)
@@ -173,18 +150,18 @@ static void *__vmalloc_user_flags(unsigned long size, gfp_t flags)
mmap_write_lock(current->mm);
vma = find_vma(current->mm, (unsigned long)ret);
if (vma)
- vma->vm_flags |= VM_USERMAP;
+ vm_flags_set(vma, VM_USERMAP);
mmap_write_unlock(current->mm);
}
return ret;
}
-void *vmalloc_user(unsigned long size)
+void *vmalloc_user_noprof(unsigned long size)
{
return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO);
}
-EXPORT_SYMBOL(vmalloc_user);
+EXPORT_SYMBOL(vmalloc_user_noprof);
struct page *vmalloc_to_page(const void *addr)
{
@@ -198,14 +175,13 @@ unsigned long vmalloc_to_pfn(const void *addr)
}
EXPORT_SYMBOL(vmalloc_to_pfn);
-long vread(char *buf, char *addr, unsigned long count)
+long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
{
/* Don't allow overflow */
- if ((unsigned long) buf + count < count)
- count = -(unsigned long) buf;
+ if ((unsigned long) addr + count < count)
+ count = -(unsigned long) addr;
- memcpy(buf, addr, count);
- return count;
+ return copy_to_iter(addr, count, iter);
}
/*
@@ -219,13 +195,29 @@ long vread(char *buf, char *addr, unsigned long count)
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vmalloc(unsigned long size)
+void *vmalloc_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL);
+ return __vmalloc_noprof(size, GFP_KERNEL);
}
-EXPORT_SYMBOL(vmalloc);
+EXPORT_SYMBOL(vmalloc_noprof);
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc);
+/*
+ * vmalloc_huge_node - allocate virtually contiguous memory, on a node
+ *
+ * @size: allocation size
+ * @gfp_mask: flags for the page level allocator
+ * @node: node to use for allocation or NUMA_NO_NODE
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ *
+ * Due to NOMMU implications the node argument and HUGE page attribute is
+ * ignored.
+ */
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
+{
+ return __vmalloc_noprof(size, gfp_mask);
+}
/*
* vzalloc - allocate virtually contiguous memory with zero fill
@@ -239,11 +231,11 @@ void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc)
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vzalloc(unsigned long size)
+void *vzalloc_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ return __vmalloc_noprof(size, GFP_KERNEL | __GFP_ZERO);
}
-EXPORT_SYMBOL(vzalloc);
+EXPORT_SYMBOL(vzalloc_noprof);
/**
* vmalloc_node - allocate memory on a specific node
@@ -256,11 +248,11 @@ EXPORT_SYMBOL(vzalloc);
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vmalloc_node(unsigned long size, int node)
+void *vmalloc_node_noprof(unsigned long size, int node)
{
- return vmalloc(size);
+ return vmalloc_noprof(size);
}
-EXPORT_SYMBOL(vmalloc_node);
+EXPORT_SYMBOL(vmalloc_node_noprof);
/**
* vzalloc_node - allocate memory on a specific node with zero fill
@@ -274,11 +266,11 @@ EXPORT_SYMBOL(vmalloc_node);
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vzalloc_node(unsigned long size, int node)
+void *vzalloc_node_noprof(unsigned long size, int node)
{
- return vzalloc(size);
+ return vzalloc_noprof(size);
}
-EXPORT_SYMBOL(vzalloc_node);
+EXPORT_SYMBOL(vzalloc_node_noprof);
/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
@@ -287,11 +279,11 @@ EXPORT_SYMBOL(vzalloc_node);
* Allocate enough 32bit PA addressable pages to cover @size from the
* page level allocator and map them into contiguous kernel virtual space.
*/
-void *vmalloc_32(unsigned long size)
+void *vmalloc_32_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL);
+ return __vmalloc_noprof(size, GFP_KERNEL);
}
-EXPORT_SYMBOL(vmalloc_32);
+EXPORT_SYMBOL(vmalloc_32_noprof);
/**
* vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
@@ -303,15 +295,15 @@ EXPORT_SYMBOL(vmalloc_32);
* VM_USERMAP is set on the corresponding VMA so that subsequent calls to
* remap_vmalloc_range() are permissible.
*/
-void *vmalloc_32_user(unsigned long size)
+void *vmalloc_32_user_noprof(unsigned long size)
{
/*
* We'll have to sort out the ZONE_DMA bits for 64-bit,
* but for now this can simply use vmalloc_user() directly.
*/
- return vmalloc_user(size);
+ return vmalloc_user_noprof(size);
}
-EXPORT_SYMBOL(vmalloc_32_user);
+EXPORT_SYMBOL(vmalloc_32_user_noprof);
void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
{
@@ -357,6 +349,13 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL(vm_insert_page);
+int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
+ struct page **pages, unsigned long *num)
+{
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vm_insert_pages);
+
int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
unsigned long num)
{
@@ -403,8 +402,22 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
return mm->brk = brk;
}
+static int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
+
+static const struct ctl_table nommu_table[] = {
+ {
+ .procname = "nr_trim_pages",
+ .data = &sysctl_nr_trim_pages,
+ .maxlen = sizeof(sysctl_nr_trim_pages),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+};
+
/*
- * initialise the percpu counter for VM and region record slabs
+ * initialise the percpu counter for VM and region record slabs, initialise VMA
+ * state.
*/
void __init mmap_init(void)
{
@@ -413,6 +426,8 @@ void __init mmap_init(void)
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
+ register_sysctl_init("vm", nommu_table);
+ vma_state_init();
}
/*
@@ -544,19 +559,6 @@ static void put_nommu_region(struct vm_region *region)
__put_nommu_region(region);
}
-void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas)
-{
- mas_set_range(mas, vma->vm_start, vma->vm_end - 1);
- mas_store_prealloc(mas, vma);
-}
-
-void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
-{
- mas->index = vma->vm_start;
- mas->last = vma->vm_end - 1;
- mas_store_prealloc(mas, NULL);
-}
-
static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
{
vma->vm_mm = mm;
@@ -573,44 +575,6 @@ static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
}
}
-/*
- * mas_add_vma_to_mm() - Maple state variant of add_mas_to_mm().
- * @mas: The maple state with preallocations.
- * @mm: The mm_struct
- * @vma: The vma to add
- *
- */
-static void mas_add_vma_to_mm(struct ma_state *mas, struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
- BUG_ON(!vma->vm_region);
-
- setup_vma_to_mm(vma, mm);
- mm->map_count++;
-
- /* add the VMA to the tree */
- vma_mas_store(vma, mas);
-}
-
-/*
- * add a VMA into a process's mm_struct in the appropriate place in the list
- * and tree and add to the address space's page tree also if not an anonymous
- * page
- * - should be called with mm->mmap_lock held writelocked
- */
-static int add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
-{
- MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);
-
- if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
- pr_warn("Allocation of vma tree for process %d failed\n",
- current->pid);
- return -ENOMEM;
- }
- mas_add_vma_to_mm(&mas, mm, vma);
- return 0;
-}
-
static void cleanup_vma_from_mm(struct vm_area_struct *vma)
{
vma->vm_mm->map_count--;
@@ -626,14 +590,16 @@ static void cleanup_vma_from_mm(struct vm_area_struct *vma)
i_mmap_unlock_write(mapping);
}
}
+
/*
* delete a VMA from its owning mm_struct and address space
*/
static int delete_vma_from_mm(struct vm_area_struct *vma)
{
- MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_start);
- if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, NULL)) {
pr_warn("Allocation of vma tree for process %d failed\n",
current->pid);
return -ENOMEM;
@@ -641,17 +607,15 @@ static int delete_vma_from_mm(struct vm_area_struct *vma)
cleanup_vma_from_mm(vma);
/* remove from the MM's tree and list */
- vma_mas_remove(vma, &mas);
+ vma_iter_clear(&vmi);
return 0;
}
-
/*
* destroy a VMA record
*/
static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
{
- if (vma->vm_ops && vma->vm_ops->close)
- vma->vm_ops->close(vma);
+ vma_close(vma);
if (vma->vm_file)
fput(vma->vm_file);
put_nommu_region(vma->vm_region);
@@ -675,30 +639,27 @@ EXPORT_SYMBOL(find_vma_intersection);
*/
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
- MA_STATE(mas, &mm->mm_mt, addr, addr);
+ VMA_ITERATOR(vmi, mm, addr);
- return mas_walk(&mas);
+ return vma_iter_load(&vmi);
}
EXPORT_SYMBOL(find_vma);
/*
- * find a VMA
- * - we don't extend stack VMAs under NOMMU conditions
- */
-struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
-{
- return find_vma(mm, addr);
-}
-
-/*
* expand a stack to a given address
* - not supported under NOMMU conditions
*/
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr)
{
return -ENOMEM;
}
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
+{
+ mmap_read_unlock(mm);
+ return NULL;
+}
+
/*
* look up the first VMA exactly that exactly matches addr
* - should be called with mm->mmap_lock at least held readlocked
@@ -709,9 +670,9 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
{
struct vm_area_struct *vma;
unsigned long end = addr + len;
- MA_STATE(mas, &mm->mm_mt, addr, addr);
+ VMA_ITERATOR(vmi, mm, addr);
- vma = mas_walk(&mas);
+ vma = vma_iter_load(&vmi);
if (!vma)
return NULL;
if (vma->vm_start != addr)
@@ -759,7 +720,7 @@ static int validate_mmap_request(struct file *file,
if (file) {
/* files must support mmap */
- if (!file->f_op->mmap)
+ if (!can_mmap_file(file))
return -ENODEV;
/* work out if what we've got could possibly be shared
@@ -884,37 +845,44 @@ static int validate_mmap_request(struct file *file,
* we've determined that we can make the mapping, now translate what we
* now know into VMA flags
*/
-static unsigned long determine_vm_flags(struct file *file,
- unsigned long prot,
- unsigned long flags,
- unsigned long capabilities)
+static vm_flags_t determine_vm_flags(struct file *file,
+ unsigned long prot,
+ unsigned long flags,
+ unsigned long capabilities)
{
- unsigned long vm_flags;
+ vm_flags_t vm_flags;
- vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags);
- /* vm_flags |= mm->def_flags; */
+ vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags);
- if (!(capabilities & NOMMU_MAP_DIRECT)) {
- /* attempt to share read-only copies of mapped file chunks */
+ if (!file) {
+ /*
+ * MAP_ANONYMOUS. MAP_SHARED is mapped to MAP_PRIVATE, because
+ * there is no fork().
+ */
vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
- if (file && !(prot & PROT_WRITE))
- vm_flags |= VM_MAYSHARE;
+ } else if (flags & MAP_PRIVATE) {
+ /* MAP_PRIVATE file mapping */
+ if (capabilities & NOMMU_MAP_DIRECT)
+ vm_flags |= (capabilities & NOMMU_VMFLAGS);
+ else
+ vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
+ if (!(prot & PROT_WRITE) && !current->ptrace)
+ /*
+ * R/O private file mapping which cannot be used to
+ * modify memory, especially also not via active ptrace
+ * (e.g., set breakpoints) or later by upgrading
+ * permissions (no mprotect()). We can try overlaying
+ * the file mapping, which will work e.g., on chardevs,
+ * ramfs/tmpfs/shmfs and romfs/cramf.
+ */
+ vm_flags |= VM_MAYOVERLAY;
} else {
- /* overlay a shareable mapping on the backing device or inode
- * if possible - used for chardevs, ramfs/tmpfs/shmfs and
- * romfs/cramfs */
- vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS);
- if (flags & MAP_SHARED)
- vm_flags |= VM_SHARED;
+ /* MAP_SHARED file mapping: NOMMU_MAP_DIRECT is set. */
+ vm_flags |= VM_SHARED | VM_MAYSHARE |
+ (capabilities & NOMMU_VMFLAGS);
}
- /* refuse to let anyone share private mappings with this process if
- * it's being traced - otherwise breakpoints set in it may interfere
- * with another untraced process
- */
- if ((flags & MAP_PRIVATE) && current->ptrace)
- vm_flags &= ~VM_MAYSHARE;
-
return vm_flags;
}
@@ -926,7 +894,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
{
int ret;
- ret = call_mmap(vma->vm_file, vma);
+ ret = mmap_file(vma->vm_file, vma);
if (ret == 0) {
vma->vm_region->vm_top = vma->vm_region->vm_end;
return 0;
@@ -952,15 +920,18 @@ static int do_mmap_private(struct vm_area_struct *vma,
void *base;
int ret, order;
- /* invoke the file's mapping function so that it can keep track of
- * shared mappings on devices or memory
- * - VM_MAYSHARE will be set if it may attempt to share
+ /*
+ * Invoke the file's mapping function so that it can keep track of
+ * shared mappings on devices or memory. VM_MAYOVERLAY will be set if
+ * it may attempt to share, which will make is_nommu_shared_mapping()
+ * happy.
*/
if (capabilities & NOMMU_MAP_DIRECT) {
- ret = call_mmap(vma->vm_file, vma);
+ ret = mmap_file(vma->vm_file, vma);
+ /* shouldn't return success if we're not sharing */
+ if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags)))
+ ret = -ENOSYS;
if (ret == 0) {
- /* shouldn't return success if we're not sharing */
- BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
vma->vm_region->vm_top = vma->vm_region->vm_end;
return 0;
}
@@ -991,7 +962,8 @@ static int do_mmap_private(struct vm_area_struct *vma,
atomic_long_add(total, &mmap_pages_allocated);
- region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
+ vm_flags_set(vma, VM_MAPPED_COPY);
+ region->vm_flags = vma->vm_flags;
region->vm_start = (unsigned long) base;
region->vm_end = region->vm_start + len;
region->vm_top = region->vm_start + (total << PAGE_SHIFT);
@@ -1030,7 +1002,7 @@ error_free:
enomem:
pr_err("Allocation of length %lu from process %d (%s) failed\n",
len, current->pid, current->comm);
- show_free_areas(0, NULL);
+ show_mem();
return -ENOMEM;
}
@@ -1042,6 +1014,7 @@ unsigned long do_mmap(struct file *file,
unsigned long len,
unsigned long prot,
unsigned long flags,
+ vm_flags_t vm_flags,
unsigned long pgoff,
unsigned long *populate,
struct list_head *uf)
@@ -1049,10 +1022,9 @@ unsigned long do_mmap(struct file *file,
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *rb;
- vm_flags_t vm_flags;
unsigned long capabilities, result;
int ret;
- MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, current->mm, 0);
*populate = 0;
@@ -1069,7 +1041,7 @@ unsigned long do_mmap(struct file *file,
/* we've determined that we can make the mapping, now translate what we
* now know into VMA flags */
- vm_flags = determine_vm_flags(file, prot, flags, capabilities);
+ vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
/* we're going to need to record the mapping */
@@ -1081,14 +1053,11 @@ unsigned long do_mmap(struct file *file,
if (!vma)
goto error_getting_vma;
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
- goto error_maple_preallocate;
-
region->vm_usage = 1;
region->vm_flags = vm_flags;
region->vm_pgoff = pgoff;
- vma->vm_flags = vm_flags;
+ vm_flags_init(vma, vm_flags);
vma->vm_pgoff = pgoff;
if (file) {
@@ -1106,7 +1075,7 @@ unsigned long do_mmap(struct file *file,
* these cases, sharing is handled in the driver or filesystem rather
* than here
*/
- if (vm_flags & VM_MAYSHARE) {
+ if (is_nommu_shared_mapping(vm_flags)) {
struct vm_region *pregion;
unsigned long pglen, rpglen, pgend, rpgend, start;
@@ -1116,7 +1085,7 @@ unsigned long do_mmap(struct file *file,
for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
pregion = rb_entry(rb, struct vm_region, vm_rb);
- if (!(pregion->vm_flags & VM_MAYSHARE))
+ if (!is_nommu_shared_mapping(pregion->vm_flags))
continue;
/* search for overlapping mappings on the same file */
@@ -1152,7 +1121,7 @@ unsigned long do_mmap(struct file *file,
vma->vm_end = start + len;
if (pregion->vm_flags & VM_MAPPED_COPY)
- vma->vm_flags |= VM_MAPPED_COPY;
+ vm_flags_set(vma, VM_MAPPED_COPY);
else {
ret = do_mmap_shared_file(vma);
if (ret < 0) {
@@ -1224,7 +1193,15 @@ unsigned long do_mmap(struct file *file,
current->mm->total_vm += len >> PAGE_SHIFT;
share:
- mas_add_vma_to_mm(&mas, current->mm, vma);
+ BUG_ON(!vma->vm_region);
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, vma))
+ goto error_just_free;
+
+ setup_vma_to_mm(vma, current->mm);
+ current->mm->map_count++;
+ /* add the VMA to the tree */
+ vma_iter_store_new(&vmi, vma);
/* we flush the region from the icache only when the first executable
* mapping of it is made */
@@ -1240,7 +1217,7 @@ share:
error_just_free:
up_write(&nommu_region_sem);
error:
- mas_destroy(&mas);
+ vma_iter_free(&vmi);
if (region->vm_file)
fput(region->vm_file);
kmem_cache_free(vm_region_jar, region);
@@ -1259,22 +1236,14 @@ error_getting_vma:
kmem_cache_free(vm_region_jar, region);
pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n",
len, current->pid);
- show_free_areas(0, NULL);
+ show_mem();
return -ENOMEM;
error_getting_region:
pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n",
len, current->pid);
- show_free_areas(0, NULL);
- return -ENOMEM;
-
-error_maple_preallocate:
- kmem_cache_free(vm_region_jar, region);
- vm_area_free(vma);
- pr_warn("Allocation of vma tree for process %d failed\n", current->pid);
- show_free_areas(0, NULL);
+ show_mem();
return -ENOMEM;
-
}
unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
@@ -1334,13 +1303,13 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
* split a vma into two pieces at address 'addr', a new vma is allocated either
* for the first part or the tail.
*/
-int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, int new_below)
+static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long addr, int new_below)
{
struct vm_area_struct *new;
struct vm_region *region;
unsigned long npages;
- MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);
+ struct mm_struct *mm;
/* we're only permitted to split anonymous regions (these should have
* only a single usage on the region) */
@@ -1359,12 +1328,6 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
if (!new)
goto err_vma_dup;
- if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
- pr_warn("Allocation of vma tree for process %d failed\n",
- current->pid);
- goto err_mas_preallocate;
- }
-
/* most fields are the same, copy all, and then fixup */
*region = *vma->vm_region;
new->vm_region = region;
@@ -1378,6 +1341,13 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
region->vm_pgoff = new->vm_pgoff += npages;
}
+ vma_iter_config(vmi, new->vm_start, new->vm_end);
+ if (vma_iter_prealloc(vmi, vma)) {
+ pr_warn("Allocation of vma tree for process %d failed\n",
+ current->pid);
+ goto err_vmi_preallocate;
+ }
+
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
@@ -1396,13 +1366,11 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
setup_vma_to_mm(vma, mm);
setup_vma_to_mm(new, mm);
- mas_set_range(&mas, vma->vm_start, vma->vm_end - 1);
- mas_store(&mas, vma);
- vma_mas_store(new, &mas);
+ vma_iter_store_new(vmi, new);
mm->map_count++;
return 0;
-err_mas_preallocate:
+err_vmi_preallocate:
vm_area_free(new);
err_vma_dup:
kmem_cache_free(vm_region_jar, region);
@@ -1413,7 +1381,7 @@ err_vma_dup:
* shrink a VMA by removing the specified chunk from either the beginning or
* the end
*/
-static int shrink_vma(struct mm_struct *mm,
+static int vmi_shrink_vma(struct vma_iterator *vmi,
struct vm_area_struct *vma,
unsigned long from, unsigned long to)
{
@@ -1421,14 +1389,15 @@ static int shrink_vma(struct mm_struct *mm,
/* adjust the VMA's pointers, which may reposition it in the MM's tree
* and list */
- if (delete_vma_from_mm(vma))
- return -ENOMEM;
- if (from > vma->vm_start)
+ if (from > vma->vm_start) {
+ if (vma_iter_clear_gfp(vmi, from, vma->vm_end, GFP_KERNEL))
+ return -ENOMEM;
vma->vm_end = from;
- else
+ } else {
+ if (vma_iter_clear_gfp(vmi, vma->vm_start, to, GFP_KERNEL))
+ return -ENOMEM;
vma->vm_start = to;
- if (add_vma_to_mm(mm, vma))
- return -ENOMEM;
+ }
/* cut the backing region down to size */
region = vma->vm_region;
@@ -1456,7 +1425,7 @@ static int shrink_vma(struct mm_struct *mm,
*/
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf)
{
- MA_STATE(mas, &mm->mm_mt, start, start);
+ VMA_ITERATOR(vmi, mm, start);
struct vm_area_struct *vma;
unsigned long end;
int ret = 0;
@@ -1468,7 +1437,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
end = start + len;
/* find the first potentially overlapping VMA */
- vma = mas_find(&mas, end - 1);
+ vma = vma_find(&vmi, end);
if (!vma) {
static int limit;
if (limit < 5) {
@@ -1487,7 +1456,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
return -EINVAL;
if (end == vma->vm_end)
goto erase_whole_vma;
- vma = mas_next(&mas, end - 1);
+ vma = vma_find(&vmi, end);
} while (vma);
return -EINVAL;
} else {
@@ -1501,11 +1470,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
if (end != vma->vm_end && offset_in_page(end))
return -EINVAL;
if (start != vma->vm_start && end != vma->vm_end) {
- ret = split_vma(mm, vma, start, 1);
+ ret = split_vma(&vmi, vma, start, 1);
if (ret < 0)
return ret;
}
- return shrink_vma(mm, vma, start, end);
+ return vmi_shrink_vma(&vmi, vma, start, end);
}
erase_whole_vma:
@@ -1560,11 +1529,6 @@ void exit_mmap(struct mm_struct *mm)
mmap_write_unlock(mm);
}
-int vm_brk(unsigned long addr, unsigned long len)
-{
- return -ENOMEM;
-}
-
/*
* expand (or shrink) an existing mapping, potentially moving it at the same
* time (controlled by the MREMAP_MAYMOVE flag and available VM space)
@@ -1600,7 +1564,7 @@ static unsigned long do_mremap(unsigned long addr,
if (vma->vm_end != vma->vm_start + old_len)
return (unsigned long) -EFAULT;
- if (vma->vm_flags & VM_MAYSHARE)
+ if (is_nommu_shared_mapping(vma->vm_flags))
return (unsigned long) -EPERM;
if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
@@ -1623,19 +1587,13 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
return ret;
}
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
- unsigned int foll_flags)
-{
- return NULL;
-}
-
int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
if (addr != (pfn << PAGE_SHIFT))
return -EINVAL;
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
return 0;
}
EXPORT_SYMBOL(remap_pfn_range);
@@ -1680,8 +1638,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
}
EXPORT_SYMBOL(filemap_map_pages);
-int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
- int len, unsigned int gup_flags)
+static int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
int write = gup_flags & FOLL_WRITE;
@@ -1753,6 +1711,85 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
}
EXPORT_SYMBOL_GPL(access_process_vm);
+#ifdef CONFIG_BPF_SYSCALL
+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len)
+{
+ unsigned long addr_end;
+ struct vm_area_struct *vma;
+ int ret = -EFAULT;
+
+ *(char *)buf = '\0';
+
+ if (mmap_read_lock_killable(mm))
+ return ret;
+
+ /* the access must start within one of the target process's mappings */
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto out;
+
+ if (check_add_overflow(addr, len, &addr_end))
+ goto out;
+
+ /* don't overrun this mapping */
+ if (addr_end > vma->vm_end)
+ len = vma->vm_end - addr;
+
+ /* only read mappings where it is permitted */
+ if (vma->vm_flags & VM_MAYREAD) {
+ ret = strscpy(buf, (char *)addr, len);
+ if (ret < 0)
+ ret = len - 1;
+ }
+
+out:
+ mmap_read_unlock(mm);
+ return ret;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk: the task of the target address space
+ * @addr: start address to read from
+ * @buf: destination buffer
+ * @len: number of bytes to copy
+ * @gup_flags: flags modifying lookup behaviour (unused)
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination);
+ * not including the trailing NUL. Always guaranteed to leave NUL-terminated
+ * buffer. On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ if (unlikely(len == 0))
+ return 0;
+
+ mm = get_task_mm(tsk);
+ if (!mm) {
+ *(char *)buf = '\0';
+ return -EFAULT;
+ }
+
+ ret = __copy_remote_vm_str(mm, addr, buf, len);
+
+ mmput(mm);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+#endif /* CONFIG_BPF_SYSCALL */
+
/**
* nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
* @inode: The inode to check
@@ -1829,7 +1866,7 @@ static int __meminit init_user_reserve(void)
{
unsigned long free_kbytes;
- free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+ free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
return 0;
@@ -1850,9 +1887,17 @@ static int __meminit init_admin_reserve(void)
{
unsigned long free_kbytes;
- free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+ free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
return 0;
}
subsys_initcall(init_admin_reserve);
+
+int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ mmap_write_lock(oldmm);
+ dup_mm_exe_file(mm, oldmm);
+ mmap_write_unlock(oldmm);
+ return 0;
+}