diff options
Diffstat (limited to 'mm/nommu.c')
| -rw-r--r-- | mm/nommu.c | 527 |
1 files changed, 286 insertions, 241 deletions
diff --git a/mm/nommu.c b/mm/nommu.c index 5b83938ecb67..c3a23b082adb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -36,23 +36,17 @@ #include <linux/printk.h> #include <linux/uaccess.h> +#include <linux/uio.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include "internal.h" -void *high_memory; -EXPORT_SYMBOL(high_memory); -struct page *mem_map; -unsigned long max_mapnr; -EXPORT_SYMBOL(max_mapnr); unsigned long highest_memmap_pfn; -int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; atomic_long_t mmap_pages_allocated; -EXPORT_SYMBOL(mem_map); /* list of mapped, potentially shareable regions */ static struct kmem_cache *vm_region_jar; @@ -70,7 +64,7 @@ const struct vm_operations_struct generic_file_vm_ops = { */ unsigned int kobjsize(const void *objp) { - struct page *page; + struct folio *folio; /* * If the object we have should not have ksize performed on it, @@ -79,22 +73,22 @@ unsigned int kobjsize(const void *objp) if (!objp || !virt_addr_valid(objp)) return 0; - page = virt_to_head_page(objp); + folio = virt_to_folio(objp); /* * If the allocator sets PageSlab, we know the pointer came from * kmalloc(). */ - if (PageSlab(page)) + if (folio_test_slab(folio)) return ksize(objp); /* - * If it's not a compound page, see if we have a matching VMA + * If it's not a large folio, see if we have a matching VMA * region. This test is intentionally done in reverse order, * so if there's no VMA, we still fall through and hand back - * PAGE_SIZE for 0-order pages. + * PAGE_SIZE for 0-order folios. */ - if (!PageCompound(page)) { + if (!folio_test_large(folio)) { struct vm_area_struct *vma; vma = find_vma(current->mm, (unsigned long)objp); @@ -106,60 +100,43 @@ unsigned int kobjsize(const void *objp) * The ksize() function is only guaranteed to work for pointers * returned by kmalloc(). So handle arbitrary pointers here. */ - return page_size(page); + return folio_size(folio); } -/** - * follow_pfn - look up PFN at a user virtual address - * @vma: memory mapping - * @address: user virtual address - * @pfn: location to store found PFN - * - * Only IO mappings and raw PFN mappings are allowed. - * - * Returns zero and the pfn at @pfn on success, -ve otherwise. - */ -int follow_pfn(struct vm_area_struct *vma, unsigned long address, - unsigned long *pfn) -{ - if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) - return -EINVAL; - - *pfn = address >> PAGE_SHIFT; - return 0; -} -EXPORT_SYMBOL(follow_pfn); - -LIST_HEAD(vmap_area_list); - void vfree(const void *addr) { kfree(addr); } EXPORT_SYMBOL(vfree); -void *__vmalloc(unsigned long size, gfp_t gfp_mask) +void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) { /* * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() * returns only a logical address. */ - return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); + return kmalloc_noprof(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); } -EXPORT_SYMBOL(__vmalloc); +EXPORT_SYMBOL(__vmalloc_noprof); -void *__vmalloc_node_range(unsigned long size, unsigned long align, +void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, + gfp_t flags, int node) +{ + return krealloc_noprof(p, size, (flags | __GFP_COMP) & ~__GFP_HIGHMEM); +} + +void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) { - return __vmalloc(size, gfp_mask); + return __vmalloc_noprof(size, gfp_mask); } -void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, +void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) { - return __vmalloc(size, gfp_mask); + return __vmalloc_noprof(size, gfp_mask); } static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) @@ -173,18 +150,18 @@ static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) mmap_write_lock(current->mm); vma = find_vma(current->mm, (unsigned long)ret); if (vma) - vma->vm_flags |= VM_USERMAP; + vm_flags_set(vma, VM_USERMAP); mmap_write_unlock(current->mm); } return ret; } -void *vmalloc_user(unsigned long size) +void *vmalloc_user_noprof(unsigned long size) { return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO); } -EXPORT_SYMBOL(vmalloc_user); +EXPORT_SYMBOL(vmalloc_user_noprof); struct page *vmalloc_to_page(const void *addr) { @@ -198,14 +175,13 @@ unsigned long vmalloc_to_pfn(const void *addr) } EXPORT_SYMBOL(vmalloc_to_pfn); -long vread(char *buf, char *addr, unsigned long count) +long vread_iter(struct iov_iter *iter, const char *addr, size_t count) { /* Don't allow overflow */ - if ((unsigned long) buf + count < count) - count = -(unsigned long) buf; + if ((unsigned long) addr + count < count) + count = -(unsigned long) addr; - memcpy(buf, addr, count); - return count; + return copy_to_iter(addr, count, iter); } /* @@ -219,13 +195,29 @@ long vread(char *buf, char *addr, unsigned long count) * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vmalloc(unsigned long size) +void *vmalloc_noprof(unsigned long size) { - return __vmalloc(size, GFP_KERNEL); + return __vmalloc_noprof(size, GFP_KERNEL); } -EXPORT_SYMBOL(vmalloc); +EXPORT_SYMBOL(vmalloc_noprof); -void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc); +/* + * vmalloc_huge_node - allocate virtually contiguous memory, on a node + * + * @size: allocation size + * @gfp_mask: flags for the page level allocator + * @node: node to use for allocation or NUMA_NO_NODE + * + * Allocate enough pages to cover @size from the page level + * allocator and map them into contiguous kernel virtual space. + * + * Due to NOMMU implications the node argument and HUGE page attribute is + * ignored. + */ +void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) +{ + return __vmalloc_noprof(size, gfp_mask); +} /* * vzalloc - allocate virtually contiguous memory with zero fill @@ -239,11 +231,11 @@ void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc) * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vzalloc(unsigned long size) +void *vzalloc_noprof(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_ZERO); + return __vmalloc_noprof(size, GFP_KERNEL | __GFP_ZERO); } -EXPORT_SYMBOL(vzalloc); +EXPORT_SYMBOL(vzalloc_noprof); /** * vmalloc_node - allocate memory on a specific node @@ -256,11 +248,11 @@ EXPORT_SYMBOL(vzalloc); * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vmalloc_node(unsigned long size, int node) +void *vmalloc_node_noprof(unsigned long size, int node) { - return vmalloc(size); + return vmalloc_noprof(size); } -EXPORT_SYMBOL(vmalloc_node); +EXPORT_SYMBOL(vmalloc_node_noprof); /** * vzalloc_node - allocate memory on a specific node with zero fill @@ -274,11 +266,11 @@ EXPORT_SYMBOL(vmalloc_node); * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ -void *vzalloc_node(unsigned long size, int node) +void *vzalloc_node_noprof(unsigned long size, int node) { - return vzalloc(size); + return vzalloc_noprof(size); } -EXPORT_SYMBOL(vzalloc_node); +EXPORT_SYMBOL(vzalloc_node_noprof); /** * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) @@ -287,11 +279,11 @@ EXPORT_SYMBOL(vzalloc_node); * Allocate enough 32bit PA addressable pages to cover @size from the * page level allocator and map them into contiguous kernel virtual space. */ -void *vmalloc_32(unsigned long size) +void *vmalloc_32_noprof(unsigned long size) { - return __vmalloc(size, GFP_KERNEL); + return __vmalloc_noprof(size, GFP_KERNEL); } -EXPORT_SYMBOL(vmalloc_32); +EXPORT_SYMBOL(vmalloc_32_noprof); /** * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory @@ -303,15 +295,15 @@ EXPORT_SYMBOL(vmalloc_32); * VM_USERMAP is set on the corresponding VMA so that subsequent calls to * remap_vmalloc_range() are permissible. */ -void *vmalloc_32_user(unsigned long size) +void *vmalloc_32_user_noprof(unsigned long size) { /* * We'll have to sort out the ZONE_DMA bits for 64-bit, * but for now this can simply use vmalloc_user() directly. */ - return vmalloc_user(size); + return vmalloc_user_noprof(size); } -EXPORT_SYMBOL(vmalloc_32_user); +EXPORT_SYMBOL(vmalloc_32_user_noprof); void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) { @@ -357,6 +349,13 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vm_insert_page); +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num) +{ + return -EINVAL; +} +EXPORT_SYMBOL(vm_insert_pages); + int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num) { @@ -403,8 +402,22 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) return mm->brk = brk; } +static int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; + +static const struct ctl_table nommu_table[] = { + { + .procname = "nr_trim_pages", + .data = &sysctl_nr_trim_pages, + .maxlen = sizeof(sysctl_nr_trim_pages), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +}; + /* - * initialise the percpu counter for VM and region record slabs + * initialise the percpu counter for VM and region record slabs, initialise VMA + * state. */ void __init mmap_init(void) { @@ -413,6 +426,8 @@ void __init mmap_init(void) ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); + register_sysctl_init("vm", nommu_table); + vma_state_init(); } /* @@ -544,19 +559,6 @@ static void put_nommu_region(struct vm_region *region) __put_nommu_region(region); } -void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas) -{ - mas_set_range(mas, vma->vm_start, vma->vm_end - 1); - mas_store_prealloc(mas, vma); -} - -void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas) -{ - mas->index = vma->vm_start; - mas->last = vma->vm_end - 1; - mas_store_prealloc(mas, NULL); -} - static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm) { vma->vm_mm = mm; @@ -573,44 +575,6 @@ static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm) } } -/* - * mas_add_vma_to_mm() - Maple state variant of add_mas_to_mm(). - * @mas: The maple state with preallocations. - * @mm: The mm_struct - * @vma: The vma to add - * - */ -static void mas_add_vma_to_mm(struct ma_state *mas, struct mm_struct *mm, - struct vm_area_struct *vma) -{ - BUG_ON(!vma->vm_region); - - setup_vma_to_mm(vma, mm); - mm->map_count++; - - /* add the VMA to the tree */ - vma_mas_store(vma, mas); -} - -/* - * add a VMA into a process's mm_struct in the appropriate place in the list - * and tree and add to the address space's page tree also if not an anonymous - * page - * - should be called with mm->mmap_lock held writelocked - */ -static int add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) -{ - MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end); - - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { - pr_warn("Allocation of vma tree for process %d failed\n", - current->pid); - return -ENOMEM; - } - mas_add_vma_to_mm(&mas, mm, vma); - return 0; -} - static void cleanup_vma_from_mm(struct vm_area_struct *vma) { vma->vm_mm->map_count--; @@ -626,14 +590,16 @@ static void cleanup_vma_from_mm(struct vm_area_struct *vma) i_mmap_unlock_write(mapping); } } + /* * delete a VMA from its owning mm_struct and address space */ static int delete_vma_from_mm(struct vm_area_struct *vma) { - MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0); + VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_start); - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { + vma_iter_config(&vmi, vma->vm_start, vma->vm_end); + if (vma_iter_prealloc(&vmi, NULL)) { pr_warn("Allocation of vma tree for process %d failed\n", current->pid); return -ENOMEM; @@ -641,17 +607,15 @@ static int delete_vma_from_mm(struct vm_area_struct *vma) cleanup_vma_from_mm(vma); /* remove from the MM's tree and list */ - vma_mas_remove(vma, &mas); + vma_iter_clear(&vmi); return 0; } - /* * destroy a VMA record */ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) { - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + vma_close(vma); if (vma->vm_file) fput(vma->vm_file); put_nommu_region(vma->vm_region); @@ -675,30 +639,27 @@ EXPORT_SYMBOL(find_vma_intersection); */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { - MA_STATE(mas, &mm->mm_mt, addr, addr); + VMA_ITERATOR(vmi, mm, addr); - return mas_walk(&mas); + return vma_iter_load(&vmi); } EXPORT_SYMBOL(find_vma); /* - * find a VMA - * - we don't extend stack VMAs under NOMMU conditions - */ -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) -{ - return find_vma(mm, addr); -} - -/* * expand a stack to a given address * - not supported under NOMMU conditions */ -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr) { return -ENOMEM; } +struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr) +{ + mmap_read_unlock(mm); + return NULL; +} + /* * look up the first VMA exactly that exactly matches addr * - should be called with mm->mmap_lock at least held readlocked @@ -709,9 +670,9 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, { struct vm_area_struct *vma; unsigned long end = addr + len; - MA_STATE(mas, &mm->mm_mt, addr, addr); + VMA_ITERATOR(vmi, mm, addr); - vma = mas_walk(&mas); + vma = vma_iter_load(&vmi); if (!vma) return NULL; if (vma->vm_start != addr) @@ -759,7 +720,7 @@ static int validate_mmap_request(struct file *file, if (file) { /* files must support mmap */ - if (!file->f_op->mmap) + if (!can_mmap_file(file)) return -ENODEV; /* work out if what we've got could possibly be shared @@ -884,37 +845,44 @@ static int validate_mmap_request(struct file *file, * we've determined that we can make the mapping, now translate what we * now know into VMA flags */ -static unsigned long determine_vm_flags(struct file *file, - unsigned long prot, - unsigned long flags, - unsigned long capabilities) +static vm_flags_t determine_vm_flags(struct file *file, + unsigned long prot, + unsigned long flags, + unsigned long capabilities) { - unsigned long vm_flags; + vm_flags_t vm_flags; - vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); - /* vm_flags |= mm->def_flags; */ + vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags); - if (!(capabilities & NOMMU_MAP_DIRECT)) { - /* attempt to share read-only copies of mapped file chunks */ + if (!file) { + /* + * MAP_ANONYMOUS. MAP_SHARED is mapped to MAP_PRIVATE, because + * there is no fork(). + */ vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - if (file && !(prot & PROT_WRITE)) - vm_flags |= VM_MAYSHARE; + } else if (flags & MAP_PRIVATE) { + /* MAP_PRIVATE file mapping */ + if (capabilities & NOMMU_MAP_DIRECT) + vm_flags |= (capabilities & NOMMU_VMFLAGS); + else + vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + + if (!(prot & PROT_WRITE) && !current->ptrace) + /* + * R/O private file mapping which cannot be used to + * modify memory, especially also not via active ptrace + * (e.g., set breakpoints) or later by upgrading + * permissions (no mprotect()). We can try overlaying + * the file mapping, which will work e.g., on chardevs, + * ramfs/tmpfs/shmfs and romfs/cramf. + */ + vm_flags |= VM_MAYOVERLAY; } else { - /* overlay a shareable mapping on the backing device or inode - * if possible - used for chardevs, ramfs/tmpfs/shmfs and - * romfs/cramfs */ - vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS); - if (flags & MAP_SHARED) - vm_flags |= VM_SHARED; + /* MAP_SHARED file mapping: NOMMU_MAP_DIRECT is set. */ + vm_flags |= VM_SHARED | VM_MAYSHARE | + (capabilities & NOMMU_VMFLAGS); } - /* refuse to let anyone share private mappings with this process if - * it's being traced - otherwise breakpoints set in it may interfere - * with another untraced process - */ - if ((flags & MAP_PRIVATE) && current->ptrace) - vm_flags &= ~VM_MAYSHARE; - return vm_flags; } @@ -926,7 +894,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) { int ret; - ret = call_mmap(vma->vm_file, vma); + ret = mmap_file(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; return 0; @@ -952,15 +920,18 @@ static int do_mmap_private(struct vm_area_struct *vma, void *base; int ret, order; - /* invoke the file's mapping function so that it can keep track of - * shared mappings on devices or memory - * - VM_MAYSHARE will be set if it may attempt to share + /* + * Invoke the file's mapping function so that it can keep track of + * shared mappings on devices or memory. VM_MAYOVERLAY will be set if + * it may attempt to share, which will make is_nommu_shared_mapping() + * happy. */ if (capabilities & NOMMU_MAP_DIRECT) { - ret = call_mmap(vma->vm_file, vma); + ret = mmap_file(vma->vm_file, vma); + /* shouldn't return success if we're not sharing */ + if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags))) + ret = -ENOSYS; if (ret == 0) { - /* shouldn't return success if we're not sharing */ - BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); vma->vm_region->vm_top = vma->vm_region->vm_end; return 0; } @@ -991,7 +962,8 @@ static int do_mmap_private(struct vm_area_struct *vma, atomic_long_add(total, &mmap_pages_allocated); - region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; + vm_flags_set(vma, VM_MAPPED_COPY); + region->vm_flags = vma->vm_flags; region->vm_start = (unsigned long) base; region->vm_end = region->vm_start + len; region->vm_top = region->vm_start + (total << PAGE_SHIFT); @@ -1030,7 +1002,7 @@ error_free: enomem: pr_err("Allocation of length %lu from process %d (%s) failed\n", len, current->pid, current->comm); - show_free_areas(0, NULL); + show_mem(); return -ENOMEM; } @@ -1042,6 +1014,7 @@ unsigned long do_mmap(struct file *file, unsigned long len, unsigned long prot, unsigned long flags, + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf) @@ -1049,10 +1022,9 @@ unsigned long do_mmap(struct file *file, struct vm_area_struct *vma; struct vm_region *region; struct rb_node *rb; - vm_flags_t vm_flags; unsigned long capabilities, result; int ret; - MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + VMA_ITERATOR(vmi, current->mm, 0); *populate = 0; @@ -1069,7 +1041,7 @@ unsigned long do_mmap(struct file *file, /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ - vm_flags = determine_vm_flags(file, prot, flags, capabilities); + vm_flags |= determine_vm_flags(file, prot, flags, capabilities); /* we're going to need to record the mapping */ @@ -1081,14 +1053,11 @@ unsigned long do_mmap(struct file *file, if (!vma) goto error_getting_vma; - if (mas_preallocate(&mas, vma, GFP_KERNEL)) - goto error_maple_preallocate; - region->vm_usage = 1; region->vm_flags = vm_flags; region->vm_pgoff = pgoff; - vma->vm_flags = vm_flags; + vm_flags_init(vma, vm_flags); vma->vm_pgoff = pgoff; if (file) { @@ -1106,7 +1075,7 @@ unsigned long do_mmap(struct file *file, * these cases, sharing is handled in the driver or filesystem rather * than here */ - if (vm_flags & VM_MAYSHARE) { + if (is_nommu_shared_mapping(vm_flags)) { struct vm_region *pregion; unsigned long pglen, rpglen, pgend, rpgend, start; @@ -1116,7 +1085,7 @@ unsigned long do_mmap(struct file *file, for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { pregion = rb_entry(rb, struct vm_region, vm_rb); - if (!(pregion->vm_flags & VM_MAYSHARE)) + if (!is_nommu_shared_mapping(pregion->vm_flags)) continue; /* search for overlapping mappings on the same file */ @@ -1152,7 +1121,7 @@ unsigned long do_mmap(struct file *file, vma->vm_end = start + len; if (pregion->vm_flags & VM_MAPPED_COPY) - vma->vm_flags |= VM_MAPPED_COPY; + vm_flags_set(vma, VM_MAPPED_COPY); else { ret = do_mmap_shared_file(vma); if (ret < 0) { @@ -1224,7 +1193,15 @@ unsigned long do_mmap(struct file *file, current->mm->total_vm += len >> PAGE_SHIFT; share: - mas_add_vma_to_mm(&mas, current->mm, vma); + BUG_ON(!vma->vm_region); + vma_iter_config(&vmi, vma->vm_start, vma->vm_end); + if (vma_iter_prealloc(&vmi, vma)) + goto error_just_free; + + setup_vma_to_mm(vma, current->mm); + current->mm->map_count++; + /* add the VMA to the tree */ + vma_iter_store_new(&vmi, vma); /* we flush the region from the icache only when the first executable * mapping of it is made */ @@ -1240,7 +1217,7 @@ share: error_just_free: up_write(&nommu_region_sem); error: - mas_destroy(&mas); + vma_iter_free(&vmi); if (region->vm_file) fput(region->vm_file); kmem_cache_free(vm_region_jar, region); @@ -1259,22 +1236,14 @@ error_getting_vma: kmem_cache_free(vm_region_jar, region); pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", len, current->pid); - show_free_areas(0, NULL); + show_mem(); return -ENOMEM; error_getting_region: pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", len, current->pid); - show_free_areas(0, NULL); - return -ENOMEM; - -error_maple_preallocate: - kmem_cache_free(vm_region_jar, region); - vm_area_free(vma); - pr_warn("Allocation of vma tree for process %d failed\n", current->pid); - show_free_areas(0, NULL); + show_mem(); return -ENOMEM; - } unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, @@ -1334,13 +1303,13 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) * split a vma into two pieces at address 'addr', a new vma is allocated either * for the first part or the tail. */ -int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, int new_below) +static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long addr, int new_below) { struct vm_area_struct *new; struct vm_region *region; unsigned long npages; - MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end); + struct mm_struct *mm; /* we're only permitted to split anonymous regions (these should have * only a single usage on the region) */ @@ -1359,12 +1328,6 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (!new) goto err_vma_dup; - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { - pr_warn("Allocation of vma tree for process %d failed\n", - current->pid); - goto err_mas_preallocate; - } - /* most fields are the same, copy all, and then fixup */ *region = *vma->vm_region; new->vm_region = region; @@ -1378,6 +1341,13 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, region->vm_pgoff = new->vm_pgoff += npages; } + vma_iter_config(vmi, new->vm_start, new->vm_end); + if (vma_iter_prealloc(vmi, vma)) { + pr_warn("Allocation of vma tree for process %d failed\n", + current->pid); + goto err_vmi_preallocate; + } + if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); @@ -1396,13 +1366,11 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, setup_vma_to_mm(vma, mm); setup_vma_to_mm(new, mm); - mas_set_range(&mas, vma->vm_start, vma->vm_end - 1); - mas_store(&mas, vma); - vma_mas_store(new, &mas); + vma_iter_store_new(vmi, new); mm->map_count++; return 0; -err_mas_preallocate: +err_vmi_preallocate: vm_area_free(new); err_vma_dup: kmem_cache_free(vm_region_jar, region); @@ -1413,7 +1381,7 @@ err_vma_dup: * shrink a VMA by removing the specified chunk from either the beginning or * the end */ -static int shrink_vma(struct mm_struct *mm, +static int vmi_shrink_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long from, unsigned long to) { @@ -1421,14 +1389,15 @@ static int shrink_vma(struct mm_struct *mm, /* adjust the VMA's pointers, which may reposition it in the MM's tree * and list */ - if (delete_vma_from_mm(vma)) - return -ENOMEM; - if (from > vma->vm_start) + if (from > vma->vm_start) { + if (vma_iter_clear_gfp(vmi, from, vma->vm_end, GFP_KERNEL)) + return -ENOMEM; vma->vm_end = from; - else + } else { + if (vma_iter_clear_gfp(vmi, vma->vm_start, to, GFP_KERNEL)) + return -ENOMEM; vma->vm_start = to; - if (add_vma_to_mm(mm, vma)) - return -ENOMEM; + } /* cut the backing region down to size */ region = vma->vm_region; @@ -1456,7 +1425,7 @@ static int shrink_vma(struct mm_struct *mm, */ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) { - MA_STATE(mas, &mm->mm_mt, start, start); + VMA_ITERATOR(vmi, mm, start); struct vm_area_struct *vma; unsigned long end; int ret = 0; @@ -1468,7 +1437,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list end = start + len; /* find the first potentially overlapping VMA */ - vma = mas_find(&mas, end - 1); + vma = vma_find(&vmi, end); if (!vma) { static int limit; if (limit < 5) { @@ -1487,7 +1456,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list return -EINVAL; if (end == vma->vm_end) goto erase_whole_vma; - vma = mas_next(&mas, end - 1); + vma = vma_find(&vmi, end); } while (vma); return -EINVAL; } else { @@ -1501,11 +1470,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list if (end != vma->vm_end && offset_in_page(end)) return -EINVAL; if (start != vma->vm_start && end != vma->vm_end) { - ret = split_vma(mm, vma, start, 1); + ret = split_vma(&vmi, vma, start, 1); if (ret < 0) return ret; } - return shrink_vma(mm, vma, start, end); + return vmi_shrink_vma(&vmi, vma, start, end); } erase_whole_vma: @@ -1560,11 +1529,6 @@ void exit_mmap(struct mm_struct *mm) mmap_write_unlock(mm); } -int vm_brk(unsigned long addr, unsigned long len) -{ - return -ENOMEM; -} - /* * expand (or shrink) an existing mapping, potentially moving it at the same * time (controlled by the MREMAP_MAYMOVE flag and available VM space) @@ -1600,7 +1564,7 @@ static unsigned long do_mremap(unsigned long addr, if (vma->vm_end != vma->vm_start + old_len) return (unsigned long) -EFAULT; - if (vma->vm_flags & VM_MAYSHARE) + if (is_nommu_shared_mapping(vma->vm_flags)) return (unsigned long) -EPERM; if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) @@ -1623,19 +1587,13 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, return ret; } -struct page *follow_page(struct vm_area_struct *vma, unsigned long address, - unsigned int foll_flags) -{ - return NULL; -} - int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { if (addr != (pfn << PAGE_SHIFT)) return -EINVAL; - vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); return 0; } EXPORT_SYMBOL(remap_pfn_range); @@ -1680,8 +1638,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, } EXPORT_SYMBOL(filemap_map_pages); -int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, - int len, unsigned int gup_flags) +static int __access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; int write = gup_flags & FOLL_WRITE; @@ -1753,6 +1711,85 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in } EXPORT_SYMBOL_GPL(access_process_vm); +#ifdef CONFIG_BPF_SYSCALL +/* + * Copy a string from another process's address space as given in mm. + * If there is any error return -EFAULT. + */ +static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, + void *buf, int len) +{ + unsigned long addr_end; + struct vm_area_struct *vma; + int ret = -EFAULT; + + *(char *)buf = '\0'; + + if (mmap_read_lock_killable(mm)) + return ret; + + /* the access must start within one of the target process's mappings */ + vma = find_vma(mm, addr); + if (!vma) + goto out; + + if (check_add_overflow(addr, len, &addr_end)) + goto out; + + /* don't overrun this mapping */ + if (addr_end > vma->vm_end) + len = vma->vm_end - addr; + + /* only read mappings where it is permitted */ + if (vma->vm_flags & VM_MAYREAD) { + ret = strscpy(buf, (char *)addr, len); + if (ret < 0) + ret = len - 1; + } + +out: + mmap_read_unlock(mm); + return ret; +} + +/** + * copy_remote_vm_str - copy a string from another process's address space. + * @tsk: the task of the target address space + * @addr: start address to read from + * @buf: destination buffer + * @len: number of bytes to copy + * @gup_flags: flags modifying lookup behaviour (unused) + * + * The caller must hold a reference on @mm. + * + * Return: number of bytes copied from @addr (source) to @buf (destination); + * not including the trailing NUL. Always guaranteed to leave NUL-terminated + * buffer. On any error, return -EFAULT. + */ +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + if (unlikely(len == 0)) + return 0; + + mm = get_task_mm(tsk); + if (!mm) { + *(char *)buf = '\0'; + return -EFAULT; + } + + ret = __copy_remote_vm_str(mm, addr, buf, len); + + mmput(mm); + + return ret; +} +EXPORT_SYMBOL_GPL(copy_remote_vm_str); +#endif /* CONFIG_BPF_SYSCALL */ + /** * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode * @inode: The inode to check @@ -1829,7 +1866,7 @@ static int __meminit init_user_reserve(void) { unsigned long free_kbytes; - free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = K(global_zone_page_state(NR_FREE_PAGES)); sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); return 0; @@ -1850,9 +1887,17 @@ static int __meminit init_admin_reserve(void) { unsigned long free_kbytes; - free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = K(global_zone_page_state(NR_FREE_PAGES)); sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); return 0; } subsys_initcall(init_admin_reserve); + +int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) +{ + mmap_write_lock(oldmm); + dup_mm_exe_file(mm, oldmm); + mmap_write_unlock(oldmm); + return 0; +} |
