diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/cleancache.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 76 | ||||
-rw-r--r-- | mm/gup.c | 10 | ||||
-rw-r--r-- | mm/khugepaged.c | 1 | ||||
-rw-r--r-- | mm/memcontrol.c | 10 | ||||
-rw-r--r-- | mm/mempool.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 19 | ||||
-rw-r--r-- | mm/page_io.c | 4 | ||||
-rw-r--r-- | mm/rmap.c | 16 | ||||
-rw-r--r-- | mm/shmem.c | 8 | ||||
-rw-r--r-- | mm/slub.c | 40 | ||||
-rw-r--r-- | mm/vmalloc.c | 15 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
14 files changed, 137 insertions, 70 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index beb7a455915d..398b46064544 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -137,7 +137,7 @@ config HAVE_MEMBLOCK_NODE_MAP config HAVE_MEMBLOCK_PHYS_MAP bool -config HAVE_GENERIC_RCU_GUP +config HAVE_GENERIC_GUP bool config ARCH_DISCARD_MEMBLOCK diff --git a/mm/cleancache.c b/mm/cleancache.c index ba5d8f3e6d68..f7b9fdc79d97 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -130,7 +130,7 @@ void __cleancache_init_shared_fs(struct super_block *sb) int pool_id = CLEANCACHE_NO_BACKEND_SHARED; if (cleancache_ops) { - pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE); + pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE); if (pool_id < 0) pool_id = CLEANCACHE_NO_POOL; } diff --git a/mm/filemap.c b/mm/filemap.c index 6f1be573a5e6..aea58e983a73 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping) } EXPORT_SYMBOL(filemap_flush); +/** + * filemap_range_has_page - check if a page exists in range. + * @mapping: address space within which to check + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Find at least one page in the range supplied, usually used to check if + * direct writing in this range will trigger a writeback. + */ +bool filemap_range_has_page(struct address_space *mapping, + loff_t start_byte, loff_t end_byte) +{ + pgoff_t index = start_byte >> PAGE_SHIFT; + pgoff_t end = end_byte >> PAGE_SHIFT; + struct pagevec pvec; + bool ret; + + if (end_byte < start_byte) + return false; + + if (mapping->nrpages == 0) + return false; + + pagevec_init(&pvec, 0); + if (!pagevec_lookup(&pvec, mapping, index, 1)) + return false; + ret = (pvec.pages[0]->index <= end); + pagevec_release(&pvec); + return ret; +} +EXPORT_SYMBOL(filemap_range_has_page); + static int __filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, loff_t end_byte) { @@ -768,10 +800,10 @@ struct wait_page_key { struct wait_page_queue { struct page *page; int bit_nr; - wait_queue_t wait; + wait_queue_entry_t wait; }; -static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) +static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) { struct wait_page_key *key = arg; struct wait_page_queue *wait_page @@ -834,7 +866,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, struct page *page, int bit_nr, int state, bool lock) { struct wait_page_queue wait_page; - wait_queue_t *wait = &wait_page.wait; + wait_queue_entry_t *wait = &wait_page.wait; int ret = 0; init_wait(wait); @@ -845,9 +877,9 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, for (;;) { spin_lock_irq(&q->lock); - if (likely(list_empty(&wait->task_list))) { + if (likely(list_empty(&wait->entry))) { if (lock) - __add_wait_queue_tail_exclusive(q, wait); + __add_wait_queue_entry_tail_exclusive(q, wait); else __add_wait_queue(q, wait); SetPageWaiters(page); @@ -907,7 +939,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) * * Add an arbitrary @waiter to the wait queue for the nominated @page. */ -void add_page_wait_queue(struct page *page, wait_queue_t *waiter) +void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) { wait_queue_head_t *q = page_waitqueue(page); unsigned long flags; @@ -2038,10 +2070,17 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) loff_t size; size = i_size_read(inode); - retval = filemap_write_and_wait_range(mapping, iocb->ki_pos, - iocb->ki_pos + count - 1); - if (retval < 0) - goto out; + if (iocb->ki_flags & IOCB_NOWAIT) { + if (filemap_range_has_page(mapping, iocb->ki_pos, + iocb->ki_pos + count - 1)) + return -EAGAIN; + } else { + retval = filemap_write_and_wait_range(mapping, + iocb->ki_pos, + iocb->ki_pos + count - 1); + if (retval < 0) + goto out; + } file_accessed(file); @@ -2642,6 +2681,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) pos = iocb->ki_pos; + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + if (limit != RLIM_INFINITY) { if (iocb->ki_pos >= limit) { send_sig(SIGXFSZ, current, 0); @@ -2710,9 +2752,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) write_len = iov_iter_count(from); end = (pos + write_len - 1) >> PAGE_SHIFT; - written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); - if (written) - goto out; + if (iocb->ki_flags & IOCB_NOWAIT) { + /* If there are pages to writeback, return */ + if (filemap_range_has_page(inode->i_mapping, pos, + pos + iov_iter_count(from))) + return -EAGAIN; + } else { + written = filemap_write_and_wait_range(mapping, pos, + pos + write_len - 1); + if (written) + goto out; + } /* * After a write we want buffered reads to be sure to go to disk to get @@ -1146,7 +1146,7 @@ struct page *get_dump_page(unsigned long addr) #endif /* CONFIG_ELF_CORE */ /* - * Generic RCU Fast GUP + * Generic Fast GUP * * get_user_pages_fast attempts to pin user pages by walking the page * tables directly and avoids taking locks. Thus the walker needs to be @@ -1167,8 +1167,8 @@ struct page *get_dump_page(unsigned long addr) * Before activating this code, please be aware that the following assumptions * are currently made: * - * *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free - * pages containing page tables. + * *) Either HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to + * free pages containing page tables or TLB flushing requires IPI broadcast. * * *) ptes can be read atomically by the architecture. * @@ -1178,7 +1178,7 @@ struct page *get_dump_page(unsigned long addr) * * This code is based heavily on the PowerPC implementation by Nick Piggin. */ -#ifdef CONFIG_HAVE_GENERIC_RCU_GUP +#ifdef CONFIG_HAVE_GENERIC_GUP #ifndef gup_get_pte /* @@ -1668,4 +1668,4 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, return ret; } -#endif /* CONFIG_HAVE_GENERIC_RCU_GUP */ +#endif /* CONFIG_HAVE_GENERIC_GUP */ diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 945fd1ca49b5..df4ebdb2b10a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, spin_unlock(ptl); free_page_and_swap_cache(src_page); } - cond_resched(); } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 94172089f52f..d75b38b66ef6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -170,7 +170,7 @@ struct mem_cgroup_event { */ poll_table pt; wait_queue_head_t *wqh; - wait_queue_t wait; + wait_queue_entry_t wait; struct work_struct remove; }; @@ -1479,10 +1479,10 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); struct oom_wait_info { struct mem_cgroup *memcg; - wait_queue_t wait; + wait_queue_entry_t wait; }; -static int memcg_oom_wake_function(wait_queue_t *wait, +static int memcg_oom_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) { struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg; @@ -1570,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle) owait.wait.flags = 0; owait.wait.func = memcg_oom_wake_function; owait.wait.private = current; - INIT_LIST_HEAD(&owait.wait.task_list); + INIT_LIST_HEAD(&owait.wait.entry); prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); mem_cgroup_mark_under_oom(memcg); @@ -3725,7 +3725,7 @@ static void memcg_event_remove(struct work_struct *work) * * Called with wqh->lock held and interrupts disabled. */ -static int memcg_event_wake(wait_queue_t *wait, unsigned mode, +static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct mem_cgroup_event *event = diff --git a/mm/mempool.c b/mm/mempool.c index 47a659dedd44..1c0294858527 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -312,7 +312,7 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; - wait_queue_t wait; + wait_queue_entry_t wait; gfp_t gfp_temp; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); diff --git a/mm/mmap.c b/mm/mmap.c index 8e07976d5e47..a5e3dcd75e79 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1817,7 +1817,8 @@ check_current: /* Check if current node has a suitable gap */ if (gap_start > high_limit) return -ENOMEM; - if (gap_end >= low_limit && gap_end - gap_start >= length) + if (gap_end >= low_limit && + gap_end > gap_start && gap_end - gap_start >= length) goto found; /* Visit right subtree if it looks promising */ @@ -1920,7 +1921,8 @@ check_current: gap_end = vm_start_gap(vma); if (gap_end < low_limit) return -ENOMEM; - if (gap_start <= high_limit && gap_end - gap_start >= length) + if (gap_start <= high_limit && + gap_end > gap_start && gap_end - gap_start >= length) goto found; /* Visit left subtree if it looks promising */ @@ -2228,16 +2230,19 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; - /* Guard against wrapping around to address 0. */ + /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; - address += PAGE_SIZE; - if (!address) + if (address >= TASK_SIZE) return -ENOMEM; + address += PAGE_SIZE; /* Enforce stack_guard_gap */ gap_addr = address + stack_guard_gap; - if (gap_addr < address) - return -ENOMEM; + + /* Guard against overflow */ + if (gap_addr < address || gap_addr > TASK_SIZE) + gap_addr = TASK_SIZE; + next = vma->vm_next; if (next && next->vm_start < gap_addr) { if (!(next->vm_flags & VM_GROWSUP)) diff --git a/mm/page_io.c b/mm/page_io.c index 23f6d0d3470f..2da71e627812 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio) { struct page *page = bio->bi_io_vec[0].bv_page; - if (bio->bi_error) { + if (bio->bi_status) { SetPageError(page); /* * We failed to write the page out to swap-space. @@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio) { struct page *page = bio->bi_io_vec[0].bv_page; - if (bio->bi_error) { + if (bio->bi_status) { SetPageError(page); ClearPageUptodate(page); pr_alert("Read-error on swap-device (%u:%u:%llu)\n", diff --git a/mm/rmap.c b/mm/rmap.c index d405f0e0ee96..130c238fe384 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -579,25 +579,13 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma) void try_to_unmap_flush(void) { struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; - int cpu; if (!tlb_ubc->flush_required) return; - cpu = get_cpu(); - - if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - local_flush_tlb(); - trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); - } - - if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) - flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL); - cpumask_clear(&tlb_ubc->cpumask); + arch_tlbbatch_flush(&tlb_ubc->arch); tlb_ubc->flush_required = false; tlb_ubc->writable = false; - put_cpu(); } /* Flush iff there are potentially writable TLB entries that can race with IO */ @@ -613,7 +601,7 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; - cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm)); + arch_tlbbatch_add_mm(&tlb_ubc->arch, mm); tlb_ubc->flush_required = true; /* diff --git a/mm/shmem.c b/mm/shmem.c index e67d6ba4e98e..9100c4952698 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -75,6 +75,7 @@ static struct vfsmount *shm_mnt; #include <uapi/linux/memfd.h> #include <linux/userfaultfd_k.h> #include <linux/rmap.h> +#include <linux/uuid.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -1902,10 +1903,10 @@ unlock: * entry unconditionally - even if something else had already woken the * target. */ -static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int ret = default_wake_function(wait, mode, sync, key); - list_del_init(&wait->task_list); + list_del_init(&wait->entry); return ret; } @@ -2840,7 +2841,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, spin_lock(&inode->i_lock); inode->i_private = NULL; wake_up_all(&shmem_falloc_waitq); - WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list)); + WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head)); spin_unlock(&inode->i_lock); error = 0; goto out; @@ -3761,6 +3762,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_TMPFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif + uuid_gen(&sb->s_uuid); inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); if (!inode) diff --git a/mm/slub.c b/mm/slub.c index 7449593fca72..8addc535bcdc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s) return name; } +static void sysfs_slab_remove_workfn(struct work_struct *work) +{ + struct kmem_cache *s = + container_of(work, struct kmem_cache, kobj_remove_work); + + if (!s->kobj.state_in_sysfs) + /* + * For a memcg cache, this may be called during + * deactivation and again on shutdown. Remove only once. + * A cache is never shut down before deactivation is + * complete, so no need to worry about synchronization. + */ + return; + +#ifdef CONFIG_MEMCG + kset_unregister(s->memcg_kset); +#endif + kobject_uevent(&s->kobj, KOBJ_REMOVE); + kobject_del(&s->kobj); + kobject_put(&s->kobj); +} + static int sysfs_slab_add(struct kmem_cache *s) { int err; @@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s) struct kset *kset = cache_kset(s); int unmergeable = slab_unmergeable(s); + INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn); + if (!kset) { kobject_init(&s->kobj, &slab_ktype); return 0; @@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s) */ return; - if (!s->kobj.state_in_sysfs) - /* - * For a memcg cache, this may be called during - * deactivation and again on shutdown. Remove only once. - * A cache is never shut down before deactivation is - * complete, so no need to worry about synchronization. - */ - return; - -#ifdef CONFIG_MEMCG - kset_unregister(s->memcg_kset); -#endif - kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); + kobject_get(&s->kobj); + schedule_work(&s->kobj_remove_work); } void sysfs_slab_release(struct kmem_cache *s) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 34a1c3e46ed7..ecc97f74ab18 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) if (p4d_none(*p4d)) return NULL; pud = pud_offset(p4d, addr); - if (pud_none(*pud)) + + /* + * Don't dereference bad PUD or PMD (below) entries. This will also + * identify huge mappings, which we may encounter on architectures + * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be + * identified as vmalloc addresses by is_vmalloc_addr(), but are + * not [unambiguously] associated with a struct page, so there is + * no correct value to return for them. + */ + WARN_ON_ONCE(pud_bad(*pud)); + if (pud_none(*pud) || pud_bad(*pud)) return NULL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + WARN_ON_ONCE(pmd_bad(*pmd)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) return NULL; ptep = pte_offset_map(pmd, addr); diff --git a/mm/vmscan.c b/mm/vmscan.c index 8ad39bbc79e6..c3c1c6ac62da 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3652,7 +3652,7 @@ int kswapd_run(int nid) pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */ - BUG_ON(system_state == SYSTEM_BOOTING); + BUG_ON(system_state < SYSTEM_RUNNING); pr_err("Failed to start kswapd on node %d\n", nid); ret = PTR_ERR(pgdat->kswapd); pgdat->kswapd = NULL; |