From b5d24fda9c3dce51fcb4eee459550a458eaaf1e2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 24 Feb 2017 14:55:45 -0800 Subject: mm, devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin, done} The mem_hotplug_{begin,done} lock coordinates with {get,put}_online_mems() to hold off "readers" of the current state of memory from new hotplug actions. mem_hotplug_begin() expects exclusive access, via the device_hotplug lock, to set mem_hotplug.active_writer. Calling mem_hotplug_begin() without locking device_hotplug can lead to corrupting mem_hotplug.refcount and missed wakeups / soft lockups. [dan.j.williams@intel.com: v2] Link: http://lkml.kernel.org/r/148728203365.38457.17804568297887708345.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/148693885680.16345.17802627926777862337.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: f931ab479dd2 ("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}") Signed-off-by: Dan Williams Reported-by: Ben Hutchings Cc: Michal Hocko Cc: Toshi Kani Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: Masayoshi Mizuma Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/memremap.c b/kernel/memremap.c index 9ecedc28b928..06123234f118 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -246,9 +246,13 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); + + lock_device_hotplug(); mem_hotplug_begin(); arch_remove_memory(align_start, align_size); mem_hotplug_done(); + unlock_device_hotplug(); + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, @@ -360,9 +364,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; + lock_device_hotplug(); mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); mem_hotplug_done(); + unlock_device_hotplug(); if (error) goto err_add_memory; -- cgit From 11bac80004499ea59f361ef2a5516c84b6eab675 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 24 Feb 2017 14:56:41 -0800 Subject: mm, fs: reduce fault, page_mkwrite, and pfn_mkwrite to take only vmf ->fault(), ->page_mkwrite(), and ->pfn_mkwrite() calls do not need to take a vma and vmf parameter when the vma already resides in vmf. Remove the vma parameter to simplify things. [arnd@arndb.de: fix ARM build] Link: http://lkml.kernel.org/r/20170125223558.1451224-1-arnd@arndb.de Link: http://lkml.kernel.org/r/148521301778.19116.10840599906674778980.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dave Jiang Signed-off-by: Arnd Bergmann Reviewed-by: Ross Zwisler Cc: Theodore Ts'o Cc: Darrick J. Wong Cc: Matthew Wilcox Cc: Dave Hansen Cc: Christoph Hellwig Cc: Jan Kara Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/events/core.c | 6 +++--- kernel/relay.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 77a932b54a64..b2eb3542e829 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4925,9 +4925,9 @@ unlock: rcu_read_unlock(); } -static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int perf_mmap_fault(struct vm_fault *vmf) { - struct perf_event *event = vma->vm_file->private_data; + struct perf_event *event = vmf->vma->vm_file->private_data; struct ring_buffer *rb; int ret = VM_FAULT_SIGBUS; @@ -4950,7 +4950,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto unlock; get_page(vmf->page); - vmf->page->mapping = vma->vm_file->f_mapping; + vmf->page->mapping = vmf->vma->vm_file->f_mapping; vmf->page->index = vmf->pgoff; ret = 0; diff --git a/kernel/relay.c b/kernel/relay.c index 8f18d314a96a..8f8dc91db680 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -39,10 +39,10 @@ static void relay_file_mmap_close(struct vm_area_struct *vma) /* * fault() vm_op implementation for relay file mapping. */ -static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int relay_buf_fault(struct vm_fault *vmf) { struct page *page; - struct rchan_buf *buf = vma->vm_private_data; + struct rchan_buf *buf = vmf->vma->vm_private_data; pgoff_t pgoff = vmf->pgoff; if (!buf) -- cgit From c8394812e56fbc334d815226268cea69b447d461 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Feb 2017 14:57:42 -0800 Subject: uprobes: split THPs before trying to replace them Patch series "Fix few rmap-related THP bugs", v3. The patchset fixes handing PTE-mapped THPs in page_referenced() and page_idle_clear_pte_refs(). To achieve that I've intrdocued new helper -- page_vma_mapped_walk() -- which replaces all page_check_address{,_transhuge}() and covers all THP cases. Patchset overview: - First patch fixes one uprobe bug (unrelated to the rest of the patchset, just spotted it at the same time); - Patches 2-5 fix handling PTE-mapped THPs in page_referenced(), page_idle_clear_pte_refs() and rmap core; - Patches 6-12 convert all page_check_address{,_transhuge}() users (plus remove_migration_pte()) to page_vma_mapped_walk() and drop unused helpers. I think the fixes are not critical enough for stable@ as they don't lead to crashes or hangs, only suboptimal behaviour. This patch (of 12): For THPs page_check_address() always fails. It leads to endless loop in uprobe_write_opcode(). Testcase with huge-tmpfs (uprobes cannot probe anonymous memory). mount -t debugfs none /sys/kernel/debug mount -t tmpfs -o huge=always none /mnt gcc -Wall -O2 -o /mnt/test -x c - < /sys/kernel/debug/tracing/uprobe_events echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable /mnt/test Let's split THPs before trying to replace. Link: http://lkml.kernel.org/r/20170129173858.45174-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Rik van Riel Acked-by: Johannes Weiner Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Hillf Danton Cc: Srikar Dronamraju Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/events/uprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d416f3baf392..1e65c79e52a6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -300,8 +300,8 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, retry: /* Read the page with vaddr into memory */ - ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, - &vma, NULL); + ret = get_user_pages_remote(NULL, mm, vaddr, 1, + FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL); if (ret <= 0) return ret; -- cgit From 14fa2daa15887f9246cfedc345e83e8d24cb9058 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Feb 2017 14:58:07 -0800 Subject: mm, uprobes: convert __replace_page() to use page_vma_mapped_walk() For consistency, it worth converting all page_check_address() to page_vma_mapped_walk(), so we could drop the former. Link: http://lkml.kernel.org/r/20170129173858.45174-10-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Srikar Dronamraju Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Hugh Dickins Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/events/uprobes.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1e65c79e52a6..18c6b23edd3c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -153,14 +153,19 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct page *old_page, struct page *new_page) { struct mm_struct *mm = vma->vm_mm; - spinlock_t *ptl; - pte_t *ptep; + struct page_vma_mapped_walk pvmw = { + .page = old_page, + .vma = vma, + .address = addr, + }; int err; /* For mmu_notifiers */ const unsigned long mmun_start = addr; const unsigned long mmun_end = addr + PAGE_SIZE; struct mem_cgroup *memcg; + VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page); + err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg, false); if (err) @@ -171,11 +176,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); err = -EAGAIN; - ptep = page_check_address(old_page, mm, addr, &ptl, 0); - if (!ptep) { + if (!page_vma_mapped_walk(&pvmw)) { mem_cgroup_cancel_charge(new_page, memcg, false); goto unlock; } + VM_BUG_ON_PAGE(addr != pvmw.address, old_page); get_page(new_page); page_add_new_anon_rmap(new_page, vma, addr, false); @@ -187,14 +192,15 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, inc_mm_counter(mm, MM_ANONPAGES); } - flush_cache_page(vma, addr, pte_pfn(*ptep)); - ptep_clear_flush_notify(vma, addr, ptep); - set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot)); + flush_cache_page(vma, addr, pte_pfn(*pvmw.pte)); + ptep_clear_flush_notify(vma, addr, pvmw.pte); + set_pte_at_notify(mm, addr, pvmw.pte, + mk_pte(new_page, vma->vm_page_prot)); page_remove_rmap(old_page, false); if (!page_mapped(old_page)) try_to_free_swap(old_page); - pte_unmap_unlock(ptep, ptl); + page_vma_mapped_walk_done(&pvmw); if (vma->vm_flags & VM_LOCKED) munlock_vma_page(old_page); -- cgit From ca49ca7114553587736fe78319e22f073b631380 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 24 Feb 2017 14:58:25 -0800 Subject: userfaultfd: non-cooperative: add event for exit() notification Allow userfaultfd monitor track termination of the processes that have memory backed by the uffd. [rppt@linux.vnet.ibm.com: add comment] Link: http://lkml.kernel.org/r/20170202135448.GB19804@rapoport-lnxLink: http://lkml.kernel.org/r/1485542673-24387-4-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Acked-by: Hillf Danton Cc: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 9960accbf2ab..90b09ca35c84 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -547,6 +548,7 @@ static void exit_mm(void) enter_lazy_tlb(mm, current); task_unlock(current); mm_update_next_owner(mm); + userfaultfd_exit(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) exit_oom_victim(); -- cgit From 3e6daded1f51b79ff851b6c1e4b192f47ea3d063 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 24 Feb 2017 15:00:44 -0800 Subject: kernel/notifier.c: simplify expression NOTIFY_STOP_MASK (0x8000) has only one bit set and there is no need to compare output of "ret & NOTIFY_STOP_MASK" to NOTIFY_STOP_MASK. We just need to make sure the output is non-zero, that's it. Link: http://lkml.kernel.org/r/88ee58264a2bfab1c97ffc8ac753e25f55f57c10.1483593065.git.viresh.kumar@linaro.org Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/notifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/notifier.c b/kernel/notifier.c index fd2c9acbcc19..6196af8a8223 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -95,7 +95,7 @@ static int notifier_call_chain(struct notifier_block **nl, if (nr_calls) (*nr_calls)++; - if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) + if (ret & NOTIFY_STOP_MASK) break; nb = next_nb; nr_to_call--; -- cgit From 738bc38d49e017fe7acb3596712518e22c225816 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Fri, 24 Feb 2017 15:00:46 -0800 Subject: kernel/ksysfs.c: add __ro_after_init to bin_attribute structure The object notes_attr of type bin_attribute is not modified after getting initailized by ksysfs_init. Apart from initialization in ksysfs_init it is also passed as an argument to the function sysfs_create_bin_file but this argument is of type const. Therefore, add __ro_after_init to its declaration. Link: http://lkml.kernel.org/r/1486839969-16891-1-git-send-email-bhumirks@gmail.com Signed-off-by: Bhumika Goyal Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ksysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index ee1bc1bb8feb..0999679d6f26 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -195,7 +195,7 @@ static ssize_t notes_read(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute notes_attr = { +static struct bin_attribute notes_attr __ro_after_init = { .attr = { .name = "notes", .mode = S_IRUGO, -- cgit