From d795a90e2ba024dbf2f22107ae89c210b98b08b8 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Sat, 28 Mar 2020 19:17:15 -0700 Subject: mm/swapfile.c: move inode_lock out of claim_swapfile claim_swapfile() currently keeps the inode locked when it is successful, or the file is already swapfile (with -EBUSY). And, on the other error cases, it does not lock the inode. This inconsistency of the lock state and return value is quite confusing and actually causing a bad unlock balance as below in the "bad_swap" section of __do_sys_swapon(). This commit fixes this issue by moving the inode_lock() and IS_SWAPFILE check out of claim_swapfile(). The inode is unlocked in "bad_swap_unlock_inode" section, so that the inode is ensured to be unlocked at "bad_swap". Thus, error handling codes after the locking now jumps to "bad_swap_unlock_inode" instead of "bad_swap". ===================================== WARNING: bad unlock balance detected! 5.5.0-rc7+ #176 Not tainted ------------------------------------- swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: __do_sys_swapon+0x94b/0x3550 but there are no more locks to release! other info that might help us debug this: no locks held by swapon/4294. stack backtrace: CPU: 5 PID: 4294 Comm: swapon Not tainted 5.5.0-rc7-BTRFS-ZNS+ #176 Hardware name: ASUS All Series/H87-PRO, BIOS 2102 07/29/2014 Call Trace: dump_stack+0xa1/0xea print_unlock_imbalance_bug.cold+0x114/0x123 lock_release+0x562/0xed0 up_write+0x2d/0x490 __do_sys_swapon+0x94b/0x3550 __x64_sys_swapon+0x54/0x80 do_syscall_64+0xa4/0x4b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f15da0a0dc7 Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices") Signed-off-by: Naohiro Aota Signed-off-by: Andrew Morton Tested-by: Qais Youef Reviewed-by: Andrew Morton Reviewed-by: Darrick J. Wong Cc: Christoph Hellwig Cc: Link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com Signed-off-by: Linus Torvalds --- mm/swapfile.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index b2a2e45c9a36..be33e6176cd9 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2899,10 +2899,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->bdev = inode->i_sb->s_bdev; } - inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - return 0; } @@ -3157,36 +3153,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host; - /* will take i_rwsem; */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap; + inode_lock(inode); + if (IS_SWAPFILE(inode)) { + error = -EBUSY; + goto bad_swap_unlock_inode; + } + /* * Read the swap header. */ if (!mapping->a_ops->readpage) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } page = read_mapping_page(mapping, 0, swap_file); if (IS_ERR(page)) { error = PTR_ERR(page); - goto bad_swap; + goto bad_swap_unlock_inode; } swap_header = kmap(page); maxpages = read_swap_header(p, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) @@ -3211,7 +3212,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for (ci = 0; ci < nr_cluster; ci++) @@ -3220,7 +3221,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; @@ -3234,13 +3235,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = swap_cgroup_swapon(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, cluster_info, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; - goto bad_swap; + goto bad_swap_unlock_inode; } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) @@ -3280,7 +3281,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = init_swap_address_space(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; /* * Flush any pending IO and dirty mappings before we start using this @@ -3290,7 +3291,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; - goto bad_swap; + goto bad_swap_unlock_inode; } mutex_lock(&swapon_mutex); @@ -3315,6 +3316,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = 0; goto out; +bad_swap_unlock_inode: + inode_unlock(inode); bad_swap: free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; @@ -3322,6 +3325,7 @@ bad_swap: set_blocksize(p->bdev, p->old_block_size); blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } + inode = NULL; destroy_swap_extents(p); swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); @@ -3333,13 +3337,8 @@ bad_swap: kvfree(frontswap_map); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); - if (swap_file) { - if (inode) { - inode_unlock(inode); - inode = NULL; - } + if (swap_file) filp_close(swap_file, NULL); - } out: if (page && !IS_ERR(page)) { kunmap(page); -- cgit From 53cdc1cb29e87ce5a61de5bb393eb08925d14ede Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Sat, 28 Mar 2020 19:17:19 -0700 Subject: drivers/base/memory.c: indicate all memory blocks as removable We see multiple issues with the implementation/interface to compute whether a memory block can be offlined (exposed via /sys/devices/system/memory/memoryX/removable) and would like to simplify it (remove the implementation). 1. It runs basically lockless. While this might be good for performance, we see possible races with memory offlining that will require at least some sort of locking to fix. 2. Nowadays, more false positives are possible. No arch-specific checks are performed that validate if memory offlining will not be denied right away (and such check will require locking). For example, arm64 won't allow to offline any memory block that was added during boot - which will imply a very high error rate. Other archs have other constraints. 3. The interface is inherently racy. E.g., if a memory block is detected to be removable (and was not a false positive at that time), there is still no guarantee that offlining will actually succeed. So any caller already has to deal with false positives. 4. It is unclear which performance benefit this interface actually provides. The introducing commit 5c755e9fd813 ("memory-hotplug: add sysfs removable attribute for hotplug memory remove") mentioned "A user-level agent must be able to identify which sections of memory are likely to be removable before attempting the potentially expensive operation." However, no actual performance comparison was included. Known users: - lsmem: Will group memory blocks based on the "removable" property. [1] - chmem: Indirect user. It has a RANGE mode where one can specify removable ranges identified via lsmem to be offlined. However, it also has a "SIZE" mode, which allows a sysadmin to skip the manual "identify removable blocks" step. [2] - powerpc-utils: Uses the "removable" attribute to skip some memory blocks right away when trying to find some to offline+remove. However, with ballooning enabled, it already skips this information completely (because it once resulted in many false negatives). Therefore, the implementation can deal with false positives properly already. [3] According to Nathan Fontenot, DLPAR on powerpc is nowadays no longer driven from userspace via the drmgr command (powerpc-utils). Nowadays it's managed in the kernel - including onlining/offlining of memory blocks - triggered by drmgr writing to /sys/kernel/dlpar. So the affected legacy userspace handling is only active on old kernels. Only very old versions of drmgr on a new kernel (unlikely) might execute slower - totally acceptable. With CONFIG_MEMORY_HOTREMOVE, always indicating "removable" should not break any user space tool. We implement a very bad heuristic now. Without CONFIG_MEMORY_HOTREMOVE we cannot offline anything, so report "not removable" as before. Original discussion can be found in [4] ("[PATCH RFC v1] mm: is_mem_section_removable() overhaul"). Other users of is_mem_section_removable() will be removed next, so that we can remove is_mem_section_removable() completely. [1] http://man7.org/linux/man-pages/man1/lsmem.1.html [2] http://man7.org/linux/man-pages/man8/chmem.8.html [3] https://github.com/ibm-power-utilities/powerpc-utils [4] https://lkml.kernel.org/r/20200117105759.27905-1-david@redhat.com Also, this patch probably fixes a crash reported by Steve. http://lkml.kernel.org/r/CAPcyv4jpdaNvJ67SkjyUJLBnBnXXQv686BiVW042g03FUmWLXw@mail.gmail.com Reported-by: "Scargall, Steve" Suggested-by: Michal Hocko Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Nathan Fontenot Acked-by: Michal Hocko Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Badari Pulavarty Cc: Robert Jennings Cc: Heiko Carstens Cc: Karel Zak Cc: Link: http://lkml.kernel.org/r/20200128093542.6908-1-david@redhat.com Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index b9f474c11393..4086718f6876 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -97,30 +97,13 @@ static ssize_t phys_index_show(struct device *dev, } /* - * Show whether the memory block is likely to be offlineable (or is already - * offline). Once offline, the memory block could be removed. The return - * value does, however, not indicate that there is a way to remove the - * memory block. + * Legacy interface that we cannot remove. Always indicate "removable" + * with CONFIG_MEMORY_HOTREMOVE - bad heuristic. */ static ssize_t removable_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct memory_block *mem = to_memory_block(dev); - unsigned long pfn; - int ret = 1, i; - - if (mem->state != MEM_ONLINE) - goto out; - - for (i = 0; i < sections_per_block; i++) { - if (!present_section_nr(mem->start_section_nr + i)) - continue; - pfn = section_nr_to_pfn(mem->start_section_nr + i); - ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); - } - -out: - return sprintf(buf, "%d\n", ret); + return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); } /* -- cgit From 726b7bbeafd4e0219c37b74673d368b21ff3d039 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Sat, 28 Mar 2020 19:17:22 -0700 Subject: hugetlb_cgroup: fix illegal access to memory This appears to be a mistake in commit faced7e0806cf ("mm: hugetlb controller for cgroups v2"). Essentially that commit does a hugetlb_cgroup_from_counter assuming that page_counter_try_charge has initialized counter. But if that has failed then it seems will not initialize counter, so hugetlb_cgroup_from_counter(counter) ends up pointing to random memory, causing kasan to complain. The solution is to simply use 'h_cg', instead of hugetlb_cgroup_from_counter(counter), since that is a reference to the hugetlb_cgroup anyway. After this change kasan ceases to complain. Fixes: faced7e0806cf ("mm: hugetlb controller for cgroups v2") Reported-by: syzbot+cac0c4e204952cf449b1@syzkaller.appspotmail.com Signed-off-by: Mina Almasry Signed-off-by: Andrew Morton Acked-by: Giuseppe Scrivano Acked-by: Tejun Heo Cc: Mike Kravetz Cc: David Rientjes Link: http://lkml.kernel.org/r/20200313223920.124230-1-almasrymina@google.com Signed-off-by: Linus Torvalds --- mm/hugetlb_cgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index e434b05416c6..5280bcf459af 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -240,8 +240,7 @@ again: if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter)) { ret = -ENOMEM; - hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx, - HUGETLB_MAX); + hugetlb_event(h_cg, idx, HUGETLB_MAX); } css_put(&h_cg->css); done: -- cgit From 8380ce479010f2f779587b462a9b4681934297c3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 28 Mar 2020 19:17:25 -0700 Subject: mm: fork: fix kernel_stack memcg stats for various stack implementations Depending on CONFIG_VMAP_STACK and the THREAD_SIZE / PAGE_SIZE ratio the space for task stacks can be allocated using __vmalloc_node_range(), alloc_pages_node() and kmem_cache_alloc_node(). In the first and the second cases page->mem_cgroup pointer is set, but in the third it's not: memcg membership of a slab page should be determined using the memcg_from_slab_page() function, which looks at page->slab_cache->memcg_params.memcg . In this case, using mod_memcg_page_state() (as in account_kernel_stack()) is incorrect: page->mem_cgroup pointer is NULL even for pages charged to a non-root memory cgroup. It can lead to kernel_stack per-memcg counters permanently showing 0 on some architectures (depending on the configuration). In order to fix it, let's introduce a mod_memcg_obj_state() helper, which takes a pointer to a kernel object as a first argument, uses mem_cgroup_from_obj() to get a RCU-protected memcg pointer and calls mod_memcg_state(). It allows to handle all possible configurations (CONFIG_VMAP_STACK and various THREAD_SIZE/PAGE_SIZE values) without spilling any memcg/kmem specifics into fork.c . Note: This is a special version of the patch created for stable backports. It contains code from the following two patches: - mm: memcg/slab: introduce mem_cgroup_from_obj() - mm: fork: fix kernel_stack memcg stats for various stack implementations [guro@fb.com: introduce mem_cgroup_from_obj()] Link: http://lkml.kernel.org/r/20200324004221.GA36662@carbon.dhcp.thefacebook.com Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages") Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Bharata B Rao Cc: Shakeel Butt Cc: Link: http://lkml.kernel.org/r/20200303233550.251375-1-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 ++++++++++++ kernel/fork.c | 4 ++-- mm/memcontrol.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a7a0a1a5c8d5..e9ba01336d4e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -695,6 +695,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); +void mod_memcg_obj_state(void *p, int idx, int val); static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) @@ -1123,6 +1124,10 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, __mod_node_page_state(page_pgdat(page), idx, val); } +static inline void mod_memcg_obj_state(void *p, int idx, int val) +{ +} + static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1427,6 +1432,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } +struct mem_cgroup *mem_cgroup_from_obj(void *p); + #else static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) @@ -1468,6 +1475,11 @@ static inline void memcg_put_cache_ids(void) { } +static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) +{ + return NULL; +} + #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 86425305cd4a..d90af13431c7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -397,8 +397,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account) mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, THREAD_SIZE / 1024 * account); - mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); + mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7a4bd8b9adc2..7ddf91c4295f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -777,6 +777,17 @@ void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) rcu_read_unlock(); } +void mod_memcg_obj_state(void *p, int idx, int val) +{ + struct mem_cgroup *memcg; + + rcu_read_lock(); + memcg = mem_cgroup_from_obj(p); + if (memcg) + mod_memcg_state(memcg, idx, val); + rcu_read_unlock(); +} + /** * __count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup @@ -2661,6 +2672,33 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg, } #ifdef CONFIG_MEMCG_KMEM +/* + * Returns a pointer to the memory cgroup to which the kernel object is charged. + * + * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(), + * cgroup_mutex, etc. + */ +struct mem_cgroup *mem_cgroup_from_obj(void *p) +{ + struct page *page; + + if (mem_cgroup_disabled()) + return NULL; + + page = virt_to_head_page(p); + + /* + * Slab pages don't have page->mem_cgroup set because corresponding + * kmem caches can be reparented during the lifetime. That's why + * memcg_from_slab_page() should be used instead. + */ + if (PageSlab(page)) + return memcg_from_slab_page(page); + + /* All other pages use page->mem_cgroup */ + return page->mem_cgroup; +} + static int memcg_alloc_cache_id(void) { int id, size; -- cgit From b943f045a9af9fd02f923e43fe8d7517e9961701 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 28 Mar 2020 19:17:29 -0700 Subject: mm/sparse: fix kernel crash with pfn_section_valid check Fix the crash like this: BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc000000000c3447c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries CPU: 11 PID: 7519 Comm: lt-ndctl Not tainted 5.6.0-rc7-autotest #1 ... NIP [c000000000c3447c] vmemmap_populated+0x98/0xc0 LR [c000000000088354] vmemmap_free+0x144/0x320 Call Trace: section_deactivate+0x220/0x240 __remove_pages+0x118/0x170 arch_remove_memory+0x3c/0x150 memunmap_pages+0x1cc/0x2f0 devm_action_release+0x30/0x50 release_nodes+0x2f8/0x3e0 device_release_driver_internal+0x168/0x270 unbind_store+0x130/0x170 drv_attr_store+0x44/0x60 sysfs_kf_write+0x68/0x80 kernfs_fop_write+0x100/0x290 __vfs_write+0x3c/0x70 vfs_write+0xcc/0x240 ksys_write+0x7c/0x140 system_call+0x5c/0x68 The crash is due to NULL dereference at test_bit(idx, ms->usage->subsection_map); due to ms->usage = NULL in pfn_section_valid() With commit d41e2f3bd546 ("mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case") section_mem_map is set to NULL after depopulate_section_mem(). This was done so that pfn_page() can work correctly with kernel config that disables SPARSEMEM_VMEMMAP. With that config pfn_to_page does __section_mem_map_addr(__sec) + __pfn; where static inline struct page *__section_mem_map_addr(struct mem_section *section) { unsigned long map = section->section_mem_map; map &= SECTION_MAP_MASK; return (struct page *)map; } Now with SPASEMEM_VMEMAP enabled, mem_section->usage->subsection_map is used to check the pfn validity (pfn_valid()). Since section_deactivate release mem_section->usage if a section is fully deactivated, pfn_valid() check after a subsection_deactivate cause a kernel crash. static inline int pfn_valid(unsigned long pfn) { ... return early_section(ms) || pfn_section_valid(ms, pfn); } where static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); return test_bit(idx, ms->usage->subsection_map); } Avoid this by clearing SECTION_HAS_MEM_MAP when mem_section->usage is freed. For architectures like ppc64 where large pages are used for vmmemap mapping (16MB), a specific vmemmap mapping can cover multiple sections. Hence before a vmemmap mapping page can be freed, the kernel needs to make sure there are no valid sections within that mapping. Clearing the section valid bit before depopulate_section_memap enables this. [aneesh.kumar@linux.ibm.com: add comment] Link: http://lkml.kernel.org/r/20200326133235.343616-1-aneesh.kumar@linux.ibm.comLink: http://lkml.kernel.org/r/20200325031914.107660-1-aneesh.kumar@linux.ibm.com Fixes: d41e2f3bd546 ("mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case") Reported-by: Sachin Sant Signed-off-by: Aneesh Kumar K.V Signed-off-by: Andrew Morton Tested-by: Sachin Sant Reviewed-by: Baoquan He Reviewed-by: Wei Yang Acked-by: Michal Hocko Acked-by: Pankaj Gupta Cc: Michael Ellerman Cc: Dan Williams Cc: David Hildenbrand Cc: Oscar Salvador Cc: Mike Rapoport Cc: Signed-off-by: Linus Torvalds --- mm/sparse.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/sparse.c b/mm/sparse.c index aadb7298dcef..65599e8bd636 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -781,6 +781,12 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, ms->usage = NULL; } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + /* + * Mark the section invalid so that valid_section() + * return false. This prevents code from dereferencing + * ms->usage array. + */ + ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; } if (section_is_early && memmap) -- cgit