From 30b0a105d9f7141e4cbf72ae5511832457d89788 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 21 Nov 2013 14:31:58 -0800 Subject: mm: thp: give transparent hugepage code a separate copy_page Right now, the migration code in migrate_page_copy() uses copy_huge_page() for hugetlbfs and thp pages: if (PageHuge(page) || PageTransHuge(page)) copy_huge_page(newpage, page); So, yay for code reuse. But: void copy_huge_page(struct page *dst, struct page *src) { struct hstate *h = page_hstate(src); and a non-hugetlbfs page has no page_hstate(). This works 99% of the time because page_hstate() determines the hstate from the page order alone. Since the page order of a THP page matches the default hugetlbfs page order, it works. But, if you change the default huge page size on the boot command-line (say default_hugepagesz=1G), then we might not even *have* a 2MB hstate so page_hstate() returns null and copy_huge_page() oopses pretty fast since copy_huge_page() dereferences the hstate: void copy_huge_page(struct page *dst, struct page *src) { struct hstate *h = page_hstate(src); if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { ... Mel noticed that the migration code is really the only user of these functions. This moves all the copy code over to migrate.c and makes copy_huge_page() work for THP by checking for it explicitly. I believe the bug was introduced in commit b32967ff101a ("mm: numa: Add THP migration for the NUMA working set scanning fault case") [akpm@linux-foundation.org: fix coding-style and comment text, per Naoya Horiguchi] Signed-off-by: Dave Hansen Acked-by: Mel Gorman Reviewed-by: Naoya Horiguchi Cc: Hillf Danton Cc: Andrea Arcangeli Tested-by: Dave Jiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index acd2010328f3..85e0c58bdfdf 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -69,7 +69,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); bool is_hugepage_active(struct page *page); -void copy_huge_page(struct page *dst, struct page *src); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); @@ -140,9 +139,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page) #define isolate_huge_page(p, l) false #define putback_active_hugepage(p) do {} while (0) #define is_hugepage_active(x) false -static inline void copy_huge_page(struct page *dst, struct page *src) -{ -} static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) -- cgit From 27c73ae759774e63313c1fbfeb17ba076cea64c5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 21 Nov 2013 14:32:02 -0800 Subject: mm: hugetlbfs: fix hugetlbfs optimization Commit 7cb2ef56e6a8 ("mm: fix aio performance regression for database caused by THP") can cause dereference of a dangling pointer if split_huge_page runs during PageHuge() if there are updates to the tail_page->private field. Also it is repeating compound_head twice for hugetlbfs and it is running compound_head+compound_trans_head for THP when a single one is needed in both cases. The new code within the PageSlab() check doesn't need to verify that the THP page size is never bigger than the smallest hugetlbfs page size, to avoid memory corruption. A longstanding theoretical race condition was found while fixing the above (see the change right after the skip_unlock label, that is relevant for the compound_lock path too). By re-establishing the _mapcount tail refcounting for all compound pages, this also fixes the below problem: echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages BUG: Bad page state in process bash pfn:59a01 page:ffffea000139b038 count:0 mapcount:10 mapping: (null) index:0x0 page flags: 0x1c00000000008000(tail) Modules linked in: CPU: 6 PID: 2018 Comm: bash Not tainted 3.12.0+ #25 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x55/0x76 bad_page+0xd5/0x130 free_pages_prepare+0x213/0x280 __free_pages+0x36/0x80 update_and_free_page+0xc1/0xd0 free_pool_huge_page+0xc2/0xe0 set_max_huge_pages.part.58+0x14c/0x220 nr_hugepages_store_common.isra.60+0xd0/0xf0 nr_hugepages_store+0x13/0x20 kobj_attr_store+0xf/0x20 sysfs_write_file+0x189/0x1e0 vfs_write+0xc5/0x1f0 SyS_write+0x55/0xb0 system_call_fastpath+0x16/0x1b Signed-off-by: Khalid Aziz Signed-off-by: Andrea Arcangeli Tested-by: Khalid Aziz Cc: Pravin Shelar Cc: Greg Kroah-Hartman Cc: Ben Hutchings Cc: Christoph Lameter Cc: Johannes Weiner Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 85e0c58bdfdf..9649ff0c63f8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); void hugepage_put_subpool(struct hugepage_subpool *spool); int PageHuge(struct page *page); +int PageHeadHuge(struct page *page_head); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); @@ -103,6 +104,11 @@ static inline int PageHuge(struct page *page) return 0; } +static inline int PageHeadHuge(struct page *page_head) +{ + return 0; +} + static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } -- cgit From 7aa555bf26763b86332c7a3689701c999834b87a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 21 Nov 2013 14:32:11 -0800 Subject: mm: place page->pmd_huge_pte to right union I don't know what went wrong, mis-merge or something, but ->pmd_huge_pte placed in wrong union within struct page. In original patch[1] it's placed to union with ->lru and ->slab, but in commit e009bb30c8df ("mm: implement split page table lock for PMD level") it's in union with ->index and ->freelist. That union seems also unused for pages with table tables and safe to re-use, but it's not what I've tested. Let's move it to original place. It fixes indentation at least. :) [1] https://lkml.org/lkml/2013/10/7/288 Signed-off-by: Kirill A. Shutemov Reviewed-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10f5a7272b80..011eb85d7b0f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -65,9 +65,6 @@ struct page { * this page is only used to * free other pages. */ -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page->ptl */ -#endif }; union { @@ -135,6 +132,9 @@ struct page { struct list_head list; /* slobs list of pages */ struct slab *slab_page; /* slab fields */ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS + pgtable_t pmd_huge_pte; /* protected by page->ptl */ +#endif }; /* Remainder is not double word aligned */ -- cgit