From da25e628c4c231a281b1c1de3168a36ab9bfe473 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 17 Sep 2015 12:24:19 -0600 Subject: x86/mm: Fix page table dump to show PAT bit /sys/kernel/debug/kernel_page_tables does not show the PAT bit for PUD/PMD mappings. This is because walk_pud_level(), walk_pmd_level() and note_page() mask the flags with PTE_FLAGS_MASK, which does not cover their PAT bit, _PAGE_PAT_LARGE. Fix it by replacing the use of PTE_FLAGS_MASK with p?d_flags(), which masks the flags properly. Also change to show the PAT bit as "PAT" to be consistent with other bits. Reported-by: Robert Elliott Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Borislav Petkov Cc: Konrad Wilk Cc: Robert Elliot Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-7-git-send-email-toshi.kani@hpe.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/dump_pagetables.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index f0cedf3395af..71ab2d741024 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -155,7 +155,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, " "); if ((level == 4 && pr & _PAGE_PAT) || ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) - pt_dump_cont_printf(m, dmsg, "pat "); + pt_dump_cont_printf(m, dmsg, "PAT "); else pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_GLOBAL) @@ -198,8 +198,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; - cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; + prot = pgprot_val(new_prot); + cur = pgprot_val(st->current_prot); if (!st->level) { /* First entry */ @@ -269,13 +269,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, { int i; pte_t *start; + pgprotval_t prot; start = (pte_t *) pmd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PTE; i++) { - pgprot_t prot = pte_pgprot(*start); - + prot = pte_flags(*start); st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); - note_page(m, st, prot, 4); + note_page(m, st, __pgprot(prot), 4); start++; } } @@ -287,18 +287,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, { int i; pmd_t *start; + pgprotval_t prot; start = (pmd_t *) pud_page_vaddr(addr); for (i = 0; i < PTRS_PER_PMD; i++) { st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); if (!pmd_none(*start)) { - pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; - - if (pmd_large(*start) || !pmd_present(*start)) + if (pmd_large(*start) || !pmd_present(*start)) { + prot = pmd_flags(*start); note_page(m, st, __pgprot(prot), 3); - else + } else { walk_pte_level(m, st, *start, P + i * PMD_LEVEL_MULT); + } } else note_page(m, st, __pgprot(0), 3); start++; @@ -318,19 +319,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, { int i; pud_t *start; + pgprotval_t prot; start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); if (!pud_none(*start)) { - pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; - - if (pud_large(*start) || !pud_present(*start)) + if (pud_large(*start) || !pud_present(*start)) { + prot = pud_flags(*start); note_page(m, st, __pgprot(prot), 2); - else + } else { walk_pmd_level(m, st, *start, P + i * PUD_LEVEL_MULT); + } } else note_page(m, st, __pgprot(0), 2); @@ -351,6 +353,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) #else pgd_t *start = swapper_pg_dir; #endif + pgprotval_t prot; int i; struct pg_state st = {}; @@ -362,13 +365,13 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) for (i = 0; i < PTRS_PER_PGD; i++) { st.current_address = normalize_addr(i * PGD_LEVEL_MULT); if (!pgd_none(*start)) { - pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; - - if (pgd_large(*start) || !pgd_present(*start)) + if (pgd_large(*start) || !pgd_present(*start)) { + prot = pgd_flags(*start); note_page(m, &st, __pgprot(prot), 1); - else + } else { walk_pud_level(m, &st, *start, i * PGD_LEVEL_MULT); + } } else note_page(m, &st, __pgprot(0), 1); -- cgit From 34437e67a6727885bdf6cbfd8441b1ac43a1ee65 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 17 Sep 2015 12:24:20 -0600 Subject: x86/mm: Fix slow_virt_to_phys() to handle large PAT bit slow_virt_to_phys() calls lookup_address() to obtain *pte and its level. It then calls pte_pfn() to obtain a physical address for any level. However, this physical address is not correct when the large PAT bit is set because pte_pfn() does not mask the large PAT bit properly for PUD/PMD. Fix slow_virt_to_phys() to use pud_pfn() and pmd_pfn() for 1GB and 2MB mapping levels. Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Borislav Petkov Cc: Konrad Wilk Cc: Robert Elliot Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-8-git-send-email-toshi.kani@hpe.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2c44c0792301..1441368a7931 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -414,18 +414,28 @@ pmd_t *lookup_pmd_address(unsigned long address) phys_addr_t slow_virt_to_phys(void *__virt_addr) { unsigned long virt_addr = (unsigned long)__virt_addr; - phys_addr_t phys_addr; - unsigned long offset; + unsigned long phys_addr, offset; enum pg_level level; - unsigned long pmask; pte_t *pte; pte = lookup_address(virt_addr, &level); BUG_ON(!pte); - pmask = page_level_mask(level); - offset = virt_addr & ~pmask; - phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; - return (phys_addr | offset); + + switch (level) { + case PG_LEVEL_1G: + phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; + offset = virt_addr & ~PUD_PAGE_MASK; + break; + case PG_LEVEL_2M: + phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; + offset = virt_addr & ~PMD_PAGE_MASK; + break; + default: + phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + offset = virt_addr & ~PAGE_MASK; + } + + return (phys_addr_t)(phys_addr | offset); } EXPORT_SYMBOL_GPL(slow_virt_to_phys); -- cgit From daf3e35c5888e8bd6a2f5ed15ed392b2df362ecf Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 17 Sep 2015 12:24:21 -0600 Subject: x86/mm: Fix gup_huge_p?d() to handle large PAT bit gup_huge_pud() and gup_huge_pmd() cast *pud and *pmd to *pte, and use pte_xxx() interfaces to obtain the flags and PFN. However, the pte_xxx() interface does not handle the large PAT bit properly for PUD/PMD. Fix gup_huge_pud() and gup_huge_pmd() to use pud_xxx() and pmd_xxx() interfaces according to their type. Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Borislav Petkov Cc: Konrad Wilk Cc: Robert Elliot Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-9-git-send-email-toshi.kani@hpe.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/gup.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 81bf3d2af3eb..ae9a37bf1371 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -118,21 +118,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask; - pte_t pte = *(pte_t *)&pmd; struct page *head, *page; int refs; mask = _PAGE_PRESENT|_PAGE_USER; if (write) mask |= _PAGE_RW; - if ((pte_flags(pte) & mask) != mask) + if ((pmd_flags(pmd) & mask) != mask) return 0; /* hugepages are never "special" */ - VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL); + VM_BUG_ON(!pfn_valid(pmd_pfn(pmd))); refs = 0; - head = pte_page(pte); + head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page); @@ -195,21 +194,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask; - pte_t pte = *(pte_t *)&pud; struct page *head, *page; int refs; mask = _PAGE_PRESENT|_PAGE_USER; if (write) mask |= _PAGE_RW; - if ((pte_flags(pte) & mask) != mask) + if ((pud_flags(pud) & mask) != mask) return 0; /* hugepages are never "special" */ - VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL); + VM_BUG_ON(!pfn_valid(pud_pfn(pud))); refs = 0; - head = pte_page(pte); + head = pud_page(pud); page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page); -- cgit From 3a19109efbfa7d887996a74257556a46e00525c2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 17 Sep 2015 12:24:22 -0600 Subject: x86/mm: Fix try_preserve_large_page() to handle large PAT bit try_preserve_large_page() is called from __change_page_attr() to change the mapping attribute of a given large page. This function uses pte_pfn() and pte_pgprot() for PUD/PMD, which do not handle the large PAT bit properly. Fix try_preserve_large_page() by using the corresponding pud/pmd prot/pfn interfaces. Also remove '#ifdef CONFIG_X86_64', which is not necessary. Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Borislav Petkov Cc: Konrad Wilk Cc: Robert Elliot Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-10-git-send-email-toshi.kani@hpe.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1441368a7931..3f612713159a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -468,7 +468,7 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; + unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; @@ -488,17 +488,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, switch (level) { case PG_LEVEL_2M: -#ifdef CONFIG_X86_64 + old_prot = pmd_pgprot(*(pmd_t *)kpte); + old_pfn = pmd_pfn(*(pmd_t *)kpte); + break; case PG_LEVEL_1G: -#endif - psize = page_level_size(level); - pmask = page_level_mask(level); + old_prot = pud_pgprot(*(pud_t *)kpte); + old_pfn = pud_pfn(*(pud_t *)kpte); break; default: do_split = -EINVAL; goto out_unlock; } + psize = page_level_size(level); + pmask = page_level_mask(level); + /* * Calculate the number of pages, which fit into this large * page starting at address: @@ -514,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * up accordingly. */ old_pte = *kpte; - old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte)); + old_prot = req_prot = pgprot_large_2_4k(old_prot); pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); @@ -540,10 +544,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, req_prot = canon_pgprot(req_prot); /* - * old_pte points to the large page base address. So we need + * old_pfn points to the large page base pfn. So we need * to add the offset of the virtual address: */ - pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); + pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT); cpa->pfn = pfn; new_prot = static_protections(req_prot, address, pfn); @@ -554,7 +558,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * the pages in the range we try to preserve: */ addr = address & pmask; - pfn = pte_pfn(old_pte); + pfn = old_pfn; for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { pgprot_t chk_prot = static_protections(req_prot, addr, pfn); @@ -584,7 +588,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * The address is aligned and the number of pages * covers the full page. */ - new_pte = pfn_pte(pte_pfn(old_pte), new_prot); + new_pte = pfn_pte(old_pfn, new_prot); __set_pmd_pte(kpte, address, new_pte); cpa->flags |= CPA_FLUSHTLB; do_split = 0; -- cgit From d551aaa2f7e1387fa66093ce9914c2e91f283a50 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 17 Sep 2015 12:24:23 -0600 Subject: x86/mm: Fix __split_large_page() to handle large PAT bit __split_large_page() is called from __change_page_attr() to change the mapping attribute by splitting a given large page into smaller pages. This function uses pte_pfn() and pte_pgprot() for PUD/PMD, which do not handle the large PAT bit properly. Fix __split_large_page() by using the corresponding pud/pmd pfn/ pgprot interfaces. Also remove '#ifdef CONFIG_X86_64', which is not necessary. Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Borislav Petkov Cc: Konrad Wilk Cc: Robert Elliot Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-11-git-send-email-toshi.kani@hpe.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3f612713159a..ab9465b9160f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -605,7 +605,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, struct page *base) { pte_t *pbase = (pte_t *)page_address(base); - unsigned long pfn, pfninc = 1; + unsigned long ref_pfn, pfn, pfninc = 1; unsigned int i, level; pte_t *tmp; pgprot_t ref_prot; @@ -622,26 +622,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, } paravirt_alloc_pte(&init_mm, page_to_pfn(base)); - ref_prot = pte_pgprot(pte_clrhuge(*kpte)); - /* promote PAT bit to correct position */ - if (level == PG_LEVEL_2M) + switch (level) { + case PG_LEVEL_2M: + ref_prot = pmd_pgprot(*(pmd_t *)kpte); + /* clear PSE and promote PAT bit to correct position */ ref_prot = pgprot_large_2_4k(ref_prot); + ref_pfn = pmd_pfn(*(pmd_t *)kpte); + break; -#ifdef CONFIG_X86_64 - if (level == PG_LEVEL_1G) { + case PG_LEVEL_1G: + ref_prot = pud_pgprot(*(pud_t *)kpte); + ref_pfn = pud_pfn(*(pud_t *)kpte); pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + /* - * Set the PSE flags only if the PRESENT flag is set + * Clear the PSE flags if the PRESENT flag is not set * otherwise pmd_present/pmd_huge will return true * even on a non present pmd. */ - if (pgprot_val(ref_prot) & _PAGE_PRESENT) - pgprot_val(ref_prot) |= _PAGE_PSE; - else + if (!(pgprot_val(ref_prot) & _PAGE_PRESENT)) pgprot_val(ref_prot) &= ~_PAGE_PSE; + break; + + default: + spin_unlock(&pgd_lock); + return 1; } -#endif /* * Set the GLOBAL flags only if the PRESENT flag is set @@ -657,7 +664,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, /* * Get the target pfn from the original entry: */ - pfn = pte_pfn(*kpte); + pfn = ref_pfn; for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot))); -- cgit From 55696b1f664e52b3036f21631f9c2247b667f587 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 17 Sep 2015 12:24:24 -0600 Subject: x86/mm: Fix no-change case in try_preserve_large_page() try_preserve_large_page() checks if new_prot is the same as old_prot. If so, it simply sets do_split to 0, and returns with no-operation. However, old_prot is set as a 4KB pgprot value while new_prot is a large page pgprot value. Now that old_prot is initially set from p?d_pgprot() as a large page pgprot value, fix it by not overwriting old_prot with a 4KB pgprot value. Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Borislav Petkov Cc: Konrad Wilk Cc: Robert Elliot Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1442514264-12475-12-git-send-email-toshi.kani@hpe.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index ab9465b9160f..e2621a8e8213 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -518,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * up accordingly. */ old_pte = *kpte; - old_prot = req_prot = pgprot_large_2_4k(old_prot); + req_prot = pgprot_large_2_4k(old_prot); pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); -- cgit From e1a58320a38dfa72be48a0f1a3a92273663ba6db Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 5 Oct 2015 12:55:20 -0400 Subject: x86/mm: Warn on W^X mappings Warn on any residual W+X mappings after setting NX if DEBUG_WX is enabled. Introduce a separate X86_PTDUMP_CORE config that enables the code for dumping the page tables without enabling the debugfs interface, so that DEBUG_WX can be enabled without exposing the debugfs interface. Switch EFI_PGT_DUMP to using X86_PTDUMP_CORE so that it also does not require enabling the debugfs interface. On success it prints this to the kernel log: x86/mm: Checked W+X mappings: passed, no W+X pages found. On failure it prints a warning and a count of the failed pages: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1 at arch/x86/mm/dump_pagetables.c:226 note_page+0x610/0x7b0() x86/mm: Found insecure W+X mapping at address ffffffff81755000/__stop___ex_table+0xfa8/0xabfa8 [...] Call Trace: [] dump_stack+0x44/0x55 [] warn_slowpath_common+0x82/0xc0 [] warn_slowpath_fmt+0x5c/0x80 [] ? note_page+0x5c9/0x7b0 [] note_page+0x610/0x7b0 [] ptdump_walk_pgd_level_core+0x259/0x3c0 [] ptdump_walk_pgd_level_checkwx+0x17/0x20 [] mark_rodata_ro+0xf5/0x100 [] ? rest_init+0x80/0x80 [] kernel_init+0x1d/0xe0 [] ret_from_fork+0x3f/0x70 [] ? rest_init+0x80/0x80 ---[ end trace a1f23a1e42a2ac76 ]--- x86/mm: Checked W+X mappings: FAILED, 171 W+X pages found. Signed-off-by: Stephen Smalley Acked-by: Kees Cook Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1444064120-11450-1-git-send-email-sds@tycho.nsa.gov [ Improved the Kconfig help text and made the new option default-y if CONFIG_DEBUG_RODATA=y, because it already found buggy mappings, so we really want people to have this on by default. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/Makefile | 2 +- arch/x86/mm/dump_pagetables.c | 42 +++++++++++++++++++++++++++++++++++++++++- arch/x86/mm/init_32.c | 2 ++ arch/x86/mm/init_64.c | 2 ++ 4 files changed, 46 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index a482d105172b..65c47fda26fc 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 71ab2d741024..1bf417e9cc13 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -32,6 +32,8 @@ struct pg_state { const struct addr_marker *marker; unsigned long lines; bool to_dmesg; + bool check_wx; + unsigned long wx_pages; }; struct addr_marker { @@ -214,6 +216,16 @@ static void note_page(struct seq_file *m, struct pg_state *st, const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; + pgprotval_t pr = pgprot_val(st->current_prot); + + if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { + WARN_ONCE(1, + "x86/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, + (void *)st->start_address); + st->wx_pages += (st->current_address - + st->start_address) / PAGE_SIZE; + } /* * Now print the actual finished series @@ -346,7 +358,8 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, #define pgd_none(a) pud_none(__pud(pgd_val(a))) #endif -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) +static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, + bool checkwx) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_level4_pgt; @@ -362,6 +375,10 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) st.to_dmesg = true; } + st.check_wx = checkwx; + if (checkwx) + st.wx_pages = 0; + for (i = 0; i < PTRS_PER_PGD; i++) { st.current_address = normalize_addr(i * PGD_LEVEL_MULT); if (!pgd_none(*start)) { @@ -381,8 +398,26 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) /* Flush out the last page */ st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); note_page(m, &st, __pgprot(0), 0); + if (!checkwx) + return; + if (st.wx_pages) + pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", + st.wx_pages); + else + pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); +} + +void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) +{ + ptdump_walk_pgd_level_core(m, pgd, false); } +void ptdump_walk_pgd_level_checkwx(void) +{ + ptdump_walk_pgd_level_core(NULL, NULL, true); +} + +#ifdef CONFIG_X86_PTDUMP static int ptdump_show(struct seq_file *m, void *v) { ptdump_walk_pgd_level(m, NULL); @@ -400,10 +435,13 @@ static const struct file_operations ptdump_fops = { .llseek = seq_lseek, .release = single_release, }; +#endif static int pt_dump_init(void) { +#ifdef CONFIG_X86_PTDUMP struct dentry *pe; +#endif #ifdef CONFIG_X86_32 /* Not a compile-time constant on x86-32 */ @@ -415,10 +453,12 @@ static int pt_dump_init(void) address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; #endif +#ifdef CONFIG_X86_PTDUMP pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, &ptdump_fops); if (!pe) return -ENOMEM; +#endif return 0; } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 7562f42914b4..cb4ef3de61f9 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -957,6 +957,8 @@ void mark_rodata_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif mark_nxdata_nx(); + if (__supported_pte_mask & _PAGE_NX) + debug_checkwx(); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 30564e2752d3..f8b157366700 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1150,6 +1150,8 @@ void mark_rodata_ro(void) free_init_pages("unused kernel", (unsigned long) __va(__pa_symbol(rodata_end)), (unsigned long) __va(__pa_symbol(_sdata))); + + debug_checkwx(); } #endif -- cgit