summaryrefslogtreecommitdiff
path: root/arch/x86/mm/pgtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pgtable.c')
-rw-r--r--arch/x86/mm/pgtable.c366
1 files changed, 165 insertions, 201 deletions
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e4f499eb0f29..2e5ecfdce73c 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -10,57 +10,23 @@
#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
EXPORT_SYMBOL(physical_mask);
+SYM_PIC_ALIAS(physical_mask);
#endif
-#ifdef CONFIG_HIGHPTE
-#define PGTABLE_HIGHMEM __GFP_HIGHMEM
-#else
-#define PGTABLE_HIGHMEM 0
-#endif
-
-#ifndef CONFIG_PARAVIRT
-static inline
-void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- tlb_remove_page(tlb, table);
-}
-#endif
-
-gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
-
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- return __pte_alloc_one(mm, __userpte_alloc_gfp);
-}
-
-static int __init setup_userpte(char *arg)
-{
- if (!arg)
- return -EINVAL;
-
- /*
- * "userpte=nohigh" disables allocation of user pagetables in
- * high memory.
- */
- if (strcmp(arg, "nohigh") == 0)
- __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
- else
- return -EINVAL;
- return 0;
+ return __pte_alloc_one(mm, GFP_PGTABLE_USER);
}
-early_param("userpte", setup_userpte);
void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
- pgtable_pte_page_dtor(pte);
paravirt_release_pte(page_to_pfn(pte));
- paravirt_tlb_remove_table(tlb, pte);
+ tlb_remove_ptdesc(tlb, page_ptdesc(pte));
}
#if CONFIG_PGTABLE_LEVELS > 2
void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
- struct page *page = virt_to_page(pmd);
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
/*
* NOTE! For PAE, any changes to the top page-directory-pointer-table
@@ -69,22 +35,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
#ifdef CONFIG_X86_PAE
tlb->need_flush_all = 1;
#endif
- pgtable_pmd_page_dtor(page);
- paravirt_tlb_remove_table(tlb, page);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd));
}
#if CONFIG_PGTABLE_LEVELS > 3
void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
- paravirt_tlb_remove_table(tlb, virt_to_page(pud));
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud));
}
#if CONFIG_PGTABLE_LEVELS > 4
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
{
paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
- paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
@@ -92,59 +57,43 @@ void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
static inline void pgd_list_add(pgd_t *pgd)
{
- struct page *page = virt_to_page(pgd);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
- list_add(&page->lru, &pgd_list);
+ list_add(&ptdesc->pt_list, &pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
- struct page *page = virt_to_page(pgd);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
- list_del(&page->lru);
+ list_del(&ptdesc->pt_list);
}
-#define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-#define MAX_UNSHARED_PTRS_PER_PGD \
- max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
-
-
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
{
- virt_to_page(pgd)->pt_mm = mm;
+ virt_to_ptdesc(pgd)->pt_mm = mm;
}
struct mm_struct *pgd_page_get_mm(struct page *page)
{
- return page->pt_mm;
+ return page_ptdesc(page)->pt_mm;
}
static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
- /* If the pgd points to a shared pagetable level (either the
- ptes in non-PAE, or shared PMD in PAE), then just copy the
- references from swapper_pg_dir. */
- if (CONFIG_PGTABLE_LEVELS == 2 ||
- (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
- CONFIG_PGTABLE_LEVELS >= 4) {
+ /* PAE preallocates all its PMDs. No cloning needed. */
+ if (!IS_ENABLED(CONFIG_X86_PAE))
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
- }
- /* list required to sync kernel mapping updates */
- if (!SHARED_KERNEL_PMD) {
- pgd_set_mm(pgd, mm);
- pgd_list_add(pgd);
- }
+ /* List used to sync kernel mapping updates */
+ pgd_set_mm(pgd, mm);
+ pgd_list_add(pgd);
}
static void pgd_dtor(pgd_t *pgd)
{
- if (SHARED_KERNEL_PMD)
- return;
-
spin_lock(&pgd_lock);
pgd_list_del(pgd);
spin_unlock(&pgd_lock);
@@ -168,15 +117,15 @@ static void pgd_dtor(pgd_t *pgd)
* processor notices the update. Since this is expensive, and
* all 4 top-level entries are used almost immediately in a
* new process's life, we just pre-populate them here.
- *
- * Also, if we're in a paravirt environment where the kernel pmd is
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
- * and initialize the kernel pmds here.
*/
-#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
-#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD
+#define PREALLOCATED_PMDS PTRS_PER_PGD
/*
+ * "USER_PMDS" are the PMDs for the user copy of the page tables when
+ * PTI is enabled. They do not exist when PTI is disabled. Note that
+ * this is distinct from the user _portion_ of the kernel page tables
+ * which always exists.
+ *
* We allocate separate PMDs for the kernel part of the user page-table
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
@@ -205,7 +154,6 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS 0
-#define MAX_PREALLOCATED_PMDS 0
#define PREALLOCATED_USER_PMDS 0
#define MAX_PREALLOCATED_USER_PMDS 0
#endif /* CONFIG_X86_PAE */
@@ -213,11 +161,14 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{
int i;
+ struct ptdesc *ptdesc;
for (i = 0; i < count; i++)
if (pmds[i]) {
- pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
- free_page((unsigned long)pmds[i]);
+ ptdesc = virt_to_ptdesc(pmds[i]);
+
+ pagetable_dtor(ptdesc);
+ pagetable_free(ptdesc);
mm_dec_nr_pmds(mm);
}
}
@@ -230,18 +181,24 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
+ gfp &= ~__GFP_HIGHMEM;
for (i = 0; i < count; i++) {
- pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
- if (!pmd)
+ pmd_t *pmd = NULL;
+ struct ptdesc *ptdesc = pagetable_alloc(gfp, 0);
+
+ if (!ptdesc)
failed = true;
- if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
- free_page((unsigned long)pmd);
- pmd = NULL;
+ if (ptdesc && !pagetable_pmd_ctor(mm, ptdesc)) {
+ pagetable_free(ptdesc);
+ ptdesc = NULL;
failed = true;
}
- if (pmd)
+ if (ptdesc) {
mm_inc_nr_pmds(mm);
+ pmd = ptdesc_address(ptdesc);
+ }
+
pmds[i] = pmd;
}
@@ -281,7 +238,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
for (i = 0; i < PREALLOCATED_PMDS; i++)
mop_up_one_pmd(mm, &pgdp[i]);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
@@ -313,7 +270,7 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
}
}
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
pgd_t *k_pgd, pmd_t *pmds[])
{
@@ -345,86 +302,30 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
{
}
#endif
-/*
- * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
- * assumes that pgd should be in one page.
- *
- * But kernel with PAE paging that is not running as a Xen domain
- * only needs to allocate 32 bytes for pgd instead of one page.
- */
-#ifdef CONFIG_X86_PAE
-
-#include <linux/slab.h>
-
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-#define PGD_ALIGN 32
-
-static struct kmem_cache *pgd_cache;
-
-void __init pgtable_cache_init(void)
-{
- /*
- * When PAE kernel is running as a Xen domain, it does not use
- * shared kernel pmd. And this requires a whole page for pgd.
- */
- if (!SHARED_KERNEL_PMD)
- return;
-
- /*
- * when PAE kernel is not running as a Xen domain, it uses
- * shared kernel pmd. Shared kernel pmd does not require a whole
- * page for pgd. We are able to just allocate a 32-byte for pgd.
- * During boot time, we create a 32-byte slab for pgd table allocation.
- */
- pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
- SLAB_PANIC, NULL);
-}
-static inline pgd_t *_pgd_alloc(void)
+static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{
/*
- * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
- * We allocate one page for pgd.
- */
- if (!SHARED_KERNEL_PMD)
- return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
- PGD_ALLOCATION_ORDER);
-
- /*
- * Now PAE kernel is not running as a Xen domain. We can allocate
- * a 32-byte slab for pgd to save memory space.
+ * PTI and Xen need a whole page for the PAE PGD
+ * even though the hardware only needs 32 bytes.
+ *
+ * For simplicity, allocate a page for all users.
*/
- return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
-}
-
-static inline void _pgd_free(pgd_t *pgd)
-{
- if (!SHARED_KERNEL_PMD)
- free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
- else
- kmem_cache_free(pgd_cache, pgd);
-}
-#else
-
-static inline pgd_t *_pgd_alloc(void)
-{
- return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
- PGD_ALLOCATION_ORDER);
+ return __pgd_alloc(mm, pgd_allocation_order());
}
-static inline void _pgd_free(pgd_t *pgd)
+static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
+ __pgd_free(mm, pgd);
}
-#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
- pmd_t *pmds[MAX_PREALLOCATED_PMDS];
+ pmd_t *pmds[PREALLOCATED_PMDS];
- pgd = _pgd_alloc();
+ pgd = _pgd_alloc(mm);
if (pgd == NULL)
goto out;
@@ -467,7 +368,7 @@ out_free_pmds:
if (sizeof(pmds) != 0)
free_pmds(mm, pmds, PREALLOCATED_PMDS);
out_free_pgd:
- _pgd_free(pgd);
+ _pgd_free(mm, pgd);
out:
return NULL;
}
@@ -477,7 +378,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
pgd_mop_up_pmds(mm, pgd);
pgd_dtor(pgd);
paravirt_pgd_free(mm, pgd);
- _pgd_free(pgd);
+ _pgd_free(mm, pgd);
}
/*
@@ -619,6 +520,8 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma,
pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
+ VM_WARN_ON_ONCE(!pmd_present(*pmdp));
+
/*
* No flush is necessary. Once an invalid PTE is established, the PTE's
* access and dirty bits cannot be updated.
@@ -627,12 +530,24 @@ pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
}
#endif
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return old;
+}
+#endif
+
/**
- * reserve_top_address - reserves a hole in the top of kernel address space
- * @reserve - size of hole to reserve
+ * reserve_top_address - Reserve a hole in the top of the kernel address space
+ * @reserve: Size of hole to reserve
*
* Can be used to relocate the fixmap area and poke a hole in the top
- * of kernel address space to make room for a hypervisor.
+ * of the kernel address space to make room for a hypervisor.
*/
void __init reserve_top_address(unsigned long reserve)
{
@@ -677,9 +592,12 @@ void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
}
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-#ifdef CONFIG_X86_5LEVEL
+#if CONFIG_PGTABLE_LEVELS > 4
/**
- * p4d_set_huge - setup kernel P4D mapping
+ * p4d_set_huge - Set up kernel P4D mapping
+ * @p4d: Pointer to the P4D entry
+ * @addr: Virtual address associated with the P4D entry
+ * @prot: Protection bits to use
*
* No 512GB pages yet -- always return 0
*/
@@ -689,9 +607,10 @@ int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
}
/**
- * p4d_clear_huge - clear kernel P4D mapping when it is set
+ * p4d_clear_huge - Clear kernel P4D mapping when it is set
+ * @p4d: Pointer to the P4D entry to clear
*
- * No 512GB pages yet -- always return 0
+ * No 512GB pages yet -- do nothing
*/
void p4d_clear_huge(p4d_t *p4d)
{
@@ -699,17 +618,14 @@ void p4d_clear_huge(p4d_t *p4d)
#endif
/**
- * pud_set_huge - setup kernel PUD mapping
+ * pud_set_huge - Set up kernel PUD mapping
+ * @pud: Pointer to the PUD entry
+ * @addr: Virtual address associated with the PUD entry
+ * @prot: Protection bits to use
*
* MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
- * function sets up a huge page only if any of the following conditions are met:
- *
- * - MTRRs are disabled, or
- *
- * - MTRRs are enabled and the range is completely covered by a single MTRR, or
- *
- * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
- * has no effect on the requested PAT memory type.
+ * function sets up a huge page only if the complete range has the same MTRR
+ * caching mode.
*
* Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
* page mapping attempt fails.
@@ -718,15 +634,14 @@ void p4d_clear_huge(p4d_t *p4d)
*/
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr, uniform;
+ u8 uniform;
- mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
- if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
- (mtrr != MTRR_TYPE_WRBACK))
+ mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
+ if (!uniform)
return 0;
/* Bail out if we are we on a populated non-leaf entry: */
- if (pud_present(*pud) && !pud_huge(*pud))
+ if (pud_present(*pud) && !pud_leaf(*pud))
return 0;
set_pte((pte_t *)pud, pfn_pte(
@@ -737,7 +652,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
}
/**
- * pmd_set_huge - setup kernel PMD mapping
+ * pmd_set_huge - Set up kernel PMD mapping
+ * @pmd: Pointer to the PMD entry
+ * @addr: Virtual address associated with the PMD entry
+ * @prot: Protection bits to use
*
* See text over pud_set_huge() above.
*
@@ -745,18 +663,17 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
*/
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr, uniform;
+ u8 uniform;
- mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
- if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
- (mtrr != MTRR_TYPE_WRBACK)) {
+ mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
+ if (!uniform) {
pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
__func__, addr, addr + PMD_SIZE);
return 0;
}
/* Bail out if we are we on a populated non-leaf entry: */
- if (pmd_present(*pmd) && !pmd_huge(*pmd))
+ if (pmd_present(*pmd) && !pmd_leaf(*pmd))
return 0;
set_pte((pte_t *)pmd, pfn_pte(
@@ -767,13 +684,14 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
}
/**
- * pud_clear_huge - clear kernel PUD mapping when it is set
+ * pud_clear_huge - Clear kernel PUD mapping when it is set
+ * @pud: Pointer to the PUD entry to clear.
*
* Returns 1 on success and 0 on failure (no PUD map is found).
*/
int pud_clear_huge(pud_t *pud)
{
- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
pud_clear(pud);
return 1;
}
@@ -782,13 +700,14 @@ int pud_clear_huge(pud_t *pud)
}
/**
- * pmd_clear_huge - clear kernel PMD mapping when it is set
+ * pmd_clear_huge - Clear kernel PMD mapping when it is set
+ * @pmd: Pointer to the PMD entry to clear.
*
* Returns 1 on success and 0 on failure (no PMD map is found).
*/
int pmd_clear_huge(pmd_t *pmd)
{
- if (pmd_large(*pmd)) {
+ if (pmd_leaf(*pmd)) {
pmd_clear(pmd);
return 1;
}
@@ -798,11 +717,11 @@ int pmd_clear_huge(pmd_t *pmd)
#ifdef CONFIG_X86_64
/**
- * pud_free_pmd_page - Clear pud entry and free pmd page.
- * @pud: Pointer to a PUD.
- * @addr: Virtual address associated with pud.
+ * pud_free_pmd_page - Clear PUD entry and free PMD page
+ * @pud: Pointer to a PUD
+ * @addr: Virtual address associated with PUD
*
- * Context: The pud range has been unmapped and TLB purged.
+ * Context: The PUD range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*
* NOTE: Callers must allow a single page allocation.
@@ -810,7 +729,7 @@ int pmd_clear_huge(pmd_t *pmd)
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
pmd_t *pmd, *pmd_sv;
- pte_t *pte;
+ struct ptdesc *pt;
int i;
pmd = pud_pgtable(*pud);
@@ -831,38 +750,37 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
for (i = 0; i < PTRS_PER_PMD; i++) {
if (!pmd_none(pmd_sv[i])) {
- pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
- free_page((unsigned long)pte);
+ pt = page_ptdesc(pmd_page(pmd_sv[i]));
+ pagetable_dtor_free(pt);
}
}
free_page((unsigned long)pmd_sv);
- pgtable_pmd_page_dtor(virt_to_page(pmd));
- free_page((unsigned long)pmd);
+ pmd_free(&init_mm, pmd);
return 1;
}
/**
- * pmd_free_pte_page - Clear pmd entry and free pte page.
- * @pmd: Pointer to a PMD.
- * @addr: Virtual address associated with pmd.
+ * pmd_free_pte_page - Clear PMD entry and free PTE page.
+ * @pmd: Pointer to the PMD
+ * @addr: Virtual address associated with PMD
*
- * Context: The pmd range has been unmapped and TLB purged.
+ * Context: The PMD range has been unmapped and TLB purged.
* Return: 1 if clearing the entry succeeded. 0 otherwise.
*/
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
- pte_t *pte;
+ struct ptdesc *pt;
- pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pt = page_ptdesc(pmd_page(*pmd));
pmd_clear(pmd);
/* INVLPG to clear all paging-structure caches */
flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
- free_page((unsigned long)pte);
+ pagetable_dtor_free(pt);
return 1;
}
@@ -871,7 +789,7 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
/*
* Disable free page handling on x86-PAE. This assures that ioremap()
- * does not update sync'd pmd entries. See vmalloc_sync_one().
+ * does not update sync'd PMD entries. See vmalloc_sync_one().
*/
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
@@ -880,3 +798,49 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
#endif /* CONFIG_X86_64 */
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return pte_mkwrite_shstk(pte);
+
+ pte = pte_mkwrite_novma(pte);
+
+ return pte_clear_saveddirty(pte);
+}
+
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return pmd_mkwrite_shstk(pmd);
+
+ pmd = pmd_mkwrite_novma(pmd);
+
+ return pmd_clear_saveddirty(pmd);
+}
+
+void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte)
+{
+ /*
+ * Hardware before shadow stack can (rarely) set Dirty=1
+ * on a Write=0 PTE. So the below condition
+ * only indicates a software bug when shadow stack is
+ * supported by the HW. This checking is covered in
+ * pte_shstk().
+ */
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
+ pte_shstk(pte));
+}
+
+void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
+{
+ /* See note in arch_check_zapped_pte() */
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
+ pmd_shstk(pmd));
+}
+
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
+{
+ /* See note in arch_check_zapped_pte() */
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pud_shstk(pud));
+}