summaryrefslogtreecommitdiff
path: root/arch/x86/mm/pti.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pti.c')
-rw-r--r--arch/x86/mm/pti.c132
1 files changed, 76 insertions, 56 deletions
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index ffe3b3a087fe..b10d4d131dce 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -6,7 +6,7 @@
*
* https://github.com/IAIK/KAISER
*
- * The original work was written by and and signed off by for the Linux
+ * The original work was written by and signed off by for the Linux
* kernel by:
*
* Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
@@ -38,6 +38,7 @@
#include <asm/desc.h>
#include <asm/sections.h>
#include <asm/set_memory.h>
+#include <asm/bugs.h>
#undef pr_fmt
#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
@@ -69,6 +70,7 @@ static void __init pti_print_if_secure(const char *reason)
pr_info("%s\n", reason);
}
+/* Assume mode is auto unless overridden via cmdline below. */
static enum pti_mode {
PTI_AUTO = 0,
PTI_FORCE_OFF,
@@ -77,50 +79,55 @@ static enum pti_mode {
void __init pti_check_boottime_disable(void)
{
- char arg[5];
- int ret;
-
- /* Assume mode is auto unless overridden. */
- pti_mode = PTI_AUTO;
-
if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on XEN PV.");
return;
}
- ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
- if (ret > 0) {
- if (ret == 3 && !strncmp(arg, "off", 3)) {
- pti_mode = PTI_FORCE_OFF;
- pti_print_if_insecure("disabled on command line.");
- return;
- }
- if (ret == 2 && !strncmp(arg, "on", 2)) {
- pti_mode = PTI_FORCE_ON;
- pti_print_if_secure("force enabled on command line.");
- goto enable;
- }
- if (ret == 4 && !strncmp(arg, "auto", 4)) {
- pti_mode = PTI_AUTO;
- goto autosel;
- }
- }
-
- if (cmdline_find_option_bool(boot_command_line, "nopti") ||
- cpu_mitigations_off()) {
+ if (pti_mode == PTI_AUTO &&
+ !cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL))
pti_mode = PTI_FORCE_OFF;
+ if (pti_mode == PTI_FORCE_OFF) {
pti_print_if_insecure("disabled on command line.");
return;
}
-autosel:
- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ if (pti_mode == PTI_FORCE_ON)
+ pti_print_if_secure("force enabled on command line.");
+
+ if (pti_mode == PTI_AUTO && !boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
return;
-enable:
+
setup_force_cpu_cap(X86_FEATURE_PTI);
+
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ pr_debug("PTI enabled, disabling INVLPGB\n");
+ setup_clear_cpu_cap(X86_FEATURE_INVLPGB);
+ }
}
+static int __init pti_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ pti_mode = PTI_FORCE_OFF;
+ else if (!strcmp(arg, "on"))
+ pti_mode = PTI_FORCE_ON;
+ else if (!strcmp(arg, "auto"))
+ pti_mode = PTI_AUTO;
+ else
+ return -EINVAL;
+ return 0;
+}
+early_param("pti", pti_parse_cmdline);
+
+static int __init pti_parse_cmdline_nopti(char *arg)
+{
+ pti_mode = PTI_FORCE_OFF;
+ return 0;
+}
+early_param("nopti", pti_parse_cmdline_nopti);
+
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
/*
@@ -132,7 +139,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
* Top-level entries added to init_mm's usermode pgd after boot
* will not be automatically propagated to other mms.
*/
- if (!pgdp_maps_userspace(pgdp))
+ if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW))
return pgd;
/*
@@ -185,7 +192,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
- BUILD_BUG_ON(pgd_large(*pgd) != 0);
+ BUILD_BUG_ON(pgd_leaf(*pgd));
return p4d_offset(pgd, address);
}
@@ -206,7 +213,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!p4d)
return NULL;
- BUILD_BUG_ON(p4d_large(*p4d) != 0);
+ BUILD_BUG_ON(p4d_leaf(*p4d));
if (p4d_none(*p4d)) {
unsigned long new_pud_page = __get_free_page(gfp);
if (WARN_ON_ONCE(!new_pud_page))
@@ -217,7 +224,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
pud = pud_offset(p4d, address);
/* The user page tables do not use large mappings: */
- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
WARN_ON(1);
return NULL;
}
@@ -241,7 +248,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
*
* Returns a pointer to a PTE on success, or NULL on failure.
*/
-static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
pmd_t *pmd;
@@ -251,10 +258,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
if (!pmd)
return NULL;
- /* We can't do anything sensible if we hit a large mapping. */
- if (pmd_large(*pmd)) {
- WARN_ON(1);
- return NULL;
+ /* Large PMD mapping found */
+ if (pmd_leaf(*pmd)) {
+ /* Clear the PMD if we hit a large mapping from the first round */
+ if (late_text) {
+ set_pmd(pmd, __pmd(0));
+ } else {
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
}
if (pmd_none(*pmd)) {
@@ -283,7 +295,7 @@ static void __init pti_setup_vsyscall(void)
if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
return;
- target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+ target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false);
if (WARN_ON(!target_pte))
return;
@@ -301,7 +313,7 @@ enum pti_clone_level {
static void
pti_clone_pgtable(unsigned long start, unsigned long end,
- enum pti_clone_level level)
+ enum pti_clone_level level, bool late_text)
{
unsigned long addr;
@@ -341,7 +353,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
continue;
}
- if (pmd_large(*pmd) || level == PTI_CLONE_PMD) {
+ if (pmd_leaf(*pmd) || level == PTI_CLONE_PMD) {
target_pmd = pti_user_pagetable_walk_pmd(addr);
if (WARN_ON(!target_pmd))
return;
@@ -374,14 +386,14 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
*/
*target_pmd = *pmd;
- addr += PMD_SIZE;
+ addr = round_up(addr + 1, PMD_SIZE);
} else if (level == PTI_CLONE_PTE) {
/* Walk the page-table down to the pte level */
pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte)) {
- addr += PAGE_SIZE;
+ addr = round_up(addr + 1, PAGE_SIZE);
continue;
}
@@ -390,7 +402,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
return;
/* Allocate PTE in the user page-table */
- target_pte = pti_user_pagetable_walk_pte(addr);
+ target_pte = pti_user_pagetable_walk_pte(addr, late_text);
if (WARN_ON(!target_pte))
return;
@@ -401,7 +413,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
/* Clone the PTE */
*target_pte = *pte;
- addr += PAGE_SIZE;
+ addr = round_up(addr + 1, PAGE_SIZE);
} else {
BUG();
@@ -452,7 +464,7 @@ static void __init pti_clone_user_shared(void)
phys_addr_t pa = per_cpu_ptr_to_phys((void *)va);
pte_t *target_pte;
- target_pte = pti_user_pagetable_walk_pte(va);
+ target_pte = pti_user_pagetable_walk_pte(va, false);
if (WARN_ON(!target_pte))
return;
@@ -475,7 +487,7 @@ static void __init pti_clone_user_shared(void)
start = CPU_ENTRY_AREA_BASE;
end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
- pti_clone_pgtable(start, end, PTI_CLONE_PMD);
+ pti_clone_pgtable(start, end, PTI_CLONE_PMD, false);
}
#endif /* CONFIG_X86_64 */
@@ -492,11 +504,11 @@ static void __init pti_setup_espfix64(void)
/*
* Clone the populated PMDs of the entry text and force it RO.
*/
-static void pti_clone_entry_text(void)
+static void pti_clone_entry_text(bool late)
{
pti_clone_pgtable((unsigned long) __entry_text_start,
(unsigned long) __entry_text_end,
- PTI_CLONE_PMD);
+ PTI_LEVEL_KERNEL_IMAGE, late);
}
/*
@@ -571,7 +583,7 @@ static void pti_clone_kernel_text(void)
* pti_set_kernel_image_nonglobal() did to clear the
* global bit.
*/
- pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
+ pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false);
/*
* pti_clone_pgtable() will set the global bit in any PMDs
@@ -592,7 +604,7 @@ static void pti_set_kernel_image_nonglobal(void)
* of the image.
*/
unsigned long start = PFN_ALIGN(_text);
- unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+ unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE);
/*
* This clears _PAGE_GLOBAL from the entire kernel image.
@@ -638,8 +650,15 @@ void __init pti_init(void)
/* Undo all global bits from the init pagetables in head_64.S: */
pti_set_kernel_image_nonglobal();
+
/* Replace some of the global bits just for shared entry text: */
- pti_clone_entry_text();
+ /*
+ * This is very early in boot. Device and Late initcalls can do
+ * modprobe before free_initmem() and mark_readonly(). This
+ * pti_clone_entry_text() allows those user-mode-helpers to function,
+ * but notably the text is still RW.
+ */
+ pti_clone_entry_text(false);
pti_setup_espfix64();
pti_setup_vsyscall();
}
@@ -656,10 +675,11 @@ void pti_finalize(void)
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
/*
- * We need to clone everything (again) that maps parts of the
- * kernel image.
+ * This is after free_initmem() (all initcalls are done) and we've done
+ * mark_readonly(). Text is now NX which might've split some PMDs
+ * relative to the early clone.
*/
- pti_clone_entry_text();
+ pti_clone_entry_text(true);
pti_clone_kernel_text();
debug_checkwx_user();