diff options
Diffstat (limited to 'arch/x86/platform/efi/efi_64.c')
| -rw-r--r-- | arch/x86/platform/efi/efi_64.c | 222 |
1 files changed, 87 insertions, 135 deletions
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 8e364c4c6768..b4409df2105a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -33,7 +33,7 @@ #include <linux/reboot.h> #include <linux/slab.h> #include <linux/ucs2_string.h> -#include <linux/mem_encrypt.h> +#include <linux/cc_platform.h> #include <linux/sched/task.h> #include <asm/setup.h> @@ -47,16 +47,14 @@ #include <asm/realmode.h> #include <asm/time.h> #include <asm/pgalloc.h> +#include <asm/sev.h> /* * We allocate runtime services regions top-down, starting from -4G, i.e. * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. */ static u64 efi_va = EFI_VA_START; - -struct efi_scratch efi_scratch; - -EXPORT_SYMBOL_GPL(efi_mm); +static struct mm_struct *efi_prev_mm; /* * We need our own copy of the higher levels of the page tables @@ -74,34 +72,34 @@ int __init efi_alloc_page_tables(void) pud_t *pud; gfp_t gfp_mask; - if (efi_have_uv1_memmap()) - return 0; - gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order()); if (!efi_pgd) - return -ENOMEM; + goto fail; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); - if (!p4d) { - free_page((unsigned long)efi_pgd); - return -ENOMEM; - } + if (!p4d) + goto free_pgd; pud = pud_alloc(&init_mm, p4d, EFI_VA_END); - if (!pud) { - if (pgtable_l5_enabled()) - free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); - return -ENOMEM; - } + if (!pud) + goto free_p4d; efi_mm.pgd = efi_pgd; mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); + set_notrack_mm(&efi_mm); return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, pgd_allocation_order()); +fail: + return -ENOMEM; } /* @@ -115,34 +113,12 @@ void efi_sync_low_kernel_mappings(void) pud_t *pud_k, *pud_efi; pgd_t *efi_pgd = efi_mm.pgd; - if (efi_have_uv1_memmap()) - return; - - /* - * We can share all PGD entries apart from the one entry that - * covers the EFI runtime mapping space. - * - * Make sure the EFI runtime region mappings are guaranteed to - * only span a single PGD entry and that the entry also maps - * other important kernel regions. - */ - MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); - MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != - (EFI_VA_END & PGDIR_MASK)); - pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); pgd_k = pgd_offset_k(PAGE_OFFSET); num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); - /* - * As with PGDs, we share all P4D entries apart from the one entry - * that covers the EFI runtime mapping space. - */ - BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END)); - BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK)); - pgd_efi = efi_pgd + pgd_index(EFI_VA_END); pgd_k = pgd_offset_k(EFI_VA_END); p4d_efi = p4d_offset(pgd_efi, 0); @@ -201,14 +177,12 @@ virt_to_phys_or_null_size(void *va, unsigned long size) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long pfn, text, pf, rodata; + extern const u8 __efi64_thunk_ret_tramp[]; + unsigned long pfn, text, pf, rodata, tramp; struct page *page; unsigned npages; pgd_t *pgd = efi_mm.pgd; - if (efi_have_uv1_memmap()) - return 0; - /* * It can happen that the physical address of new_memmap lands in memory * which is not mapped in the EFI page table. Therefore we need to go @@ -223,7 +197,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) } /* - * Certain firmware versions are way too sentimential and still believe + * Certain firmware versions are way too sentimental and still believe * they are exclusive and unquestionable owners of the first physical page, * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY * (but then write-access it later during SetVirtualAddressMap()). @@ -239,6 +213,15 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) } /* + * When SEV-ES is active, the GHCB as set by the kernel will be used + * by firmware. Create a 1:1 unencrypted mapping for each GHCB. + */ + if (sev_es_efi_map_ghcbs_cas(pgd)) { + pr_err("Failed to create 1:1 mapping for the GHCBs and CAs!\n"); + return 1; + } + + /* * When making calls to the firmware everything needs to be 1:1 * mapped and addressable with 32-bit pointers. Map the kernel * text and allocate a new stack because we can't rely on the @@ -253,26 +236,35 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 1; } - efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ + efi_mixed_mode_stack_pa = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); - pfn = text >> PAGE_SHIFT; - pf = _PAGE_ENC; - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { - pr_err("Failed to map kernel text 1:1\n"); + if (kernel_unmap_pages_in_pgd(pgd, text, npages)) { + pr_err("Failed to unmap kernel text 1:1 mapping\n"); return 1; } npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; rodata = __pa(__start_rodata); pfn = rodata >> PAGE_SHIFT; + + pf = _PAGE_NX | _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { pr_err("Failed to map kernel rodata 1:1\n"); return 1; } + tramp = __pa(__efi64_thunk_ret_tramp); + pfn = tramp >> PAGE_SHIFT; + + pf = _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, tramp, 1, pf)) { + pr_err("Failed to map mixed mode return trampoline\n"); + return 1; + } + return 0; } @@ -301,7 +293,8 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; - if (sev_active() && md->type != EFI_MEMORY_MAPPED_IO) + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && + md->type != EFI_MEMORY_MAPPED_IO) flags |= _PAGE_ENC; pfn = md->phys_addr >> PAGE_SHIFT; @@ -315,9 +308,6 @@ void __init efi_map_region(efi_memory_desc_t *md) unsigned long size = md->num_pages << PAGE_SHIFT; u64 pa = md->phys_addr; - if (efi_have_uv1_memmap()) - return old_map_region(md); - /* * Make sure the 1:1 mappings are present as a catch-all for b0rked * firmware which doesn't update all internal pointers after switching @@ -400,17 +390,22 @@ static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf) return err1 || err2; } -static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md) +bool efi_disable_ibt_for_runtime __ro_after_init = true; + +static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md, + bool has_ibt) { unsigned long pf = 0; + efi_disable_ibt_for_runtime |= !has_ibt; + if (md->attribute & EFI_MEMORY_XP) pf |= _PAGE_NX; if (!(md->attribute & EFI_MEMORY_RO)) pf |= _PAGE_RW; - if (sev_active()) + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) pf |= _PAGE_ENC; return efi_update_mappings(md, pf); @@ -418,66 +413,16 @@ static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *m void __init efi_runtime_update_mappings(void) { - efi_memory_desc_t *md; - - if (efi_have_uv1_memmap()) { - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); - return; - } - - /* - * Use the EFI Memory Attribute Table for mapping permissions if it - * exists, since it is intended to supersede EFI_PROPERTIES_TABLE. - */ if (efi_enabled(EFI_MEM_ATTR)) { + efi_disable_ibt_for_runtime = false; efi_memattr_apply_permissions(NULL, efi_update_mem_attr); - return; - } - - /* - * EFI_MEMORY_ATTRIBUTES_TABLE is intended to replace - * EFI_PROPERTIES_TABLE. So, use EFI_PROPERTIES_TABLE to update - * permissions only if EFI_MEMORY_ATTRIBUTES_TABLE is not - * published by the firmware. Even if we find a buggy implementation of - * EFI_MEMORY_ATTRIBUTES_TABLE, don't fall back to - * EFI_PROPERTIES_TABLE, because of the same reason. - */ - - if (!efi_enabled(EFI_NX_PE_DATA)) - return; - - for_each_efi_memory_desc(md) { - unsigned long pf = 0; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - - if (!(md->attribute & EFI_MEMORY_WB)) - pf |= _PAGE_PCD; - - if ((md->attribute & EFI_MEMORY_XP) || - (md->type == EFI_RUNTIME_SERVICES_DATA)) - pf |= _PAGE_NX; - - if (!(md->attribute & EFI_MEMORY_RO) && - (md->type != EFI_RUNTIME_SERVICES_CODE)) - pf |= _PAGE_RW; - - if (sev_active()) - pf |= _PAGE_ENC; - - efi_update_mappings(md, pf); } } void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - if (efi_have_uv1_memmap()) - ptdump_walk_pgd_level(NULL, &init_mm); - else - ptdump_walk_pgd_level(NULL, &efi_mm); + ptdump_walk_pgd_level(NULL, &efi_mm); #endif } @@ -486,13 +431,31 @@ void __init efi_dump_pagetable(void) * in a kernel thread and user context. Preemption needs to remain disabled * while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm * can not change under us. - * It should be ensured that there are no concurent calls to this function. + * It should be ensured that there are no concurrent calls to this function. */ -void efi_switch_mm(struct mm_struct *mm) +static void efi_enter_mm(void) { - efi_scratch.prev_mm = current->active_mm; - current->active_mm = mm; - switch_mm(efi_scratch.prev_mm, mm, NULL); + efi_prev_mm = use_temporary_mm(&efi_mm); +} + +static void efi_leave_mm(void) +{ + unuse_temporary_mm(efi_prev_mm); +} + +void arch_efi_call_virt_setup(void) +{ + efi_sync_low_kernel_mappings(); + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); + efi_enter_mm(); +} + +void arch_efi_call_virt_teardown(void) +{ + efi_leave_mm(); + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); } static DEFINE_SPINLOCK(efi_runtime_lock); @@ -556,12 +519,12 @@ efi_thunk_set_virtual_address_map(unsigned long memory_map_size, efi_sync_low_kernel_mappings(); local_irq_save(flags); - efi_switch_mm(&efi_mm); + efi_enter_mm(); status = __efi_thunk(set_virtual_address_map, memory_map_size, descriptor_size, descriptor_version, virtual_map); - efi_switch_mm(efi_scratch.prev_mm); + efi_leave_mm(); local_irq_restore(flags); return status; @@ -849,40 +812,29 @@ efi_set_virtual_address_map(unsigned long memory_map_size, const efi_system_table_t *systab = (efi_system_table_t *)systab_phys; efi_status_t status; unsigned long flags; - pgd_t *save_pgd = NULL; if (efi_is_mixed()) return efi_thunk_set_virtual_address_map(memory_map_size, descriptor_size, descriptor_version, virtual_map); + efi_enter_mm(); - if (efi_have_uv1_memmap()) { - save_pgd = efi_uv1_memmap_phys_prolog(); - if (!save_pgd) - return EFI_ABORTED; - } else { - efi_switch_mm(&efi_mm); - } - - kernel_fpu_begin(); + efi_fpu_begin(); /* Disable interrupts around EFI calls: */ local_irq_save(flags); - status = efi_call(efi.runtime->set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); + status = arch_efi_call_virt(efi.runtime, set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); local_irq_restore(flags); - kernel_fpu_end(); + efi_fpu_end(); /* grab the virtually remapped EFI runtime services table pointer */ efi.runtime = READ_ONCE(systab->runtime); - if (save_pgd) - efi_uv1_memmap_phys_epilog(save_pgd); - else - efi_switch_mm(efi_scratch.prev_mm); + efi_leave_mm(); return status; } |
