summaryrefslogtreecommitdiff
path: root/arch/x86/platform/uv
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2020-01-13 18:22:39 +0100
committerIngo Molnar <mingo@kernel.org>2020-01-20 08:13:01 +0100
commit1f299fad1e312947c974c6a1d8a3a484f27a6111 (patch)
tree87b1234b4391ae11fa9f6dd87844e7f8fc6ce6e3 /arch/x86/platform/uv
parent97bb9cdc32108036170d9d0d208257168f80d9e9 (diff)
efi/x86: Limit EFI old memory map to SGI UV machines
We carry a quirk in the x86 EFI code to switch back to an older method of mapping the EFI runtime services memory regions, because it was deemed risky at the time to implement a new method without providing a fallback to the old method in case problems arose. Such problems did arise, but they appear to be limited to SGI UV1 machines, and so these are the only ones for which the fallback gets enabled automatically (via a DMI quirk). The fallback can be enabled manually as well, by passing efi=old_map, but there is very little evidence that suggests that this is something that is being relied upon in the field. Given that UV1 support is not enabled by default by the distros (Ubuntu, Fedora), there is no point in carrying this fallback code all the time if there are no other users. So let's move it into the UV support code, and document that efi=old_map now requires this support code to be enabled. Note that efi=old_map has been used in the past on other SGI UV machines to work around kernel regressions in production, so we keep the option to enable it by hand, but only if the kernel was built with UV support. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/20200113172245.27925-8-ardb@kernel.org
Diffstat (limited to 'arch/x86/platform/uv')
-rw-r--r--arch/x86/platform/uv/bios_uv.c164
1 files changed, 162 insertions, 2 deletions
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 5c0e2eb5d87c..7c2b8c5d0b49 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -31,10 +31,10 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
return BIOS_STATUS_UNIMPLEMENTED;
/*
- * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
+ * If EFI_UV1_MEMMAP is set, we need to fall back to using our old EFI
* callback method, which uses efi_call() directly, with the kernel page tables:
*/
- if (unlikely(efi_enabled(EFI_OLD_MEMMAP))) {
+ if (unlikely(efi_enabled(EFI_UV1_MEMMAP))) {
kernel_fpu_begin();
ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
kernel_fpu_end();
@@ -217,3 +217,163 @@ int uv_bios_init(void)
pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
return 0;
}
+
+static void __init early_code_mapping_set_exec(int executable)
+{
+ efi_memory_desc_t *md;
+
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return;
+
+ /* Make EFI service code area executable */
+ for_each_efi_memory_desc(md) {
+ if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_CODE)
+ efi_set_executable(md, executable);
+ }
+}
+
+void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd)
+{
+ /*
+ * After the lock is released, the original page table is restored.
+ */
+ int pgd_idx, i;
+ int nr_pgds;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+ for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+ pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
+ set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
+ if (!pgd_present(*pgd))
+ continue;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ p4d = p4d_offset(pgd,
+ pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+ if (!p4d_present(*p4d))
+ continue;
+
+ pud = (pud_t *)p4d_page_vaddr(*p4d);
+ pud_free(&init_mm, pud);
+ }
+
+ p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ p4d_free(&init_mm, p4d);
+ }
+
+ kfree(save_pgd);
+
+ __flush_tlb_all();
+ early_code_mapping_set_exec(0);
+}
+
+pgd_t * __init efi_uv1_memmap_phys_prolog(void)
+{
+ unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
+ pgd_t *save_pgd, *pgd_k, *pgd_efi;
+ p4d_t *p4d, *p4d_k, *p4d_efi;
+ pud_t *pud;
+
+ int pgd;
+ int n_pgds, i, j;
+
+ early_code_mapping_set_exec(1);
+
+ n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
+ save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
+ if (!save_pgd)
+ return NULL;
+
+ /*
+ * Build 1:1 identity mapping for UV1 memmap usage. Note that
+ * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
+ * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
+ * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
+ * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
+ * This means here we can only reuse the PMD tables of the direct mapping.
+ */
+ for (pgd = 0; pgd < n_pgds; pgd++) {
+ addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
+ vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
+ pgd_efi = pgd_offset_k(addr_pgd);
+ save_pgd[pgd] = *pgd_efi;
+
+ p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
+ if (!p4d) {
+ pr_err("Failed to allocate p4d table!\n");
+ goto out;
+ }
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ addr_p4d = addr_pgd + i * P4D_SIZE;
+ p4d_efi = p4d + p4d_index(addr_p4d);
+
+ pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
+ if (!pud) {
+ pr_err("Failed to allocate pud table!\n");
+ goto out;
+ }
+
+ for (j = 0; j < PTRS_PER_PUD; j++) {
+ addr_pud = addr_p4d + j * PUD_SIZE;
+
+ if (addr_pud > (max_pfn << PAGE_SHIFT))
+ break;
+
+ vaddr = (unsigned long)__va(addr_pud);
+
+ pgd_k = pgd_offset_k(vaddr);
+ p4d_k = p4d_offset(pgd_k, vaddr);
+ pud[j] = *pud_offset(p4d_k, vaddr);
+ }
+ }
+ pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
+ }
+
+ __flush_tlb_all();
+ return save_pgd;
+out:
+ efi_uv1_memmap_phys_epilog(save_pgd);
+ return NULL;
+}
+
+void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
+ u32 type, u64 attribute)
+{
+ unsigned long last_map_pfn;
+
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return ioremap(phys_addr, size);
+
+ last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+ if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+ unsigned long top = last_map_pfn << PAGE_SHIFT;
+ efi_ioremap(top, size - (top - phys_addr), type, attribute);
+ }
+
+ if (!(attribute & EFI_MEMORY_WB))
+ efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
+
+ return (void __iomem *)__va(phys_addr);
+}
+
+static int __init arch_parse_efi_cmdline(char *str)
+{
+ if (!str) {
+ pr_warn("need at least one option\n");
+ return -EINVAL;
+ }
+
+ if (parse_option_str(str, "old_map"))
+ set_bit(EFI_UV1_MEMMAP, &efi.flags);
+
+ return 0;
+}
+early_param("efi", arch_parse_efi_cmdline);