diff options
Diffstat (limited to 'arch/x86/mm/ioremap.c')
| -rw-r--r-- | arch/x86/mm/ioremap.c | 298 |
1 files changed, 185 insertions, 113 deletions
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 5378d10f1d31..12c8180ca1ba 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Re-map IO memory to kernel address space so that we can access it. * This is needed for high PCI addresses that aren't mapped in the @@ -10,26 +11,31 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/ioport.h> +#include <linux/ioremap.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mmiotrace.h> -#include <linux/mem_encrypt.h> +#include <linux/cc_platform.h> #include <linux/efi.h> +#include <linux/pgtable.h> +#include <linux/kmsan.h> #include <asm/set_memory.h> #include <asm/e820/api.h> +#include <asm/efi.h> #include <asm/fixmap.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/setup.h> #include "physaddr.h" -struct ioremap_mem_flags { - bool system_ram; - bool desc_other; +/* + * Descriptor controlling ioremap() behavior. + */ +struct ioremap_desc { + unsigned int flags; }; /* @@ -61,59 +67,103 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } -static bool __ioremap_check_ram(struct resource *res) +/* Does the range (or a subset of) contain normal RAM? */ +static unsigned int __ioremap_check_ram(struct resource *res) { unsigned long start_pfn, stop_pfn; - unsigned long i; + unsigned long pfn; if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) - return false; + return 0; start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; stop_pfn = (res->end + 1) >> PAGE_SHIFT; if (stop_pfn > start_pfn) { - for (i = 0; i < (stop_pfn - start_pfn); ++i) - if (pfn_valid(start_pfn + i) && - !PageReserved(pfn_to_page(start_pfn + i))) - return true; + for_each_valid_pfn(pfn, start_pfn, stop_pfn) + if (!PageReserved(pfn_to_page(pfn))) + return IORES_MAP_SYSTEM_RAM; } - return false; + return 0; } -static int __ioremap_check_desc_other(struct resource *res) +/* + * In a SEV guest, NONE and RESERVED should not be mapped encrypted because + * there the whole memory is already encrypted. + */ +static unsigned int __ioremap_check_encrypted(struct resource *res) +{ + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return 0; + + switch (res->desc) { + case IORES_DESC_NONE: + case IORES_DESC_RESERVED: + break; + default: + return IORES_MAP_ENCRYPTED; + } + + return 0; +} + +/* + * The EFI runtime services data area is not covered by walk_mem_res(), but must + * be mapped encrypted when SEV is active. + */ +static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc) { - return (res->desc != IORES_DESC_NONE); + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + if (x86_platform.hyper.is_private_mmio(addr)) { + desc->flags |= IORES_MAP_ENCRYPTED; + return; + } + + if (!IS_ENABLED(CONFIG_EFI)) + return; + + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || + (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && + efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) + desc->flags |= IORES_MAP_ENCRYPTED; } -static int __ioremap_res_check(struct resource *res, void *arg) +static int __ioremap_collect_map_flags(struct resource *res, void *arg) { - struct ioremap_mem_flags *flags = arg; + struct ioremap_desc *desc = arg; - if (!flags->system_ram) - flags->system_ram = __ioremap_check_ram(res); + if (!(desc->flags & IORES_MAP_SYSTEM_RAM)) + desc->flags |= __ioremap_check_ram(res); - if (!flags->desc_other) - flags->desc_other = __ioremap_check_desc_other(res); + if (!(desc->flags & IORES_MAP_ENCRYPTED)) + desc->flags |= __ioremap_check_encrypted(res); - return flags->system_ram && flags->desc_other; + return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) == + (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)); } /* * To avoid multiple resource walks, this function walks resources marked as * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). + * + * After that, deal with misc other ranges in __ioremap_check_other() which do + * not fall into the above category. */ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, - struct ioremap_mem_flags *flags) + struct ioremap_desc *desc) { u64 start, end; start = (u64)addr; end = start + size - 1; - memset(flags, 0, sizeof(*flags)); + memset(desc, 0, sizeof(struct ioremap_desc)); - walk_mem_res(start, end, flags, __ioremap_res_check); + walk_mem_res(start, end, desc, __ioremap_collect_map_flags); + + __ioremap_check_other(addr, desc); } /* @@ -130,15 +180,15 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap_caller(resource_size_t phys_addr, - unsigned long size, enum page_cache_mode pcm, - void *caller, bool encrypted) +static void __iomem * +__ioremap_caller(resource_size_t phys_addr, unsigned long size, + enum page_cache_mode pcm, void *caller, bool encrypted) { unsigned long offset, vaddr; resource_size_t last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; - struct ioremap_mem_flags mem_flags; + struct ioremap_desc io_desc; struct vm_struct *area; enum page_cache_mode new_pcm; pgprot_t prot; @@ -157,12 +207,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - __ioremap_check_mem(phys_addr, size, &mem_flags); + __ioremap_check_mem(phys_addr, size, &io_desc); /* * Don't allow anybody to remap normal RAM that we're using.. */ - if (mem_flags.system_ram) { + if (io_desc.flags & IORES_MAP_SYSTEM_RAM) { WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", &phys_addr, &last_addr); return NULL; @@ -172,13 +222,19 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; - retval = reserve_memtype(phys_addr, (u64)phys_addr + size, + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { - printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); + printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval); return NULL; } @@ -198,10 +254,15 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * If the page being mapped is in memory and SEV is active then * make sure the memory encryption attribute is enabled in the * resulting mapping. + * In TDX guests, memory is marked private by default. If encryption + * is not requested (using encrypted), explicitly set decrypt + * attribute in all IOREMAPPED memory. */ prot = PAGE_KERNEL_IO; - if ((sev_active() && mem_flags.desc_other) || encrypted) + if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted) prot = pgprot_encrypted(prot); + else + prot = pgprot_decrypted(prot); switch (pcm) { case _PAGE_CACHE_MODE_UC: @@ -234,7 +295,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, pcm)) + if (memtype_kernel_map_sync(phys_addr, size, pcm)) goto err_free_area; if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) @@ -254,16 +315,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, err_free_area: free_vm_area(area); err_free_memtype: - free_memtype(phys_addr, phys_addr + size); + memtype_free(phys_addr, phys_addr + size); return NULL; } /** - * ioremap_nocache - map bus memory into CPU space + * ioremap - map bus memory into CPU space * @phys_addr: bus address of the memory * @size: size of the resource to map * - * ioremap_nocache performs a platform specific sequence of operations to + * ioremap performs a platform specific sequence of operations to * make bus memory CPU accessible via the readb/readw/readl/writeb/ * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual @@ -279,7 +340,7 @@ err_free_memtype: * * Must be freed with iounmap. */ -void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) +void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: @@ -294,7 +355,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) return __ioremap_caller(phys_addr, size, pcm, __builtin_return_address(0), false); } -EXPORT_SYMBOL(ioremap_nocache); +EXPORT_SYMBOL(ioremap); /** * ioremap_uc - map bus memory into CPU space as strongly uncachable @@ -378,10 +439,10 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) EXPORT_SYMBOL(ioremap_cache); void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { return __ioremap_caller(phys_addr, size, - pgprot2cachemode(__pgprot(prot_val)), + pgprot2cachemode(prot), __builtin_return_address(0), false); } EXPORT_SYMBOL(ioremap_prot); @@ -396,7 +457,7 @@ void iounmap(volatile void __iomem *addr) { struct vm_struct *p, *o; - if ((void __force *)addr <= high_memory) + if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr))) return; /* @@ -430,7 +491,9 @@ void iounmap(volatile void __iomem *addr) return; } - free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); + kmsan_iounmap_page_range((unsigned long)addr, + (unsigned long)addr + get_vm_area_size(p)); + memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ o = remove_vm_area((void __force *)addr); @@ -439,18 +502,12 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int __init arch_ioremap_pud_supported(void) +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags) { -#ifdef CONFIG_X86_64 - return boot_cpu_has(X86_FEATURE_GBPAGES); -#else - return 0; -#endif -} + if ((flags & MEMREMAP_DEC) || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return (void __force *)ioremap_cache(phys_addr, size); -int __init arch_ioremap_pmd_supported(void) -{ - return boot_cpu_has(X86_FEATURE_PSE); + return (void __force *)ioremap_encrypted(phys_addr, size); } /* @@ -478,6 +535,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) memunmap((void *)((unsigned long)addr & PAGE_MASK)); } +#ifdef CONFIG_AMD_MEM_ENCRYPT /* * Examine the physical address to determine if it is an area of memory * that should be mapped decrypted. If the memory is not part of the @@ -525,9 +583,9 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr, case E820_TYPE_NVS: case E820_TYPE_UNUSABLE: /* For SEV, these areas are encrypted */ - if (sev_active()) + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) break; - /* Fallthrough */ + fallthrough; case E820_TYPE_PRAM: return true; @@ -542,8 +600,7 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr, * Examine the physical address to determine if it is EFI data. Check * it against the boot params structure and EFI tables and memory types. */ -static bool memremap_is_efi_data(resource_size_t phys_addr, - unsigned long size) +static bool memremap_is_efi_data(resource_size_t phys_addr) { u64 paddr; @@ -581,26 +638,69 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, * Examine the physical address to determine if it is boot data by checking * it against the boot params setup_data chain. */ -static bool memremap_is_setup_data(resource_size_t phys_addr, - unsigned long size) +static bool __ref __memremap_is_setup_data(resource_size_t phys_addr, bool early) { + unsigned int setup_data_sz = sizeof(struct setup_data); + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; paddr = boot_params.hdr.setup_data; while (paddr) { - unsigned int len; + unsigned int len, size; if (phys_addr == paddr) return true; - data = memremap(paddr, sizeof(*data), - MEMREMAP_WB | MEMREMAP_DEC); + if (early) + data = early_memremap_decrypted(paddr, setup_data_sz); + else + data = memremap(paddr, setup_data_sz, MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to remap setup_data entry\n"); + return false; + } + + size = setup_data_sz; paddr_next = data->next; len = data->len; - memunmap(data); + if ((phys_addr > paddr) && + (phys_addr < (paddr + setup_data_sz + len))) { + if (early) + early_memunmap(data, setup_data_sz); + else + memunmap(data); + return true; + } + + if (data->type == SETUP_INDIRECT) { + size += len; + if (early) { + early_memunmap(data, setup_data_sz); + data = early_memremap_decrypted(paddr, size); + } else { + memunmap(data); + data = memremap(paddr, size, MEMREMAP_WB | MEMREMAP_DEC); + } + if (!data) { + pr_warn("failed to remap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } + } + + if (early) + early_memunmap(data, size); + else + memunmap(data); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) return true; @@ -611,37 +711,14 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, return false; } -/* - * Examine the physical address to determine if it is boot data by checking - * it against the boot params setup_data chain (early boot version). - */ -static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, - unsigned long size) +static bool memremap_is_setup_data(resource_size_t phys_addr) { - struct setup_data *data; - u64 paddr, paddr_next; - - paddr = boot_params.hdr.setup_data; - while (paddr) { - unsigned int len; - - if (phys_addr == paddr) - return true; - - data = early_memremap_decrypted(paddr, sizeof(*data)); - - paddr_next = data->next; - len = data->len; - - early_memunmap(data, sizeof(*data)); - - if ((phys_addr > paddr) && (phys_addr < (paddr + len))) - return true; - - paddr = paddr_next; - } + return __memremap_is_setup_data(phys_addr, false); +} - return false; +static bool __init early_memremap_is_setup_data(resource_size_t phys_addr) +{ + return __memremap_is_setup_data(phys_addr, true); } /* @@ -652,7 +729,7 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size, unsigned long flags) { - if (!mem_encrypt_active()) + if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) return true; if (flags & MEMREMAP_ENC) @@ -661,9 +738,9 @@ bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size, if (flags & MEMREMAP_DEC) return false; - if (sme_active()) { - if (memremap_is_setup_data(phys_addr, size) || - memremap_is_efi_data(phys_addr, size)) + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + if (memremap_is_setup_data(phys_addr) || + memremap_is_efi_data(phys_addr)) return false; } @@ -682,14 +759,14 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, { bool encrypted_prot; - if (!mem_encrypt_active()) + if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) return prot; encrypted_prot = true; - if (sme_active()) { - if (early_memremap_is_setup_data(phys_addr, size) || - memremap_is_efi_data(phys_addr, size)) + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + if (early_memremap_is_setup_data(phys_addr) || + memremap_is_efi_data(phys_addr)) encrypted_prot = false; } @@ -705,7 +782,6 @@ bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size) return arch_memremap_can_ram_remap(phys_addr, size, 0); } -#ifdef CONFIG_ARCH_USE_MEMREMAP_PROT /* Remap memory with encryption */ void __init *early_memremap_encrypted(resource_size_t phys_addr, unsigned long size) @@ -720,10 +796,8 @@ void __init *early_memremap_encrypted(resource_size_t phys_addr, void __init *early_memremap_encrypted_wp(resource_size_t phys_addr, unsigned long size) { - /* Be sure the write-protect PAT entry is set for write-protect */ - if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP) + if (!x86_has_pat_wp()) return NULL; - return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP); } @@ -741,13 +815,11 @@ void __init *early_memremap_decrypted(resource_size_t phys_addr, void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, unsigned long size) { - /* Be sure the write-protect PAT entry is set for write-protect */ - if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP) + if (!x86_has_pat_wp()) return NULL; - return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP); } -#endif /* CONFIG_ARCH_USE_MEMREMAP_PROT */ +#endif /* CONFIG_AMD_MEM_ENCRYPT */ static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; @@ -825,11 +897,11 @@ void __init __early_set_fixmap(enum fixed_addresses idx, pte = early_ioremap_pte(addr); /* Sanitize 'prot' against any unsupported bits: */ - pgprot_val(flags) &= __default_kernel_pte_mask; + pgprot_val(flags) &= __supported_pte_mask; if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else pte_clear(&init_mm, addr, pte); - __flush_tlb_one_kernel(addr); + flush_tlb_one_kernel(addr); } |
