diff options
Diffstat (limited to 'arch/x86/mm/ioremap.c')
| -rw-r--r-- | arch/x86/mm/ioremap.c | 912 |
1 files changed, 598 insertions, 314 deletions
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0215e2c563ef..12c8180ca1ba 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Re-map IO memory to kernel address space so that we can access it. * This is needed for high PCI addresses that aren't mapped in the @@ -6,43 +7,59 @@ * (C) Copyright 1995 1996 Linus Torvalds */ -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/init.h> #include <linux/io.h> -#include <linux/module.h> +#include <linux/ioport.h> +#include <linux/ioremap.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mmiotrace.h> - -#include <asm/cacheflush.h> -#include <asm/e820.h> +#include <linux/cc_platform.h> +#include <linux/efi.h> +#include <linux/pgtable.h> +#include <linux/kmsan.h> + +#include <asm/set_memory.h> +#include <asm/e820/api.h> +#include <asm/efi.h> #include <asm/fixmap.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> -#include <asm/pat.h> +#include <asm/memtype.h> +#include <asm/setup.h> #include "physaddr.h" /* + * Descriptor controlling ioremap() behavior. + */ +struct ioremap_desc { + unsigned int flags; +}; + +/* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. */ int ioremap_change_attr(unsigned long vaddr, unsigned long size, - unsigned long prot_val) + enum page_cache_mode pcm) { unsigned long nrpages = size >> PAGE_SHIFT; int err; - switch (prot_val) { - case _PAGE_CACHE_UC: + switch (pcm) { + case _PAGE_CACHE_MODE_UC: default: err = _set_memory_uc(vaddr, nrpages); break; - case _PAGE_CACHE_WC: + case _PAGE_CACHE_MODE_WC: err = _set_memory_wc(vaddr, nrpages); break; - case _PAGE_CACHE_WB: + case _PAGE_CACHE_MODE_WT: + err = _set_memory_wt(vaddr, nrpages); + break; + case _PAGE_CACHE_MODE_WB: err = _set_memory_wb(vaddr, nrpages); break; } @@ -50,24 +67,130 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } +/* Does the range (or a subset of) contain normal RAM? */ +static unsigned int __ioremap_check_ram(struct resource *res) +{ + unsigned long start_pfn, stop_pfn; + unsigned long pfn; + + if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) + return 0; + + start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; + stop_pfn = (res->end + 1) >> PAGE_SHIFT; + if (stop_pfn > start_pfn) { + for_each_valid_pfn(pfn, start_pfn, stop_pfn) + if (!PageReserved(pfn_to_page(pfn))) + return IORES_MAP_SYSTEM_RAM; + } + + return 0; +} + +/* + * In a SEV guest, NONE and RESERVED should not be mapped encrypted because + * there the whole memory is already encrypted. + */ +static unsigned int __ioremap_check_encrypted(struct resource *res) +{ + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return 0; + + switch (res->desc) { + case IORES_DESC_NONE: + case IORES_DESC_RESERVED: + break; + default: + return IORES_MAP_ENCRYPTED; + } + + return 0; +} + +/* + * The EFI runtime services data area is not covered by walk_mem_res(), but must + * be mapped encrypted when SEV is active. + */ +static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc) +{ + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + if (x86_platform.hyper.is_private_mmio(addr)) { + desc->flags |= IORES_MAP_ENCRYPTED; + return; + } + + if (!IS_ENABLED(CONFIG_EFI)) + return; + + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || + (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && + efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) + desc->flags |= IORES_MAP_ENCRYPTED; +} + +static int __ioremap_collect_map_flags(struct resource *res, void *arg) +{ + struct ioremap_desc *desc = arg; + + if (!(desc->flags & IORES_MAP_SYSTEM_RAM)) + desc->flags |= __ioremap_check_ram(res); + + if (!(desc->flags & IORES_MAP_ENCRYPTED)) + desc->flags |= __ioremap_check_encrypted(res); + + return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) == + (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)); +} + +/* + * To avoid multiple resource walks, this function walks resources marked as + * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a + * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). + * + * After that, deal with misc other ranges in __ioremap_check_other() which do + * not fall into the above category. + */ +static void __ioremap_check_mem(resource_size_t addr, unsigned long size, + struct ioremap_desc *desc) +{ + u64 start, end; + + start = (u64)addr; + end = start + size - 1; + memset(desc, 0, sizeof(struct ioremap_desc)); + + walk_mem_res(start, end, desc, __ioremap_collect_map_flags); + + __ioremap_check_other(addr, desc); +} + /* * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. + * address space. It transparently creates kernel huge I/O mapping when + * the physical address is aligned by a huge page size (1GB or 2MB) and + * the requested size is at least the huge page size. + * + * NOTE: MTRRs can override PAT memory types with a 4KB granularity. + * Therefore, the mapping code falls back to use a smaller page toward 4KB + * when a mapping range is covered by non-WB type of MTRRs. * * NOTE! We need to allow non-page-aligned mappings too: we will obviously * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap_caller(resource_size_t phys_addr, - unsigned long size, unsigned long prot_val, void *caller) +static void __iomem * +__ioremap_caller(resource_size_t phys_addr, unsigned long size, + enum page_cache_mode pcm, void *caller, bool encrypted) { unsigned long offset, vaddr; - resource_size_t pfn, last_pfn, last_addr; + resource_size_t last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; + struct ioremap_desc io_desc; struct vm_struct *area; - unsigned long new_prot_val; + enum page_cache_mode new_pcm; pgprot_t prot; int retval; void __iomem *ret_addr; @@ -84,64 +207,82 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (is_ISA_range(phys_addr, last_addr)) - return (__force void __iomem *)phys_to_virt(phys_addr); + __ioremap_check_mem(phys_addr, size, &io_desc); /* * Don't allow anybody to remap normal RAM that we're using.. */ - last_pfn = last_addr >> PAGE_SHIFT; - for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { - int is_ram = page_is_ram(pfn); - - if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) - return NULL; - WARN_ON_ONCE(is_ram); + if (io_desc.flags & IORES_MAP_SYSTEM_RAM) { + WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", + &phys_addr, &last_addr); + return NULL; } /* * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; - retval = reserve_memtype(phys_addr, (u64)phys_addr + size, - prot_val, &new_prot_val); + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, + pcm, &new_pcm); if (retval) { - printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); + printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval); return NULL; } - if (prot_val != new_prot_val) { - if (!is_new_memtype_allowed(phys_addr, size, - prot_val, new_prot_val)) { + if (pcm != new_pcm) { + if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) { printk(KERN_ERR - "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", + "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n", (unsigned long long)phys_addr, (unsigned long long)(phys_addr + size), - prot_val, new_prot_val); + pcm, new_pcm); goto err_free_memtype; } - prot_val = new_prot_val; + pcm = new_pcm; } - switch (prot_val) { - case _PAGE_CACHE_UC: + /* + * If the page being mapped is in memory and SEV is active then + * make sure the memory encryption attribute is enabled in the + * resulting mapping. + * In TDX guests, memory is marked private by default. If encryption + * is not requested (using encrypted), explicitly set decrypt + * attribute in all IOREMAPPED memory. + */ + prot = PAGE_KERNEL_IO; + if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted) + prot = pgprot_encrypted(prot); + else + prot = pgprot_decrypted(prot); + + switch (pcm) { + case _PAGE_CACHE_MODE_UC: default: - prot = PAGE_KERNEL_IO_NOCACHE; + prot = __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_UC)); + break; + case _PAGE_CACHE_MODE_UC_MINUS: + prot = __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); break; - case _PAGE_CACHE_UC_MINUS: - prot = PAGE_KERNEL_IO_UC_MINUS; + case _PAGE_CACHE_MODE_WC: + prot = __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WC)); break; - case _PAGE_CACHE_WC: - prot = PAGE_KERNEL_IO_WC; + case _PAGE_CACHE_MODE_WT: + prot = __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WT)); break; - case _PAGE_CACHE_WB: - prot = PAGE_KERNEL_IO; + case _PAGE_CACHE_MODE_WB: break; } @@ -154,7 +295,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, prot_val)) + if (memtype_kernel_map_sync(phys_addr, size, pcm)) goto err_free_area; if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) @@ -167,23 +308,23 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * Check if the request spans more than any BAR in the iomem resource * tree. */ - WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size), - KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); + if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size)) + pr_warn("caller %pS mapping multiple BARs\n", caller); return ret_addr; err_free_area: free_vm_area(area); err_free_memtype: - free_memtype(phys_addr, phys_addr + size); + memtype_free(phys_addr, phys_addr + size); return NULL; } /** - * ioremap_nocache - map bus memory into CPU space + * ioremap - map bus memory into CPU space * @phys_addr: bus address of the memory * @size: size of the resource to map * - * ioremap_nocache performs a platform specific sequence of operations to + * ioremap performs a platform specific sequence of operations to * make bus memory CPU accessible via the readb/readw/readl/writeb/ * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual @@ -199,21 +340,55 @@ err_free_memtype: * * Must be freed with iounmap. */ -void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) +void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; + * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use - * UC MINUS. + * UC MINUS. Drivers that are certain they need or can already + * be converted over to strong UC can use ioremap_uc(). */ - unsigned long val = _PAGE_CACHE_UC_MINUS; + enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS; + + return __ioremap_caller(phys_addr, size, pcm, + __builtin_return_address(0), false); +} +EXPORT_SYMBOL(ioremap); + +/** + * ioremap_uc - map bus memory into CPU space as strongly uncachable + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * ioremap_uc performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked with a strong + * preference as completely uncachable on the CPU when possible. For non-PAT + * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT + * systems this will set the PAT entry for the pages as strong UC. This call + * will honor existing caching rules from things like the PCI bus. Note that + * there are other caches and buffers on many busses. In particular driver + * authors should read up on PCI writes. + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size) +{ + enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC; - return __ioremap_caller(phys_addr, size, val, - __builtin_return_address(0)); + return __ioremap_caller(phys_addr, size, pcm, + __builtin_return_address(0), false); } -EXPORT_SYMBOL(ioremap_nocache); +EXPORT_SYMBOL_GPL(ioremap_uc); /** * ioremap_wc - map memory into CPU space write combined @@ -227,26 +402,48 @@ EXPORT_SYMBOL(ioremap_nocache); */ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { - if (pat_enabled) - return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, - __builtin_return_address(0)); - else - return ioremap_nocache(phys_addr, size); + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC, + __builtin_return_address(0), false); } EXPORT_SYMBOL(ioremap_wc); +/** + * ioremap_wt - map memory into CPU space write through + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * This version of ioremap ensures that the memory is marked write through. + * Write through stores data into memory while keeping the cache up-to-date. + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size) +{ + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT, + __builtin_return_address(0), false); +} +EXPORT_SYMBOL(ioremap_wt); + +void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size) +{ + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB, + __builtin_return_address(0), true); +} +EXPORT_SYMBOL(ioremap_encrypted); + void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { - return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB, - __builtin_return_address(0)); + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB, + __builtin_return_address(0), false); } EXPORT_SYMBOL(ioremap_cache); void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { - return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), - __builtin_return_address(0)); + return __ioremap_caller(phys_addr, size, + pgprot2cachemode(prot), + __builtin_return_address(0), false); } EXPORT_SYMBOL(ioremap_prot); @@ -260,23 +457,27 @@ void iounmap(volatile void __iomem *addr) { struct vm_struct *p, *o; - if ((void __force *)addr <= high_memory) + if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr))) return; /* - * __ioremap special-cases the PCI/ISA range by not instantiating a - * vm_area and by simply returning an address into the kernel mapping - * of ISA space. So handle that here. + * The PCI/ISA range special-casing was removed from __ioremap() + * so this check, in theory, can be removed. However, there are + * cases where iounmap() is called for addresses not obtained via + * ioremap() (vga16fb for example). Add a warning so that these + * cases can be caught and fixed. */ if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && - (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) + (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) { + WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n"); return; + } + + mmiotrace_iounmap(addr); addr = (volatile void __iomem *) (PAGE_MASK & (unsigned long __force)addr); - mmiotrace_iounmap(addr); - /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by @@ -290,7 +491,9 @@ void iounmap(volatile void __iomem *addr) return; } - free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); + kmsan_iounmap_page_range((unsigned long)addr, + (unsigned long)addr + get_vm_area_size(p)); + memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ o = remove_vm_area((void __force *)addr); @@ -299,54 +502,334 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags) +{ + if ((flags & MEMREMAP_DEC) || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return (void __force *)ioremap_cache(phys_addr, size); + + return (void __force *)ioremap_encrypted(phys_addr, size); +} + /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access */ -void *xlate_dev_mem_ptr(unsigned long phys) +void *xlate_dev_mem_ptr(phys_addr_t phys) { - void *addr; - unsigned long start = phys & PAGE_MASK; + unsigned long start = phys & PAGE_MASK; + unsigned long offset = phys & ~PAGE_MASK; + void *vaddr; - /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ - if (page_is_ram(start >> PAGE_SHIFT)) - return __va(phys); + /* memremap() maps if RAM, otherwise falls back to ioremap() */ + vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB); - addr = (void __force *)ioremap_cache(start, PAGE_SIZE); - if (addr) - addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); + /* Only add the offset on success and return NULL if memremap() failed */ + if (vaddr) + vaddr += offset; - return addr; + return vaddr; } -void unxlate_dev_mem_ptr(unsigned long phys, void *addr) +void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) { - if (page_is_ram(phys >> PAGE_SHIFT)) - return; + memunmap((void *)((unsigned long)addr & PAGE_MASK)); +} - iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); - return; +#ifdef CONFIG_AMD_MEM_ENCRYPT +/* + * Examine the physical address to determine if it is an area of memory + * that should be mapped decrypted. If the memory is not part of the + * kernel usable area it was accessed and created decrypted, so these + * areas should be mapped decrypted. And since the encryption key can + * change across reboots, persistent memory should also be mapped + * decrypted. + * + * If SEV is active, that implies that BIOS/UEFI also ran encrypted so + * only persistent memory should be mapped decrypted. + */ +static bool memremap_should_map_decrypted(resource_size_t phys_addr, + unsigned long size) +{ + int is_pmem; + + /* + * Check if the address is part of a persistent memory region. + * This check covers areas added by E820, EFI and ACPI. + */ + is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM, + IORES_DESC_PERSISTENT_MEMORY); + if (is_pmem != REGION_DISJOINT) + return true; + + /* + * Check if the non-volatile attribute is set for an EFI + * reserved area. + */ + if (efi_enabled(EFI_BOOT)) { + switch (efi_mem_type(phys_addr)) { + case EFI_RESERVED_TYPE: + if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV) + return true; + break; + default: + break; + } + } + + /* Check if the address is outside kernel usable area */ + switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { + case E820_TYPE_RESERVED: + case E820_TYPE_ACPI: + case E820_TYPE_NVS: + case E820_TYPE_UNUSABLE: + /* For SEV, these areas are encrypted */ + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + break; + fallthrough; + + case E820_TYPE_PRAM: + return true; + default: + break; + } + + return false; } -static int __initdata early_ioremap_debug; +/* + * Examine the physical address to determine if it is EFI data. Check + * it against the boot params structure and EFI tables and memory types. + */ +static bool memremap_is_efi_data(resource_size_t phys_addr) +{ + u64 paddr; + + /* Check if the address is part of EFI boot/runtime data */ + if (!efi_enabled(EFI_BOOT)) + return false; + + paddr = boot_params.efi_info.efi_memmap_hi; + paddr <<= 32; + paddr |= boot_params.efi_info.efi_memmap; + if (phys_addr == paddr) + return true; + + paddr = boot_params.efi_info.efi_systab_hi; + paddr <<= 32; + paddr |= boot_params.efi_info.efi_systab; + if (phys_addr == paddr) + return true; + + if (efi_is_table_address(phys_addr)) + return true; + + switch (efi_mem_type(phys_addr)) { + case EFI_BOOT_SERVICES_DATA: + case EFI_RUNTIME_SERVICES_DATA: + return true; + default: + break; + } + + return false; +} -static int __init early_ioremap_debug_setup(char *str) +/* + * Examine the physical address to determine if it is boot data by checking + * it against the boot params setup_data chain. + */ +static bool __ref __memremap_is_setup_data(resource_size_t phys_addr, bool early) { - early_ioremap_debug = 1; + unsigned int setup_data_sz = sizeof(struct setup_data); + struct setup_indirect *indirect; + struct setup_data *data; + u64 paddr, paddr_next; + + paddr = boot_params.hdr.setup_data; + while (paddr) { + unsigned int len, size; + + if (phys_addr == paddr) + return true; + + if (early) + data = early_memremap_decrypted(paddr, setup_data_sz); + else + data = memremap(paddr, setup_data_sz, MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to remap setup_data entry\n"); + return false; + } - return 0; + size = setup_data_sz; + + paddr_next = data->next; + len = data->len; + + if ((phys_addr > paddr) && + (phys_addr < (paddr + setup_data_sz + len))) { + if (early) + early_memunmap(data, setup_data_sz); + else + memunmap(data); + return true; + } + + if (data->type == SETUP_INDIRECT) { + size += len; + if (early) { + early_memunmap(data, setup_data_sz); + data = early_memremap_decrypted(paddr, size); + } else { + memunmap(data); + data = memremap(paddr, size, MEMREMAP_WB | MEMREMAP_DEC); + } + if (!data) { + pr_warn("failed to remap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } + } + + if (early) + early_memunmap(data, size); + else + memunmap(data); + + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) + return true; + + paddr = paddr_next; + } + + return false; +} + +static bool memremap_is_setup_data(resource_size_t phys_addr) +{ + return __memremap_is_setup_data(phys_addr, false); +} + +static bool __init early_memremap_is_setup_data(resource_size_t phys_addr) +{ + return __memremap_is_setup_data(phys_addr, true); +} + +/* + * Architecture function to determine if RAM remap is allowed. By default, a + * RAM remap will map the data as encrypted. Determine if a RAM remap should + * not be done so that the data will be mapped decrypted. + */ +bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size, + unsigned long flags) +{ + if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + return true; + + if (flags & MEMREMAP_ENC) + return true; + + if (flags & MEMREMAP_DEC) + return false; + + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + if (memremap_is_setup_data(phys_addr) || + memremap_is_efi_data(phys_addr)) + return false; + } + + return !memremap_should_map_decrypted(phys_addr, size); +} + +/* + * Architecture override of __weak function to adjust the protection attributes + * used when remapping memory. By default, early_memremap() will map the data + * as encrypted. Determine if an encrypted mapping should not be done and set + * the appropriate protection attributes. + */ +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, + unsigned long size, + pgprot_t prot) +{ + bool encrypted_prot; + + if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + return prot; + + encrypted_prot = true; + + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + if (early_memremap_is_setup_data(phys_addr) || + memremap_is_efi_data(phys_addr)) + encrypted_prot = false; + } + + if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size)) + encrypted_prot = false; + + return encrypted_prot ? pgprot_encrypted(prot) + : pgprot_decrypted(prot); +} + +bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size) +{ + return arch_memremap_can_ram_remap(phys_addr, size, 0); +} + +/* Remap memory with encryption */ +void __init *early_memremap_encrypted(resource_size_t phys_addr, + unsigned long size) +{ + return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC); } -early_param("early_ioremap_debug", early_ioremap_debug_setup); -static __initdata int after_paging_init; +/* + * Remap memory with encryption and write-protected - cannot be called + * before pat_init() is called + */ +void __init *early_memremap_encrypted_wp(resource_size_t phys_addr, + unsigned long size) +{ + if (!x86_has_pat_wp()) + return NULL; + return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP); +} + +/* Remap memory without encryption */ +void __init *early_memremap_decrypted(resource_size_t phys_addr, + unsigned long size) +{ + return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC); +} + +/* + * Remap memory without encryption and write-protected - cannot be called + * before pat_init() is called + */ +void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, + unsigned long size) +{ + if (!x86_has_pat_wp()) + return NULL; + return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP); +} +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { /* Don't assume we're using swapper_pg_dir at this point */ - pgd_t *base = __va(read_cr3()); + pgd_t *base = __va(read_cr3_pa()); pgd_t *pgd = &base[pgd_index(addr)]; - pud_t *pud = pud_offset(pgd, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); pmd_t *pmd = pmd_offset(pud, addr); return pmd; @@ -362,18 +845,17 @@ bool __init is_early_ioremap_ptep(pte_t *ptep) return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; } -static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; - void __init early_ioremap_init(void) { pmd_t *pmd; - int i; - if (early_ioremap_debug) - printk(KERN_INFO "early_ioremap_init()\n"); +#ifdef CONFIG_X86_64 + BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1)); +#else + WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1)); +#endif - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + early_ioremap_setup(); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); @@ -402,13 +884,8 @@ void __init early_ioremap_init(void) } } -void __init early_ioremap_reset(void) -{ - after_paging_init = 1; -} - -static void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) +void __init __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) { unsigned long addr = __fix_to_virt(idx); pte_t *pte; @@ -419,205 +896,12 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, } pte = early_ioremap_pte(addr); + /* Sanitize 'prot' against any unsupported bits: */ + pgprot_val(flags) &= __supported_pte_mask; + if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else pte_clear(&init_mm, addr, pte); - __flush_tlb_one(addr); -} - -static inline void __init early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t prot) -{ - if (after_paging_init) - __set_fixmap(idx, phys, prot); - else - __early_set_fixmap(idx, phys, prot); -} - -static inline void __init early_clear_fixmap(enum fixed_addresses idx) -{ - if (after_paging_init) - clear_fixmap(idx); - else - __early_set_fixmap(idx, 0, __pgprot(0)); -} - -static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; -static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; - -void __init fixup_early_ioremap(void) -{ - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i]) { - WARN_ON(1); - break; - } - } - - early_ioremap_init(); -} - -static int __init check_early_ioremap_leak(void) -{ - int count = 0; - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - if (prev_map[i]) - count++; - - if (!count) - return 0; - WARN(1, KERN_WARNING - "Debug warning: early ioremap leak of %d areas detected.\n", - count); - printk(KERN_WARNING - "please boot with early_ioremap_debug and report the dmesg.\n"); - - return 1; -} -late_initcall(check_early_ioremap_leak); - -static void __init __iomem * -__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) -{ - unsigned long offset; - resource_size_t last_addr; - unsigned int nrpages; - enum fixed_addresses idx0, idx; - int i, slot; - - WARN_ON(system_state != SYSTEM_BOOTING); - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (!prev_map[i]) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n", - __func__, (u64)phys_addr, size); - WARN_ON(1); - return NULL; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ", - __func__, (u64)phys_addr, size, slot); - dump_stack(); - } - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) { - WARN_ON(1); - return NULL; - } - - prev_size[slot] = size; - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr + 1) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) { - WARN_ON(1); - return NULL; - } - - /* - * Ok, go for it.. - */ - idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - idx = idx0; - while (nrpages > 0) { - early_set_fixmap(idx, phys_addr, prot); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - - prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); - return prev_map[slot]; -} - -/* Remap an IO device */ -void __init __iomem * -early_ioremap(resource_size_t phys_addr, unsigned long size) -{ - return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); -} - -/* Remap memory */ -void __init __iomem * -early_memremap(resource_size_t phys_addr, unsigned long size) -{ - return __early_ioremap(phys_addr, size, PAGE_KERNEL); -} - -void __init early_iounmap(void __iomem *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - int i, slot; - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i] == addr) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", - addr, size); - WARN_ON(1); - return; - } - - if (prev_size[slot] != size) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", - addr, size, slot, prev_size[slot]); - WARN_ON(1); - return; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, - size, slot); - dump_stack(); - } - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) { - WARN_ON(1); - return; - } - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - while (nrpages > 0) { - early_clear_fixmap(idx); - --idx; - --nrpages; - } - prev_map[slot] = NULL; + flush_tlb_one_kernel(addr); } |
