summaryrefslogtreecommitdiff
path: root/arch/x86/mm/ioremap.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/ioremap.c')
-rw-r--r--arch/x86/mm/ioremap.c912
1 files changed, 598 insertions, 314 deletions
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0215e2c563ef..12c8180ca1ba 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Re-map IO memory to kernel address space so that we can access it.
* This is needed for high PCI addresses that aren't mapped in the
@@ -6,43 +7,59 @@
* (C) Copyright 1995 1996 Linus Torvalds
*/
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/init.h>
#include <linux/io.h>
-#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/ioremap.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mmiotrace.h>
-
-#include <asm/cacheflush.h>
-#include <asm/e820.h>
+#include <linux/cc_platform.h>
+#include <linux/efi.h>
+#include <linux/pgtable.h>
+#include <linux/kmsan.h>
+
+#include <asm/set_memory.h>
+#include <asm/e820/api.h>
+#include <asm/efi.h>
#include <asm/fixmap.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
+#include <asm/setup.h>
#include "physaddr.h"
/*
+ * Descriptor controlling ioremap() behavior.
+ */
+struct ioremap_desc {
+ unsigned int flags;
+};
+
+/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
*/
int ioremap_change_attr(unsigned long vaddr, unsigned long size,
- unsigned long prot_val)
+ enum page_cache_mode pcm)
{
unsigned long nrpages = size >> PAGE_SHIFT;
int err;
- switch (prot_val) {
- case _PAGE_CACHE_UC:
+ switch (pcm) {
+ case _PAGE_CACHE_MODE_UC:
default:
err = _set_memory_uc(vaddr, nrpages);
break;
- case _PAGE_CACHE_WC:
+ case _PAGE_CACHE_MODE_WC:
err = _set_memory_wc(vaddr, nrpages);
break;
- case _PAGE_CACHE_WB:
+ case _PAGE_CACHE_MODE_WT:
+ err = _set_memory_wt(vaddr, nrpages);
+ break;
+ case _PAGE_CACHE_MODE_WB:
err = _set_memory_wb(vaddr, nrpages);
break;
}
@@ -50,24 +67,130 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
return err;
}
+/* Does the range (or a subset of) contain normal RAM? */
+static unsigned int __ioremap_check_ram(struct resource *res)
+{
+ unsigned long start_pfn, stop_pfn;
+ unsigned long pfn;
+
+ if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
+ return 0;
+
+ start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ stop_pfn = (res->end + 1) >> PAGE_SHIFT;
+ if (stop_pfn > start_pfn) {
+ for_each_valid_pfn(pfn, start_pfn, stop_pfn)
+ if (!PageReserved(pfn_to_page(pfn)))
+ return IORES_MAP_SYSTEM_RAM;
+ }
+
+ return 0;
+}
+
+/*
+ * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
+ * there the whole memory is already encrypted.
+ */
+static unsigned int __ioremap_check_encrypted(struct resource *res)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return 0;
+
+ switch (res->desc) {
+ case IORES_DESC_NONE:
+ case IORES_DESC_RESERVED:
+ break;
+ default:
+ return IORES_MAP_ENCRYPTED;
+ }
+
+ return 0;
+}
+
+/*
+ * The EFI runtime services data area is not covered by walk_mem_res(), but must
+ * be mapped encrypted when SEV is active.
+ */
+static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ if (x86_platform.hyper.is_private_mmio(addr)) {
+ desc->flags |= IORES_MAP_ENCRYPTED;
+ return;
+ }
+
+ if (!IS_ENABLED(CONFIG_EFI))
+ return;
+
+ if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
+ (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
+ efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
+ desc->flags |= IORES_MAP_ENCRYPTED;
+}
+
+static int __ioremap_collect_map_flags(struct resource *res, void *arg)
+{
+ struct ioremap_desc *desc = arg;
+
+ if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
+ desc->flags |= __ioremap_check_ram(res);
+
+ if (!(desc->flags & IORES_MAP_ENCRYPTED))
+ desc->flags |= __ioremap_check_encrypted(res);
+
+ return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
+ (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
+}
+
+/*
+ * To avoid multiple resource walks, this function walks resources marked as
+ * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
+ * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
+ *
+ * After that, deal with misc other ranges in __ioremap_check_other() which do
+ * not fall into the above category.
+ */
+static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
+ struct ioremap_desc *desc)
+{
+ u64 start, end;
+
+ start = (u64)addr;
+ end = start + size - 1;
+ memset(desc, 0, sizeof(struct ioremap_desc));
+
+ walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
+
+ __ioremap_check_other(addr, desc);
+}
+
/*
* Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
+ * address space. It transparently creates kernel huge I/O mapping when
+ * the physical address is aligned by a huge page size (1GB or 2MB) and
+ * the requested size is at least the huge page size.
+ *
+ * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
+ * Therefore, the mapping code falls back to use a smaller page toward 4KB
+ * when a mapping range is covered by non-WB type of MTRRs.
*
* NOTE! We need to allow non-page-aligned mappings too: we will obviously
* have to convert them into an offset in a page-aligned mapping, but the
* caller shouldn't need to know that small detail.
*/
-static void __iomem *__ioremap_caller(resource_size_t phys_addr,
- unsigned long size, unsigned long prot_val, void *caller)
+static void __iomem *
+__ioremap_caller(resource_size_t phys_addr, unsigned long size,
+ enum page_cache_mode pcm, void *caller, bool encrypted)
{
unsigned long offset, vaddr;
- resource_size_t pfn, last_pfn, last_addr;
+ resource_size_t last_addr;
const resource_size_t unaligned_phys_addr = phys_addr;
const unsigned long unaligned_size = size;
+ struct ioremap_desc io_desc;
struct vm_struct *area;
- unsigned long new_prot_val;
+ enum page_cache_mode new_pcm;
pgprot_t prot;
int retval;
void __iomem *ret_addr;
@@ -84,64 +207,82 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
return NULL;
}
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (is_ISA_range(phys_addr, last_addr))
- return (__force void __iomem *)phys_to_virt(phys_addr);
+ __ioremap_check_mem(phys_addr, size, &io_desc);
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
- last_pfn = last_addr >> PAGE_SHIFT;
- for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) {
- int is_ram = page_is_ram(pfn);
-
- if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
- return NULL;
- WARN_ON_ONCE(is_ram);
+ if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
+ WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
+ &phys_addr, &last_addr);
+ return NULL;
}
/*
* Mappings have to be page-aligned
*/
offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PHYSICAL_PAGE_MASK;
+ phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
- retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
- prot_val, &new_prot_val);
+ /*
+ * Mask out any bits not part of the actual physical
+ * address, like memory encryption bits.
+ */
+ phys_addr &= PHYSICAL_PAGE_MASK;
+
+ retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
+ pcm, &new_pcm);
if (retval) {
- printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
+ printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
return NULL;
}
- if (prot_val != new_prot_val) {
- if (!is_new_memtype_allowed(phys_addr, size,
- prot_val, new_prot_val)) {
+ if (pcm != new_pcm) {
+ if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
printk(KERN_ERR
- "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
+ "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
(unsigned long long)phys_addr,
(unsigned long long)(phys_addr + size),
- prot_val, new_prot_val);
+ pcm, new_pcm);
goto err_free_memtype;
}
- prot_val = new_prot_val;
+ pcm = new_pcm;
}
- switch (prot_val) {
- case _PAGE_CACHE_UC:
+ /*
+ * If the page being mapped is in memory and SEV is active then
+ * make sure the memory encryption attribute is enabled in the
+ * resulting mapping.
+ * In TDX guests, memory is marked private by default. If encryption
+ * is not requested (using encrypted), explicitly set decrypt
+ * attribute in all IOREMAPPED memory.
+ */
+ prot = PAGE_KERNEL_IO;
+ if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
+ prot = pgprot_encrypted(prot);
+ else
+ prot = pgprot_decrypted(prot);
+
+ switch (pcm) {
+ case _PAGE_CACHE_MODE_UC:
default:
- prot = PAGE_KERNEL_IO_NOCACHE;
+ prot = __pgprot(pgprot_val(prot) |
+ cachemode2protval(_PAGE_CACHE_MODE_UC));
+ break;
+ case _PAGE_CACHE_MODE_UC_MINUS:
+ prot = __pgprot(pgprot_val(prot) |
+ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
break;
- case _PAGE_CACHE_UC_MINUS:
- prot = PAGE_KERNEL_IO_UC_MINUS;
+ case _PAGE_CACHE_MODE_WC:
+ prot = __pgprot(pgprot_val(prot) |
+ cachemode2protval(_PAGE_CACHE_MODE_WC));
break;
- case _PAGE_CACHE_WC:
- prot = PAGE_KERNEL_IO_WC;
+ case _PAGE_CACHE_MODE_WT:
+ prot = __pgprot(pgprot_val(prot) |
+ cachemode2protval(_PAGE_CACHE_MODE_WT));
break;
- case _PAGE_CACHE_WB:
- prot = PAGE_KERNEL_IO;
+ case _PAGE_CACHE_MODE_WB:
break;
}
@@ -154,7 +295,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
- if (kernel_map_sync_memtype(phys_addr, size, prot_val))
+ if (memtype_kernel_map_sync(phys_addr, size, pcm))
goto err_free_area;
if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
@@ -167,23 +308,23 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* Check if the request spans more than any BAR in the iomem resource
* tree.
*/
- WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
- KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+ if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
+ pr_warn("caller %pS mapping multiple BARs\n", caller);
return ret_addr;
err_free_area:
free_vm_area(area);
err_free_memtype:
- free_memtype(phys_addr, phys_addr + size);
+ memtype_free(phys_addr, phys_addr + size);
return NULL;
}
/**
- * ioremap_nocache - map bus memory into CPU space
+ * ioremap - map bus memory into CPU space
* @phys_addr: bus address of the memory
* @size: size of the resource to map
*
- * ioremap_nocache performs a platform specific sequence of operations to
+ * ioremap performs a platform specific sequence of operations to
* make bus memory CPU accessible via the readb/readw/readl/writeb/
* writew/writel functions and the other mmio helpers. The returned
* address is not guaranteed to be usable directly as a virtual
@@ -199,21 +340,55 @@ err_free_memtype:
*
* Must be freed with iounmap.
*/
-void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
+void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
{
/*
* Ideally, this should be:
- * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
+ * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
*
* Till we fix all X drivers to use ioremap_wc(), we will use
- * UC MINUS.
+ * UC MINUS. Drivers that are certain they need or can already
+ * be converted over to strong UC can use ioremap_uc().
*/
- unsigned long val = _PAGE_CACHE_UC_MINUS;
+ enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
+
+ return __ioremap_caller(phys_addr, size, pcm,
+ __builtin_return_address(0), false);
+}
+EXPORT_SYMBOL(ioremap);
+
+/**
+ * ioremap_uc - map bus memory into CPU space as strongly uncachable
+ * @phys_addr: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap_uc performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked with a strong
+ * preference as completely uncachable on the CPU when possible. For non-PAT
+ * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
+ * systems this will set the PAT entry for the pages as strong UC. This call
+ * will honor existing caching rules from things like the PCI bus. Note that
+ * there are other caches and buffers on many busses. In particular driver
+ * authors should read up on PCI writes.
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ *
+ * Must be freed with iounmap.
+ */
+void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
+{
+ enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
- return __ioremap_caller(phys_addr, size, val,
- __builtin_return_address(0));
+ return __ioremap_caller(phys_addr, size, pcm,
+ __builtin_return_address(0), false);
}
-EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL_GPL(ioremap_uc);
/**
* ioremap_wc - map memory into CPU space write combined
@@ -227,26 +402,48 @@ EXPORT_SYMBOL(ioremap_nocache);
*/
void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{
- if (pat_enabled)
- return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
- __builtin_return_address(0));
- else
- return ioremap_nocache(phys_addr, size);
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
+ __builtin_return_address(0), false);
}
EXPORT_SYMBOL(ioremap_wc);
+/**
+ * ioremap_wt - map memory into CPU space write through
+ * @phys_addr: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * This version of ioremap ensures that the memory is marked write through.
+ * Write through stores data into memory while keeping the cache up-to-date.
+ *
+ * Must be freed with iounmap.
+ */
+void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
+{
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
+ __builtin_return_address(0), false);
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
+{
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
+ __builtin_return_address(0), true);
+}
+EXPORT_SYMBOL(ioremap_encrypted);
+
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
{
- return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
- __builtin_return_address(0));
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
+ __builtin_return_address(0), false);
}
EXPORT_SYMBOL(ioremap_cache);
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
- unsigned long prot_val)
+ pgprot_t prot)
{
- return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
- __builtin_return_address(0));
+ return __ioremap_caller(phys_addr, size,
+ pgprot2cachemode(prot),
+ __builtin_return_address(0), false);
}
EXPORT_SYMBOL(ioremap_prot);
@@ -260,23 +457,27 @@ void iounmap(volatile void __iomem *addr)
{
struct vm_struct *p, *o;
- if ((void __force *)addr <= high_memory)
+ if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr)))
return;
/*
- * __ioremap special-cases the PCI/ISA range by not instantiating a
- * vm_area and by simply returning an address into the kernel mapping
- * of ISA space. So handle that here.
+ * The PCI/ISA range special-casing was removed from __ioremap()
+ * so this check, in theory, can be removed. However, there are
+ * cases where iounmap() is called for addresses not obtained via
+ * ioremap() (vga16fb for example). Add a warning so that these
+ * cases can be caught and fixed.
*/
if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
- (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
+ (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
+ WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
return;
+ }
+
+ mmiotrace_iounmap(addr);
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
- mmiotrace_iounmap(addr);
-
/* Use the vm area unlocked, assuming the caller
ensures there isn't another iounmap for the same address
in parallel. Reuse of the virtual address is prevented by
@@ -290,7 +491,9 @@ void iounmap(volatile void __iomem *addr)
return;
}
- free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
+ kmsan_iounmap_page_range((unsigned long)addr,
+ (unsigned long)addr + get_vm_area_size(p));
+ memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
/* Finally remove it */
o = remove_vm_area((void __force *)addr);
@@ -299,54 +502,334 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
+{
+ if ((flags & MEMREMAP_DEC) || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ return (void __force *)ioremap_cache(phys_addr, size);
+
+ return (void __force *)ioremap_encrypted(phys_addr, size);
+}
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
*/
-void *xlate_dev_mem_ptr(unsigned long phys)
+void *xlate_dev_mem_ptr(phys_addr_t phys)
{
- void *addr;
- unsigned long start = phys & PAGE_MASK;
+ unsigned long start = phys & PAGE_MASK;
+ unsigned long offset = phys & ~PAGE_MASK;
+ void *vaddr;
- /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
- if (page_is_ram(start >> PAGE_SHIFT))
- return __va(phys);
+ /* memremap() maps if RAM, otherwise falls back to ioremap() */
+ vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
- addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
- if (addr)
- addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+ /* Only add the offset on success and return NULL if memremap() failed */
+ if (vaddr)
+ vaddr += offset;
- return addr;
+ return vaddr;
}
-void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
+void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
{
- if (page_is_ram(phys >> PAGE_SHIFT))
- return;
+ memunmap((void *)((unsigned long)addr & PAGE_MASK));
+}
- iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
- return;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/*
+ * Examine the physical address to determine if it is an area of memory
+ * that should be mapped decrypted. If the memory is not part of the
+ * kernel usable area it was accessed and created decrypted, so these
+ * areas should be mapped decrypted. And since the encryption key can
+ * change across reboots, persistent memory should also be mapped
+ * decrypted.
+ *
+ * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
+ * only persistent memory should be mapped decrypted.
+ */
+static bool memremap_should_map_decrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+ int is_pmem;
+
+ /*
+ * Check if the address is part of a persistent memory region.
+ * This check covers areas added by E820, EFI and ACPI.
+ */
+ is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
+ IORES_DESC_PERSISTENT_MEMORY);
+ if (is_pmem != REGION_DISJOINT)
+ return true;
+
+ /*
+ * Check if the non-volatile attribute is set for an EFI
+ * reserved area.
+ */
+ if (efi_enabled(EFI_BOOT)) {
+ switch (efi_mem_type(phys_addr)) {
+ case EFI_RESERVED_TYPE:
+ if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Check if the address is outside kernel usable area */
+ switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
+ case E820_TYPE_RESERVED:
+ case E820_TYPE_ACPI:
+ case E820_TYPE_NVS:
+ case E820_TYPE_UNUSABLE:
+ /* For SEV, these areas are encrypted */
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ break;
+ fallthrough;
+
+ case E820_TYPE_PRAM:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
}
-static int __initdata early_ioremap_debug;
+/*
+ * Examine the physical address to determine if it is EFI data. Check
+ * it against the boot params structure and EFI tables and memory types.
+ */
+static bool memremap_is_efi_data(resource_size_t phys_addr)
+{
+ u64 paddr;
+
+ /* Check if the address is part of EFI boot/runtime data */
+ if (!efi_enabled(EFI_BOOT))
+ return false;
+
+ paddr = boot_params.efi_info.efi_memmap_hi;
+ paddr <<= 32;
+ paddr |= boot_params.efi_info.efi_memmap;
+ if (phys_addr == paddr)
+ return true;
+
+ paddr = boot_params.efi_info.efi_systab_hi;
+ paddr <<= 32;
+ paddr |= boot_params.efi_info.efi_systab;
+ if (phys_addr == paddr)
+ return true;
+
+ if (efi_is_table_address(phys_addr))
+ return true;
+
+ switch (efi_mem_type(phys_addr)) {
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_RUNTIME_SERVICES_DATA:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
-static int __init early_ioremap_debug_setup(char *str)
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain.
+ */
+static bool __ref __memremap_is_setup_data(resource_size_t phys_addr, bool early)
{
- early_ioremap_debug = 1;
+ unsigned int setup_data_sz = sizeof(struct setup_data);
+ struct setup_indirect *indirect;
+ struct setup_data *data;
+ u64 paddr, paddr_next;
+
+ paddr = boot_params.hdr.setup_data;
+ while (paddr) {
+ unsigned int len, size;
+
+ if (phys_addr == paddr)
+ return true;
+
+ if (early)
+ data = early_memremap_decrypted(paddr, setup_data_sz);
+ else
+ data = memremap(paddr, setup_data_sz, MEMREMAP_WB | MEMREMAP_DEC);
+ if (!data) {
+ pr_warn("failed to remap setup_data entry\n");
+ return false;
+ }
- return 0;
+ size = setup_data_sz;
+
+ paddr_next = data->next;
+ len = data->len;
+
+ if ((phys_addr > paddr) &&
+ (phys_addr < (paddr + setup_data_sz + len))) {
+ if (early)
+ early_memunmap(data, setup_data_sz);
+ else
+ memunmap(data);
+ return true;
+ }
+
+ if (data->type == SETUP_INDIRECT) {
+ size += len;
+ if (early) {
+ early_memunmap(data, setup_data_sz);
+ data = early_memremap_decrypted(paddr, size);
+ } else {
+ memunmap(data);
+ data = memremap(paddr, size, MEMREMAP_WB | MEMREMAP_DEC);
+ }
+ if (!data) {
+ pr_warn("failed to remap indirect setup_data\n");
+ return false;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ paddr = indirect->addr;
+ len = indirect->len;
+ }
+ }
+
+ if (early)
+ early_memunmap(data, size);
+ else
+ memunmap(data);
+
+ if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
+ return true;
+
+ paddr = paddr_next;
+ }
+
+ return false;
+}
+
+static bool memremap_is_setup_data(resource_size_t phys_addr)
+{
+ return __memremap_is_setup_data(phys_addr, false);
+}
+
+static bool __init early_memremap_is_setup_data(resource_size_t phys_addr)
+{
+ return __memremap_is_setup_data(phys_addr, true);
+}
+
+/*
+ * Architecture function to determine if RAM remap is allowed. By default, a
+ * RAM remap will map the data as encrypted. Determine if a RAM remap should
+ * not be done so that the data will be mapped decrypted.
+ */
+bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
+ unsigned long flags)
+{
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ return true;
+
+ if (flags & MEMREMAP_ENC)
+ return true;
+
+ if (flags & MEMREMAP_DEC)
+ return false;
+
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
+ if (memremap_is_setup_data(phys_addr) ||
+ memremap_is_efi_data(phys_addr))
+ return false;
+ }
+
+ return !memremap_should_map_decrypted(phys_addr, size);
+}
+
+/*
+ * Architecture override of __weak function to adjust the protection attributes
+ * used when remapping memory. By default, early_memremap() will map the data
+ * as encrypted. Determine if an encrypted mapping should not be done and set
+ * the appropriate protection attributes.
+ */
+pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
+ unsigned long size,
+ pgprot_t prot)
+{
+ bool encrypted_prot;
+
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ return prot;
+
+ encrypted_prot = true;
+
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
+ if (early_memremap_is_setup_data(phys_addr) ||
+ memremap_is_efi_data(phys_addr))
+ encrypted_prot = false;
+ }
+
+ if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
+ encrypted_prot = false;
+
+ return encrypted_prot ? pgprot_encrypted(prot)
+ : pgprot_decrypted(prot);
+}
+
+bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
+{
+ return arch_memremap_can_ram_remap(phys_addr, size, 0);
+}
+
+/* Remap memory with encryption */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
}
-early_param("early_ioremap_debug", early_ioremap_debug_setup);
-static __initdata int after_paging_init;
+/*
+ * Remap memory with encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+ unsigned long size)
+{
+ if (!x86_has_pat_wp())
+ return NULL;
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
+}
+
+/* Remap memory without encryption */
+void __init *early_memremap_decrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
+}
+
+/*
+ * Remap memory without encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
+ unsigned long size)
+{
+ if (!x86_has_pat_wp())
+ return NULL;
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
+}
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
/* Don't assume we're using swapper_pg_dir at this point */
- pgd_t *base = __va(read_cr3());
+ pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(addr)];
- pud_t *pud = pud_offset(pgd, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
pmd_t *pmd = pmd_offset(pud, addr);
return pmd;
@@ -362,18 +845,17 @@ bool __init is_early_ioremap_ptep(pte_t *ptep)
return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
}
-static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
-
void __init early_ioremap_init(void)
{
pmd_t *pmd;
- int i;
- if (early_ioremap_debug)
- printk(KERN_INFO "early_ioremap_init()\n");
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
+#else
+ WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
+#endif
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
- slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
+ early_ioremap_setup();
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
memset(bm_pte, 0, sizeof(bm_pte));
@@ -402,13 +884,8 @@ void __init early_ioremap_init(void)
}
}
-void __init early_ioremap_reset(void)
-{
- after_paging_init = 1;
-}
-
-static void __init __early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags)
+void __init __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
@@ -419,205 +896,12 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
}
pte = early_ioremap_pte(addr);
+ /* Sanitize 'prot' against any unsupported bits: */
+ pgprot_val(flags) &= __supported_pte_mask;
+
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else
pte_clear(&init_mm, addr, pte);
- __flush_tlb_one(addr);
-}
-
-static inline void __init early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t prot)
-{
- if (after_paging_init)
- __set_fixmap(idx, phys, prot);
- else
- __early_set_fixmap(idx, phys, prot);
-}
-
-static inline void __init early_clear_fixmap(enum fixed_addresses idx)
-{
- if (after_paging_init)
- clear_fixmap(idx);
- else
- __early_set_fixmap(idx, 0, __pgprot(0));
-}
-
-static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
-static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
-
-void __init fixup_early_ioremap(void)
-{
- int i;
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (prev_map[i]) {
- WARN_ON(1);
- break;
- }
- }
-
- early_ioremap_init();
-}
-
-static int __init check_early_ioremap_leak(void)
-{
- int count = 0;
- int i;
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
- if (prev_map[i])
- count++;
-
- if (!count)
- return 0;
- WARN(1, KERN_WARNING
- "Debug warning: early ioremap leak of %d areas detected.\n",
- count);
- printk(KERN_WARNING
- "please boot with early_ioremap_debug and report the dmesg.\n");
-
- return 1;
-}
-late_initcall(check_early_ioremap_leak);
-
-static void __init __iomem *
-__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
-{
- unsigned long offset;
- resource_size_t last_addr;
- unsigned int nrpages;
- enum fixed_addresses idx0, idx;
- int i, slot;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- slot = -1;
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (!prev_map[i]) {
- slot = i;
- break;
- }
- }
-
- if (slot < 0) {
- printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n",
- __func__, (u64)phys_addr, size);
- WARN_ON(1);
- return NULL;
- }
-
- if (early_ioremap_debug) {
- printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ",
- __func__, (u64)phys_addr, size, slot);
- dump_stack();
- }
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr) {
- WARN_ON(1);
- return NULL;
- }
-
- prev_size[slot] = size;
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
- /*
- * Mappings have to fit in the FIX_BTMAP area.
- */
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > NR_FIX_BTMAPS) {
- WARN_ON(1);
- return NULL;
- }
-
- /*
- * Ok, go for it..
- */
- idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
- idx = idx0;
- while (nrpages > 0) {
- early_set_fixmap(idx, phys_addr, prot);
- phys_addr += PAGE_SIZE;
- --idx;
- --nrpages;
- }
- if (early_ioremap_debug)
- printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
-
- prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
- return prev_map[slot];
-}
-
-/* Remap an IO device */
-void __init __iomem *
-early_ioremap(resource_size_t phys_addr, unsigned long size)
-{
- return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
-}
-
-/* Remap memory */
-void __init __iomem *
-early_memremap(resource_size_t phys_addr, unsigned long size)
-{
- return __early_ioremap(phys_addr, size, PAGE_KERNEL);
-}
-
-void __init early_iounmap(void __iomem *addr, unsigned long size)
-{
- unsigned long virt_addr;
- unsigned long offset;
- unsigned int nrpages;
- enum fixed_addresses idx;
- int i, slot;
-
- slot = -1;
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (prev_map[i] == addr) {
- slot = i;
- break;
- }
- }
-
- if (slot < 0) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n",
- addr, size);
- WARN_ON(1);
- return;
- }
-
- if (prev_size[slot] != size) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
- addr, size, slot, prev_size[slot]);
- WARN_ON(1);
- return;
- }
-
- if (early_ioremap_debug) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
- size, slot);
- dump_stack();
- }
-
- virt_addr = (unsigned long)addr;
- if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
- WARN_ON(1);
- return;
- }
- offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
-
- idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
- while (nrpages > 0) {
- early_clear_fixmap(idx);
- --idx;
- --nrpages;
- }
- prev_map[slot] = NULL;
+ flush_tlb_one_kernel(addr);
}