diff options
Diffstat (limited to 'arch/powerpc/kexec/ranges.c')
| -rw-r--r-- | arch/powerpc/kexec/ranges.c | 729 |
1 files changed, 729 insertions, 0 deletions
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c new file mode 100644 index 000000000000..867135560e5c --- /dev/null +++ b/arch/powerpc/kexec/ranges.c @@ -0,0 +1,729 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * powerpc code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2020 IBM Corporation + * + * Based on kexec-tools' kexec-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Hari Bathini, IBM Corporation. + */ + +#define pr_fmt(fmt) "kexec ranges: " fmt + +#include <linux/sort.h> +#include <linux/kexec.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/crash_core.h> +#include <asm/sections.h> +#include <asm/kexec_ranges.h> +#include <asm/crashdump-ppc64.h> + +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) +/** + * get_max_nr_ranges - Get the max no. of ranges crash_mem structure + * could hold, given the size allocated for it. + * @size: Allocation size of crash_mem structure. + * + * Returns the maximum no. of ranges. + */ +static inline unsigned int get_max_nr_ranges(size_t size) +{ + return ((size - sizeof(struct crash_mem)) / + sizeof(struct range)); +} + +/** + * get_mem_rngs_size - Get the allocated size of mem_rngs based on + * max_nr_ranges and chunk size. + * @mem_rngs: Memory ranges. + * + * Returns the maximum size of @mem_rngs. + */ +static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs) +{ + size_t size; + + if (!mem_rngs) + return 0; + + size = (sizeof(struct crash_mem) + + (mem_rngs->max_nr_ranges * sizeof(struct range))); + + /* + * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ. + * So, align to get the actual length. + */ + return ALIGN(size, MEM_RANGE_CHUNK_SZ); +} + +/** + * __add_mem_range - add a memory range to memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + + if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) { + mem_rngs = realloc_mem_ranges(mem_ranges); + if (!mem_rngs) + return -ENOMEM; + } + + mem_rngs->ranges[mem_rngs->nr_ranges].start = base; + mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1; + pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n", + base, base + size - 1, mem_rngs->nr_ranges); + mem_rngs->nr_ranges++; + return 0; +} + +/** + * __merge_memory_ranges - Merges the given memory ranges list. + * @mem_rngs: Range list to merge. + * + * Assumes a sorted range list. + * + * Returns nothing. + */ +static void __merge_memory_ranges(struct crash_mem *mem_rngs) +{ + struct range *ranges; + int i, idx; + + if (!mem_rngs) + return; + + idx = 0; + ranges = &(mem_rngs->ranges[0]); + for (i = 1; i < mem_rngs->nr_ranges; i++) { + if (ranges[i].start <= (ranges[i-1].end + 1)) + ranges[idx].end = ranges[i].end; + else { + idx++; + if (i == idx) + continue; + + ranges[idx] = ranges[i]; + } + } + mem_rngs->nr_ranges = idx + 1; +} + +/* cmp_func_t callback to sort ranges with sort() */ +static int rngcmp(const void *_x, const void *_y) +{ + const struct range *x = _x, *y = _y; + + if (x->start > y->start) + return 1; + if (x->start < y->start) + return -1; + return 0; +} + +/** + * sort_memory_ranges - Sorts the given memory ranges list. + * @mem_rngs: Range list to sort. + * @merge: If true, merge the list after sorting. + * + * Returns nothing. + */ +void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge) +{ + int i; + + if (!mem_rngs) + return; + + /* Sort the ranges in-place */ + sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges, + sizeof(mem_rngs->ranges[0]), rngcmp, NULL); + + if (merge) + __merge_memory_ranges(mem_rngs); + + /* For debugging purpose */ + pr_debug("Memory ranges:\n"); + for (i = 0; i < mem_rngs->nr_ranges; i++) { + pr_debug("\t[%03d][%#016llx - %#016llx]\n", i, + mem_rngs->ranges[i].start, + mem_rngs->ranges[i].end); + } +} + +/** + * realloc_mem_ranges - reallocate mem_ranges with size incremented + * by MEM_RANGE_CHUNK_SZ. Frees up the old memory, + * if memory allocation fails. + * @mem_ranges: Memory ranges to reallocate. + * + * Returns pointer to reallocated memory on success, NULL otherwise. + */ +struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges) +{ + struct crash_mem *mem_rngs = *mem_ranges; + unsigned int nr_ranges; + size_t size; + + size = get_mem_rngs_size(mem_rngs); + nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0; + + size += MEM_RANGE_CHUNK_SZ; + mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL); + if (!mem_rngs) { + kfree(*mem_ranges); + *mem_ranges = NULL; + return NULL; + } + + mem_rngs->nr_ranges = nr_ranges; + mem_rngs->max_nr_ranges = get_max_nr_ranges(size); + *mem_ranges = mem_rngs; + + return mem_rngs; +} + +/** + * add_mem_range - Updates existing memory range, if there is an overlap. + * Else, adds a new memory range. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + u64 mstart, mend, end; + unsigned int i; + + if (!size) + return 0; + + end = base + size - 1; + + if (!mem_rngs || !(mem_rngs->nr_ranges)) + return __add_mem_range(mem_ranges, base, size); + + for (i = 0; i < mem_rngs->nr_ranges; i++) { + mstart = mem_rngs->ranges[i].start; + mend = mem_rngs->ranges[i].end; + if (base < mend && end > mstart) { + if (base < mstart) + mem_rngs->ranges[i].start = base; + if (end > mend) + mem_rngs->ranges[i].end = end; + return 0; + } + } + + return __add_mem_range(mem_ranges, base, size); +} + +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ + +#ifdef CONFIG_KEXEC_FILE +/** + * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. + * @mem_ranges: Range list to add the memory range(s) to. + * + * Returns 0 on success, negative errno on error. + */ +static int add_tce_mem_ranges(struct crash_mem **mem_ranges) +{ + struct device_node *dn = NULL; + int ret = 0; + + for_each_node_by_type(dn, "pci") { + u64 base; + u32 size; + + ret = of_property_read_u64(dn, "linux,tce-base", &base); + ret |= of_property_read_u32(dn, "linux,tce-size", &size); + if (ret) { + /* + * It is ok to have pci nodes without tce. So, ignore + * property does not exist error. + */ + if (ret == -EINVAL) { + ret = 0; + continue; + } + break; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + of_node_put(dn); + return ret; +} + +/** + * add_initrd_mem_range - Adds initrd range to the given memory ranges list, + * if the initrd was retained. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +static int add_initrd_mem_range(struct crash_mem **mem_ranges) +{ + u64 base, end; + int ret; + + /* This range means something, only if initrd was retained */ + if (!strstr(saved_command_line, "retain_initrd")) + return 0; + + ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base); + ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end); + if (!ret) + ret = add_mem_range(mem_ranges, base, end - base + 1); + + return ret; +} + +/** + * add_htab_mem_range - Adds htab range to the given memory ranges list, + * if it exists + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +static int add_htab_mem_range(struct crash_mem **mem_ranges) +{ + +#ifdef CONFIG_PPC_64S_HASH_MMU + if (!htab_address) + return 0; + + return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); +#else + return 0; +#endif +} + +/** + * add_kernel_mem_range - Adds kernel text region to the given + * memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +static int add_kernel_mem_range(struct crash_mem **mem_ranges) +{ + return add_mem_range(mem_ranges, 0, __pa(_end)); +} +#endif /* CONFIG_KEXEC_FILE */ + +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) +/** + * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +static int add_rtas_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u32 base, size; + int ret = 0; + + dn = of_find_node_by_path("/rtas"); + if (!dn) + return 0; + + ret = of_property_read_u32(dn, "linux,rtas-base", &base); + ret |= of_property_read_u32(dn, "rtas-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} + +/** + * add_opal_mem_range - Adds OPAL region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +static int add_opal_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u64 base, size; + int ret; + + dn = of_find_node_by_path("/ibm,opal"); + if (!dn) + return 0; + + ret = of_property_read_u64(dn, "opal-base-address", &base); + ret |= of_property_read_u64(dn, "opal-runtime-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ + +#ifdef CONFIG_KEXEC_FILE +/** + * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w + * to the given memory ranges list. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int add_reserved_mem_ranges(struct crash_mem **mem_ranges) +{ + int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; + struct device_node *root = of_find_node_by_path("/"); + const __be32 *prop; + + prop = of_get_property(root, "reserved-ranges", &len); + n_mem_addr_cells = of_n_addr_cells(root); + n_mem_size_cells = of_n_size_cells(root); + of_node_put(root); + if (!prop) + return 0; + + cells = n_mem_addr_cells + n_mem_size_cells; + + /* Each reserved range is an (address,size) pair */ + for (i = 0; i < (len / (sizeof(u32) * cells)); i++) { + u64 base, size; + + base = of_read_number(prop + (i * cells), n_mem_addr_cells); + size = of_read_number(prop + (i * cells) + n_mem_addr_cells, + n_mem_size_cells); + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + return ret; +} + +/** + * get_reserved_memory_ranges - Get reserve memory ranges. This list includes + * memory regions that should be added to the + * memory reserve map to ensure the region is + * protected from any mischief. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_reserved_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup reserved memory ranges\n"); + return ret; +} + +/** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_kernel_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); + if (ret) + goto out; + + /* exclude memory ranges should be sorted for easy lookup */ + sort_memory_ranges(*mem_ranges, true); +out: + if (ret) + pr_err("Failed to setup exclude memory ranges\n"); + return ret; +} + +#ifdef CONFIG_CRASH_DUMP +/** + * get_usable_memory_ranges - Get usable memory ranges. This list includes + * regions like crashkernel, opal/rtas & tce-table, + * that kdump kernel could use. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_usable_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret, i; + + /* + * Early boot failure observed on guests when low memory (first memory + * block?) is not added to usable memory. So, add [0, crashk_res.end] + * instead of [crashk_res.start, crashk_res.end] to workaround it. + * Also, crashed kernel's memory must be added to reserve map to + * avoid kdump kernel from using it. + */ + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); + if (ret) + goto out; + + for (i = 0; i < crashk_cma_cnt; i++) { + ret = add_mem_range(mem_ranges, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end - crashk_cma_ranges[i].start + 1); + if (ret) + goto out; + } + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup usable memory ranges\n"); + return ret; +} +#endif /* CONFIG_CRASH_DUMP */ +#endif /* CONFIG_KEXEC_FILE */ + +#ifdef CONFIG_CRASH_DUMP +static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, + unsigned long long mstart, + unsigned long long mend) +{ + struct crash_mem *tmem = *mem_ranges; + + /* Reallocate memory ranges if there is no space to split ranges */ + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + return -ENOMEM; + } + + return crash_exclude_mem_range(tmem, mstart, mend); +} + +/** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + phys_addr_t base, end; + u64 i; + int ret; + + for_each_mem_range(i, &base, &end) { + u64 size = end - base; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + goto out; + } + + /* + * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL + * regions are exported to save their context at the time of + * crash, they should actually be backed up just like the + * first 64K bytes of memory. + */ + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + +/** + * remove_mem_range - Removes the given memory range from the range list. + * @mem_ranges: Range list to remove the memory range to. + * @base: Base address of the range to remove. + * @size: Size of the memory range to remove. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + u64 end; + int ret = 0; + unsigned int i; + u64 mstart, mend; + struct crash_mem *mem_rngs = *mem_ranges; + + if (!size) + return 0; + + /* + * Memory range are stored as start and end address, use + * the same format to do remove operation. + */ + end = base + size - 1; + + for (i = 0; i < mem_rngs->nr_ranges; i++) { + mstart = mem_rngs->ranges[i].start; + mend = mem_rngs->ranges[i].end; + + /* + * Memory range to remove is not part of this range entry + * in the memory range list + */ + if (!(base >= mstart && end <= mend)) + continue; + + /* + * Memory range to remove is equivalent to this entry in the + * memory range list. Remove the range entry from the list. + */ + if (base == mstart && end == mend) { + for (; i < mem_rngs->nr_ranges - 1; i++) { + mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; + mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; + } + mem_rngs->nr_ranges--; + goto out; + } + /* + * Start address of the memory range to remove and the + * current memory range entry in the list is same. Just + * move the start address of the current memory range + * entry in the list to end + 1. + */ + else if (base == mstart) { + mem_rngs->ranges[i].start = end + 1; + goto out; + } + /* + * End address of the memory range to remove and the + * current memory range entry in the list is same. + * Just move the end address of the current memory + * range entry in the list to base - 1. + */ + else if (end == mend) { + mem_rngs->ranges[i].end = base - 1; + goto out; + } + /* + * Memory range to remove is not at the edge of current + * memory range entry. Split the current memory entry into + * two half. + */ + else { + size = mem_rngs->ranges[i].end - end + 1; + mem_rngs->ranges[i].end = base - 1; + ret = add_mem_range(mem_ranges, end + 1, size); + } + } +out: + return ret; +} +#endif /* CONFIG_CRASH_DUMP */ |
