summaryrefslogtreecommitdiff
path: root/arch/powerpc/kexec
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kexec')
-rw-r--r--arch/powerpc/kexec/Makefile18
-rw-r--r--arch/powerpc/kexec/core.c173
-rw-r--r--arch/powerpc/kexec/core_32.c5
-rw-r--r--arch/powerpc/kexec/core_64.c159
-rw-r--r--arch/powerpc/kexec/crash.c281
-rw-r--r--arch/powerpc/kexec/elf_64.c85
-rw-r--r--arch/powerpc/kexec/file_load.c215
-rw-r--r--arch/powerpc/kexec/file_load_64.c871
-rw-r--r--arch/powerpc/kexec/ima.c219
-rw-r--r--arch/powerpc/kexec/ranges.c708
-rw-r--r--arch/powerpc/kexec/relocate_32.S21
-rw-r--r--arch/powerpc/kexec/vmcore_info.c32
12 files changed, 2172 insertions, 615 deletions
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 378f6108a414..470eb0453e17 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -3,23 +3,17 @@
# Makefile for the linux kernel.
#
-# Avoid clang warnings around longjmp/setjmp declarations
-CFLAGS_crash.o += -ffreestanding
-
-obj-y += core.o crash.o core_$(BITS).o
+obj-y += core.o core_$(BITS).o ranges.o
obj-$(CONFIG_PPC32) += relocate_32.o
-obj-$(CONFIG_KEXEC_FILE) += file_load.o elf_$(BITS).o
-
-ifdef CONFIG_HAVE_IMA_KEXEC
-ifdef CONFIG_IMA
-obj-y += ima.o
-endif
-endif
-
+obj-$(CONFIG_KEXEC_FILE) += file_load.o file_load_$(BITS).o elf_$(BITS).o
+obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
+obj-$(CONFIG_CRASH_DUMP) += crash.o
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_core_$(BITS).o := n
KCOV_INSTRUMENT_core_$(BITS).o := n
UBSAN_SANITIZE_core_$(BITS).o := n
+KASAN_SANITIZE_core.o := n
+KASAN_SANITIZE_core_$(BITS) := n
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 078fe3d76feb..00e9c267b912 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -18,77 +18,21 @@
#include <asm/kdump.h>
#include <asm/machdep.h>
#include <asm/pgalloc.h>
-#include <asm/prom.h>
#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/firmware.h>
-void machine_kexec_mask_interrupts(void) {
- unsigned int i;
- struct irq_desc *desc;
-
- for_each_irq_desc(i, desc) {
- struct irq_chip *chip;
-
- chip = irq_desc_get_chip(desc);
- if (!chip)
- continue;
-
- if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
- chip->irq_eoi(&desc->irq_data);
-
- if (chip->irq_mask)
- chip->irq_mask(&desc->irq_data);
-
- if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
- chip->irq_disable(&desc->irq_data);
- }
-}
-
+#ifdef CONFIG_CRASH_DUMP
void machine_crash_shutdown(struct pt_regs *regs)
{
default_machine_crash_shutdown(regs);
}
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
- if (ppc_md.machine_kexec_prepare)
- return ppc_md.machine_kexec_prepare(image);
- else
- return default_machine_kexec_prepare(image);
-}
+#endif
void machine_kexec_cleanup(struct kimage *image)
{
}
-void arch_crash_save_vmcoreinfo(void)
-{
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- VMCOREINFO_SYMBOL(node_data);
- VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
- VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
- VMCOREINFO_SYMBOL(vmemmap_list);
- VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
- VMCOREINFO_SYMBOL(mmu_psize_defs);
- VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
- VMCOREINFO_OFFSET(vmemmap_backing, list);
- VMCOREINFO_OFFSET(vmemmap_backing, phys);
- VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
- VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
- VMCOREINFO_OFFSET(mmu_psize_def, shift);
-#endif
- vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
-}
-
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
@@ -113,90 +57,80 @@ void machine_kexec(struct kimage *image)
for(;;);
}
-void __init reserve_crashkernel(void)
-{
- unsigned long long crash_size, crash_base;
- int ret;
-
- /* use common parsing */
- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
- &crash_size, &crash_base);
- if (ret == 0 && crash_size > 0) {
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- }
-
- if (crashk_res.end == crashk_res.start) {
- crashk_res.start = crashk_res.end = 0;
- return;
- }
-
- /* We might have got these values via the command line or the
- * device tree, either way sanitise them now. */
+#ifdef CONFIG_CRASH_RESERVE
- crash_size = resource_size(&crashk_res);
+static unsigned long long __init get_crash_base(unsigned long long crash_base)
+{
#ifndef CONFIG_NONSTATIC_KERNEL
- if (crashk_res.start != KDUMP_KERNELBASE)
+ if (crash_base != KDUMP_KERNELBASE)
printk("Crash kernel location must be 0x%x\n",
KDUMP_KERNELBASE);
- crashk_res.start = KDUMP_KERNELBASE;
+ return KDUMP_KERNELBASE;
#else
- if (!crashk_res.start) {
+ unsigned long long crash_base_align;
+
+ if (!crash_base) {
#ifdef CONFIG_PPC64
/*
- * On 64bit we split the RMO in half but cap it at half of
- * a small SLB (128MB) since the crash kernel needs to place
- * itself and some stacks to be in the first segment.
+ * On the LPAR platform place the crash kernel to mid of
+ * RMA size (max. of 512MB) to ensure the crash kernel
+ * gets enough space to place itself and some stack to be
+ * in the first segment. At the same time normal kernel
+ * also get enough space to allocate memory for essential
+ * system resource in the first segment. Keep the crash
+ * kernel starts at 128MB offset on other platforms.
*/
- crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ crash_base = min_t(u64, ppc64_rma_size / 2, SZ_512M);
+ else
+ crash_base = min_t(u64, ppc64_rma_size / 2, SZ_128M);
#else
- crashk_res.start = KDUMP_KERNELBASE;
+ crash_base = KDUMP_KERNELBASE;
#endif
}
- crash_base = PAGE_ALIGN(crashk_res.start);
- if (crash_base != crashk_res.start) {
- printk("Crash kernel base must be aligned to 0x%lx\n",
- PAGE_SIZE);
- crashk_res.start = crash_base;
- }
+ crash_base_align = PAGE_ALIGN(crash_base);
+ if (crash_base != crash_base_align)
+ pr_warn("Crash kernel base must be aligned to 0x%lx\n", PAGE_SIZE);
+ return crash_base_align;
#endif
- crash_size = PAGE_ALIGN(crash_size);
- crashk_res.end = crashk_res.start + crash_size - 1;
+}
- /* The crash region must not overlap the current kernel */
- if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
- printk(KERN_WARNING
- "Crash kernel can not overlap current kernel\n");
- crashk_res.start = crashk_res.end = 0;
+void __init arch_reserve_crashkernel(void)
+{
+ unsigned long long crash_size, crash_base, crash_end;
+ unsigned long long kernel_start, kernel_size;
+ unsigned long long total_mem_sz;
+ int ret;
+
+ total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size();
+
+ /* use common parsing */
+ ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size,
+ &crash_base, NULL, NULL);
+
+ if (ret)
return;
- }
- /* Crash kernel trumps memory limit */
- if (memory_limit && memory_limit <= crashk_res.end) {
- memory_limit = crashk_res.end + 1;
- printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
- memory_limit);
- }
+ crash_base = get_crash_base(crash_base);
+ crash_end = crash_base + crash_size - 1;
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crashk_res.start >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
+ kernel_start = __pa(_stext);
+ kernel_size = _end - _stext;
- if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
- memblock_reserve(crashk_res.start, crash_size)) {
- pr_err("Failed to reserve memory for crashkernel!\n");
- crashk_res.start = crashk_res.end = 0;
+ /* The crash region must not overlap the current kernel */
+ if ((kernel_start + kernel_size > crash_base) && (kernel_start <= crash_end)) {
+ pr_warn("Crash kernel can not overlap current kernel\n");
return;
}
+
+ reserve_crashkernel_generic(crash_size, crash_base, 0, false);
}
-int overlaps_crashkernel(unsigned long start, unsigned long size)
+int __init overlaps_crashkernel(unsigned long start, unsigned long size)
{
return (start + size) > crashk_res.start && start <= crashk_res.end;
}
@@ -278,3 +212,4 @@ static int __init kexec_setup(void)
return 0;
}
late_initcall(kexec_setup);
+#endif /* CONFIG_CRASH_RESERVE */
diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c
index bf9f1f906d64..deb28eb44f30 100644
--- a/arch/powerpc/kexec/core_32.c
+++ b/arch/powerpc/kexec/core_32.c
@@ -7,6 +7,7 @@
* Copyright (C) 2005 IBM Corporation.
*/
+#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/string.h>
@@ -55,7 +56,7 @@ void default_machine_kexec(struct kimage *image)
reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
printk(KERN_INFO "Bye!\n");
- if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
+ if (!IS_ENABLED(CONFIG_PPC_85xx) && !IS_ENABLED(CONFIG_44x))
relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
/* now call it */
@@ -63,7 +64,7 @@ void default_machine_kexec(struct kimage *image)
(*rnk)(page_list, reboot_code_buffer_phys, image->start);
}
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
{
return 0;
}
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 04a7cba58eff..222aa326dace 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -16,6 +16,8 @@
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/hardirq.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -25,14 +27,14 @@
#include <asm/paca.h>
#include <asm/mmu.h>
#include <asm/sections.h> /* _end */
-#include <asm/prom.h>
+#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
-#include <asm/asm-prototypes.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
+#include <asm/crashdump-ppc64.h>
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
{
int i;
unsigned long begin, end; /* limits of segment */
@@ -64,15 +66,18 @@ int default_machine_kexec_prepare(struct kimage *image)
begin = image->segment[i].mem;
end = begin + image->segment[i].memsz;
- if ((begin < high) && (end > low))
+ if ((begin < high) && (end > low)) {
+ of_node_put(node);
return -ETXTBSY;
+ }
}
}
return 0;
}
-static void copy_segments(unsigned long ind)
+/* Called during kexec sequence with MMU off */
+static notrace void copy_segments(unsigned long ind)
{
unsigned long entry;
unsigned long *ptr;
@@ -105,7 +110,8 @@ static void copy_segments(unsigned long ind)
}
}
-void kexec_copy_flush(struct kimage *image)
+/* Called during kexec sequence with MMU off */
+notrace void kexec_copy_flush(struct kimage *image)
{
long i, nr_segments = image->nr_segments;
struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
@@ -152,6 +158,8 @@ static void kexec_smp_down(void *arg)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0, 1);
+ reset_sprs();
+
kexec_smp_wait();
/* NOTREACHED */
}
@@ -212,7 +220,7 @@ static void wake_offline_cpus(void)
if (!cpu_online(cpu)) {
printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
cpu);
- WARN_ON(cpu_up(cpu));
+ WARN_ON(add_cpu(cpu));
}
}
}
@@ -278,14 +286,13 @@ static void kexec_prepare_cpus(void)
* We could use a smaller stack if we don't care about anything using
* current, but that audit has not been performed.
*/
-static union thread_union kexec_stack __init_task_data =
- { };
+static union thread_union kexec_stack = { };
/*
* For similar reasons to the stack above, the kexecing CPU needs to be on a
* static PACA; we switch to kexec_paca.
*/
-struct paca_struct kexec_paca;
+static struct paca_struct kexec_paca;
/* Our assembly helper, in misc_64.S */
extern void kexec_sequence(void *newstack, unsigned long start,
@@ -311,6 +318,16 @@ void default_machine_kexec(struct kimage *image)
if (!kdump_in_progress())
kexec_prepare_cpus();
+#ifdef CONFIG_PPC_PSERIES
+ /*
+ * This must be done after other CPUs have shut down, otherwise they
+ * could execute the 'scv' instruction, which is not supported with
+ * reloc disabled (see configure_exceptions()).
+ */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+#endif
+
printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
@@ -355,7 +372,7 @@ void default_machine_kexec(struct kimage *image)
* the RMA. On BookE there is no real MMU off mode, so we have to
* keep it enabled as well (but then we have bolted TLB entries).
*/
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
copy_with_mmu_off = false;
#else
copy_with_mmu_off = radix_enabled() ||
@@ -372,10 +389,10 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Values we need to export to the second kernel via the device tree. */
-static unsigned long htab_base;
-static unsigned long htab_size;
+static __be64 htab_base;
+static __be64 htab_size;
static struct property htab_base_prop = {
.name = "linux,htab-base",
@@ -401,7 +418,7 @@ static int __init export_htab_values(void)
if (!node)
return -ENODEV;
- /* remove any stale propertys so ours can be found */
+ /* remove any stale properties so ours can be found */
of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
@@ -414,4 +431,114 @@ static int __init export_htab_values(void)
return 0;
}
late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ * them to fdt.
+ * @fdt: Flattened device tree of the kernel
+ * @node_offset: offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+ int ret = 0;
+ struct property *pp;
+
+ if (!dn)
+ return -EINVAL;
+
+ for_each_property_of_node(dn, pp) {
+ ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+ if (ret < 0) {
+ pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ * device node.
+ * @fdt: Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ *
+ * Note: expecting no subnodes under /cpus/<node> with device_type == "cpu".
+ * If this changes, update this function to include them.
+ */
+int update_cpus_node(void *fdt)
+{
+ int prev_node_offset;
+ const char *device_type;
+ const struct fdt_property *prop;
+ struct device_node *cpus_node, *dn;
+ int cpus_offset, cpus_subnode_offset, ret = 0;
+
+ cpus_offset = fdt_path_offset(fdt, "/cpus");
+ if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+ pr_err("Malformed device tree: error reading /cpus node: %s\n",
+ fdt_strerror(cpus_offset));
+ return cpus_offset;
+ }
+
+ prev_node_offset = cpus_offset;
+ /* Delete sub-nodes of /cpus node with device_type == "cpu" */
+ for (cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset); cpus_subnode_offset >= 0;) {
+ /* Ignore nodes that do not have a device_type property or device_type != "cpu" */
+ prop = fdt_get_property(fdt, cpus_subnode_offset, "device_type", NULL);
+ if (!prop || strcmp(prop->data, "cpu")) {
+ prev_node_offset = cpus_subnode_offset;
+ goto next_node;
+ }
+
+ ret = fdt_del_node(fdt, cpus_subnode_offset);
+ if (ret < 0) {
+ pr_err("Failed to delete a cpus sub-node: %s\n", fdt_strerror(ret));
+ return ret;
+ }
+next_node:
+ if (prev_node_offset == cpus_offset)
+ cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset);
+ else
+ cpus_subnode_offset = fdt_next_subnode(fdt, prev_node_offset);
+ }
+
+ cpus_node = of_find_node_by_path("/cpus");
+ /* Fail here to avoid kexec/kdump kernel boot hung */
+ if (!cpus_node) {
+ pr_err("No /cpus node found\n");
+ return -EINVAL;
+ }
+
+ /* Add all /cpus sub-nodes of device_type == "cpu" to FDT */
+ for_each_child_of_node(cpus_node, dn) {
+ /* Ignore device nodes that do not have a device_type property
+ * or device_type != "cpu".
+ */
+ device_type = of_get_property(dn, "device_type", NULL);
+ if (!device_type || strcmp(device_type, "cpu"))
+ continue;
+
+ cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+ if (cpus_subnode_offset < 0) {
+ pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+ fdt_strerror(cpus_subnode_offset));
+ ret = cpus_subnode_offset;
+ goto out;
+ }
+
+ ret = add_node_props(fdt, cpus_subnode_offset, dn);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ of_node_put(cpus_node);
+ of_node_put(dn);
+ return ret;
+}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index d488311efab1..9ac3266e4965 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -16,14 +16,17 @@
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/types.h>
+#include <linux/libfdt.h>
+#include <linux/memory.h>
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/kexec.h>
-#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/setjmp.h>
#include <asm/debug.h>
+#include <asm/interrupt.h>
+#include <asm/kexec_ranges.h>
/*
* The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -40,6 +43,14 @@
#define REAL_MODE_TIMEOUT 10000
static int time_to_dump;
+
+/*
+ * In case of system reset, secondary CPUs enter crash_kexec_secondary with out
+ * having to send an IPI explicitly. So, indicate if the crash is via
+ * system reset to avoid sending another IPI.
+ */
+static int is_via_system_reset;
+
/*
* crash_wake_offline should be set to 1 by platforms that intend to wake
* up offline cpus prior to jumping to a kdump kernel. Currently powernv
@@ -101,11 +112,11 @@ void crash_ipi_callback(struct pt_regs *regs)
/* NOTREACHED */
}
-static void crash_kexec_prepare_cpus(int cpu)
+static void crash_kexec_prepare_cpus(void)
{
unsigned int msecs;
- unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
- int tries = 0;
+ volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+ volatile int tries = 0;
int (*old_handler)(struct pt_regs *regs);
printk(KERN_EMERG "Sending IPI to other CPUs\n");
@@ -113,7 +124,15 @@ static void crash_kexec_prepare_cpus(int cpu)
if (crash_wake_offline)
ncpus = num_present_cpus() - 1;
- crash_send_ipi(crash_ipi_callback);
+ /*
+ * If we came in via system reset, secondaries enter via crash_kexec_secondary().
+ * So, wait a while for the secondary CPUs to enter for that case.
+ * Else, send IPI to all other CPUs.
+ */
+ if (is_via_system_reset)
+ mdelay(PRIMARY_TIMEOUT);
+ else
+ crash_send_ipi(crash_ipi_callback);
smp_wmb();
again:
@@ -202,7 +221,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
#else /* ! CONFIG_SMP */
-static void crash_kexec_prepare_cpus(int cpu)
+static void crash_kexec_prepare_cpus(void)
{
/*
* move the secondaries to us so that we can copy
@@ -224,7 +243,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
/* wait for all the CPUs to hit real mode but timeout if they don't come in */
#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
-static void __maybe_unused crash_kexec_wait_realmode(int cpu)
+noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu)
{
unsigned int msecs;
int i;
@@ -248,6 +267,32 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu)
static inline void crash_kexec_wait_realmode(int cpu) {}
#endif /* CONFIG_SMP && CONFIG_PPC64 */
+void crash_kexec_prepare(void)
+{
+ /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
+ printk_deferred_enter();
+
+ /*
+ * This function is only called after the system
+ * has panicked or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means stopping other cpus in
+ * an SMP system.
+ * The kernel is broken so disable interrupts.
+ */
+ hard_irq_disable();
+
+ /*
+ * Make a note of crashing cpu. Will be used in machine_kexec
+ * such that another IPI will not be sent.
+ */
+ crashing_cpu = smp_processor_id();
+
+ crash_kexec_prepare_cpus();
+}
+
/*
* Register a function to be called on shutdown. Only use this if you
* can't reset your device in the second kernel.
@@ -308,35 +353,13 @@ EXPORT_SYMBOL(crash_shutdown_unregister);
void default_machine_crash_shutdown(struct pt_regs *regs)
{
- unsigned int i;
+ volatile unsigned int i;
int (*old_handler)(struct pt_regs *regs);
- /*
- * This function is only called after the system
- * has panicked or is otherwise in a critical state.
- * The minimum amount of code to allow a kexec'd kernel
- * to run successfully needs to happen here.
- *
- * In practice this means stopping other cpus in
- * an SMP system.
- * The kernel is broken so disable interrupts.
- */
- hard_irq_disable();
+ if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
+ is_via_system_reset = 1;
- /*
- * Make a note of crashing cpu. Will be used in machine_kexec
- * such that another IPI will not be sent.
- */
- crashing_cpu = smp_processor_id();
-
- /*
- * If we came in via system reset, wait a while for the secondary
- * CPUs to enter.
- */
- if (TRAP(regs) == 0x100)
- mdelay(PRIMARY_TIMEOUT);
-
- crash_kexec_prepare_cpus(crashing_cpu);
+ crash_smp_send_stop();
crash_save_cpu(regs, crashing_cpu);
@@ -372,3 +395,195 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
}
+
+#ifdef CONFIG_CRASH_HOTPLUG
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+
+/*
+ * Advertise preferred elfcorehdr size to userspace via
+ * /sys/kernel/crash_elfcorehdr_size sysfs interface.
+ */
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+ unsigned long phdr_cnt;
+
+ /* A program header for possible CPUs + vmcoreinfo */
+ phdr_cnt = num_possible_cpus() + 1;
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES;
+
+ return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr));
+}
+
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
+ * elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @mn: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn)
+{
+ int ret;
+ struct crash_mem *cmem = NULL;
+ struct kexec_segment *ksegment;
+ void *ptr, *mem, *elfbuf = NULL;
+ unsigned long elfsz, memsz, base_addr, size;
+
+ ksegment = &image->segment[image->elfcorehdr_index];
+ mem = (void *) ksegment->mem;
+ memsz = ksegment->memsz;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret) {
+ pr_err("Failed to get crash mem range\n");
+ return;
+ }
+
+ /*
+ * The hot unplugged memory is part of crash memory ranges,
+ * remove it here.
+ */
+ if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+ base_addr = PFN_PHYS(mn->start_pfn);
+ size = mn->nr_pages * PAGE_SIZE;
+ ret = remove_mem_range(&cmem, base_addr, size);
+ if (ret) {
+ pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n");
+ goto out;
+ }
+ }
+
+ ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
+ if (ret) {
+ pr_err("Failed to prepare elf header\n");
+ goto out;
+ }
+
+ /*
+ * It is unlikely that kernel hit this because elfcorehdr kexec
+ * segment (memsz) is built with addition space to accommodate growing
+ * number of crash memory ranges while loading the kdump kernel. It is
+ * Just to avoid any unforeseen case.
+ */
+ if (elfsz > memsz) {
+ pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz);
+ goto out;
+ }
+
+ ptr = __va(mem);
+ if (ptr) {
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* Replace the old elfcorehdr with newly prepared elfcorehdr */
+ memcpy((void *)ptr, elfbuf, elfsz);
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+ }
+out:
+ kvfree(cmem);
+ kvfree(elfbuf);
+}
+
+/**
+ * get_fdt_index - Loop through the kexec segment array and find
+ * the index of the FDT segment.
+ * @image: a pointer to kexec_crash_image
+ *
+ * Returns the index of FDT segment in the kexec segment array
+ * if found; otherwise -1.
+ */
+static int get_fdt_index(struct kimage *image)
+{
+ void *ptr;
+ unsigned long mem;
+ int i, fdt_index = -1;
+
+ /* Find the FDT segment index in kexec segment array. */
+ for (i = 0; i < image->nr_segments; i++) {
+ mem = image->segment[i].mem;
+ ptr = __va(mem);
+
+ if (ptr && fdt_magic(ptr) == FDT_MAGIC) {
+ fdt_index = i;
+ break;
+ }
+ }
+
+ return fdt_index;
+}
+
+/**
+ * update_crash_fdt - updates the cpus node of the crash FDT.
+ *
+ * @image: a pointer to kexec_crash_image
+ */
+static void update_crash_fdt(struct kimage *image)
+{
+ void *fdt;
+ int fdt_index;
+
+ fdt_index = get_fdt_index(image);
+ if (fdt_index < 0) {
+ pr_err("Unable to locate FDT segment.\n");
+ return;
+ }
+
+ fdt = __va((void *)image->segment[fdt_index].mem);
+
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* update FDT to reflect changes in CPU resources */
+ if (update_cpus_node(fdt))
+ pr_err("Failed to update crash FDT");
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+}
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
+{
+#ifdef CONFIG_KEXEC_FILE
+ if (image->file_mode)
+ return 1;
+#endif
+ return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT;
+}
+
+/**
+ * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the
+ * necessary kexec segments based on the hotplug event.
+ * @image: a pointer to kexec_crash_image
+ * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case.
+ *
+ * Update the kdump image based on the type of hotplug event, represented by image->hp_action.
+ * CPU add: Update the FDT segment to include the newly added CPU.
+ * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs
+ * part of the FDT.
+ * Memory add/remove: No action is taken as this is not yet supported.
+ */
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
+{
+ struct memory_notify *mn;
+
+ switch (image->hp_action) {
+ case KEXEC_CRASH_HP_REMOVE_CPU:
+ return;
+
+ case KEXEC_CRASH_HP_ADD_CPU:
+ update_crash_fdt(image);
+ break;
+
+ case KEXEC_CRASH_HP_REMOVE_MEMORY:
+ case KEXEC_CRASH_HP_ADD_MEMORY:
+ mn = (struct memory_notify *)arg;
+ update_crash_elfcorehdr(image, mn);
+ return;
+ default:
+ pr_warn_once("Unknown hotplug action\n");
+ }
+}
+#endif /* CONFIG_CRASH_HOTPLUG */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 3072fd6dbe94..5d6d616404cf 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -19,9 +19,11 @@
#include <linux/kexec.h>
#include <linux/libfdt.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <asm/kexec_ranges.h>
static void *elf64_load(struct kimage *image, char *kernel_buf,
unsigned long kernel_len, char *initrd,
@@ -29,12 +31,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
unsigned long cmdline_len)
{
int ret;
- unsigned int fdt_size;
unsigned long kernel_load_addr;
unsigned long initrd_load_addr = 0, fdt_load_addr;
void *fdt;
const void *slave_code;
struct elfhdr ehdr;
+ char *modified_cmdline = NULL;
+ struct crash_mem *rmem = NULL;
struct kexec_elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size };
@@ -44,13 +47,21 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
if (ret)
- goto out;
+ return ERR_PTR(ret);
+
+ if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
+ /* min & max buffer values for kdump case */
+ kbuf.buf_min = pbuf.buf_min = crashk_res.start;
+ kbuf.buf_max = pbuf.buf_max =
+ ((crashk_res.end < ppc64_rma_size) ?
+ crashk_res.end : (ppc64_rma_size - 1));
+ }
ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
if (ret)
goto out;
- pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+ kexec_dprintk("Loaded the kernel at 0x%lx\n", kernel_load_addr);
ret = kexec_load_purgatory(image, &pbuf);
if (ret) {
@@ -58,7 +69,26 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
goto out;
}
- pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
+ kexec_dprintk("Loaded purgatory at 0x%lx\n", pbuf.mem);
+
+ /* Load additional segments needed for panic kernel */
+ if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
+ ret = load_crashdump_segments_ppc64(image, &kbuf);
+ if (ret) {
+ pr_err("Failed to load kdump kernel segments\n");
+ goto out;
+ }
+
+ /* Setup cmdline for kdump kernel case */
+ modified_cmdline = setup_kdump_cmdline(image, cmdline,
+ cmdline_len);
+ if (!modified_cmdline) {
+ pr_err("Setting up cmdline for kdump kernel failed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ cmdline = modified_cmdline;
+ }
if (initrd != NULL) {
kbuf.buffer = initrd;
@@ -71,52 +101,61 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
goto out;
initrd_load_addr = kbuf.mem;
- pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+ kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_load_addr);
}
- fdt_size = fdt_totalsize(initial_boot_params) * 2;
- fdt = kmalloc(fdt_size, GFP_KERNEL);
- if (!fdt) {
- pr_err("Not enough memory for the device tree.\n");
- ret = -ENOMEM;
+ ret = get_reserved_memory_ranges(&rmem);
+ if (ret)
goto out;
- }
- ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
- if (ret < 0) {
+
+ fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr,
+ initrd_len, cmdline,
+ kexec_extra_fdt_size_ppc64(image, rmem));
+ if (!fdt) {
pr_err("Error setting up the new device tree.\n");
ret = -EINVAL;
goto out;
}
- ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+ ret = setup_new_fdt_ppc64(image, fdt, rmem);
if (ret)
- goto out;
+ goto out_free_fdt;
- fdt_pack(fdt);
+ if (!IS_ENABLED(CONFIG_CRASH_HOTPLUG) || image->type != KEXEC_TYPE_CRASH)
+ fdt_pack(fdt);
kbuf.buffer = fdt;
- kbuf.bufsz = kbuf.memsz = fdt_size;
+ kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
kbuf.buf_align = PAGE_SIZE;
kbuf.top_down = true;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
- goto out;
+ goto out_free_fdt;
+
+ /* FDT will be freed in arch_kimage_file_post_load_cleanup */
+ image->arch.fdt = fdt;
+
fdt_load_addr = kbuf.mem;
- pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+ kexec_dprintk("Loaded device tree at 0x%lx\n", fdt_load_addr);
slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
- ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
- fdt_load_addr);
+ ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr);
if (ret)
pr_err("Error setting up the purgatory.\n");
+ goto out;
+
+out_free_fdt:
+ kvfree(fdt);
out:
+ kfree(rmem);
+ kfree(modified_cmdline);
kexec_free_elf_info(&elf_info);
- /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
- return ret ? ERR_PTR(ret) : fdt;
+ return ret ? ERR_PTR(ret) : NULL;
}
const struct kexec_file_ops kexec_elf64_ops = {
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 143c91724617..4284f76cbef5 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ppc64 code to implement the kexec_file_load syscall
+ * powerpc code to implement the kexec_file_load syscall
*
* Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
* Copyright (C) 2004 IBM Corp.
@@ -18,23 +18,44 @@
#include <linux/kexec.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
-#include <asm/ima.h>
+#include <asm/setup.h>
-#define SLAVE_CODE_SIZE 256
+#define SLAVE_CODE_SIZE 256 /* First 0x100 bytes */
-const struct kexec_file_ops * const kexec_file_loaders[] = {
- &kexec_elf64_ops,
- NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
+/**
+ * setup_kdump_cmdline - Prepend "elfcorehdr=<addr> " to command line
+ * of kdump kernel for exporting the core.
+ * @image: Kexec image
+ * @cmdline: Command line parameters to update.
+ * @cmdline_len: Length of the cmdline parameters.
+ *
+ * kdump segment must be setup before calling this function.
+ *
+ * Returns new cmdline buffer for kdump kernel on success, NULL otherwise.
+ */
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
{
- /* We don't support crash kernels yet. */
- if (image->type == KEXEC_TYPE_CRASH)
- return -EOPNOTSUPP;
+ int elfcorehdr_strlen;
+ char *cmdline_ptr;
+
+ cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+ if (!cmdline_ptr)
+ return NULL;
+
+ elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+ image->elf_load_addr);
- return kexec_image_probe_default(image, buf, buf_len);
+ if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+ pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+ kfree(cmdline_ptr);
+ return NULL;
+ }
+
+ memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+ // Ensure it's nul terminated
+ cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+ return cmdline_ptr;
}
/**
@@ -86,169 +107,3 @@ int setup_purgatory(struct kimage *image, const void *slave_code,
return 0;
}
-
-/**
- * delete_fdt_mem_rsv - delete memory reservation with given address and size
- *
- * Return: 0 on success, or negative errno on error.
- */
-int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
-{
- int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
-
- for (i = 0; i < num_rsvs; i++) {
- uint64_t rsv_start, rsv_size;
-
- ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
- if (ret) {
- pr_err("Malformed device tree.\n");
- return -EINVAL;
- }
-
- if (rsv_start == start && rsv_size == size) {
- ret = fdt_del_mem_rsv(fdt, i);
- if (ret) {
- pr_err("Error deleting device tree reservation.\n");
- return -EINVAL;
- }
-
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-/*
- * setup_new_fdt - modify /chosen and memory reservation for the next kernel
- * @image: kexec image being loaded.
- * @fdt: Flattened device tree for the next kernel.
- * @initrd_load_addr: Address where the next initrd will be loaded.
- * @initrd_len: Size of the next initrd, or 0 if there will be none.
- * @cmdline: Command line for the next kernel, or NULL if there will
- * be none.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_new_fdt(const struct kimage *image, void *fdt,
- unsigned long initrd_load_addr, unsigned long initrd_len,
- const char *cmdline)
-{
- int ret, chosen_node;
- const void *prop;
-
- /* Remove memory reservation for the current device tree. */
- ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
- fdt_totalsize(initial_boot_params));
- if (ret == 0)
- pr_debug("Removed old device tree reservation.\n");
- else if (ret != -ENOENT)
- return ret;
-
- chosen_node = fdt_path_offset(fdt, "/chosen");
- if (chosen_node == -FDT_ERR_NOTFOUND) {
- chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
- "chosen");
- if (chosen_node < 0) {
- pr_err("Error creating /chosen.\n");
- return -EINVAL;
- }
- } else if (chosen_node < 0) {
- pr_err("Malformed device tree: error reading /chosen.\n");
- return -EINVAL;
- }
-
- /* Did we boot using an initrd? */
- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
- if (prop) {
- uint64_t tmp_start, tmp_end, tmp_size;
-
- tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
-
- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
- if (!prop) {
- pr_err("Malformed device tree.\n");
- return -EINVAL;
- }
- tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
-
- /*
- * kexec reserves exact initrd size, while firmware may
- * reserve a multiple of PAGE_SIZE, so check for both.
- */
- tmp_size = tmp_end - tmp_start;
- ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
- if (ret == -ENOENT)
- ret = delete_fdt_mem_rsv(fdt, tmp_start,
- round_up(tmp_size, PAGE_SIZE));
- if (ret == 0)
- pr_debug("Removed old initrd reservation.\n");
- else if (ret != -ENOENT)
- return ret;
-
- /* If there's no new initrd, delete the old initrd's info. */
- if (initrd_len == 0) {
- ret = fdt_delprop(fdt, chosen_node,
- "linux,initrd-start");
- if (ret) {
- pr_err("Error deleting linux,initrd-start.\n");
- return -EINVAL;
- }
-
- ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
- if (ret) {
- pr_err("Error deleting linux,initrd-end.\n");
- return -EINVAL;
- }
- }
- }
-
- if (initrd_len) {
- ret = fdt_setprop_u64(fdt, chosen_node,
- "linux,initrd-start",
- initrd_load_addr);
- if (ret < 0)
- goto err;
-
- /* initrd-end is the first address after the initrd image. */
- ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
- initrd_load_addr + initrd_len);
- if (ret < 0)
- goto err;
-
- ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
- if (ret) {
- pr_err("Error reserving initrd memory: %s\n",
- fdt_strerror(ret));
- return -EINVAL;
- }
- }
-
- if (cmdline != NULL) {
- ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
- if (ret < 0)
- goto err;
- } else {
- ret = fdt_delprop(fdt, chosen_node, "bootargs");
- if (ret && ret != -FDT_ERR_NOTFOUND) {
- pr_err("Error deleting bootargs.\n");
- return -EINVAL;
- }
- }
-
- ret = setup_ima_buffer(image, fdt, chosen_node);
- if (ret) {
- pr_err("Error setting up the new device tree.\n");
- return ret;
- }
-
- ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
- if (ret)
- goto err;
-
- return 0;
-
-err:
- pr_err("Error setting up the new device tree.\n");
- return -EINVAL;
-}
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
new file mode 100644
index 000000000000..e7ef8b2a2554
--- /dev/null
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -0,0 +1,871 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/setup.h>
+#include <asm/drmem.h>
+#include <asm/firmware.h>
+#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+#include <asm/mmzone.h>
+#include <asm/iommu.h>
+#include <asm/prom.h>
+#include <asm/plpks.h>
+#include <asm/cputhreads.h>
+
+struct umem_info {
+ __be64 *buf; /* data buffer for usable-memory property */
+ u32 size; /* size allocated for the data buffer */
+ u32 max_entries; /* maximum no. of entries */
+ u32 idx; /* index of current entry */
+
+ /* usable memory ranges to look up */
+ unsigned int nr_ranges;
+ const struct range *ranges;
+};
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &kexec_elf64_ops,
+ NULL
+};
+
+int arch_check_excluded_range(struct kimage *image, unsigned long start,
+ unsigned long end)
+{
+ struct crash_mem *emem;
+ int i;
+
+ emem = image->arch.exclude_ranges;
+ for (i = 0; i < emem->nr_ranges; i++)
+ if (start < emem->ranges[i].end && end > emem->ranges[i].start)
+ return 1;
+
+ return 0;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
+ * @um_info: Usable memory buffer and ranges info.
+ * @cnt: No. of entries to accommodate.
+ *
+ * Frees up the old buffer if memory reallocation fails.
+ *
+ * Returns buffer on success, NULL on error.
+ */
+static __be64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
+{
+ u32 new_size;
+ __be64 *tbuf;
+
+ if ((um_info->idx + cnt) <= um_info->max_entries)
+ return um_info->buf;
+
+ new_size = um_info->size + MEM_RANGE_CHUNK_SZ;
+ tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL);
+ if (tbuf) {
+ um_info->buf = tbuf;
+ um_info->size = new_size;
+ um_info->max_entries = (um_info->size / sizeof(u64));
+ }
+
+ return tbuf;
+}
+
+/**
+ * add_usable_mem - Add the usable memory ranges within the given memory range
+ * to the buffer
+ * @um_info: Usable memory buffer and ranges info.
+ * @base: Base address of memory range to look for.
+ * @end: End address of memory range to look for.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end)
+{
+ u64 loc_base, loc_end;
+ bool add;
+ int i;
+
+ for (i = 0; i < um_info->nr_ranges; i++) {
+ add = false;
+ loc_base = um_info->ranges[i].start;
+ loc_end = um_info->ranges[i].end;
+ if (loc_base >= base && loc_end <= end)
+ add = true;
+ else if (base < loc_end && end > loc_base) {
+ if (loc_base < base)
+ loc_base = base;
+ if (loc_end > end)
+ loc_end = end;
+ add = true;
+ }
+
+ if (add) {
+ if (!check_realloc_usable_mem(um_info, 2))
+ return -ENOMEM;
+
+ um_info->buf[um_info->idx++] = cpu_to_be64(loc_base);
+ um_info->buf[um_info->idx++] =
+ cpu_to_be64(loc_end - loc_base + 1);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * kdump_setup_usable_lmb - This is a callback function that gets called by
+ * walk_drmem_lmbs for every LMB to set its
+ * usable memory ranges.
+ * @lmb: LMB info.
+ * @usm: linux,drconf-usable-memory property value.
+ * @data: Pointer to usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm,
+ void *data)
+{
+ struct umem_info *um_info;
+ int tmp_idx, ret;
+ u64 base, end;
+
+ /*
+ * kdump load isn't supported on kernels already booted with
+ * linux,drconf-usable-memory property.
+ */
+ if (*usm) {
+ pr_err("linux,drconf-usable-memory property already exists!");
+ return -EINVAL;
+ }
+
+ um_info = data;
+ tmp_idx = um_info->idx;
+ if (!check_realloc_usable_mem(um_info, 1))
+ return -ENOMEM;
+
+ um_info->idx++;
+ base = lmb->base_addr;
+ end = base + drmem_lmb_size() - 1;
+ ret = add_usable_mem(um_info, base, end);
+ if (!ret) {
+ /*
+ * Update the no. of ranges added. Two entries (base & size)
+ * for every range added.
+ */
+ um_info->buf[tmp_idx] =
+ cpu_to_be64((um_info->idx - tmp_idx - 1) / 2);
+ }
+
+ return ret;
+}
+
+#define NODE_PATH_LEN 256
+/**
+ * add_usable_mem_property - Add usable memory property for the given
+ * memory node.
+ * @fdt: Flattened device tree for the kdump kernel.
+ * @dn: Memory node.
+ * @um_info: Usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem_property(void *fdt, struct device_node *dn,
+ struct umem_info *um_info)
+{
+ int node;
+ char path[NODE_PATH_LEN];
+ int i, ret;
+ u64 base, size;
+
+ of_node_get(dn);
+
+ if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) {
+ pr_err("Buffer (%d) too small for memory node: %pOF\n",
+ NODE_PATH_LEN, dn);
+ return -EOVERFLOW;
+ }
+ kexec_dprintk("Memory node path: %s\n", path);
+
+ /* Now that we know the path, find its offset in kdump kernel's fdt */
+ node = fdt_path_offset(fdt, path);
+ if (node < 0) {
+ pr_err("Malformed device tree: error reading %s\n", path);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ um_info->idx = 0;
+ if (!check_realloc_usable_mem(um_info, 2)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * "reg" property represents sequence of (addr,size) tuples
+ * each representing a memory range.
+ */
+ for (i = 0; ; i++) {
+ ret = of_property_read_reg(dn, i, &base, &size);
+ if (ret)
+ break;
+
+ ret = add_usable_mem(um_info, base, base + size - 1);
+ if (ret)
+ goto out;
+ }
+
+ // No reg or empty reg? Skip this node.
+ if (i == 0)
+ goto out;
+
+ /*
+ * No kdump kernel usable memory found in this memory node.
+ * Write (0,0) tuple in linux,usable-memory property for
+ * this region to be ignored.
+ */
+ if (um_info->idx == 0) {
+ um_info->buf[0] = 0;
+ um_info->buf[1] = 0;
+ um_info->idx = 2;
+ }
+
+ ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf,
+ (um_info->idx * sizeof(u64)));
+
+out:
+ of_node_put(dn);
+ return ret;
+}
+
+
+/**
+ * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory
+ * and linux,drconf-usable-memory DT properties as
+ * appropriate to restrict its memory usage.
+ * @fdt: Flattened device tree for the kdump kernel.
+ * @usable_mem: Usable memory ranges for kdump kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem)
+{
+ struct umem_info um_info;
+ struct device_node *dn;
+ int node, ret = 0;
+
+ if (!usable_mem) {
+ pr_err("Usable memory ranges for kdump kernel not found\n");
+ return -ENOENT;
+ }
+
+ node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+ if (node == -FDT_ERR_NOTFOUND)
+ kexec_dprintk("No dynamic reconfiguration memory found\n");
+ else if (node < 0) {
+ pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n");
+ return -EINVAL;
+ }
+
+ um_info.buf = NULL;
+ um_info.size = 0;
+ um_info.max_entries = 0;
+ um_info.idx = 0;
+ /* Memory ranges to look up */
+ um_info.ranges = &(usable_mem->ranges[0]);
+ um_info.nr_ranges = usable_mem->nr_ranges;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (dn) {
+ ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb);
+ of_node_put(dn);
+
+ if (ret) {
+ pr_err("Could not setup linux,drconf-usable-memory property for kdump\n");
+ goto out;
+ }
+
+ ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory",
+ um_info.buf, (um_info.idx * sizeof(u64)));
+ if (ret) {
+ pr_err("Failed to update fdt with linux,drconf-usable-memory property: %s",
+ fdt_strerror(ret));
+ goto out;
+ }
+ }
+
+ /*
+ * Walk through each memory node and set linux,usable-memory property
+ * for the corresponding node in kdump kernel's fdt.
+ */
+ for_each_node_by_type(dn, "memory") {
+ ret = add_usable_mem_property(fdt, dn, &um_info);
+ if (ret) {
+ pr_err("Failed to set linux,usable-memory property for %s node",
+ dn->full_name);
+ of_node_put(dn);
+ goto out;
+ }
+ }
+
+out:
+ kfree(um_info.buf);
+ return ret;
+}
+
+/**
+ * load_backup_segment - Locate a memory hole to place the backup region.
+ * @image: Kexec image.
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+ void *buf;
+ int ret;
+
+ /*
+ * Setup a source buffer for backup segment.
+ *
+ * A source buffer has no meaning for backup region as data will
+ * be copied from backup source, after crash, in the purgatory.
+ * But as load segment code doesn't recognize such segments,
+ * setup a dummy source buffer to keep it happy for now.
+ */
+ buf = vzalloc(BACKUP_SRC_SIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ kbuf->buffer = buf;
+ kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE;
+ kbuf->top_down = false;
+
+ ret = kexec_add_buffer(kbuf);
+ if (ret) {
+ vfree(buf);
+ return ret;
+ }
+
+ image->arch.backup_buf = buf;
+ image->arch.backup_start = kbuf->mem;
+ return 0;
+}
+
+/**
+ * update_backup_region_phdr - Update backup region's offset for the core to
+ * export the region appropriately.
+ * @image: Kexec image.
+ * @ehdr: ELF core header.
+ *
+ * Assumes an exclusive program header is setup for the backup region
+ * in the ELF headers
+ *
+ * Returns nothing.
+ */
+static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
+{
+ Elf64_Phdr *phdr;
+ unsigned int i;
+
+ phdr = (Elf64_Phdr *)(ehdr + 1);
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ if (phdr->p_paddr == BACKUP_SRC_START) {
+ phdr->p_offset = image->arch.backup_start;
+ kexec_dprintk("Backup region offset updated to 0x%lx\n",
+ image->arch.backup_start);
+ return;
+ }
+ }
+}
+
+static unsigned int kdump_extra_elfcorehdr_size(struct crash_mem *cmem)
+{
+#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned int extra_sz = 0;
+
+ if (CONFIG_CRASH_MAX_MEMORY_RANGES > (unsigned int)PN_XNUM)
+ pr_warn("Number of Phdrs %u exceeds max\n", CONFIG_CRASH_MAX_MEMORY_RANGES);
+ else if (cmem->nr_ranges >= CONFIG_CRASH_MAX_MEMORY_RANGES)
+ pr_warn("Configured crash mem ranges may not be enough\n");
+ else
+ extra_sz = (CONFIG_CRASH_MAX_MEMORY_RANGES - cmem->nr_ranges) * sizeof(Elf64_Phdr);
+
+ return extra_sz;
+#endif
+ return 0;
+}
+
+/**
+ * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
+ * segment needed to load kdump kernel.
+ * @image: Kexec image.
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+ struct crash_mem *cmem = NULL;
+ unsigned long headers_sz;
+ void *headers = NULL;
+ int ret;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret)
+ goto out;
+
+ /* Setup elfcorehdr segment */
+ ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz);
+ if (ret) {
+ pr_err("Failed to prepare elf headers for the core\n");
+ goto out;
+ }
+
+ /* Fix the offset for backup region in the ELF header */
+ update_backup_region_phdr(image, headers);
+
+ kbuf->buffer = headers;
+ kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf->bufsz = headers_sz;
+ kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem);
+ kbuf->top_down = false;
+
+ ret = kexec_add_buffer(kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out;
+ }
+
+ image->elf_load_addr = kbuf->mem;
+ image->elf_headers_sz = headers_sz;
+ image->elf_headers = headers;
+out:
+ kfree(cmem);
+ return ret;
+}
+
+/**
+ * load_crashdump_segments_ppc64 - Initialize the additional segements needed
+ * to load kdump kernel.
+ * @image: Kexec image.
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int load_crashdump_segments_ppc64(struct kimage *image,
+ struct kexec_buf *kbuf)
+{
+ int ret;
+
+ /* Load backup segment - first 64K bytes of the crashing kernel */
+ ret = load_backup_segment(image, kbuf);
+ if (ret) {
+ pr_err("Failed to load backup segment\n");
+ return ret;
+ }
+ kexec_dprintk("Loaded the backup region at 0x%lx\n", kbuf->mem);
+
+ /* Load elfcorehdr segment - to export crashing kernel's vmcore */
+ ret = load_elfcorehdr_segment(image, kbuf);
+ if (ret) {
+ pr_err("Failed to load elfcorehdr segment\n");
+ return ret;
+ }
+ kexec_dprintk("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf->bufsz, kbuf->memsz);
+
+ return 0;
+}
+#endif
+
+/**
+ * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
+ * variables and call setup_purgatory() to initialize
+ * common global variable.
+ * @image: kexec image.
+ * @slave_code: Slave code for the purgatory.
+ * @fdt: Flattened device tree for the next kernel.
+ * @kernel_load_addr: Address where the kernel is loaded.
+ * @fdt_load_addr: Address where the flattened device tree is loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr)
+{
+ struct device_node *dn = NULL;
+ int ret;
+
+ ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr);
+ if (ret)
+ goto out;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ u32 my_run_at_load = 1;
+
+ /*
+ * Tell relocatable kernel to run at load address
+ * via the word meant for that at 0x5c.
+ */
+ ret = kexec_purgatory_get_set_symbol(image, "run_at_load",
+ &my_run_at_load,
+ sizeof(my_run_at_load),
+ false);
+ if (ret)
+ goto out;
+ }
+
+ /* Tell purgatory where to look for backup region */
+ ret = kexec_purgatory_get_set_symbol(image, "backup_start",
+ &image->arch.backup_start,
+ sizeof(image->arch.backup_start),
+ false);
+ if (ret)
+ goto out;
+
+ /* Setup OPAL base & entry values */
+ dn = of_find_node_by_path("/ibm,opal");
+ if (dn) {
+ u64 val;
+
+ ret = of_property_read_u64(dn, "opal-base-address", &val);
+ if (ret)
+ goto out;
+
+ ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val,
+ sizeof(val), false);
+ if (ret)
+ goto out;
+
+ ret = of_property_read_u64(dn, "opal-entry-address", &val);
+ if (ret)
+ goto out;
+ ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val,
+ sizeof(val), false);
+ }
+out:
+ if (ret)
+ pr_err("Failed to setup purgatory symbols");
+ of_node_put(dn);
+ return ret;
+}
+
+/**
+ * cpu_node_size - Compute the size of a CPU node in the FDT.
+ * This should be done only once and the value is stored in
+ * a static variable.
+ * Returns the max size of a CPU node in the FDT.
+ */
+static unsigned int cpu_node_size(void)
+{
+ static unsigned int size;
+ struct device_node *dn;
+ struct property *pp;
+
+ /*
+ * Don't compute it twice, we are assuming that the per CPU node size
+ * doesn't change during the system's life.
+ */
+ if (size)
+ return size;
+
+ dn = of_find_node_by_type(NULL, "cpu");
+ if (WARN_ON_ONCE(!dn)) {
+ // Unlikely to happen
+ return 0;
+ }
+
+ /*
+ * We compute the sub node size for a CPU node, assuming it
+ * will be the same for all.
+ */
+ size += strlen(dn->name) + 5;
+ for_each_property_of_node(dn, pp) {
+ size += strlen(pp->name);
+ size += pp->length;
+ }
+
+ of_node_put(dn);
+ return size;
+}
+
+static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image, unsigned int cpu_nodes)
+{
+ unsigned int extra_size = 0;
+ u64 usm_entries;
+#ifdef CONFIG_CRASH_HOTPLUG
+ unsigned int possible_cpu_nodes;
+#endif
+
+ if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH)
+ return 0;
+
+ /*
+ * For kdump kernel, account for linux,usable-memory and
+ * linux,drconf-usable-memory properties. Get an approximate on the
+ * number of usable memory entries and use for FDT size estimation.
+ */
+ if (drmem_lmb_size()) {
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
+ (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
+ extra_size += (unsigned int)(usm_entries * sizeof(u64));
+ }
+
+#ifdef CONFIG_CRASH_HOTPLUG
+ /*
+ * Make sure enough space is reserved to accommodate possible CPU nodes
+ * in the crash FDT. This allows packing possible CPU nodes which are
+ * not yet present in the system without regenerating the entire FDT.
+ */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ possible_cpu_nodes = num_possible_cpus() / threads_per_core;
+ if (possible_cpu_nodes > cpu_nodes)
+ extra_size += (possible_cpu_nodes - cpu_nodes) * cpu_node_size();
+ }
+#endif
+
+ return extra_size;
+}
+
+/**
+ * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
+ * setup FDT for kexec/kdump kernel.
+ * @image: kexec image being loaded.
+ *
+ * Returns the estimated extra size needed for kexec/kdump kernel FDT.
+ */
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image, struct crash_mem *rmem)
+{
+ struct device_node *dn;
+ unsigned int cpu_nodes = 0, extra_size = 0;
+
+ // Budget some space for the password blob. There's already extra space
+ // for the key name
+ if (plpks_is_available())
+ extra_size += (unsigned int)plpks_get_passwordlen();
+
+ /* Get the number of CPU nodes in the current device tree */
+ for_each_node_by_type(dn, "cpu") {
+ cpu_nodes++;
+ }
+
+ /* Consider extra space for CPU nodes added since the boot time */
+ if (cpu_nodes > boot_cpu_node_count)
+ extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
+
+ /* Consider extra space for reserved memory ranges if any */
+ if (rmem->nr_ranges > 0)
+ extra_size += sizeof(struct fdt_reserve_entry) * rmem->nr_ranges;
+
+ return extra_size + kdump_extra_fdt_size_ppc64(image, cpu_nodes);
+}
+
+static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
+ const char *propname)
+{
+ const void *prop, *fdtprop;
+ int len = 0, fdtlen = 0;
+
+ prop = of_get_property(dn, propname, &len);
+ fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen);
+
+ if (fdtprop && !prop)
+ return fdt_delprop(fdt, node_offset, propname);
+ else if (prop)
+ return fdt_setprop(fdt, node_offset, propname, prop, len);
+ else
+ return -FDT_ERR_NOTFOUND;
+}
+
+static int update_pci_dma_nodes(void *fdt, const char *dmapropname)
+{
+ struct device_node *dn;
+ int pci_offset, root_offset, ret = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ root_offset = fdt_path_offset(fdt, "/");
+ for_each_node_with_property(dn, dmapropname) {
+ pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn));
+ if (pci_offset < 0)
+ continue;
+
+ ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window");
+ if (ret < 0) {
+ of_node_put(dn);
+ break;
+ }
+ ret = copy_property(fdt, pci_offset, dn, dmapropname);
+ if (ret < 0) {
+ of_node_put(dn);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
+ * being loaded.
+ * @image: kexec image being loaded.
+ * @fdt: Flattened device tree for the next kernel.
+ * @rmem: Reserved memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, struct crash_mem *rmem)
+{
+ struct crash_mem *umem = NULL;
+ int i, nr_ranges, ret;
+
+#ifdef CONFIG_CRASH_DUMP
+ /*
+ * Restrict memory usage for kdump kernel by setting up
+ * usable memory ranges and memory reserve map.
+ */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = get_usable_memory_ranges(&umem);
+ if (ret)
+ goto out;
+
+ ret = update_usable_mem_fdt(fdt, umem);
+ if (ret) {
+ pr_err("Error setting up usable-memory property for kdump kernel\n");
+ goto out;
+ }
+
+ /*
+ * Ensure we don't touch crashed kernel's memory except the
+ * first 64K of RAM, which will be backed up.
+ */
+ ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1,
+ crashk_res.start - BACKUP_SRC_SIZE);
+ if (ret) {
+ pr_err("Error reserving crash memory: %s\n",
+ fdt_strerror(ret));
+ goto out;
+ }
+
+ /* Ensure backup region is not used by kdump/capture kernel */
+ ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
+ BACKUP_SRC_SIZE);
+ if (ret) {
+ pr_err("Error reserving memory for backup: %s\n",
+ fdt_strerror(ret));
+ goto out;
+ }
+ }
+#endif
+
+ /* Update cpus nodes information to account hotplug CPUs. */
+ ret = update_cpus_node(fdt);
+ if (ret < 0)
+ goto out;
+
+ ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
+ if (ret < 0)
+ goto out;
+
+ ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
+ if (ret < 0)
+ goto out;
+
+ /* Update memory reserve map */
+ nr_ranges = rmem ? rmem->nr_ranges : 0;
+ for (i = 0; i < nr_ranges; i++) {
+ u64 base, size;
+
+ base = rmem->ranges[i].start;
+ size = rmem->ranges[i].end - base + 1;
+ ret = fdt_add_mem_rsv(fdt, base, size);
+ if (ret) {
+ pr_err("Error updating memory reserve map: %s\n",
+ fdt_strerror(ret));
+ goto out;
+ }
+ }
+
+ // If we have PLPKS active, we need to provide the password to the new kernel
+ if (plpks_is_available())
+ ret = plpks_populate_fdt(fdt);
+
+out:
+ kfree(umem);
+ return ret;
+}
+
+/**
+ * arch_kexec_kernel_image_probe - Does additional handling needed to setup
+ * kexec segments.
+ * @image: kexec image being loaded.
+ * @buf: Buffer pointing to elf data.
+ * @buf_len: Length of the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int ret;
+
+ /* Get exclude memory ranges needed for setting up kexec segments */
+ ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
+ if (ret) {
+ pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
+ return ret;
+ }
+
+ return kexec_image_probe_default(image, buf, buf_len);
+}
+
+/**
+ * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
+ * while loading the image.
+ * @image: kexec image being loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ kfree(image->arch.exclude_ranges);
+ image->arch.exclude_ranges = NULL;
+
+ vfree(image->arch.backup_buf);
+ image->arch.backup_buf = NULL;
+
+ vfree(image->elf_headers);
+ image->elf_headers = NULL;
+ image->elf_headers_sz = 0;
+
+ kvfree(image->arch.fdt);
+ image->arch.fdt = NULL;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/powerpc/kexec/ima.c b/arch/powerpc/kexec/ima.c
deleted file mode 100644
index 720e50e490b6..000000000000
--- a/arch/powerpc/kexec/ima.c
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2016 IBM Corporation
- *
- * Authors:
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
- */
-
-#include <linux/slab.h>
-#include <linux/kexec.h>
-#include <linux/of.h>
-#include <linux/memblock.h>
-#include <linux/libfdt.h>
-
-static int get_addr_size_cells(int *addr_cells, int *size_cells)
-{
- struct device_node *root;
-
- root = of_find_node_by_path("/");
- if (!root)
- return -EINVAL;
-
- *addr_cells = of_n_addr_cells(root);
- *size_cells = of_n_size_cells(root);
-
- of_node_put(root);
-
- return 0;
-}
-
-static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
- size_t *size)
-{
- int ret, addr_cells, size_cells;
-
- ret = get_addr_size_cells(&addr_cells, &size_cells);
- if (ret)
- return ret;
-
- if (len < 4 * (addr_cells + size_cells))
- return -ENOENT;
-
- *addr = of_read_number(prop, addr_cells);
- *size = of_read_number(prop + 4 * addr_cells, size_cells);
-
- return 0;
-}
-
-/**
- * ima_get_kexec_buffer - get IMA buffer from the previous kernel
- * @addr: On successful return, set to point to the buffer contents.
- * @size: On successful return, set to the buffer size.
- *
- * Return: 0 on success, negative errno on error.
- */
-int ima_get_kexec_buffer(void **addr, size_t *size)
-{
- int ret, len;
- unsigned long tmp_addr;
- size_t tmp_size;
- const void *prop;
-
- prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
- if (!prop)
- return -ENOENT;
-
- ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
- if (ret)
- return ret;
-
- *addr = __va(tmp_addr);
- *size = tmp_size;
-
- return 0;
-}
-
-/**
- * ima_free_kexec_buffer - free memory used by the IMA buffer
- */
-int ima_free_kexec_buffer(void)
-{
- int ret;
- unsigned long addr;
- size_t size;
- struct property *prop;
-
- prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
- if (!prop)
- return -ENOENT;
-
- ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
- if (ret)
- return ret;
-
- ret = of_remove_property(of_chosen, prop);
- if (ret)
- return ret;
-
- return memblock_free(addr, size);
-
-}
-
-/**
- * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
- *
- * The IMA measurement buffer is of no use to a subsequent kernel, so we always
- * remove it from the device tree.
- */
-void remove_ima_buffer(void *fdt, int chosen_node)
-{
- int ret, len;
- unsigned long addr;
- size_t size;
- const void *prop;
-
- prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
- if (!prop)
- return;
-
- ret = do_get_kexec_buffer(prop, len, &addr, &size);
- fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
- if (ret)
- return;
-
- ret = delete_fdt_mem_rsv(fdt, addr, size);
- if (!ret)
- pr_debug("Removed old IMA buffer reservation.\n");
-}
-
-#ifdef CONFIG_IMA_KEXEC
-/**
- * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
- *
- * Architectures should use this function to pass on the IMA buffer
- * information to the next kernel.
- *
- * Return: 0 on success, negative errno on error.
- */
-int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
- size_t size)
-{
- image->arch.ima_buffer_addr = load_addr;
- image->arch.ima_buffer_size = size;
-
- return 0;
-}
-
-static int write_number(void *p, u64 value, int cells)
-{
- if (cells == 1) {
- u32 tmp;
-
- if (value > U32_MAX)
- return -EINVAL;
-
- tmp = cpu_to_be32(value);
- memcpy(p, &tmp, sizeof(tmp));
- } else if (cells == 2) {
- u64 tmp;
-
- tmp = cpu_to_be64(value);
- memcpy(p, &tmp, sizeof(tmp));
- } else
- return -EINVAL;
-
- return 0;
-}
-
-/**
- * setup_ima_buffer - add IMA buffer information to the fdt
- * @image: kexec image being loaded.
- * @fdt: Flattened device tree for the next kernel.
- * @chosen_node: Offset to the chosen node.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
-{
- int ret, addr_cells, size_cells, entry_size;
- u8 value[16];
-
- remove_ima_buffer(fdt, chosen_node);
- if (!image->arch.ima_buffer_size)
- return 0;
-
- ret = get_addr_size_cells(&addr_cells, &size_cells);
- if (ret)
- return ret;
-
- entry_size = 4 * (addr_cells + size_cells);
-
- if (entry_size > sizeof(value))
- return -EINVAL;
-
- ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
- if (ret)
- return ret;
-
- ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
- size_cells);
- if (ret)
- return ret;
-
- ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
- entry_size);
- if (ret < 0)
- return -EINVAL;
-
- ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
- image->arch.ima_buffer_size);
- if (ret)
- return -EINVAL;
-
- pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
- image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
-
- return 0;
-}
-#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
new file mode 100644
index 000000000000..3702b0bdab14
--- /dev/null
+++ b/arch/powerpc/kexec/ranges.c
@@ -0,0 +1,708 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "kexec ranges: " fmt
+
+#include <linux/sort.h>
+#include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/crash_core.h>
+#include <asm/sections.h>
+#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * get_max_nr_ranges - Get the max no. of ranges crash_mem structure
+ * could hold, given the size allocated for it.
+ * @size: Allocation size of crash_mem structure.
+ *
+ * Returns the maximum no. of ranges.
+ */
+static inline unsigned int get_max_nr_ranges(size_t size)
+{
+ return ((size - sizeof(struct crash_mem)) /
+ sizeof(struct range));
+}
+
+/**
+ * get_mem_rngs_size - Get the allocated size of mem_rngs based on
+ * max_nr_ranges and chunk size.
+ * @mem_rngs: Memory ranges.
+ *
+ * Returns the maximum size of @mem_rngs.
+ */
+static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)
+{
+ size_t size;
+
+ if (!mem_rngs)
+ return 0;
+
+ size = (sizeof(struct crash_mem) +
+ (mem_rngs->max_nr_ranges * sizeof(struct range)));
+
+ /*
+ * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.
+ * So, align to get the actual length.
+ */
+ return ALIGN(size, MEM_RANGE_CHUNK_SZ);
+}
+
+/**
+ * __add_mem_range - add a memory range to memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ * @base: Base address of the range to add.
+ * @size: Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {
+ mem_rngs = realloc_mem_ranges(mem_ranges);
+ if (!mem_rngs)
+ return -ENOMEM;
+ }
+
+ mem_rngs->ranges[mem_rngs->nr_ranges].start = base;
+ mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;
+ pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",
+ base, base + size - 1, mem_rngs->nr_ranges);
+ mem_rngs->nr_ranges++;
+ return 0;
+}
+
+/**
+ * __merge_memory_ranges - Merges the given memory ranges list.
+ * @mem_rngs: Range list to merge.
+ *
+ * Assumes a sorted range list.
+ *
+ * Returns nothing.
+ */
+static void __merge_memory_ranges(struct crash_mem *mem_rngs)
+{
+ struct range *ranges;
+ int i, idx;
+
+ if (!mem_rngs)
+ return;
+
+ idx = 0;
+ ranges = &(mem_rngs->ranges[0]);
+ for (i = 1; i < mem_rngs->nr_ranges; i++) {
+ if (ranges[i].start <= (ranges[i-1].end + 1))
+ ranges[idx].end = ranges[i].end;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ ranges[idx] = ranges[i];
+ }
+ }
+ mem_rngs->nr_ranges = idx + 1;
+}
+
+/* cmp_func_t callback to sort ranges with sort() */
+static int rngcmp(const void *_x, const void *_y)
+{
+ const struct range *x = _x, *y = _y;
+
+ if (x->start > y->start)
+ return 1;
+ if (x->start < y->start)
+ return -1;
+ return 0;
+}
+
+/**
+ * sort_memory_ranges - Sorts the given memory ranges list.
+ * @mem_rngs: Range list to sort.
+ * @merge: If true, merge the list after sorting.
+ *
+ * Returns nothing.
+ */
+void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)
+{
+ int i;
+
+ if (!mem_rngs)
+ return;
+
+ /* Sort the ranges in-place */
+ sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,
+ sizeof(mem_rngs->ranges[0]), rngcmp, NULL);
+
+ if (merge)
+ __merge_memory_ranges(mem_rngs);
+
+ /* For debugging purpose */
+ pr_debug("Memory ranges:\n");
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,
+ mem_rngs->ranges[i].start,
+ mem_rngs->ranges[i].end);
+ }
+}
+
+/**
+ * realloc_mem_ranges - reallocate mem_ranges with size incremented
+ * by MEM_RANGE_CHUNK_SZ. Frees up the old memory,
+ * if memory allocation fails.
+ * @mem_ranges: Memory ranges to reallocate.
+ *
+ * Returns pointer to reallocated memory on success, NULL otherwise.
+ */
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)
+{
+ struct crash_mem *mem_rngs = *mem_ranges;
+ unsigned int nr_ranges;
+ size_t size;
+
+ size = get_mem_rngs_size(mem_rngs);
+ nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;
+
+ size += MEM_RANGE_CHUNK_SZ;
+ mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);
+ if (!mem_rngs) {
+ kfree(*mem_ranges);
+ *mem_ranges = NULL;
+ return NULL;
+ }
+
+ mem_rngs->nr_ranges = nr_ranges;
+ mem_rngs->max_nr_ranges = get_max_nr_ranges(size);
+ *mem_ranges = mem_rngs;
+
+ return mem_rngs;
+}
+
+/**
+ * add_mem_range - Updates existing memory range, if there is an overlap.
+ * Else, adds a new memory range.
+ * @mem_ranges: Range list to add the memory range to.
+ * @base: Base address of the range to add.
+ * @size: Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ struct crash_mem *mem_rngs = *mem_ranges;
+ u64 mstart, mend, end;
+ unsigned int i;
+
+ if (!size)
+ return 0;
+
+ end = base + size - 1;
+
+ if (!mem_rngs || !(mem_rngs->nr_ranges))
+ return __add_mem_range(mem_ranges, base, size);
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+ if (base < mend && end > mstart) {
+ if (base < mstart)
+ mem_rngs->ranges[i].start = base;
+ if (end > mend)
+ mem_rngs->ranges[i].end = end;
+ return 0;
+ }
+ }
+
+ return __add_mem_range(mem_ranges, base, size);
+}
+
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+
+#ifdef CONFIG_KEXEC_FILE
+/**
+ * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory range(s) to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_tce_mem_ranges(struct crash_mem **mem_ranges)
+{
+ struct device_node *dn = NULL;
+ int ret = 0;
+
+ for_each_node_by_type(dn, "pci") {
+ u64 base;
+ u32 size;
+
+ ret = of_property_read_u64(dn, "linux,tce-base", &base);
+ ret |= of_property_read_u32(dn, "linux,tce-size", &size);
+ if (ret) {
+ /*
+ * It is ok to have pci nodes without tce. So, ignore
+ * property does not exist error.
+ */
+ if (ret == -EINVAL) {
+ ret = 0;
+ continue;
+ }
+ break;
+ }
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ break;
+ }
+
+ of_node_put(dn);
+ return ret;
+}
+
+/**
+ * add_initrd_mem_range - Adds initrd range to the given memory ranges list,
+ * if the initrd was retained.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_initrd_mem_range(struct crash_mem **mem_ranges)
+{
+ u64 base, end;
+ int ret;
+
+ /* This range means something, only if initrd was retained */
+ if (!strstr(saved_command_line, "retain_initrd"))
+ return 0;
+
+ ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
+ ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
+ if (!ret)
+ ret = add_mem_range(mem_ranges, base, end - base + 1);
+
+ return ret;
+}
+
+/**
+ * add_htab_mem_range - Adds htab range to the given memory ranges list,
+ * if it exists
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!htab_address)
+ return 0;
+
+ return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
+#else
+ return 0;
+#endif
+}
+
+/**
+ * add_kernel_mem_range - Adds kernel text region to the given
+ * memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_kernel_mem_range(struct crash_mem **mem_ranges)
+{
+ return add_mem_range(mem_ranges, 0, __pa(_end));
+}
+#endif /* CONFIG_KEXEC_FILE */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_rtas_mem_range(struct crash_mem **mem_ranges)
+{
+ struct device_node *dn;
+ u32 base, size;
+ int ret = 0;
+
+ dn = of_find_node_by_path("/rtas");
+ if (!dn)
+ return 0;
+
+ ret = of_property_read_u32(dn, "linux,rtas-base", &base);
+ ret |= of_property_read_u32(dn, "rtas-size", &size);
+ if (!ret)
+ ret = add_mem_range(mem_ranges, base, size);
+
+ of_node_put(dn);
+ return ret;
+}
+
+/**
+ * add_opal_mem_range - Adds OPAL region to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_opal_mem_range(struct crash_mem **mem_ranges)
+{
+ struct device_node *dn;
+ u64 base, size;
+ int ret;
+
+ dn = of_find_node_by_path("/ibm,opal");
+ if (!dn)
+ return 0;
+
+ ret = of_property_read_u64(dn, "opal-base-address", &base);
+ ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
+ if (!ret)
+ ret = add_mem_range(mem_ranges, base, size);
+
+ of_node_put(dn);
+ return ret;
+}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+
+#ifdef CONFIG_KEXEC_FILE
+/**
+ * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
+ * to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
+{
+ int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
+ struct device_node *root = of_find_node_by_path("/");
+ const __be32 *prop;
+
+ prop = of_get_property(root, "reserved-ranges", &len);
+ n_mem_addr_cells = of_n_addr_cells(root);
+ n_mem_size_cells = of_n_size_cells(root);
+ of_node_put(root);
+ if (!prop)
+ return 0;
+
+ cells = n_mem_addr_cells + n_mem_size_cells;
+
+ /* Each reserved range is an (address,size) pair */
+ for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * cells), n_mem_addr_cells);
+ size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
+ n_mem_size_cells);
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ * memory regions that should be added to the
+ * memory reserve map to ensure the region is
+ * protected from any mischief.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup reserved memory ranges\n");
+ return ret;
+}
+
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ * regions like opal/rtas, tce-table, initrd,
+ * kernel, htab which should be avoided while
+ * setting up kexec load segments.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_initrd_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_htab_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_kernel_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* exclude memory ranges should be sorted for easy lookup */
+ sort_memory_ranges(*mem_ranges, true);
+out:
+ if (ret)
+ pr_err("Failed to setup exclude memory ranges\n");
+ return ret;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ * regions like crashkernel, opal/rtas & tce-table,
+ * that kdump kernel could use.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ /*
+ * Early boot failure observed on guests when low memory (first memory
+ * block?) is not added to usable memory. So, add [0, crashk_res.end]
+ * instead of [crashk_res.start, crashk_res.end] to workaround it.
+ * Also, crashed kernel's memory must be added to reserve map to
+ * avoid kdump kernel from using it.
+ */
+ ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup usable memory ranges\n");
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */
+#endif /* CONFIG_KEXEC_FILE */
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ * first/crashing kernel's memory regions that
+ * would be exported via an elfcore.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+ phys_addr_t base, end;
+ struct crash_mem *tmem;
+ u64 i;
+ int ret;
+
+ for_each_mem_range(i, &base, &end) {
+ u64 size = end - base;
+
+ /* Skip backup memory region, which needs a separate entry */
+ if (base == BACKUP_SRC_START) {
+ if (size > BACKUP_SRC_SIZE) {
+ base = BACKUP_SRC_END + 1;
+ size -= BACKUP_SRC_SIZE;
+ } else
+ continue;
+ }
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ goto out;
+
+ /* Try merging adjacent ranges before reallocation attempt */
+ if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+ sort_memory_ranges(*mem_ranges, true);
+ }
+
+ /* Reallocate memory ranges if there is no space to split ranges */
+ tmem = *mem_ranges;
+ if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+ tmem = realloc_mem_ranges(mem_ranges);
+ if (!tmem)
+ goto out;
+ }
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ goto out;
+
+ /*
+ * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+ * regions are exported to save their context at the time of
+ * crash, they should actually be backed up just like the
+ * first 64K bytes of memory.
+ */
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* create a separate program header for the backup region */
+ ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+ if (ret)
+ goto out;
+
+ sort_memory_ranges(*mem_ranges, false);
+out:
+ if (ret)
+ pr_err("Failed to setup crash memory ranges\n");
+ return ret;
+}
+
+/**
+ * remove_mem_range - Removes the given memory range from the range list.
+ * @mem_ranges: Range list to remove the memory range to.
+ * @base: Base address of the range to remove.
+ * @size: Size of the memory range to remove.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ u64 end;
+ int ret = 0;
+ unsigned int i;
+ u64 mstart, mend;
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!size)
+ return 0;
+
+ /*
+ * Memory range are stored as start and end address, use
+ * the same format to do remove operation.
+ */
+ end = base + size - 1;
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+
+ /*
+ * Memory range to remove is not part of this range entry
+ * in the memory range list
+ */
+ if (!(base >= mstart && end <= mend))
+ continue;
+
+ /*
+ * Memory range to remove is equivalent to this entry in the
+ * memory range list. Remove the range entry from the list.
+ */
+ if (base == mstart && end == mend) {
+ for (; i < mem_rngs->nr_ranges - 1; i++) {
+ mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
+ mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
+ }
+ mem_rngs->nr_ranges--;
+ goto out;
+ }
+ /*
+ * Start address of the memory range to remove and the
+ * current memory range entry in the list is same. Just
+ * move the start address of the current memory range
+ * entry in the list to end + 1.
+ */
+ else if (base == mstart) {
+ mem_rngs->ranges[i].start = end + 1;
+ goto out;
+ }
+ /*
+ * End address of the memory range to remove and the
+ * current memory range entry in the list is same.
+ * Just move the end address of the current memory
+ * range entry in the list to base - 1.
+ */
+ else if (end == mend) {
+ mem_rngs->ranges[i].end = base - 1;
+ goto out;
+ }
+ /*
+ * Memory range to remove is not at the edge of current
+ * memory range entry. Split the current memory entry into
+ * two half.
+ */
+ else {
+ mem_rngs->ranges[i].end = base - 1;
+ size = mem_rngs->ranges[i].end - end;
+ ret = add_mem_range(mem_ranges, end + 1, size);
+ }
+ }
+out:
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
index 61946c19e07c..dd86e338307d 100644
--- a/arch/powerpc/kexec/relocate_32.S
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -8,6 +8,7 @@
* Author: Suzuki Poulose <suzuki@in.ibm.com>
*/
+#include <linux/objtool.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -25,14 +26,14 @@ relocate_new_kernel:
/* r4 = reboot_code_buffer */
/* r5 = start_address */
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
mr r29, r3
mr r30, r4
mr r31, r5
#define ENTRY_MAPPING_KEXEC_SETUP
-#include <kernel/fsl_booke_entry_mapping.S>
+#include <kernel/85xx_entry_mapping.S>
#undef ENTRY_MAPPING_KEXEC_SETUP
mr r3, r29
@@ -93,7 +94,7 @@ wmmucr:
* Invalidate all the TLB entries except the current entry
* where we are running from
*/
- bl 0f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
0: mflr r5 /* Make it accessible */
tlbsx r23,0,r5 /* Find entry we are in */
li r4,0 /* Start at TLB entry 0 */
@@ -158,7 +159,7 @@ write_out:
/* Switch to other address space in MSR */
insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
- bl 1f
+ bcl 20,31,$+4
1: mflr r8
addi r8, r8, (2f-1b) /* Find the target offset */
@@ -202,7 +203,7 @@ next_tlb:
li r9,0
insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
- bl 1f
+ bcl 20,31,$+4
1: mflr r8
and r8, r8, r11 /* Get our offset within page */
addi r8, r8, (2f-1b)
@@ -240,7 +241,7 @@ setup_map_47x:
sync
/* Find the entry we are running from */
- bl 2f
+ bcl 20,31,$+4
2: mflr r23
tlbsx r23, 0, r23
tlbre r24, r23, 0 /* TLB Word 0 */
@@ -296,7 +297,7 @@ clear_utlb_entry:
/* Update the msr to the new TS */
insrwi r5, r7, 1, 26
- bl 1f
+ bcl 20,31,$+4
1: mflr r6
addi r6, r6, (2f-1b)
@@ -347,15 +348,13 @@ write_utlb:
rlwinm r10, r24, 0, 22, 27
cmpwi r10, PPC47x_TLB0_4K
- bne 0f
li r10, 0x1000 /* r10 = 4k */
- bl 1f
+ beq 0f
-0:
/* Defaults to 256M */
lis r10, 0x1000
- bl 1f
+0: bcl 20,31,$+4
1: mflr r4
addi r4, r4, (2f-1b) /* virtual address of 2f */
diff --git a/arch/powerpc/kexec/vmcore_info.c b/arch/powerpc/kexec/vmcore_info.c
new file mode 100644
index 000000000000..2b65d2adca5e
--- /dev/null
+++ b/arch/powerpc/kexec/vmcore_info.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <asm/pgalloc.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+ VMCOREINFO_SYMBOL(vmemmap_list);
+ VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+ VMCOREINFO_SYMBOL(mmu_psize_defs);
+ VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+ VMCOREINFO_OFFSET(vmemmap_backing, list);
+ VMCOREINFO_OFFSET(vmemmap_backing, phys);
+ VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+ VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+ VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
+ VMCOREINFO_SYMBOL(cur_cpu_spec);
+ VMCOREINFO_OFFSET(cpu_spec, cpu_features);
+ VMCOREINFO_OFFSET(cpu_spec, mmu_features);
+ vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled());
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+}