From 4e62d1d86585e1b62b4f96ee586881dd45a443dc Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Wed, 12 Oct 2022 16:36:19 +0800 Subject: LoongArch: Add kdump support This patch adds support for kdump. In kdump case the normal kernel will reserve a region for the crash kernel and jump there on panic. Arch-specific functions are added to allow for implementing a crash dump file interface, /proc/vmcore, which can be viewed as a ELF file. A user-space tool, such as kexec-tools, is responsible for allocating a separate region for the core's ELF header within the crash kdump kernel memory and filling it in when executing kexec_load(). Then, its location will be advertised to the crash dump kernel via a command line argument "elfcorehdr=", and the crash dump kernel will preserve this region for later use with arch_reserve_vmcore() at boot time. At the same time, the crash kdump kernel is also limited within the "crashkernel" area via a command line argument "mem=", so as not to destroy the original kernel dump data. In the crash dump kernel environment, /proc/vmcore is used to access the primary kernel's memory with copy_oldmem_page(). I tested kdump on LoongArch machines (Loongson-3A5000) and it works as expected (suggested crashkernel parameter is "crashkernel=512M@2560M"), you may test it by triggering a crash through /proc/sysrq-trigger: $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1" # echo c > /proc/sysrq-trigger Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/machine_kexec.c | 98 +++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 5 deletions(-) (limited to 'arch/loongarch/kernel/machine_kexec.c') diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index d5037573ed66..2dcb9e003657 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -7,10 +7,15 @@ #include #include #include -#include +#include #include +#include #include +#include #include +#include +#include +#include #include #include @@ -21,6 +26,7 @@ #define KEXEC_CMDLINE_ADDR TO_CACHE(0x108000UL) static unsigned long reboot_code_buffer; +static cpumask_t cpus_in_crash = CPU_MASK_NONE; #ifdef CONFIG_SMP static void (*relocated_kexec_smp_wait)(void *); @@ -78,7 +84,7 @@ int machine_kexec_prepare(struct kimage *kimage) return -EINVAL; } - /* kexec need a safe page to save reboot_code_buffer */ + /* kexec/kdump need a safe page to save reboot_code_buffer */ kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE); reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page); @@ -102,7 +108,8 @@ void kexec_reboot(void) /* * We know we were online, and there will be no incoming IPIs at - * this point. + * this point. Mark online again before rebooting so that the crash + * analysis tool will see us correctly. */ set_cpu_online(smp_processor_id(), true); @@ -147,7 +154,74 @@ static void kexec_shutdown_secondary(void *regs) kexec_reboot(); } -#endif + +static void crash_shutdown_secondary(void *passed_regs) +{ + int cpu = smp_processor_id(); + struct pt_regs *regs = passed_regs; + + /* + * If we are passed registers, use those. Otherwise get the + * regs from the last interrupt, which should be correct, as + * we are in an interrupt. But if the regs are not there, + * pull them from the top of the stack. They are probably + * wrong, but we need something to keep from crashing again. + */ + if (!regs) + regs = get_irq_regs(); + if (!regs) + regs = task_pt_regs(current); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + crash_save_cpu(regs, cpu); + cpumask_set_cpu(cpu, &cpus_in_crash); + + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); +} + +void crash_smp_send_stop(void) +{ + unsigned int ncpus; + unsigned long timeout; + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we should execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* Excluding the panic cpu */ + ncpus = num_online_cpus() - 1; + + smp_call_function(crash_shutdown_secondary, NULL, 0); + smp_wmb(); + + /* + * The crash CPU sends an IPI and wait for other CPUs to + * respond. Delay of at least 10 seconds. + */ + timeout = MSEC_PER_SEC * 10; + pr_emerg("Sending IPI to other cpus...\n"); + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) { + mdelay(1); + cpu_relax(); + } +} +#endif /* defined(CONFIG_SMP) */ void machine_shutdown(void) { @@ -165,6 +239,19 @@ void machine_shutdown(void) void machine_crash_shutdown(struct pt_regs *regs) { + int crashing_cpu; + + local_irq_disable(); + + crashing_cpu = smp_processor_id(); + crash_save_cpu(regs, crashing_cpu); + +#ifdef CONFIG_SMP + crash_smp_send_stop(); +#endif + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + pr_info("Starting crashdump kernel...\n"); } void machine_kexec(struct kimage *image) @@ -178,7 +265,8 @@ void machine_kexec(struct kimage *image) start_addr = (unsigned long)phys_to_virt(image->start); - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK); + first_ind_entry = (image->type == KEXEC_TYPE_DEFAULT) ? + (unsigned long)phys_to_virt(image->head & PAGE_MASK) : 0; /* * The generic kexec code builds a page list with physical -- cgit