summaryrefslogtreecommitdiff
path: root/kernel/kexec_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/kexec_core.c')
-rw-r--r--kernel/kexec_core.c751
1 files changed, 454 insertions, 297 deletions
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 969e8f52f7da..0f92acdd354d 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/btf.h>
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/file.h>
@@ -14,6 +15,7 @@
#include <linux/kexec.h>
#include <linux/mutex.h>
#include <linux/list.h>
+#include <linux/liveupdate.h>
#include <linux/highmem.h>
#include <linux/syscalls.h>
#include <linux/reboot.h>
@@ -39,6 +41,8 @@
#include <linux/hugetlb.h>
#include <linux/objtool.h>
#include <linux/kmsg_dump.h>
+#include <linux/dma-map-ops.h>
+#include <linux/sysfs.h>
#include <asm/page.h>
#include <asm/sections.h>
@@ -48,52 +52,10 @@
atomic_t __kexec_lock = ATOMIC_INIT(0);
-/* Per cpu memory for storing cpu states in case of system crash. */
-note_buf_t __percpu *crash_notes;
-
/* Flag to indicate we are going to kexec a new kernel */
bool kexec_in_progress = false;
-
-/* Location of the reserved area for the crash kernel */
-struct resource crashk_res = {
- .name = "Crash kernel",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
- .desc = IORES_DESC_CRASH_KERNEL
-};
-struct resource crashk_low_res = {
- .name = "Crash kernel",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
- .desc = IORES_DESC_CRASH_KERNEL
-};
-
-int kexec_should_crash(struct task_struct *p)
-{
- /*
- * If crash_kexec_post_notifiers is enabled, don't run
- * crash_kexec() here yet, which must be run after panic
- * notifiers in panic().
- */
- if (crash_kexec_post_notifiers)
- return 0;
- /*
- * There are 4 panic() calls in make_task_dead() path, each of which
- * corresponds to each of these 4 conditions.
- */
- if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
- return 1;
- return 0;
-}
-
-int kexec_crash_loaded(void)
-{
- return !!kexec_crash_image;
-}
-EXPORT_SYMBOL_GPL(kexec_crash_loaded);
+bool kexec_file_dbg_print;
/*
* When kexec transitions to the new kernel there is a one-to-one
@@ -226,6 +188,7 @@ int sanity_check_segment_list(struct kimage *image)
if (total_pages > nr_pages / 2)
return -EINVAL;
+#ifdef CONFIG_CRASH_DUMP
/*
* Verify we have good destination addresses. Normally
* the caller is responsible for making certain we don't
@@ -248,6 +211,17 @@ int sanity_check_segment_list(struct kimage *image)
return -EADDRNOTAVAIL;
}
}
+#endif
+
+ /*
+ * The destination addresses are searched from system RAM rather than
+ * being allocated from the buddy allocator, so they are not guaranteed
+ * to be accepted by the current kernel. Accept the destination
+ * addresses before kexec swaps their content with the segments' source
+ * pages to avoid accessing memory before it is accepted.
+ */
+ for (i = 0; i < nr_segments; i++)
+ accept_memory(image->segment[i].mem, image->segment[i].memsz);
return 0;
}
@@ -261,7 +235,6 @@ struct kimage *do_kimage_alloc_init(void)
if (!image)
return NULL;
- image->head = 0;
image->entry = &image->head;
image->last_entry = &image->head;
image->control_page = ~0; /* By default this does not apply */
@@ -276,6 +249,12 @@ struct kimage *do_kimage_alloc_init(void)
/* Initialize the list of unusable pages */
INIT_LIST_HEAD(&image->unusable_pages);
+#ifdef CONFIG_CRASH_HOTPLUG
+ image->hp_action = KEXEC_CRASH_HP_NONE;
+ image->elfcorehdr_index = -1;
+ image->elfcorehdr_updated = false;
+#endif
+
return image;
}
@@ -289,8 +268,8 @@ int kimage_is_destination_range(struct kimage *image,
unsigned long mstart, mend;
mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz;
- if ((end > mstart) && (start < mend))
+ mend = mstart + image->segment[i].memsz - 1;
+ if ((end >= mstart) && (start <= mend))
return 1;
}
@@ -383,7 +362,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
pfn = page_to_boot_pfn(pages);
epfn = pfn + count;
addr = pfn << PAGE_SHIFT;
- eaddr = epfn << PAGE_SHIFT;
+ eaddr = (epfn << PAGE_SHIFT) - 1;
if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
kimage_is_destination_range(image, addr, eaddr)) {
list_add(&pages->lru, &extra_pages);
@@ -414,6 +393,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
return pages;
}
+#ifdef CONFIG_CRASH_DUMP
static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
unsigned int order)
{
@@ -443,7 +423,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
pages = NULL;
size = (1 << order) << PAGE_SHIFT;
- hole_start = (image->control_page + (size - 1)) & ~(size - 1);
+ hole_start = ALIGN(image->control_page, size);
hole_end = hole_start + size - 1;
while (hole_end <= crashk_res.end) {
unsigned long i;
@@ -460,7 +440,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
mend = mstart + image->segment[i].memsz - 1;
if ((hole_end >= mstart) && (hole_start <= mend)) {
/* Advance the hole to the end of the segment */
- hole_start = (mend + (size - 1)) & ~(size - 1);
+ hole_start = ALIGN(mend, size);
hole_end = hole_start + size - 1;
break;
}
@@ -468,7 +448,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
/* If I don't overlap any segments I have found my hole! */
if (i == image->nr_segments) {
pages = pfn_to_page(hole_start >> PAGE_SHIFT);
- image->control_page = hole_end;
+ image->control_page = hole_end + 1;
break;
}
}
@@ -479,6 +459,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
return pages;
}
+#endif
struct page *kimage_alloc_control_pages(struct kimage *image,
@@ -490,48 +471,16 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
case KEXEC_TYPE_DEFAULT:
pages = kimage_alloc_normal_control_pages(image, order);
break;
+#ifdef CONFIG_CRASH_DUMP
case KEXEC_TYPE_CRASH:
pages = kimage_alloc_crash_control_pages(image, order);
break;
+#endif
}
return pages;
}
-int kimage_crash_copy_vmcoreinfo(struct kimage *image)
-{
- struct page *vmcoreinfo_page;
- void *safecopy;
-
- if (image->type != KEXEC_TYPE_CRASH)
- return 0;
-
- /*
- * For kdump, allocate one vmcoreinfo safe copy from the
- * crash memory. as we have arch_kexec_protect_crashkres()
- * after kexec syscall, we naturally protect it from write
- * (even read) access under kernel direct mapping. But on
- * the other hand, we still need to operate it when crash
- * happens to generate vmcoreinfo note, hereby we rely on
- * vmap for this purpose.
- */
- vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
- if (!vmcoreinfo_page) {
- pr_warn("Could not allocate vmcoreinfo buffer\n");
- return -ENOMEM;
- }
- safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
- if (!safecopy) {
- pr_warn("Could not vmap vmcoreinfo buffer\n");
- return -ENOMEM;
- }
-
- image->vmcoreinfo_data_copy = safecopy;
- crash_update_vmcoreinfo_safecopy(safecopy);
-
- return 0;
-}
-
static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
{
if (*image->entry != 0)
@@ -606,6 +555,24 @@ static void kimage_free_entry(kimage_entry_t entry)
kimage_free_pages(page);
}
+static void kimage_free_cma(struct kimage *image)
+{
+ unsigned long i;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ struct page *cma = image->segment_cma[i];
+ u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT;
+
+ if (!cma)
+ continue;
+
+ arch_kexec_pre_free_pages(page_address(cma), nr_pages);
+ dma_release_from_contiguous(NULL, cma, nr_pages);
+ image->segment_cma[i] = NULL;
+ }
+
+}
+
void kimage_free(struct kimage *image)
{
kimage_entry_t *ptr, entry;
@@ -614,10 +581,12 @@ void kimage_free(struct kimage *image)
if (!image)
return;
+#ifdef CONFIG_CRASH_DUMP
if (image->vmcoreinfo_data_copy) {
crash_update_vmcoreinfo_safecopy(NULL);
vunmap(image->vmcoreinfo_data_copy);
}
+#endif
kimage_free_extra_pages(image);
for_each_kimage_entry(image, ptr, entry) {
@@ -642,6 +611,9 @@ void kimage_free(struct kimage *image)
/* Free the kexec control pages... */
kimage_free_page_list(&image->control_pages);
+ /* Free CMA allocations */
+ kimage_free_cma(image);
+
/*
* Free up any temporary buffers allocated. This might hit if
* error occurred much later after buffer allocation.
@@ -729,7 +701,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
/* If the page is not a destination page use it */
if (!kimage_is_destination_range(image, addr,
- addr + PAGE_SIZE))
+ addr + PAGE_SIZE - 1))
break;
/*
@@ -767,9 +739,64 @@ static struct page *kimage_alloc_page(struct kimage *image,
return page;
}
-static int kimage_load_normal_segment(struct kimage *image,
- struct kexec_segment *segment)
+static int kimage_load_cma_segment(struct kimage *image, int idx)
{
+ struct kexec_segment *segment = &image->segment[idx];
+ struct page *cma = image->segment_cma[idx];
+ char *ptr = page_address(cma);
+ size_t ubytes, mbytes;
+ int result = 0;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
+
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
+ ubytes = segment->bufsz;
+ mbytes = segment->memsz;
+
+ /* Then copy from source buffer to the CMA one */
+ while (mbytes) {
+ size_t uchunk, mchunk;
+
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
+ uchunk = min(ubytes, mchunk);
+
+ if (uchunk) {
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
+ ubytes -= uchunk;
+ if (image->file_mode)
+ kbuf += uchunk;
+ else
+ buf += uchunk;
+ }
+
+ if (result) {
+ result = -EFAULT;
+ goto out;
+ }
+
+ ptr += mchunk;
+ mbytes -= mchunk;
+
+ cond_resched();
+ }
+
+ /* Clear any remainder */
+ memset(ptr, 0, mbytes);
+
+out:
+ return result;
+}
+
+static int kimage_load_normal_segment(struct kimage *image, int idx)
+{
+ struct kexec_segment *segment = &image->segment[idx];
unsigned long maddr;
size_t ubytes, mbytes;
int result;
@@ -784,6 +811,9 @@ static int kimage_load_normal_segment(struct kimage *image,
mbytes = segment->memsz;
maddr = segment->mem;
+ if (image->segment_cma[idx])
+ return kimage_load_cma_segment(image, idx);
+
result = kimage_set_destination(image, maddr);
if (result < 0)
goto out;
@@ -806,27 +836,27 @@ static int kimage_load_normal_segment(struct kimage *image,
ptr = kmap_local_page(page);
/* Start with a clear page */
clear_page(ptr);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
uchunk = min(ubytes, mchunk);
- /* For file based kexec, source pages are in kernel memory */
- if (image->file_mode)
- memcpy(ptr, kbuf, uchunk);
- else
- result = copy_from_user(ptr, buf, uchunk);
+ if (uchunk) {
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
+ ubytes -= uchunk;
+ if (image->file_mode)
+ kbuf += uchunk;
+ else
+ buf += uchunk;
+ }
kunmap_local(ptr);
if (result) {
result = -EFAULT;
goto out;
}
- ubytes -= uchunk;
maddr += mchunk;
- if (image->file_mode)
- kbuf += mchunk;
- else
- buf += mchunk;
mbytes -= mchunk;
cond_resched();
@@ -835,13 +865,14 @@ out:
return result;
}
-static int kimage_load_crash_segment(struct kimage *image,
- struct kexec_segment *segment)
+#ifdef CONFIG_CRASH_DUMP
+static int kimage_load_crash_segment(struct kimage *image, int idx)
{
/* For crash dumps kernels we simply copy the data from
* user space to it's destination.
* We do things a page at a time for the sake of kmap.
*/
+ struct kexec_segment *segment = &image->segment[idx];
unsigned long maddr;
size_t ubytes, mbytes;
int result;
@@ -868,20 +899,25 @@ static int kimage_load_crash_segment(struct kimage *image,
}
arch_kexec_post_alloc_pages(page_address(page), 1, 0);
ptr = kmap_local_page(page);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
uchunk = min(ubytes, mchunk);
if (mchunk > uchunk) {
/* Zero the trailing part of the page */
memset(ptr + uchunk, 0, mchunk - uchunk);
}
- /* For file based kexec, source pages are in kernel memory */
- if (image->file_mode)
- memcpy(ptr, kbuf, uchunk);
- else
- result = copy_from_user(ptr, buf, uchunk);
+ if (uchunk) {
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
+ ubytes -= uchunk;
+ if (image->file_mode)
+ kbuf += uchunk;
+ else
+ buf += uchunk;
+ }
kexec_flush_icache_page(page);
kunmap_local(ptr);
arch_kexec_pre_free_pages(page_address(page), 1);
@@ -889,12 +925,7 @@ static int kimage_load_crash_segment(struct kimage *image,
result = -EFAULT;
goto out;
}
- ubytes -= uchunk;
maddr += mchunk;
- if (image->file_mode)
- kbuf += mchunk;
- else
- buf += mchunk;
mbytes -= mchunk;
cond_resched();
@@ -902,225 +933,196 @@ static int kimage_load_crash_segment(struct kimage *image,
out:
return result;
}
+#endif
-int kimage_load_segment(struct kimage *image,
- struct kexec_segment *segment)
+int kimage_load_segment(struct kimage *image, int idx)
{
int result = -ENOMEM;
switch (image->type) {
case KEXEC_TYPE_DEFAULT:
- result = kimage_load_normal_segment(image, segment);
+ result = kimage_load_normal_segment(image, idx);
break;
+#ifdef CONFIG_CRASH_DUMP
case KEXEC_TYPE_CRASH:
- result = kimage_load_crash_segment(image, segment);
+ result = kimage_load_crash_segment(image, idx);
break;
+#endif
}
return result;
}
-struct kimage *kexec_image;
-struct kimage *kexec_crash_image;
-int kexec_load_disabled;
-#ifdef CONFIG_SYSCTL
-static struct ctl_table kexec_core_sysctls[] = {
- {
- .procname = "kexec_load_disabled",
- .data = &kexec_load_disabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- /* only handle a transition from default "0" to "1" */
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_ONE,
- },
- { }
-};
-
-static int __init kexec_core_sysctl_init(void)
-{
- register_sysctl_init("kernel", kexec_core_sysctls);
- return 0;
-}
-late_initcall(kexec_core_sysctl_init);
-#endif
-
-/*
- * No panic_cpu check version of crash_kexec(). This function is called
- * only when panic_cpu holds the current CPU number; this is the only CPU
- * which processes crash_kexec routines.
- */
-void __noclone __crash_kexec(struct pt_regs *regs)
-{
- /* Take the kexec_lock here to prevent sys_kexec_load
- * running on one cpu from replacing the crash kernel
- * we are using after a panic on a different cpu.
- *
- * If the crash kernel was not located in a fixed area
- * of memory the xchg(&kexec_crash_image) would be
- * sufficient. But since I reuse the memory...
- */
- if (kexec_trylock()) {
- if (kexec_crash_image) {
- struct pt_regs fixed_regs;
-
- crash_setup_regs(&fixed_regs, regs);
- crash_save_vmcoreinfo();
- machine_crash_shutdown(&fixed_regs);
- machine_kexec(kexec_crash_image);
- }
- kexec_unlock();
- }
-}
-STACK_FRAME_NON_STANDARD(__crash_kexec);
-
-void crash_kexec(struct pt_regs *regs)
+void *kimage_map_segment(struct kimage *image,
+ unsigned long addr, unsigned long size)
{
- int old_cpu, this_cpu;
+ unsigned long src_page_addr, dest_page_addr = 0;
+ unsigned long eaddr = addr + size;
+ kimage_entry_t *ptr, entry;
+ struct page **src_pages;
+ unsigned int npages;
+ void *vaddr = NULL;
+ int i;
/*
- * Only one CPU is allowed to execute the crash_kexec() code as with
- * panic(). Otherwise parallel calls of panic() and crash_kexec()
- * may stop each other. To exclude them, we use panic_cpu here too.
+ * Collect the source pages and map them in a contiguous VA range.
*/
- this_cpu = raw_smp_processor_id();
- old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
- if (old_cpu == PANIC_CPU_INVALID) {
- /* This is the 1st CPU which comes here, so go ahead. */
- __crash_kexec(regs);
+ npages = PFN_UP(eaddr) - PFN_DOWN(addr);
+ src_pages = kmalloc_array(npages, sizeof(*src_pages), GFP_KERNEL);
+ if (!src_pages) {
+ pr_err("Could not allocate ima pages array.\n");
+ return NULL;
+ }
- /*
- * Reset panic_cpu to allow another panic()/crash_kexec()
- * call.
- */
- atomic_set(&panic_cpu, PANIC_CPU_INVALID);
+ i = 0;
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_DESTINATION) {
+ dest_page_addr = entry & PAGE_MASK;
+ } else if (entry & IND_SOURCE) {
+ if (dest_page_addr >= addr && dest_page_addr < eaddr) {
+ src_page_addr = entry & PAGE_MASK;
+ src_pages[i++] =
+ virt_to_page(__va(src_page_addr));
+ if (i == npages)
+ break;
+ dest_page_addr += PAGE_SIZE;
+ }
+ }
}
-}
-ssize_t crash_get_memory_size(void)
-{
- ssize_t size = 0;
+ /* Sanity check. */
+ WARN_ON(i < npages);
- if (!kexec_trylock())
- return -EBUSY;
+ vaddr = vmap(src_pages, npages, VM_MAP, PAGE_KERNEL);
+ kfree(src_pages);
- if (crashk_res.end != crashk_res.start)
- size = resource_size(&crashk_res);
+ if (!vaddr)
+ pr_err("Could not map ima buffer.\n");
- kexec_unlock();
- return size;
+ return vaddr;
}
-int crash_shrink_memory(unsigned long new_size)
+void kimage_unmap_segment(void *segment_buffer)
{
- int ret = 0;
- unsigned long start, end;
- unsigned long old_size;
- struct resource *ram_res;
-
- if (!kexec_trylock())
- return -EBUSY;
+ vunmap(segment_buffer);
+}
- if (kexec_crash_image) {
- ret = -ENOENT;
- goto unlock;
- }
- start = crashk_res.start;
- end = crashk_res.end;
- old_size = (end == 0) ? 0 : end - start + 1;
- if (new_size >= old_size) {
- ret = (new_size == old_size) ? 0 : -EINVAL;
- goto unlock;
- }
+struct kexec_load_limit {
+ /* Mutex protects the limit count. */
+ struct mutex mutex;
+ int limit;
+};
- ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
- if (!ram_res) {
- ret = -ENOMEM;
- goto unlock;
- }
+static struct kexec_load_limit load_limit_reboot = {
+ .mutex = __MUTEX_INITIALIZER(load_limit_reboot.mutex),
+ .limit = -1,
+};
- start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
- end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
+static struct kexec_load_limit load_limit_panic = {
+ .mutex = __MUTEX_INITIALIZER(load_limit_panic.mutex),
+ .limit = -1,
+};
- crash_free_reserved_phys_range(end, crashk_res.end);
+struct kimage *kexec_image;
+struct kimage *kexec_crash_image;
+static int kexec_load_disabled;
- if ((start == end) && (crashk_res.parent != NULL))
- release_resource(&crashk_res);
+#ifdef CONFIG_SYSCTL
+static int kexec_limit_handler(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct kexec_load_limit *limit = table->data;
+ int val;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(val),
+ .mode = table->mode,
+ };
+ int ret;
+
+ if (write) {
+ ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+ if (val < 0)
+ return -EINVAL;
- ram_res->start = end;
- ram_res->end = crashk_res.end;
- ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
- ram_res->name = "System RAM";
+ mutex_lock(&limit->mutex);
+ if (limit->limit != -1 && val >= limit->limit)
+ ret = -EINVAL;
+ else
+ limit->limit = val;
+ mutex_unlock(&limit->mutex);
- crashk_res.end = end - 1;
+ return ret;
+ }
- insert_resource(&iomem_resource, ram_res);
+ mutex_lock(&limit->mutex);
+ val = limit->limit;
+ mutex_unlock(&limit->mutex);
-unlock:
- kexec_unlock();
- return ret;
+ return proc_dointvec(&tmp, write, buffer, lenp, ppos);
}
-void crash_save_cpu(struct pt_regs *regs, int cpu)
-{
- struct elf_prstatus prstatus;
- u32 *buf;
-
- if ((cpu < 0) || (cpu >= nr_cpu_ids))
- return;
+static const struct ctl_table kexec_core_sysctls[] = {
+ {
+ .procname = "kexec_load_disabled",
+ .data = &kexec_load_disabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ /* only handle a transition from default "0" to "1" */
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "kexec_load_limit_panic",
+ .data = &load_limit_panic,
+ .mode = 0644,
+ .proc_handler = kexec_limit_handler,
+ },
+ {
+ .procname = "kexec_load_limit_reboot",
+ .data = &load_limit_reboot,
+ .mode = 0644,
+ .proc_handler = kexec_limit_handler,
+ },
+};
- /* Using ELF notes here is opportunistic.
- * I need a well defined structure format
- * for the data I pass, and I need tags
- * on the data to indicate what information I have
- * squirrelled away. ELF notes happen to provide
- * all of that, so there is no need to invent something new.
- */
- buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
- if (!buf)
- return;
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.common.pr_pid = current->pid;
- elf_core_copy_regs(&prstatus.pr_reg, regs);
- buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
- &prstatus, sizeof(prstatus));
- final_note(buf);
+static int __init kexec_core_sysctl_init(void)
+{
+ register_sysctl_init("kernel", kexec_core_sysctls);
+ return 0;
}
+late_initcall(kexec_core_sysctl_init);
+#endif
-static int __init crash_notes_memory_init(void)
+bool kexec_load_permitted(int kexec_image_type)
{
- /* Allocate memory for saving cpu registers. */
- size_t size, align;
+ struct kexec_load_limit *limit;
/*
- * crash_notes could be allocated across 2 vmalloc pages when percpu
- * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
- * pages are also on 2 continuous physical pages. In this case the
- * 2nd part of crash_notes in 2nd page could be lost since only the
- * starting address and size of crash_notes are exported through sysfs.
- * Here round up the size of crash_notes to the nearest power of two
- * and pass it to __alloc_percpu as align value. This can make sure
- * crash_notes is allocated inside one physical page.
+ * Only the superuser can use the kexec syscall and if it has not
+ * been disabled.
*/
- size = sizeof(note_buf_t);
- align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
-
- /*
- * Break compile if size is bigger than PAGE_SIZE since crash_notes
- * definitely will be in 2 pages with that.
- */
- BUILD_BUG_ON(size > PAGE_SIZE);
-
- crash_notes = __alloc_percpu(size, align);
- if (!crash_notes) {
- pr_warn("Memory allocation for saving cpu register states failed\n");
- return -ENOMEM;
+ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+ return false;
+
+ /* Check limit counter and decrease it.*/
+ limit = (kexec_image_type == KEXEC_TYPE_CRASH) ?
+ &load_limit_panic : &load_limit_reboot;
+ mutex_lock(&limit->mutex);
+ if (!limit->limit) {
+ mutex_unlock(&limit->mutex);
+ return false;
}
- return 0;
-}
-subsys_initcall(crash_notes_memory_init);
+ if (limit->limit != -1)
+ limit->limit--;
+ mutex_unlock(&limit->mutex);
+ return true;
+}
/*
* Move into place and start executing a preloaded standalone
@@ -1137,24 +1139,32 @@ int kernel_kexec(void)
goto Unlock;
}
+ error = liveupdate_reboot();
+ if (error)
+ goto Unlock;
+
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context) {
+ /*
+ * This flow is analogous to hibernation flows that occur
+ * before creating an image and before jumping from the
+ * restore kernel to the image one, so it uses the same
+ * device callbacks as those two flows.
+ */
pm_prepare_console();
error = freeze_processes();
if (error) {
error = -EBUSY;
goto Restore_console;
}
- suspend_console();
+ console_suspend_all();
error = dpm_suspend_start(PMSG_FREEZE);
if (error)
- goto Resume_console;
- /* At this point, dpm_suspend_start() has been called,
- * but *not* dpm_suspend_end(). We *must* call
- * dpm_suspend_end() now. Otherwise, drivers for
- * some devices (e.g. interrupt controllers) become
- * desynchronized with the actual state of the
- * hardware at resume time, and evil weirdness ensues.
+ goto Resume_devices;
+ /*
+ * dpm_suspend_end() must be called after dpm_suspend_start()
+ * to complete the transition, like in the hibernation flows
+ * mentioned above.
*/
error = dpm_suspend_end(PMSG_FREEZE);
if (error)
@@ -1172,6 +1182,7 @@ int kernel_kexec(void)
kexec_in_progress = true;
kernel_restart_prepare("kexec reboot");
migrate_to_reboot_cpu();
+ syscore_shutdown();
/*
* migrate_to_reboot_cpu() disables CPU hotplug assuming that
@@ -1189,6 +1200,13 @@ int kernel_kexec(void)
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context) {
+ /*
+ * This flow is analogous to hibernation flows that occur after
+ * creating an image and after the image kernel has got control
+ * back, and in case the devices have been reset or otherwise
+ * manipulated in the meantime, it uses the device callbacks
+ * used by the latter.
+ */
syscore_resume();
Enable_irqs:
local_irq_enable();
@@ -1197,8 +1215,7 @@ int kernel_kexec(void)
dpm_resume_start(PMSG_RESTORE);
Resume_devices:
dpm_resume_end(PMSG_RESTORE);
- Resume_console:
- resume_console();
+ console_resume_all();
thaw_processes();
Restore_console:
pm_restore_console();
@@ -1209,3 +1226,143 @@ int kernel_kexec(void)
kexec_unlock();
return error;
}
+
+static ssize_t loaded_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", !!kexec_image);
+}
+static struct kobj_attribute loaded_attr = __ATTR_RO(loaded);
+
+#ifdef CONFIG_CRASH_DUMP
+static ssize_t crash_loaded_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", kexec_crash_loaded());
+}
+static struct kobj_attribute crash_loaded_attr = __ATTR_RO(crash_loaded);
+
+#ifdef CONFIG_CRASH_RESERVE
+static ssize_t crash_cma_ranges_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < crashk_cma_cnt; ++i) {
+ len += sysfs_emit_at(buf, len, "%08llx-%08llx\n",
+ crashk_cma_ranges[i].start,
+ crashk_cma_ranges[i].end);
+ }
+ return len;
+}
+static struct kobj_attribute crash_cma_ranges_attr = __ATTR_RO(crash_cma_ranges);
+#endif
+
+static ssize_t crash_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ssize_t size = crash_get_memory_size();
+
+ if (size < 0)
+ return size;
+
+ return sysfs_emit(buf, "%zd\n", size);
+}
+static ssize_t crash_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long cnt;
+ int ret;
+
+ if (kstrtoul(buf, 0, &cnt))
+ return -EINVAL;
+
+ ret = crash_shrink_memory(cnt);
+ return ret < 0 ? ret : count;
+}
+static struct kobj_attribute crash_size_attr = __ATTR_RW(crash_size);
+
+#ifdef CONFIG_CRASH_HOTPLUG
+static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ unsigned int sz = crash_get_elfcorehdr_size();
+
+ return sysfs_emit(buf, "%u\n", sz);
+}
+static struct kobj_attribute crash_elfcorehdr_size_attr = __ATTR_RO(crash_elfcorehdr_size);
+
+#endif /* CONFIG_CRASH_HOTPLUG */
+#endif /* CONFIG_CRASH_DUMP */
+
+static struct attribute *kexec_attrs[] = {
+ &loaded_attr.attr,
+#ifdef CONFIG_CRASH_DUMP
+ &crash_loaded_attr.attr,
+ &crash_size_attr.attr,
+#ifdef CONFIG_CRASH_RESERVE
+ &crash_cma_ranges_attr.attr,
+#endif
+#ifdef CONFIG_CRASH_HOTPLUG
+ &crash_elfcorehdr_size_attr.attr,
+#endif
+#endif
+ NULL
+};
+
+struct kexec_link_entry {
+ const char *target;
+ const char *name;
+};
+
+static struct kexec_link_entry kexec_links[] = {
+ { "loaded", "kexec_loaded" },
+#ifdef CONFIG_CRASH_DUMP
+ { "crash_loaded", "kexec_crash_loaded" },
+ { "crash_size", "kexec_crash_size" },
+#ifdef CONFIG_CRASH_RESERVE
+ {"crash_cma_ranges", "kexec_crash_cma_ranges"},
+#endif
+#ifdef CONFIG_CRASH_HOTPLUG
+ { "crash_elfcorehdr_size", "crash_elfcorehdr_size" },
+#endif
+#endif
+};
+
+static struct kobject *kexec_kobj;
+ATTRIBUTE_GROUPS(kexec);
+
+static int __init init_kexec_sysctl(void)
+{
+ int error;
+ int i;
+
+ kexec_kobj = kobject_create_and_add("kexec", kernel_kobj);
+ if (!kexec_kobj) {
+ pr_err("failed to create kexec kobject\n");
+ return -ENOMEM;
+ }
+
+ error = sysfs_create_groups(kexec_kobj, kexec_groups);
+ if (error)
+ goto kset_exit;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_links); i++) {
+ error = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, kexec_kobj,
+ kexec_links[i].target,
+ kexec_links[i].name);
+ if (error)
+ pr_err("Unable to create %s symlink (%d)", kexec_links[i].name, error);
+ }
+
+ return 0;
+
+kset_exit:
+ kobject_put(kexec_kobj);
+ return error;
+}
+
+subsys_initcall(init_kexec_sysctl);