summaryrefslogtreecommitdiff
path: root/kernel/kexec_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/kexec_core.c')
-rw-r--r--kernel/kexec_core.c348
1 files changed, 321 insertions, 27 deletions
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 9112d69d68b0..0f92acdd354d 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -15,6 +15,7 @@
#include <linux/kexec.h>
#include <linux/mutex.h>
#include <linux/list.h>
+#include <linux/liveupdate.h>
#include <linux/highmem.h>
#include <linux/syscalls.h>
#include <linux/reboot.h>
@@ -40,6 +41,8 @@
#include <linux/hugetlb.h>
#include <linux/objtool.h>
#include <linux/kmsg_dump.h>
+#include <linux/dma-map-ops.h>
+#include <linux/sysfs.h>
#include <asm/page.h>
#include <asm/sections.h>
@@ -210,6 +213,16 @@ int sanity_check_segment_list(struct kimage *image)
}
#endif
+ /*
+ * The destination addresses are searched from system RAM rather than
+ * being allocated from the buddy allocator, so they are not guaranteed
+ * to be accepted by the current kernel. Accept the destination
+ * addresses before kexec swaps their content with the segments' source
+ * pages to avoid accessing memory before it is accepted.
+ */
+ for (i = 0; i < nr_segments; i++)
+ accept_memory(image->segment[i].mem, image->segment[i].memsz);
+
return 0;
}
@@ -222,7 +235,6 @@ struct kimage *do_kimage_alloc_init(void)
if (!image)
return NULL;
- image->head = 0;
image->entry = &image->head;
image->last_entry = &image->head;
image->control_page = ~0; /* By default this does not apply */
@@ -543,6 +555,24 @@ static void kimage_free_entry(kimage_entry_t entry)
kimage_free_pages(page);
}
+static void kimage_free_cma(struct kimage *image)
+{
+ unsigned long i;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ struct page *cma = image->segment_cma[i];
+ u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT;
+
+ if (!cma)
+ continue;
+
+ arch_kexec_pre_free_pages(page_address(cma), nr_pages);
+ dma_release_from_contiguous(NULL, cma, nr_pages);
+ image->segment_cma[i] = NULL;
+ }
+
+}
+
void kimage_free(struct kimage *image)
{
kimage_entry_t *ptr, entry;
@@ -581,6 +611,9 @@ void kimage_free(struct kimage *image)
/* Free the kexec control pages... */
kimage_free_page_list(&image->control_pages);
+ /* Free CMA allocations */
+ kimage_free_cma(image);
+
/*
* Free up any temporary buffers allocated. This might hit if
* error occurred much later after buffer allocation.
@@ -706,9 +739,64 @@ static struct page *kimage_alloc_page(struct kimage *image,
return page;
}
-static int kimage_load_normal_segment(struct kimage *image,
- struct kexec_segment *segment)
+static int kimage_load_cma_segment(struct kimage *image, int idx)
+{
+ struct kexec_segment *segment = &image->segment[idx];
+ struct page *cma = image->segment_cma[idx];
+ char *ptr = page_address(cma);
+ size_t ubytes, mbytes;
+ int result = 0;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
+
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
+ ubytes = segment->bufsz;
+ mbytes = segment->memsz;
+
+ /* Then copy from source buffer to the CMA one */
+ while (mbytes) {
+ size_t uchunk, mchunk;
+
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
+ uchunk = min(ubytes, mchunk);
+
+ if (uchunk) {
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
+ ubytes -= uchunk;
+ if (image->file_mode)
+ kbuf += uchunk;
+ else
+ buf += uchunk;
+ }
+
+ if (result) {
+ result = -EFAULT;
+ goto out;
+ }
+
+ ptr += mchunk;
+ mbytes -= mchunk;
+
+ cond_resched();
+ }
+
+ /* Clear any remainder */
+ memset(ptr, 0, mbytes);
+
+out:
+ return result;
+}
+
+static int kimage_load_normal_segment(struct kimage *image, int idx)
{
+ struct kexec_segment *segment = &image->segment[idx];
unsigned long maddr;
size_t ubytes, mbytes;
int result;
@@ -723,6 +811,9 @@ static int kimage_load_normal_segment(struct kimage *image,
mbytes = segment->memsz;
maddr = segment->mem;
+ if (image->segment_cma[idx])
+ return kimage_load_cma_segment(image, idx);
+
result = kimage_set_destination(image, maddr);
if (result < 0)
goto out;
@@ -745,9 +836,7 @@ static int kimage_load_normal_segment(struct kimage *image,
ptr = kmap_local_page(page);
/* Start with a clear page */
clear_page(ptr);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
uchunk = min(ubytes, mchunk);
if (uchunk) {
@@ -777,13 +866,13 @@ out:
}
#ifdef CONFIG_CRASH_DUMP
-static int kimage_load_crash_segment(struct kimage *image,
- struct kexec_segment *segment)
+static int kimage_load_crash_segment(struct kimage *image, int idx)
{
/* For crash dumps kernels we simply copy the data from
* user space to it's destination.
* We do things a page at a time for the sake of kmap.
*/
+ struct kexec_segment *segment = &image->segment[idx];
unsigned long maddr;
size_t ubytes, mbytes;
int result;
@@ -810,9 +899,7 @@ static int kimage_load_crash_segment(struct kimage *image,
}
arch_kexec_post_alloc_pages(page_address(page), 1, 0);
ptr = kmap_local_page(page);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
+ mchunk = min_t(size_t, mbytes, PAGE_SIZE);
uchunk = min(ubytes, mchunk);
if (mchunk > uchunk) {
/* Zero the trailing part of the page */
@@ -848,18 +935,17 @@ out:
}
#endif
-int kimage_load_segment(struct kimage *image,
- struct kexec_segment *segment)
+int kimage_load_segment(struct kimage *image, int idx)
{
int result = -ENOMEM;
switch (image->type) {
case KEXEC_TYPE_DEFAULT:
- result = kimage_load_normal_segment(image, segment);
+ result = kimage_load_normal_segment(image, idx);
break;
#ifdef CONFIG_CRASH_DUMP
case KEXEC_TYPE_CRASH:
- result = kimage_load_crash_segment(image, segment);
+ result = kimage_load_crash_segment(image, idx);
break;
#endif
}
@@ -867,6 +953,60 @@ int kimage_load_segment(struct kimage *image,
return result;
}
+void *kimage_map_segment(struct kimage *image,
+ unsigned long addr, unsigned long size)
+{
+ unsigned long src_page_addr, dest_page_addr = 0;
+ unsigned long eaddr = addr + size;
+ kimage_entry_t *ptr, entry;
+ struct page **src_pages;
+ unsigned int npages;
+ void *vaddr = NULL;
+ int i;
+
+ /*
+ * Collect the source pages and map them in a contiguous VA range.
+ */
+ npages = PFN_UP(eaddr) - PFN_DOWN(addr);
+ src_pages = kmalloc_array(npages, sizeof(*src_pages), GFP_KERNEL);
+ if (!src_pages) {
+ pr_err("Could not allocate ima pages array.\n");
+ return NULL;
+ }
+
+ i = 0;
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_DESTINATION) {
+ dest_page_addr = entry & PAGE_MASK;
+ } else if (entry & IND_SOURCE) {
+ if (dest_page_addr >= addr && dest_page_addr < eaddr) {
+ src_page_addr = entry & PAGE_MASK;
+ src_pages[i++] =
+ virt_to_page(__va(src_page_addr));
+ if (i == npages)
+ break;
+ dest_page_addr += PAGE_SIZE;
+ }
+ }
+ }
+
+ /* Sanity check. */
+ WARN_ON(i < npages);
+
+ vaddr = vmap(src_pages, npages, VM_MAP, PAGE_KERNEL);
+ kfree(src_pages);
+
+ if (!vaddr)
+ pr_err("Could not map ima buffer.\n");
+
+ return vaddr;
+}
+
+void kimage_unmap_segment(void *segment_buffer)
+{
+ vunmap(segment_buffer);
+}
+
struct kexec_load_limit {
/* Mutex protects the limit count. */
struct mutex mutex;
@@ -888,7 +1028,7 @@ struct kimage *kexec_crash_image;
static int kexec_load_disabled;
#ifdef CONFIG_SYSCTL
-static int kexec_limit_handler(struct ctl_table *table, int write,
+static int kexec_limit_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct kexec_load_limit *limit = table->data;
@@ -925,7 +1065,7 @@ static int kexec_limit_handler(struct ctl_table *table, int write,
return proc_dointvec(&tmp, write, buffer, lenp, ppos);
}
-static struct ctl_table kexec_core_sysctls[] = {
+static const struct ctl_table kexec_core_sysctls[] = {
{
.procname = "kexec_load_disabled",
.data = &kexec_load_disabled,
@@ -999,24 +1139,32 @@ int kernel_kexec(void)
goto Unlock;
}
+ error = liveupdate_reboot();
+ if (error)
+ goto Unlock;
+
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context) {
+ /*
+ * This flow is analogous to hibernation flows that occur
+ * before creating an image and before jumping from the
+ * restore kernel to the image one, so it uses the same
+ * device callbacks as those two flows.
+ */
pm_prepare_console();
error = freeze_processes();
if (error) {
error = -EBUSY;
goto Restore_console;
}
- suspend_console();
+ console_suspend_all();
error = dpm_suspend_start(PMSG_FREEZE);
if (error)
- goto Resume_console;
- /* At this point, dpm_suspend_start() has been called,
- * but *not* dpm_suspend_end(). We *must* call
- * dpm_suspend_end() now. Otherwise, drivers for
- * some devices (e.g. interrupt controllers) become
- * desynchronized with the actual state of the
- * hardware at resume time, and evil weirdness ensues.
+ goto Resume_devices;
+ /*
+ * dpm_suspend_end() must be called after dpm_suspend_start()
+ * to complete the transition, like in the hibernation flows
+ * mentioned above.
*/
error = dpm_suspend_end(PMSG_FREEZE);
if (error)
@@ -1052,6 +1200,13 @@ int kernel_kexec(void)
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context) {
+ /*
+ * This flow is analogous to hibernation flows that occur after
+ * creating an image and after the image kernel has got control
+ * back, and in case the devices have been reset or otherwise
+ * manipulated in the meantime, it uses the device callbacks
+ * used by the latter.
+ */
syscore_resume();
Enable_irqs:
local_irq_enable();
@@ -1060,8 +1215,7 @@ int kernel_kexec(void)
dpm_resume_start(PMSG_RESTORE);
Resume_devices:
dpm_resume_end(PMSG_RESTORE);
- Resume_console:
- resume_console();
+ console_resume_all();
thaw_processes();
Restore_console:
pm_restore_console();
@@ -1072,3 +1226,143 @@ int kernel_kexec(void)
kexec_unlock();
return error;
}
+
+static ssize_t loaded_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", !!kexec_image);
+}
+static struct kobj_attribute loaded_attr = __ATTR_RO(loaded);
+
+#ifdef CONFIG_CRASH_DUMP
+static ssize_t crash_loaded_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", kexec_crash_loaded());
+}
+static struct kobj_attribute crash_loaded_attr = __ATTR_RO(crash_loaded);
+
+#ifdef CONFIG_CRASH_RESERVE
+static ssize_t crash_cma_ranges_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < crashk_cma_cnt; ++i) {
+ len += sysfs_emit_at(buf, len, "%08llx-%08llx\n",
+ crashk_cma_ranges[i].start,
+ crashk_cma_ranges[i].end);
+ }
+ return len;
+}
+static struct kobj_attribute crash_cma_ranges_attr = __ATTR_RO(crash_cma_ranges);
+#endif
+
+static ssize_t crash_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ssize_t size = crash_get_memory_size();
+
+ if (size < 0)
+ return size;
+
+ return sysfs_emit(buf, "%zd\n", size);
+}
+static ssize_t crash_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long cnt;
+ int ret;
+
+ if (kstrtoul(buf, 0, &cnt))
+ return -EINVAL;
+
+ ret = crash_shrink_memory(cnt);
+ return ret < 0 ? ret : count;
+}
+static struct kobj_attribute crash_size_attr = __ATTR_RW(crash_size);
+
+#ifdef CONFIG_CRASH_HOTPLUG
+static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ unsigned int sz = crash_get_elfcorehdr_size();
+
+ return sysfs_emit(buf, "%u\n", sz);
+}
+static struct kobj_attribute crash_elfcorehdr_size_attr = __ATTR_RO(crash_elfcorehdr_size);
+
+#endif /* CONFIG_CRASH_HOTPLUG */
+#endif /* CONFIG_CRASH_DUMP */
+
+static struct attribute *kexec_attrs[] = {
+ &loaded_attr.attr,
+#ifdef CONFIG_CRASH_DUMP
+ &crash_loaded_attr.attr,
+ &crash_size_attr.attr,
+#ifdef CONFIG_CRASH_RESERVE
+ &crash_cma_ranges_attr.attr,
+#endif
+#ifdef CONFIG_CRASH_HOTPLUG
+ &crash_elfcorehdr_size_attr.attr,
+#endif
+#endif
+ NULL
+};
+
+struct kexec_link_entry {
+ const char *target;
+ const char *name;
+};
+
+static struct kexec_link_entry kexec_links[] = {
+ { "loaded", "kexec_loaded" },
+#ifdef CONFIG_CRASH_DUMP
+ { "crash_loaded", "kexec_crash_loaded" },
+ { "crash_size", "kexec_crash_size" },
+#ifdef CONFIG_CRASH_RESERVE
+ {"crash_cma_ranges", "kexec_crash_cma_ranges"},
+#endif
+#ifdef CONFIG_CRASH_HOTPLUG
+ { "crash_elfcorehdr_size", "crash_elfcorehdr_size" },
+#endif
+#endif
+};
+
+static struct kobject *kexec_kobj;
+ATTRIBUTE_GROUPS(kexec);
+
+static int __init init_kexec_sysctl(void)
+{
+ int error;
+ int i;
+
+ kexec_kobj = kobject_create_and_add("kexec", kernel_kobj);
+ if (!kexec_kobj) {
+ pr_err("failed to create kexec kobject\n");
+ return -ENOMEM;
+ }
+
+ error = sysfs_create_groups(kexec_kobj, kexec_groups);
+ if (error)
+ goto kset_exit;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_links); i++) {
+ error = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, kexec_kobj,
+ kexec_links[i].target,
+ kexec_links[i].name);
+ if (error)
+ pr_err("Unable to create %s symlink (%d)", kexec_links[i].name, error);
+ }
+
+ return 0;
+
+kset_exit:
+ kobject_put(kexec_kobj);
+ return error;
+}
+
+subsys_initcall(init_kexec_sysctl);