diff options
Diffstat (limited to 'kernel/kexec_core.c')
| -rw-r--r-- | kernel/kexec_core.c | 348 |
1 files changed, 321 insertions, 27 deletions
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9112d69d68b0..0f92acdd354d 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -15,6 +15,7 @@ #include <linux/kexec.h> #include <linux/mutex.h> #include <linux/list.h> +#include <linux/liveupdate.h> #include <linux/highmem.h> #include <linux/syscalls.h> #include <linux/reboot.h> @@ -40,6 +41,8 @@ #include <linux/hugetlb.h> #include <linux/objtool.h> #include <linux/kmsg_dump.h> +#include <linux/dma-map-ops.h> +#include <linux/sysfs.h> #include <asm/page.h> #include <asm/sections.h> @@ -210,6 +213,16 @@ int sanity_check_segment_list(struct kimage *image) } #endif + /* + * The destination addresses are searched from system RAM rather than + * being allocated from the buddy allocator, so they are not guaranteed + * to be accepted by the current kernel. Accept the destination + * addresses before kexec swaps their content with the segments' source + * pages to avoid accessing memory before it is accepted. + */ + for (i = 0; i < nr_segments; i++) + accept_memory(image->segment[i].mem, image->segment[i].memsz); + return 0; } @@ -222,7 +235,6 @@ struct kimage *do_kimage_alloc_init(void) if (!image) return NULL; - image->head = 0; image->entry = &image->head; image->last_entry = &image->head; image->control_page = ~0; /* By default this does not apply */ @@ -543,6 +555,24 @@ static void kimage_free_entry(kimage_entry_t entry) kimage_free_pages(page); } +static void kimage_free_cma(struct kimage *image) +{ + unsigned long i; + + for (i = 0; i < image->nr_segments; i++) { + struct page *cma = image->segment_cma[i]; + u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT; + + if (!cma) + continue; + + arch_kexec_pre_free_pages(page_address(cma), nr_pages); + dma_release_from_contiguous(NULL, cma, nr_pages); + image->segment_cma[i] = NULL; + } + +} + void kimage_free(struct kimage *image) { kimage_entry_t *ptr, entry; @@ -581,6 +611,9 @@ void kimage_free(struct kimage *image) /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); + /* Free CMA allocations */ + kimage_free_cma(image); + /* * Free up any temporary buffers allocated. This might hit if * error occurred much later after buffer allocation. @@ -706,9 +739,64 @@ static struct page *kimage_alloc_page(struct kimage *image, return page; } -static int kimage_load_normal_segment(struct kimage *image, - struct kexec_segment *segment) +static int kimage_load_cma_segment(struct kimage *image, int idx) +{ + struct kexec_segment *segment = &image->segment[idx]; + struct page *cma = image->segment_cma[idx]; + char *ptr = page_address(cma); + size_t ubytes, mbytes; + int result = 0; + unsigned char __user *buf = NULL; + unsigned char *kbuf = NULL; + + if (image->file_mode) + kbuf = segment->kbuf; + else + buf = segment->buf; + ubytes = segment->bufsz; + mbytes = segment->memsz; + + /* Then copy from source buffer to the CMA one */ + while (mbytes) { + size_t uchunk, mchunk; + + mchunk = min_t(size_t, mbytes, PAGE_SIZE); + uchunk = min(ubytes, mchunk); + + if (uchunk) { + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); + ubytes -= uchunk; + if (image->file_mode) + kbuf += uchunk; + else + buf += uchunk; + } + + if (result) { + result = -EFAULT; + goto out; + } + + ptr += mchunk; + mbytes -= mchunk; + + cond_resched(); + } + + /* Clear any remainder */ + memset(ptr, 0, mbytes); + +out: + return result; +} + +static int kimage_load_normal_segment(struct kimage *image, int idx) { + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -723,6 +811,9 @@ static int kimage_load_normal_segment(struct kimage *image, mbytes = segment->memsz; maddr = segment->mem; + if (image->segment_cma[idx]) + return kimage_load_cma_segment(image, idx); + result = kimage_set_destination(image, maddr); if (result < 0) goto out; @@ -745,9 +836,7 @@ static int kimage_load_normal_segment(struct kimage *image, ptr = kmap_local_page(page); /* Start with a clear page */ clear_page(ptr); - ptr += maddr & ~PAGE_MASK; - mchunk = min_t(size_t, mbytes, - PAGE_SIZE - (maddr & ~PAGE_MASK)); + mchunk = min_t(size_t, mbytes, PAGE_SIZE); uchunk = min(ubytes, mchunk); if (uchunk) { @@ -777,13 +866,13 @@ out: } #ifdef CONFIG_CRASH_DUMP -static int kimage_load_crash_segment(struct kimage *image, - struct kexec_segment *segment) +static int kimage_load_crash_segment(struct kimage *image, int idx) { /* For crash dumps kernels we simply copy the data from * user space to it's destination. * We do things a page at a time for the sake of kmap. */ + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -810,9 +899,7 @@ static int kimage_load_crash_segment(struct kimage *image, } arch_kexec_post_alloc_pages(page_address(page), 1, 0); ptr = kmap_local_page(page); - ptr += maddr & ~PAGE_MASK; - mchunk = min_t(size_t, mbytes, - PAGE_SIZE - (maddr & ~PAGE_MASK)); + mchunk = min_t(size_t, mbytes, PAGE_SIZE); uchunk = min(ubytes, mchunk); if (mchunk > uchunk) { /* Zero the trailing part of the page */ @@ -848,18 +935,17 @@ out: } #endif -int kimage_load_segment(struct kimage *image, - struct kexec_segment *segment) +int kimage_load_segment(struct kimage *image, int idx) { int result = -ENOMEM; switch (image->type) { case KEXEC_TYPE_DEFAULT: - result = kimage_load_normal_segment(image, segment); + result = kimage_load_normal_segment(image, idx); break; #ifdef CONFIG_CRASH_DUMP case KEXEC_TYPE_CRASH: - result = kimage_load_crash_segment(image, segment); + result = kimage_load_crash_segment(image, idx); break; #endif } @@ -867,6 +953,60 @@ int kimage_load_segment(struct kimage *image, return result; } +void *kimage_map_segment(struct kimage *image, + unsigned long addr, unsigned long size) +{ + unsigned long src_page_addr, dest_page_addr = 0; + unsigned long eaddr = addr + size; + kimage_entry_t *ptr, entry; + struct page **src_pages; + unsigned int npages; + void *vaddr = NULL; + int i; + + /* + * Collect the source pages and map them in a contiguous VA range. + */ + npages = PFN_UP(eaddr) - PFN_DOWN(addr); + src_pages = kmalloc_array(npages, sizeof(*src_pages), GFP_KERNEL); + if (!src_pages) { + pr_err("Could not allocate ima pages array.\n"); + return NULL; + } + + i = 0; + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_DESTINATION) { + dest_page_addr = entry & PAGE_MASK; + } else if (entry & IND_SOURCE) { + if (dest_page_addr >= addr && dest_page_addr < eaddr) { + src_page_addr = entry & PAGE_MASK; + src_pages[i++] = + virt_to_page(__va(src_page_addr)); + if (i == npages) + break; + dest_page_addr += PAGE_SIZE; + } + } + } + + /* Sanity check. */ + WARN_ON(i < npages); + + vaddr = vmap(src_pages, npages, VM_MAP, PAGE_KERNEL); + kfree(src_pages); + + if (!vaddr) + pr_err("Could not map ima buffer.\n"); + + return vaddr; +} + +void kimage_unmap_segment(void *segment_buffer) +{ + vunmap(segment_buffer); +} + struct kexec_load_limit { /* Mutex protects the limit count. */ struct mutex mutex; @@ -888,7 +1028,7 @@ struct kimage *kexec_crash_image; static int kexec_load_disabled; #ifdef CONFIG_SYSCTL -static int kexec_limit_handler(struct ctl_table *table, int write, +static int kexec_limit_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct kexec_load_limit *limit = table->data; @@ -925,7 +1065,7 @@ static int kexec_limit_handler(struct ctl_table *table, int write, return proc_dointvec(&tmp, write, buffer, lenp, ppos); } -static struct ctl_table kexec_core_sysctls[] = { +static const struct ctl_table kexec_core_sysctls[] = { { .procname = "kexec_load_disabled", .data = &kexec_load_disabled, @@ -999,24 +1139,32 @@ int kernel_kexec(void) goto Unlock; } + error = liveupdate_reboot(); + if (error) + goto Unlock; + #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + /* + * This flow is analogous to hibernation flows that occur + * before creating an image and before jumping from the + * restore kernel to the image one, so it uses the same + * device callbacks as those two flows. + */ pm_prepare_console(); error = freeze_processes(); if (error) { error = -EBUSY; goto Restore_console; } - suspend_console(); + console_suspend_all(); error = dpm_suspend_start(PMSG_FREEZE); if (error) - goto Resume_console; - /* At this point, dpm_suspend_start() has been called, - * but *not* dpm_suspend_end(). We *must* call - * dpm_suspend_end() now. Otherwise, drivers for - * some devices (e.g. interrupt controllers) become - * desynchronized with the actual state of the - * hardware at resume time, and evil weirdness ensues. + goto Resume_devices; + /* + * dpm_suspend_end() must be called after dpm_suspend_start() + * to complete the transition, like in the hibernation flows + * mentioned above. */ error = dpm_suspend_end(PMSG_FREEZE); if (error) @@ -1052,6 +1200,13 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + /* + * This flow is analogous to hibernation flows that occur after + * creating an image and after the image kernel has got control + * back, and in case the devices have been reset or otherwise + * manipulated in the meantime, it uses the device callbacks + * used by the latter. + */ syscore_resume(); Enable_irqs: local_irq_enable(); @@ -1060,8 +1215,7 @@ int kernel_kexec(void) dpm_resume_start(PMSG_RESTORE); Resume_devices: dpm_resume_end(PMSG_RESTORE); - Resume_console: - resume_console(); + console_resume_all(); thaw_processes(); Restore_console: pm_restore_console(); @@ -1072,3 +1226,143 @@ int kernel_kexec(void) kexec_unlock(); return error; } + +static ssize_t loaded_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", !!kexec_image); +} +static struct kobj_attribute loaded_attr = __ATTR_RO(loaded); + +#ifdef CONFIG_CRASH_DUMP +static ssize_t crash_loaded_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", kexec_crash_loaded()); +} +static struct kobj_attribute crash_loaded_attr = __ATTR_RO(crash_loaded); + +#ifdef CONFIG_CRASH_RESERVE +static ssize_t crash_cma_ranges_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + + ssize_t len = 0; + int i; + + for (i = 0; i < crashk_cma_cnt; ++i) { + len += sysfs_emit_at(buf, len, "%08llx-%08llx\n", + crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + } + return len; +} +static struct kobj_attribute crash_cma_ranges_attr = __ATTR_RO(crash_cma_ranges); +#endif + +static ssize_t crash_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + ssize_t size = crash_get_memory_size(); + + if (size < 0) + return size; + + return sysfs_emit(buf, "%zd\n", size); +} +static ssize_t crash_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long cnt; + int ret; + + if (kstrtoul(buf, 0, &cnt)) + return -EINVAL; + + ret = crash_shrink_memory(cnt); + return ret < 0 ? ret : count; +} +static struct kobj_attribute crash_size_attr = __ATTR_RW(crash_size); + +#ifdef CONFIG_CRASH_HOTPLUG +static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + unsigned int sz = crash_get_elfcorehdr_size(); + + return sysfs_emit(buf, "%u\n", sz); +} +static struct kobj_attribute crash_elfcorehdr_size_attr = __ATTR_RO(crash_elfcorehdr_size); + +#endif /* CONFIG_CRASH_HOTPLUG */ +#endif /* CONFIG_CRASH_DUMP */ + +static struct attribute *kexec_attrs[] = { + &loaded_attr.attr, +#ifdef CONFIG_CRASH_DUMP + &crash_loaded_attr.attr, + &crash_size_attr.attr, +#ifdef CONFIG_CRASH_RESERVE + &crash_cma_ranges_attr.attr, +#endif +#ifdef CONFIG_CRASH_HOTPLUG + &crash_elfcorehdr_size_attr.attr, +#endif +#endif + NULL +}; + +struct kexec_link_entry { + const char *target; + const char *name; +}; + +static struct kexec_link_entry kexec_links[] = { + { "loaded", "kexec_loaded" }, +#ifdef CONFIG_CRASH_DUMP + { "crash_loaded", "kexec_crash_loaded" }, + { "crash_size", "kexec_crash_size" }, +#ifdef CONFIG_CRASH_RESERVE + {"crash_cma_ranges", "kexec_crash_cma_ranges"}, +#endif +#ifdef CONFIG_CRASH_HOTPLUG + { "crash_elfcorehdr_size", "crash_elfcorehdr_size" }, +#endif +#endif +}; + +static struct kobject *kexec_kobj; +ATTRIBUTE_GROUPS(kexec); + +static int __init init_kexec_sysctl(void) +{ + int error; + int i; + + kexec_kobj = kobject_create_and_add("kexec", kernel_kobj); + if (!kexec_kobj) { + pr_err("failed to create kexec kobject\n"); + return -ENOMEM; + } + + error = sysfs_create_groups(kexec_kobj, kexec_groups); + if (error) + goto kset_exit; + + for (i = 0; i < ARRAY_SIZE(kexec_links); i++) { + error = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, kexec_kobj, + kexec_links[i].target, + kexec_links[i].name); + if (error) + pr_err("Unable to create %s symlink (%d)", kexec_links[i].name, error); + } + + return 0; + +kset_exit: + kobject_put(kexec_kobj); + return error; +} + +subsys_initcall(init_kexec_sysctl); |
