diff options
Diffstat (limited to 'kernel/kexec_core.c')
| -rw-r--r-- | kernel/kexec_core.c | 751 |
1 files changed, 454 insertions, 297 deletions
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 969e8f52f7da..0f92acdd354d 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/btf.h> #include <linux/capability.h> #include <linux/mm.h> #include <linux/file.h> @@ -14,6 +15,7 @@ #include <linux/kexec.h> #include <linux/mutex.h> #include <linux/list.h> +#include <linux/liveupdate.h> #include <linux/highmem.h> #include <linux/syscalls.h> #include <linux/reboot.h> @@ -39,6 +41,8 @@ #include <linux/hugetlb.h> #include <linux/objtool.h> #include <linux/kmsg_dump.h> +#include <linux/dma-map-ops.h> +#include <linux/sysfs.h> #include <asm/page.h> #include <asm/sections.h> @@ -48,52 +52,10 @@ atomic_t __kexec_lock = ATOMIC_INIT(0); -/* Per cpu memory for storing cpu states in case of system crash. */ -note_buf_t __percpu *crash_notes; - /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; - -/* Location of the reserved area for the crash kernel */ -struct resource crashk_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; -struct resource crashk_low_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; - -int kexec_should_crash(struct task_struct *p) -{ - /* - * If crash_kexec_post_notifiers is enabled, don't run - * crash_kexec() here yet, which must be run after panic - * notifiers in panic(). - */ - if (crash_kexec_post_notifiers) - return 0; - /* - * There are 4 panic() calls in make_task_dead() path, each of which - * corresponds to each of these 4 conditions. - */ - if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) - return 1; - return 0; -} - -int kexec_crash_loaded(void) -{ - return !!kexec_crash_image; -} -EXPORT_SYMBOL_GPL(kexec_crash_loaded); +bool kexec_file_dbg_print; /* * When kexec transitions to the new kernel there is a one-to-one @@ -226,6 +188,7 @@ int sanity_check_segment_list(struct kimage *image) if (total_pages > nr_pages / 2) return -EINVAL; +#ifdef CONFIG_CRASH_DUMP /* * Verify we have good destination addresses. Normally * the caller is responsible for making certain we don't @@ -248,6 +211,17 @@ int sanity_check_segment_list(struct kimage *image) return -EADDRNOTAVAIL; } } +#endif + + /* + * The destination addresses are searched from system RAM rather than + * being allocated from the buddy allocator, so they are not guaranteed + * to be accepted by the current kernel. Accept the destination + * addresses before kexec swaps their content with the segments' source + * pages to avoid accessing memory before it is accepted. + */ + for (i = 0; i < nr_segments; i++) + accept_memory(image->segment[i].mem, image->segment[i].memsz); return 0; } @@ -261,7 +235,6 @@ struct kimage *do_kimage_alloc_init(void) if (!image) return NULL; - image->head = 0; image->entry = &image->head; image->last_entry = &image->head; image->control_page = ~0; /* By default this does not apply */ @@ -276,6 +249,12 @@ struct kimage *do_kimage_alloc_init(void) /* Initialize the list of unusable pages */ INIT_LIST_HEAD(&image->unusable_pages); +#ifdef CONFIG_CRASH_HOTPLUG + image->hp_action = KEXEC_CRASH_HP_NONE; + image->elfcorehdr_index = -1; + image->elfcorehdr_updated = false; +#endif + return image; } @@ -289,8 +268,8 @@ int kimage_is_destination_range(struct kimage *image, unsigned long mstart, mend; mstart = image->segment[i].mem; - mend = mstart + image->segment[i].memsz; - if ((end > mstart) && (start < mend)) + mend = mstart + image->segment[i].memsz - 1; + if ((end >= mstart) && (start <= mend)) return 1; } @@ -383,7 +362,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, pfn = page_to_boot_pfn(pages); epfn = pfn + count; addr = pfn << PAGE_SHIFT; - eaddr = epfn << PAGE_SHIFT; + eaddr = (epfn << PAGE_SHIFT) - 1; if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || kimage_is_destination_range(image, addr, eaddr)) { list_add(&pages->lru, &extra_pages); @@ -414,6 +393,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, return pages; } +#ifdef CONFIG_CRASH_DUMP static struct page *kimage_alloc_crash_control_pages(struct kimage *image, unsigned int order) { @@ -443,7 +423,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, pages = NULL; size = (1 << order) << PAGE_SHIFT; - hole_start = (image->control_page + (size - 1)) & ~(size - 1); + hole_start = ALIGN(image->control_page, size); hole_end = hole_start + size - 1; while (hole_end <= crashk_res.end) { unsigned long i; @@ -460,7 +440,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, mend = mstart + image->segment[i].memsz - 1; if ((hole_end >= mstart) && (hole_start <= mend)) { /* Advance the hole to the end of the segment */ - hole_start = (mend + (size - 1)) & ~(size - 1); + hole_start = ALIGN(mend, size); hole_end = hole_start + size - 1; break; } @@ -468,7 +448,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, /* If I don't overlap any segments I have found my hole! */ if (i == image->nr_segments) { pages = pfn_to_page(hole_start >> PAGE_SHIFT); - image->control_page = hole_end; + image->control_page = hole_end + 1; break; } } @@ -479,6 +459,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, return pages; } +#endif struct page *kimage_alloc_control_pages(struct kimage *image, @@ -490,48 +471,16 @@ struct page *kimage_alloc_control_pages(struct kimage *image, case KEXEC_TYPE_DEFAULT: pages = kimage_alloc_normal_control_pages(image, order); break; +#ifdef CONFIG_CRASH_DUMP case KEXEC_TYPE_CRASH: pages = kimage_alloc_crash_control_pages(image, order); break; +#endif } return pages; } -int kimage_crash_copy_vmcoreinfo(struct kimage *image) -{ - struct page *vmcoreinfo_page; - void *safecopy; - - if (image->type != KEXEC_TYPE_CRASH) - return 0; - - /* - * For kdump, allocate one vmcoreinfo safe copy from the - * crash memory. as we have arch_kexec_protect_crashkres() - * after kexec syscall, we naturally protect it from write - * (even read) access under kernel direct mapping. But on - * the other hand, we still need to operate it when crash - * happens to generate vmcoreinfo note, hereby we rely on - * vmap for this purpose. - */ - vmcoreinfo_page = kimage_alloc_control_pages(image, 0); - if (!vmcoreinfo_page) { - pr_warn("Could not allocate vmcoreinfo buffer\n"); - return -ENOMEM; - } - safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL); - if (!safecopy) { - pr_warn("Could not vmap vmcoreinfo buffer\n"); - return -ENOMEM; - } - - image->vmcoreinfo_data_copy = safecopy; - crash_update_vmcoreinfo_safecopy(safecopy); - - return 0; -} - static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) { if (*image->entry != 0) @@ -606,6 +555,24 @@ static void kimage_free_entry(kimage_entry_t entry) kimage_free_pages(page); } +static void kimage_free_cma(struct kimage *image) +{ + unsigned long i; + + for (i = 0; i < image->nr_segments; i++) { + struct page *cma = image->segment_cma[i]; + u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT; + + if (!cma) + continue; + + arch_kexec_pre_free_pages(page_address(cma), nr_pages); + dma_release_from_contiguous(NULL, cma, nr_pages); + image->segment_cma[i] = NULL; + } + +} + void kimage_free(struct kimage *image) { kimage_entry_t *ptr, entry; @@ -614,10 +581,12 @@ void kimage_free(struct kimage *image) if (!image) return; +#ifdef CONFIG_CRASH_DUMP if (image->vmcoreinfo_data_copy) { crash_update_vmcoreinfo_safecopy(NULL); vunmap(image->vmcoreinfo_data_copy); } +#endif kimage_free_extra_pages(image); for_each_kimage_entry(image, ptr, entry) { @@ -642,6 +611,9 @@ void kimage_free(struct kimage *image) /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); + /* Free CMA allocations */ + kimage_free_cma(image); + /* * Free up any temporary buffers allocated. This might hit if * error occurred much later after buffer allocation. @@ -729,7 +701,7 @@ static struct page *kimage_alloc_page(struct kimage *image, /* If the page is not a destination page use it */ if (!kimage_is_destination_range(image, addr, - addr + PAGE_SIZE)) + addr + PAGE_SIZE - 1)) break; /* @@ -767,9 +739,64 @@ static struct page *kimage_alloc_page(struct kimage *image, return page; } -static int kimage_load_normal_segment(struct kimage *image, - struct kexec_segment *segment) +static int kimage_load_cma_segment(struct kimage *image, int idx) { + struct kexec_segment *segment = &image->segment[idx]; + struct page *cma = image->segment_cma[idx]; + char *ptr = page_address(cma); + size_t ubytes, mbytes; + int result = 0; + unsigned char __user *buf = NULL; + unsigned char *kbuf = NULL; + + if (image->file_mode) + kbuf = segment->kbuf; + else + buf = segment->buf; + ubytes = segment->bufsz; + mbytes = segment->memsz; + + /* Then copy from source buffer to the CMA one */ + while (mbytes) { + size_t uchunk, mchunk; + + mchunk = min_t(size_t, mbytes, PAGE_SIZE); + uchunk = min(ubytes, mchunk); + + if (uchunk) { + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); + ubytes -= uchunk; + if (image->file_mode) + kbuf += uchunk; + else + buf += uchunk; + } + + if (result) { + result = -EFAULT; + goto out; + } + + ptr += mchunk; + mbytes -= mchunk; + + cond_resched(); + } + + /* Clear any remainder */ + memset(ptr, 0, mbytes); + +out: + return result; +} + +static int kimage_load_normal_segment(struct kimage *image, int idx) +{ + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -784,6 +811,9 @@ static int kimage_load_normal_segment(struct kimage *image, mbytes = segment->memsz; maddr = segment->mem; + if (image->segment_cma[idx]) + return kimage_load_cma_segment(image, idx); + result = kimage_set_destination(image, maddr); if (result < 0) goto out; @@ -806,27 +836,27 @@ static int kimage_load_normal_segment(struct kimage *image, ptr = kmap_local_page(page); /* Start with a clear page */ clear_page(ptr); - ptr += maddr & ~PAGE_MASK; - mchunk = min_t(size_t, mbytes, - PAGE_SIZE - (maddr & ~PAGE_MASK)); + mchunk = min_t(size_t, mbytes, PAGE_SIZE); uchunk = min(ubytes, mchunk); - /* For file based kexec, source pages are in kernel memory */ - if (image->file_mode) - memcpy(ptr, kbuf, uchunk); - else - result = copy_from_user(ptr, buf, uchunk); + if (uchunk) { + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); + ubytes -= uchunk; + if (image->file_mode) + kbuf += uchunk; + else + buf += uchunk; + } kunmap_local(ptr); if (result) { result = -EFAULT; goto out; } - ubytes -= uchunk; maddr += mchunk; - if (image->file_mode) - kbuf += mchunk; - else - buf += mchunk; mbytes -= mchunk; cond_resched(); @@ -835,13 +865,14 @@ out: return result; } -static int kimage_load_crash_segment(struct kimage *image, - struct kexec_segment *segment) +#ifdef CONFIG_CRASH_DUMP +static int kimage_load_crash_segment(struct kimage *image, int idx) { /* For crash dumps kernels we simply copy the data from * user space to it's destination. * We do things a page at a time for the sake of kmap. */ + struct kexec_segment *segment = &image->segment[idx]; unsigned long maddr; size_t ubytes, mbytes; int result; @@ -868,20 +899,25 @@ static int kimage_load_crash_segment(struct kimage *image, } arch_kexec_post_alloc_pages(page_address(page), 1, 0); ptr = kmap_local_page(page); - ptr += maddr & ~PAGE_MASK; - mchunk = min_t(size_t, mbytes, - PAGE_SIZE - (maddr & ~PAGE_MASK)); + mchunk = min_t(size_t, mbytes, PAGE_SIZE); uchunk = min(ubytes, mchunk); if (mchunk > uchunk) { /* Zero the trailing part of the page */ memset(ptr + uchunk, 0, mchunk - uchunk); } - /* For file based kexec, source pages are in kernel memory */ - if (image->file_mode) - memcpy(ptr, kbuf, uchunk); - else - result = copy_from_user(ptr, buf, uchunk); + if (uchunk) { + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); + ubytes -= uchunk; + if (image->file_mode) + kbuf += uchunk; + else + buf += uchunk; + } kexec_flush_icache_page(page); kunmap_local(ptr); arch_kexec_pre_free_pages(page_address(page), 1); @@ -889,12 +925,7 @@ static int kimage_load_crash_segment(struct kimage *image, result = -EFAULT; goto out; } - ubytes -= uchunk; maddr += mchunk; - if (image->file_mode) - kbuf += mchunk; - else - buf += mchunk; mbytes -= mchunk; cond_resched(); @@ -902,225 +933,196 @@ static int kimage_load_crash_segment(struct kimage *image, out: return result; } +#endif -int kimage_load_segment(struct kimage *image, - struct kexec_segment *segment) +int kimage_load_segment(struct kimage *image, int idx) { int result = -ENOMEM; switch (image->type) { case KEXEC_TYPE_DEFAULT: - result = kimage_load_normal_segment(image, segment); + result = kimage_load_normal_segment(image, idx); break; +#ifdef CONFIG_CRASH_DUMP case KEXEC_TYPE_CRASH: - result = kimage_load_crash_segment(image, segment); + result = kimage_load_crash_segment(image, idx); break; +#endif } return result; } -struct kimage *kexec_image; -struct kimage *kexec_crash_image; -int kexec_load_disabled; -#ifdef CONFIG_SYSCTL -static struct ctl_table kexec_core_sysctls[] = { - { - .procname = "kexec_load_disabled", - .data = &kexec_load_disabled, - .maxlen = sizeof(int), - .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, - }, - { } -}; - -static int __init kexec_core_sysctl_init(void) -{ - register_sysctl_init("kernel", kexec_core_sysctls); - return 0; -} -late_initcall(kexec_core_sysctl_init); -#endif - -/* - * No panic_cpu check version of crash_kexec(). This function is called - * only when panic_cpu holds the current CPU number; this is the only CPU - * which processes crash_kexec routines. - */ -void __noclone __crash_kexec(struct pt_regs *regs) -{ - /* Take the kexec_lock here to prevent sys_kexec_load - * running on one cpu from replacing the crash kernel - * we are using after a panic on a different cpu. - * - * If the crash kernel was not located in a fixed area - * of memory the xchg(&kexec_crash_image) would be - * sufficient. But since I reuse the memory... - */ - if (kexec_trylock()) { - if (kexec_crash_image) { - struct pt_regs fixed_regs; - - crash_setup_regs(&fixed_regs, regs); - crash_save_vmcoreinfo(); - machine_crash_shutdown(&fixed_regs); - machine_kexec(kexec_crash_image); - } - kexec_unlock(); - } -} -STACK_FRAME_NON_STANDARD(__crash_kexec); - -void crash_kexec(struct pt_regs *regs) +void *kimage_map_segment(struct kimage *image, + unsigned long addr, unsigned long size) { - int old_cpu, this_cpu; + unsigned long src_page_addr, dest_page_addr = 0; + unsigned long eaddr = addr + size; + kimage_entry_t *ptr, entry; + struct page **src_pages; + unsigned int npages; + void *vaddr = NULL; + int i; /* - * Only one CPU is allowed to execute the crash_kexec() code as with - * panic(). Otherwise parallel calls of panic() and crash_kexec() - * may stop each other. To exclude them, we use panic_cpu here too. + * Collect the source pages and map them in a contiguous VA range. */ - this_cpu = raw_smp_processor_id(); - old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); - if (old_cpu == PANIC_CPU_INVALID) { - /* This is the 1st CPU which comes here, so go ahead. */ - __crash_kexec(regs); + npages = PFN_UP(eaddr) - PFN_DOWN(addr); + src_pages = kmalloc_array(npages, sizeof(*src_pages), GFP_KERNEL); + if (!src_pages) { + pr_err("Could not allocate ima pages array.\n"); + return NULL; + } - /* - * Reset panic_cpu to allow another panic()/crash_kexec() - * call. - */ - atomic_set(&panic_cpu, PANIC_CPU_INVALID); + i = 0; + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_DESTINATION) { + dest_page_addr = entry & PAGE_MASK; + } else if (entry & IND_SOURCE) { + if (dest_page_addr >= addr && dest_page_addr < eaddr) { + src_page_addr = entry & PAGE_MASK; + src_pages[i++] = + virt_to_page(__va(src_page_addr)); + if (i == npages) + break; + dest_page_addr += PAGE_SIZE; + } + } } -} -ssize_t crash_get_memory_size(void) -{ - ssize_t size = 0; + /* Sanity check. */ + WARN_ON(i < npages); - if (!kexec_trylock()) - return -EBUSY; + vaddr = vmap(src_pages, npages, VM_MAP, PAGE_KERNEL); + kfree(src_pages); - if (crashk_res.end != crashk_res.start) - size = resource_size(&crashk_res); + if (!vaddr) + pr_err("Could not map ima buffer.\n"); - kexec_unlock(); - return size; + return vaddr; } -int crash_shrink_memory(unsigned long new_size) +void kimage_unmap_segment(void *segment_buffer) { - int ret = 0; - unsigned long start, end; - unsigned long old_size; - struct resource *ram_res; - - if (!kexec_trylock()) - return -EBUSY; + vunmap(segment_buffer); +} - if (kexec_crash_image) { - ret = -ENOENT; - goto unlock; - } - start = crashk_res.start; - end = crashk_res.end; - old_size = (end == 0) ? 0 : end - start + 1; - if (new_size >= old_size) { - ret = (new_size == old_size) ? 0 : -EINVAL; - goto unlock; - } +struct kexec_load_limit { + /* Mutex protects the limit count. */ + struct mutex mutex; + int limit; +}; - ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); - if (!ram_res) { - ret = -ENOMEM; - goto unlock; - } +static struct kexec_load_limit load_limit_reboot = { + .mutex = __MUTEX_INITIALIZER(load_limit_reboot.mutex), + .limit = -1, +}; - start = roundup(start, KEXEC_CRASH_MEM_ALIGN); - end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); +static struct kexec_load_limit load_limit_panic = { + .mutex = __MUTEX_INITIALIZER(load_limit_panic.mutex), + .limit = -1, +}; - crash_free_reserved_phys_range(end, crashk_res.end); +struct kimage *kexec_image; +struct kimage *kexec_crash_image; +static int kexec_load_disabled; - if ((start == end) && (crashk_res.parent != NULL)) - release_resource(&crashk_res); +#ifdef CONFIG_SYSCTL +static int kexec_limit_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct kexec_load_limit *limit = table->data; + int val; + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(val), + .mode = table->mode, + }; + int ret; + + if (write) { + ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (val < 0) + return -EINVAL; - ram_res->start = end; - ram_res->end = crashk_res.end; - ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; - ram_res->name = "System RAM"; + mutex_lock(&limit->mutex); + if (limit->limit != -1 && val >= limit->limit) + ret = -EINVAL; + else + limit->limit = val; + mutex_unlock(&limit->mutex); - crashk_res.end = end - 1; + return ret; + } - insert_resource(&iomem_resource, ram_res); + mutex_lock(&limit->mutex); + val = limit->limit; + mutex_unlock(&limit->mutex); -unlock: - kexec_unlock(); - return ret; + return proc_dointvec(&tmp, write, buffer, lenp, ppos); } -void crash_save_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= nr_cpu_ids)) - return; +static const struct ctl_table kexec_core_sysctls[] = { + { + .procname = "kexec_load_disabled", + .data = &kexec_load_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "kexec_load_limit_panic", + .data = &load_limit_panic, + .mode = 0644, + .proc_handler = kexec_limit_handler, + }, + { + .procname = "kexec_load_limit_reboot", + .data = &load_limit_reboot, + .mode = 0644, + .proc_handler = kexec_limit_handler, + }, +}; - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that, so there is no need to invent something new. - */ - buf = (u32 *)per_cpu_ptr(crash_notes, cpu); - if (!buf) - return; - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.common.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, - &prstatus, sizeof(prstatus)); - final_note(buf); +static int __init kexec_core_sysctl_init(void) +{ + register_sysctl_init("kernel", kexec_core_sysctls); + return 0; } +late_initcall(kexec_core_sysctl_init); +#endif -static int __init crash_notes_memory_init(void) +bool kexec_load_permitted(int kexec_image_type) { - /* Allocate memory for saving cpu registers. */ - size_t size, align; + struct kexec_load_limit *limit; /* - * crash_notes could be allocated across 2 vmalloc pages when percpu - * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc - * pages are also on 2 continuous physical pages. In this case the - * 2nd part of crash_notes in 2nd page could be lost since only the - * starting address and size of crash_notes are exported through sysfs. - * Here round up the size of crash_notes to the nearest power of two - * and pass it to __alloc_percpu as align value. This can make sure - * crash_notes is allocated inside one physical page. + * Only the superuser can use the kexec syscall and if it has not + * been disabled. */ - size = sizeof(note_buf_t); - align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); - - /* - * Break compile if size is bigger than PAGE_SIZE since crash_notes - * definitely will be in 2 pages with that. - */ - BUILD_BUG_ON(size > PAGE_SIZE); - - crash_notes = __alloc_percpu(size, align); - if (!crash_notes) { - pr_warn("Memory allocation for saving cpu register states failed\n"); - return -ENOMEM; + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + return false; + + /* Check limit counter and decrease it.*/ + limit = (kexec_image_type == KEXEC_TYPE_CRASH) ? + &load_limit_panic : &load_limit_reboot; + mutex_lock(&limit->mutex); + if (!limit->limit) { + mutex_unlock(&limit->mutex); + return false; } - return 0; -} -subsys_initcall(crash_notes_memory_init); + if (limit->limit != -1) + limit->limit--; + mutex_unlock(&limit->mutex); + return true; +} /* * Move into place and start executing a preloaded standalone @@ -1137,24 +1139,32 @@ int kernel_kexec(void) goto Unlock; } + error = liveupdate_reboot(); + if (error) + goto Unlock; + #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + /* + * This flow is analogous to hibernation flows that occur + * before creating an image and before jumping from the + * restore kernel to the image one, so it uses the same + * device callbacks as those two flows. + */ pm_prepare_console(); error = freeze_processes(); if (error) { error = -EBUSY; goto Restore_console; } - suspend_console(); + console_suspend_all(); error = dpm_suspend_start(PMSG_FREEZE); if (error) - goto Resume_console; - /* At this point, dpm_suspend_start() has been called, - * but *not* dpm_suspend_end(). We *must* call - * dpm_suspend_end() now. Otherwise, drivers for - * some devices (e.g. interrupt controllers) become - * desynchronized with the actual state of the - * hardware at resume time, and evil weirdness ensues. + goto Resume_devices; + /* + * dpm_suspend_end() must be called after dpm_suspend_start() + * to complete the transition, like in the hibernation flows + * mentioned above. */ error = dpm_suspend_end(PMSG_FREEZE); if (error) @@ -1172,6 +1182,7 @@ int kernel_kexec(void) kexec_in_progress = true; kernel_restart_prepare("kexec reboot"); migrate_to_reboot_cpu(); + syscore_shutdown(); /* * migrate_to_reboot_cpu() disables CPU hotplug assuming that @@ -1189,6 +1200,13 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + /* + * This flow is analogous to hibernation flows that occur after + * creating an image and after the image kernel has got control + * back, and in case the devices have been reset or otherwise + * manipulated in the meantime, it uses the device callbacks + * used by the latter. + */ syscore_resume(); Enable_irqs: local_irq_enable(); @@ -1197,8 +1215,7 @@ int kernel_kexec(void) dpm_resume_start(PMSG_RESTORE); Resume_devices: dpm_resume_end(PMSG_RESTORE); - Resume_console: - resume_console(); + console_resume_all(); thaw_processes(); Restore_console: pm_restore_console(); @@ -1209,3 +1226,143 @@ int kernel_kexec(void) kexec_unlock(); return error; } + +static ssize_t loaded_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", !!kexec_image); +} +static struct kobj_attribute loaded_attr = __ATTR_RO(loaded); + +#ifdef CONFIG_CRASH_DUMP +static ssize_t crash_loaded_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", kexec_crash_loaded()); +} +static struct kobj_attribute crash_loaded_attr = __ATTR_RO(crash_loaded); + +#ifdef CONFIG_CRASH_RESERVE +static ssize_t crash_cma_ranges_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + + ssize_t len = 0; + int i; + + for (i = 0; i < crashk_cma_cnt; ++i) { + len += sysfs_emit_at(buf, len, "%08llx-%08llx\n", + crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + } + return len; +} +static struct kobj_attribute crash_cma_ranges_attr = __ATTR_RO(crash_cma_ranges); +#endif + +static ssize_t crash_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + ssize_t size = crash_get_memory_size(); + + if (size < 0) + return size; + + return sysfs_emit(buf, "%zd\n", size); +} +static ssize_t crash_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long cnt; + int ret; + + if (kstrtoul(buf, 0, &cnt)) + return -EINVAL; + + ret = crash_shrink_memory(cnt); + return ret < 0 ? ret : count; +} +static struct kobj_attribute crash_size_attr = __ATTR_RW(crash_size); + +#ifdef CONFIG_CRASH_HOTPLUG +static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + unsigned int sz = crash_get_elfcorehdr_size(); + + return sysfs_emit(buf, "%u\n", sz); +} +static struct kobj_attribute crash_elfcorehdr_size_attr = __ATTR_RO(crash_elfcorehdr_size); + +#endif /* CONFIG_CRASH_HOTPLUG */ +#endif /* CONFIG_CRASH_DUMP */ + +static struct attribute *kexec_attrs[] = { + &loaded_attr.attr, +#ifdef CONFIG_CRASH_DUMP + &crash_loaded_attr.attr, + &crash_size_attr.attr, +#ifdef CONFIG_CRASH_RESERVE + &crash_cma_ranges_attr.attr, +#endif +#ifdef CONFIG_CRASH_HOTPLUG + &crash_elfcorehdr_size_attr.attr, +#endif +#endif + NULL +}; + +struct kexec_link_entry { + const char *target; + const char *name; +}; + +static struct kexec_link_entry kexec_links[] = { + { "loaded", "kexec_loaded" }, +#ifdef CONFIG_CRASH_DUMP + { "crash_loaded", "kexec_crash_loaded" }, + { "crash_size", "kexec_crash_size" }, +#ifdef CONFIG_CRASH_RESERVE + {"crash_cma_ranges", "kexec_crash_cma_ranges"}, +#endif +#ifdef CONFIG_CRASH_HOTPLUG + { "crash_elfcorehdr_size", "crash_elfcorehdr_size" }, +#endif +#endif +}; + +static struct kobject *kexec_kobj; +ATTRIBUTE_GROUPS(kexec); + +static int __init init_kexec_sysctl(void) +{ + int error; + int i; + + kexec_kobj = kobject_create_and_add("kexec", kernel_kobj); + if (!kexec_kobj) { + pr_err("failed to create kexec kobject\n"); + return -ENOMEM; + } + + error = sysfs_create_groups(kexec_kobj, kexec_groups); + if (error) + goto kset_exit; + + for (i = 0; i < ARRAY_SIZE(kexec_links); i++) { + error = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, kexec_kobj, + kexec_links[i].target, + kexec_links[i].name); + if (error) + pr_err("Unable to create %s symlink (%d)", kexec_links[i].name, error); + } + + return 0; + +kset_exit: + kobject_put(kexec_kobj); + return error; +} + +subsys_initcall(init_kexec_sysctl); |
