diff options
Diffstat (limited to 'fs/proc/kcore.c')
| -rw-r--r-- | fs/proc/kcore.c | 767 |
1 files changed, 433 insertions, 334 deletions
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 06ea155e1a59..728630b10fdf 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/proc/kcore.c kernel ELF core dumper * @@ -9,6 +10,7 @@ * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> */ +#include <linux/vmcore_info.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/kcore.h> @@ -16,23 +18,22 @@ #include <linux/capability.h> #include <linux/elf.h> #include <linux/elfcore.h> -#include <linux/notifier.h> #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/printk.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/init.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uio.h> #include <asm/io.h> #include <linux/list.h> #include <linux/ioport.h> #include <linux/memory.h> +#include <linux/sched/task.h> +#include <linux/security.h> #include <asm/sections.h> #include "internal.h" -#define CORE_STR "CORE" - #ifndef ELF_CORE_EFLAGS #define ELF_CORE_EFLAGS 0 #endif @@ -47,104 +48,100 @@ static struct proc_dir_entry *proc_root_kcore; #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) #endif -/* An ELF note in memory */ -struct memelfnote +#ifndef kc_xlate_dev_mem_ptr +#define kc_xlate_dev_mem_ptr kc_xlate_dev_mem_ptr +static inline void *kc_xlate_dev_mem_ptr(phys_addr_t phys) { - const char *name; - int type; - unsigned int datasz; - void *data; -}; + return __va(phys); +} +#endif +#ifndef kc_unxlate_dev_mem_ptr +#define kc_unxlate_dev_mem_ptr kc_unxlate_dev_mem_ptr +static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt) +{ +} +#endif static LIST_HEAD(kclist_head); -static DEFINE_RWLOCK(kclist_lock); +static int kcore_nphdr; +static size_t kcore_phdrs_len; +static size_t kcore_notes_len; +static size_t kcore_data_offset; +DEFINE_STATIC_PERCPU_RWSEM(kclist_lock); static int kcore_need_update = 1; -void -kclist_add(struct kcore_list *new, void *addr, size_t size, int type) +/* + * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error + * Same as oldmem_pfn_is_ram in vmcore + */ +static int (*mem_pfn_is_ram)(unsigned long pfn); + +int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)) +{ + if (mem_pfn_is_ram) + return -EBUSY; + mem_pfn_is_ram = fn; + return 0; +} + +static int pfn_is_ram(unsigned long pfn) +{ + if (mem_pfn_is_ram) + return mem_pfn_is_ram(pfn); + else + return 1; +} + +/* This doesn't grab kclist_lock, so it should only be used at init time. */ +void __init kclist_add(struct kcore_list *new, void *addr, size_t size, + int type) { new->addr = (unsigned long)addr; new->size = size; new->type = type; - write_lock(&kclist_lock); list_add_tail(&new->list, &kclist_head); - write_unlock(&kclist_lock); } -static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) +static void update_kcore_size(void) { size_t try, size; struct kcore_list *m; - *nphdr = 1; /* PT_NOTE */ + kcore_nphdr = 1; /* PT_NOTE */ size = 0; list_for_each_entry(m, &kclist_head, list) { try = kc_vaddr_to_offset((size_t)m->addr + m->size); if (try > size) size = try; - *nphdr = *nphdr + 1; + kcore_nphdr++; } - *elf_buflen = sizeof(struct elfhdr) + - (*nphdr + 2)*sizeof(struct elf_phdr) + - 3 * ((sizeof(struct elf_note)) + - roundup(sizeof(CORE_STR), 4)) + - roundup(sizeof(struct elf_prstatus), 4) + - roundup(sizeof(struct elf_prpsinfo), 4) + - roundup(sizeof(struct task_struct), 4); - *elf_buflen = PAGE_ALIGN(*elf_buflen); - return size + *elf_buflen; -} - -static void free_kclist_ents(struct list_head *head) -{ - struct kcore_list *tmp, *pos; - - list_for_each_entry_safe(pos, tmp, head, list) { - list_del(&pos->list); - kfree(pos); - } -} -/* - * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list. - */ -static void __kcore_update_ram(struct list_head *list) -{ - int nphdr; - size_t size; - struct kcore_list *tmp, *pos; - LIST_HEAD(garbage); - write_lock(&kclist_lock); - if (kcore_need_update) { - list_for_each_entry_safe(pos, tmp, &kclist_head, list) { - if (pos->type == KCORE_RAM - || pos->type == KCORE_VMEMMAP) - list_move(&pos->list, &garbage); - } - list_splice_tail(list, &kclist_head); - } else - list_splice(list, &garbage); - kcore_need_update = 0; - proc_root_kcore->size = get_kcore_size(&nphdr, &size); - write_unlock(&kclist_lock); - - free_kclist_ents(&garbage); + kcore_phdrs_len = kcore_nphdr * sizeof(struct elf_phdr); + kcore_notes_len = (4 * sizeof(struct elf_note) + + ALIGN(sizeof(NN_PRSTATUS), 4) + + ALIGN(sizeof(NN_PRPSINFO), 4) + + ALIGN(sizeof(NN_TASKSTRUCT), 4) + + VMCOREINFO_NOTE_NAME_BYTES + + ALIGN(sizeof(struct elf_prstatus), 4) + + ALIGN(sizeof(struct elf_prpsinfo), 4) + + ALIGN(arch_task_struct_size, 4) + + ALIGN(vmcoreinfo_size, 4)); + kcore_data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + kcore_phdrs_len + + kcore_notes_len); + proc_root_kcore->size = kcore_data_offset + size; } - #ifdef CONFIG_HIGHMEM /* * If no highmem, we can assume [0...max_low_pfn) continuous range of memory * because memory hole is not as big as !HIGHMEM case. * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) */ -static int kcore_update_ram(void) +static int kcore_ram_list(struct list_head *head) { - LIST_HEAD(head); struct kcore_list *ent; - int ret = 0; ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) @@ -152,9 +149,8 @@ static int kcore_update_ram(void) ent->addr = (unsigned long)__va(0); ent->size = max_low_pfn << PAGE_SHIFT; ent->type = KCORE_RAM; - list_add(&ent->list, &head); - __kcore_update_ram(&head); - return ret; + list_add(&ent->list, head); + return 0; } #else /* !CONFIG_HIGHMEM */ @@ -172,7 +168,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; - end = ALIGN(end, PAGE_SIZE); + end = PAGE_ALIGN(end); /* overlap check (because we have to align page */ list_for_each_entry(tmp, head, list) { if (tmp->type != KCORE_VMEMMAP) @@ -207,25 +203,32 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) { struct list_head *head = (struct list_head *)arg; struct kcore_list *ent; + struct page *p; + + if (!pfn_valid(pfn)) + return 1; + + p = pfn_to_page(pfn); ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; - ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); + ent->addr = (unsigned long)page_to_virt(p); ent->size = nr_pages << PAGE_SHIFT; - /* Sanity check: Can happen in 32bit arch...maybe */ - if (ent->addr < (unsigned long) __va(0)) + if (!virt_addr_valid((void *)ent->addr)) goto free_out; /* cut not-mapped area. ....from ppc-32 code. */ if (ULONG_MAX - ent->addr < ent->size) ent->size = ULONG_MAX - ent->addr; - /* cut when vmalloc() area is higher than direct-map area */ - if (VMALLOC_START > (unsigned long)__va(0)) { - if (ent->addr > VMALLOC_START) - goto free_out; + /* + * We've already checked virt_addr_valid so we know this address + * is a valid pointer, therefore we can check against it to determine + * if we need to trim + */ + if (VMALLOC_START > ent->addr) { if (VMALLOC_START - ent->addr < ent->size) ent->size = VMALLOC_START - ent->addr; } @@ -244,327 +247,427 @@ free_out: return 1; } -static int kcore_update_ram(void) +static int kcore_ram_list(struct list_head *list) { int nid, ret; unsigned long end_pfn; - LIST_HEAD(head); - /* Not inialized....update now */ + /* Not initialized....update now */ /* find out "max pfn" */ end_pfn = 0; for_each_node_state(nid, N_MEMORY) { unsigned long node_end; - node_end = NODE_DATA(nid)->node_start_pfn + - NODE_DATA(nid)->node_spanned_pages; + node_end = node_end_pfn(nid); if (end_pfn < node_end) end_pfn = node_end; } /* scan 0 to max_pfn */ - ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private); - if (ret) { - free_kclist_ents(&head); + ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private); + if (ret) return -ENOMEM; - } - __kcore_update_ram(&head); - return ret; + return 0; } #endif /* CONFIG_HIGHMEM */ -/*****************************************************************************/ -/* - * determine size of ELF note - */ -static int notesize(struct memelfnote *en) -{ - int sz; - - sz = sizeof(struct elf_note); - sz += roundup((strlen(en->name) + 1), 4); - sz += roundup(en->datasz, 4); - - return sz; -} /* end notesize() */ - -/*****************************************************************************/ -/* - * store a note in the header buffer - */ -static char *storenote(struct memelfnote *men, char *bufp) +static int kcore_update_ram(void) { - struct elf_note en; + LIST_HEAD(list); + LIST_HEAD(garbage); + struct kcore_list *tmp, *pos; + int ret = 0; -#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0) + percpu_down_write(&kclist_lock); + if (!xchg(&kcore_need_update, 0)) + goto out; - en.n_namesz = strlen(men->name) + 1; - en.n_descsz = men->datasz; - en.n_type = men->type; + ret = kcore_ram_list(&list); + if (ret) { + /* Couldn't get the RAM list, try again next time. */ + WRITE_ONCE(kcore_need_update, 1); + list_splice_tail(&list, &garbage); + goto out; + } - DUMP_WRITE(&en, sizeof(en)); - DUMP_WRITE(men->name, en.n_namesz); + list_for_each_entry_safe(pos, tmp, &kclist_head, list) { + if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP) + list_move(&pos->list, &garbage); + } + list_splice_tail(&list, &kclist_head); - /* XXX - cast from long long to long to avoid need for libgcc.a */ - bufp = (char*) roundup((unsigned long)bufp,4); - DUMP_WRITE(men->data, men->datasz); - bufp = (char*) roundup((unsigned long)bufp,4); + update_kcore_size(); -#undef DUMP_WRITE +out: + percpu_up_write(&kclist_lock); + list_for_each_entry_safe(pos, tmp, &garbage, list) { + list_del(&pos->list); + kfree(pos); + } + return ret; +} - return bufp; -} /* end storenote() */ +static void append_kcore_note(char *notes, size_t *i, const char *name, + unsigned int type, const void *desc, + size_t descsz) +{ + struct elf_note *note = (struct elf_note *)¬es[*i]; + + note->n_namesz = strlen(name) + 1; + note->n_descsz = descsz; + note->n_type = type; + *i += sizeof(*note); + memcpy(¬es[*i], name, note->n_namesz); + *i = ALIGN(*i + note->n_namesz, 4); + memcpy(¬es[*i], desc, descsz); + *i = ALIGN(*i + descsz, 4); +} -/* - * store an ELF coredump header in the supplied buffer - * nphdr is the number of elf_phdr to insert - */ -static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) +static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct elf_prstatus prstatus; /* NT_PRSTATUS */ - struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */ - struct elf_phdr *nhdr, *phdr; - struct elfhdr *elf; - struct memelfnote notes[3]; - off_t offset = 0; + struct file *file = iocb->ki_filp; + char *buf = file->private_data; + loff_t *fpos = &iocb->ki_pos; + size_t phdrs_offset, notes_offset; + size_t page_offline_frozen = 1; struct kcore_list *m; + size_t tsz; + unsigned long start; + size_t buflen = iov_iter_count(iter); + size_t orig_buflen = buflen; + int ret = 0; - /* setup ELF header */ - elf = (struct elfhdr *) bufp; - bufp += sizeof(struct elfhdr); - offset += sizeof(struct elfhdr); - memcpy(elf->e_ident, ELFMAG, SELFMAG); - elf->e_ident[EI_CLASS] = ELF_CLASS; - elf->e_ident[EI_DATA] = ELF_DATA; - elf->e_ident[EI_VERSION]= EV_CURRENT; - elf->e_ident[EI_OSABI] = ELF_OSABI; - memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); - elf->e_type = ET_CORE; - elf->e_machine = ELF_ARCH; - elf->e_version = EV_CURRENT; - elf->e_entry = 0; - elf->e_phoff = sizeof(struct elfhdr); - elf->e_shoff = 0; - elf->e_flags = ELF_CORE_EFLAGS; - elf->e_ehsize = sizeof(struct elfhdr); - elf->e_phentsize= sizeof(struct elf_phdr); - elf->e_phnum = nphdr; - elf->e_shentsize= 0; - elf->e_shnum = 0; - elf->e_shstrndx = 0; - - /* setup ELF PT_NOTE program header */ - nhdr = (struct elf_phdr *) bufp; - bufp += sizeof(struct elf_phdr); - offset += sizeof(struct elf_phdr); - nhdr->p_type = PT_NOTE; - nhdr->p_offset = 0; - nhdr->p_vaddr = 0; - nhdr->p_paddr = 0; - nhdr->p_filesz = 0; - nhdr->p_memsz = 0; - nhdr->p_flags = 0; - nhdr->p_align = 0; - - /* setup ELF PT_LOAD program header for every area */ - list_for_each_entry(m, &kclist_head, list) { - phdr = (struct elf_phdr *) bufp; - bufp += sizeof(struct elf_phdr); - offset += sizeof(struct elf_phdr); - - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff; - phdr->p_vaddr = (size_t)m->addr; - phdr->p_paddr = 0; - phdr->p_filesz = phdr->p_memsz = m->size; - phdr->p_align = PAGE_SIZE; - } - + percpu_down_read(&kclist_lock); /* - * Set up the notes in similar form to SVR4 core dumps made - * with info from their /proc. + * Don't race against drivers that set PageOffline() and expect no + * further page access. */ - nhdr->p_offset = offset; - - /* set up the process status */ - notes[0].name = CORE_STR; - notes[0].type = NT_PRSTATUS; - notes[0].datasz = sizeof(struct elf_prstatus); - notes[0].data = &prstatus; - - memset(&prstatus, 0, sizeof(struct elf_prstatus)); - - nhdr->p_filesz = notesize(¬es[0]); - bufp = storenote(¬es[0], bufp); - - /* set up the process info */ - notes[1].name = CORE_STR; - notes[1].type = NT_PRPSINFO; - notes[1].datasz = sizeof(struct elf_prpsinfo); - notes[1].data = &prpsinfo; - - memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo)); - prpsinfo.pr_state = 0; - prpsinfo.pr_sname = 'R'; - prpsinfo.pr_zomb = 0; - - strcpy(prpsinfo.pr_fname, "vmlinux"); - strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); + page_offline_freeze(); + + phdrs_offset = sizeof(struct elfhdr); + notes_offset = phdrs_offset + kcore_phdrs_len; + + /* ELF file header. */ + if (buflen && *fpos < sizeof(struct elfhdr)) { + struct elfhdr ehdr = { + .e_ident = { + [EI_MAG0] = ELFMAG0, + [EI_MAG1] = ELFMAG1, + [EI_MAG2] = ELFMAG2, + [EI_MAG3] = ELFMAG3, + [EI_CLASS] = ELF_CLASS, + [EI_DATA] = ELF_DATA, + [EI_VERSION] = EV_CURRENT, + [EI_OSABI] = ELF_OSABI, + }, + .e_type = ET_CORE, + .e_machine = ELF_ARCH, + .e_version = EV_CURRENT, + .e_phoff = sizeof(struct elfhdr), + .e_flags = ELF_CORE_EFLAGS, + .e_ehsize = sizeof(struct elfhdr), + .e_phentsize = sizeof(struct elf_phdr), + .e_phnum = kcore_nphdr, + }; + + tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos); + if (copy_to_iter((char *)&ehdr + *fpos, tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } - nhdr->p_filesz += notesize(¬es[1]); - bufp = storenote(¬es[1], bufp); + buflen -= tsz; + *fpos += tsz; + } - /* set up the task structure */ - notes[2].name = CORE_STR; - notes[2].type = NT_TASKSTRUCT; - notes[2].datasz = sizeof(struct task_struct); - notes[2].data = current; + /* ELF program headers. */ + if (buflen && *fpos < phdrs_offset + kcore_phdrs_len) { + struct elf_phdr *phdrs, *phdr; - nhdr->p_filesz += notesize(¬es[2]); - bufp = storenote(¬es[2], bufp); + phdrs = kzalloc(kcore_phdrs_len, GFP_KERNEL); + if (!phdrs) { + ret = -ENOMEM; + goto out; + } -} /* end elf_kcore_store_hdr() */ + phdrs[0].p_type = PT_NOTE; + phdrs[0].p_offset = notes_offset; + phdrs[0].p_filesz = kcore_notes_len; -/*****************************************************************************/ -/* - * read from the ELF header and then kernel memory - */ -static ssize_t -read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) -{ - ssize_t acc = 0; - size_t size, tsz; - size_t elf_buflen; - int nphdr; - unsigned long start; + phdr = &phdrs[1]; + list_for_each_entry(m, &kclist_head, list) { + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_offset = kc_vaddr_to_offset(m->addr) + + kcore_data_offset; + phdr->p_vaddr = (size_t)m->addr; + if (m->type == KCORE_RAM) + phdr->p_paddr = __pa(m->addr); + else if (m->type == KCORE_TEXT) + phdr->p_paddr = __pa_symbol(m->addr); + else + phdr->p_paddr = (elf_addr_t)-1; + phdr->p_filesz = phdr->p_memsz = m->size; + phdr->p_align = PAGE_SIZE; + phdr++; + } - read_lock(&kclist_lock); - size = get_kcore_size(&nphdr, &elf_buflen); + tsz = min_t(size_t, buflen, + phdrs_offset + kcore_phdrs_len - *fpos); + if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz, + iter) != tsz) { + kfree(phdrs); + ret = -EFAULT; + goto out; + } + kfree(phdrs); - if (buflen == 0 || *fpos >= size) { - read_unlock(&kclist_lock); - return 0; + buflen -= tsz; + *fpos += tsz; } - /* trim buflen to not go beyond EOF */ - if (buflen > size - *fpos) - buflen = size - *fpos; - - /* construct an ELF core header if we'll need some of it */ - if (*fpos < elf_buflen) { - char * elf_buf; - - tsz = elf_buflen - *fpos; - if (buflen < tsz) - tsz = buflen; - elf_buf = kzalloc(elf_buflen, GFP_ATOMIC); - if (!elf_buf) { - read_unlock(&kclist_lock); - return -ENOMEM; + /* ELF note segment. */ + if (buflen && *fpos < notes_offset + kcore_notes_len) { + struct elf_prstatus prstatus = {}; + struct elf_prpsinfo prpsinfo = { + .pr_sname = 'R', + .pr_fname = "vmlinux", + }; + char *notes; + size_t i = 0; + + strscpy(prpsinfo.pr_psargs, saved_command_line, + sizeof(prpsinfo.pr_psargs)); + + notes = kzalloc(kcore_notes_len, GFP_KERNEL); + if (!notes) { + ret = -ENOMEM; + goto out; } - elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen); - read_unlock(&kclist_lock); - if (copy_to_user(buffer, elf_buf + *fpos, tsz)) { - kfree(elf_buf); - return -EFAULT; + + append_kcore_note(notes, &i, NN_PRSTATUS, NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + append_kcore_note(notes, &i, NN_PRPSINFO, NT_PRPSINFO, &prpsinfo, + sizeof(prpsinfo)); + append_kcore_note(notes, &i, NN_TASKSTRUCT, NT_TASKSTRUCT, current, + arch_task_struct_size); + /* + * vmcoreinfo_size is mostly constant after init time, but it + * can be changed by crash_save_vmcoreinfo(). Racing here with a + * panic on another CPU before the machine goes down is insanely + * unlikely, but it's better to not leave potential buffer + * overflows lying around, regardless. + */ + append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0, + vmcoreinfo_data, + min(vmcoreinfo_size, kcore_notes_len - i)); + + tsz = min_t(size_t, buflen, + notes_offset + kcore_notes_len - *fpos); + if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) { + kfree(notes); + ret = -EFAULT; + goto out; } - kfree(elf_buf); + kfree(notes); + buflen -= tsz; *fpos += tsz; - buffer += tsz; - acc += tsz; - - /* leave now if filled buffer already */ - if (buflen == 0) - return acc; - } else - read_unlock(&kclist_lock); + } /* * Check to see if our file offset matches with any of * the addresses in the elf_phdr on our list. */ - start = kc_offset_to_vaddr(*fpos - elf_buflen); + start = kc_offset_to_vaddr(*fpos - kcore_data_offset); if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) tsz = buflen; - + + m = NULL; while (buflen) { - struct kcore_list *m; + struct page *page; + unsigned long pfn; + phys_addr_t phys; + void *__start; + + /* + * If this is the first iteration or the address is not within + * the previous entry, search for a matching entry. + */ + if (!m || start < m->addr || start >= m->addr + m->size) { + struct kcore_list *pos; + + m = NULL; + list_for_each_entry(pos, &kclist_head, list) { + if (start >= pos->addr && + start < pos->addr + pos->size) { + m = pos; + break; + } + } + } - read_lock(&kclist_lock); - list_for_each_entry(m, &kclist_head, list) { - if (start >= m->addr && start < (m->addr+m->size)) - break; + if (page_offline_frozen++ % MAX_ORDER_NR_PAGES == 0) { + page_offline_thaw(); + cond_resched(); + page_offline_freeze(); + } + + if (!m) { + if (iov_iter_zero(tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } + goto skip; + } + + switch (m->type) { + case KCORE_VMALLOC: + { + const char *src = (char *)start; + size_t read = 0, left = tsz; + + /* + * vmalloc uses spinlocks, so we optimistically try to + * read memory. If this fails, fault pages in and try + * again until we are done. + */ + while (true) { + read += vread_iter(iter, src, left); + if (read == tsz) + break; + + src += read; + left -= read; + + if (fault_in_iov_iter_writeable(iter, left)) { + ret = -EFAULT; + goto out; + } + } + break; } - read_unlock(&kclist_lock); - - if (&m->list == &kclist_head) { - if (clear_user(buffer, tsz)) - return -EFAULT; - } else if (is_vmalloc_or_module_addr((void *)start)) { - char * elf_buf; - - elf_buf = kzalloc(tsz, GFP_KERNEL); - if (!elf_buf) - return -ENOMEM; - vread(elf_buf, (char *)start, tsz); - /* we have to zero-fill user buffer even if no read */ - if (copy_to_user(buffer, elf_buf, tsz)) { - kfree(elf_buf); - return -EFAULT; + case KCORE_USER: + /* User page is handled prior to normal kernel page: */ + if (copy_to_iter((char *)start, tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } + break; + case KCORE_RAM: + phys = __pa(start); + pfn = phys >> PAGE_SHIFT; + page = pfn_to_online_page(pfn); + + /* + * Don't read offline sections, logically offline pages + * (e.g., inflated in a balloon), hwpoisoned pages, + * and explicitly excluded physical ranges. + */ + if (!page || PageOffline(page) || + is_page_hwpoison(page) || !pfn_is_ram(pfn) || + pfn_is_unaccepted_memory(pfn)) { + if (iov_iter_zero(tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } + break; } - kfree(elf_buf); - } else { - if (kern_addr_valid(start)) { - unsigned long n; - - n = copy_to_user(buffer, (char *)start, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) - return -EFAULT; + fallthrough; + case KCORE_VMEMMAP: + case KCORE_TEXT: + if (m->type == KCORE_RAM) { + __start = kc_xlate_dev_mem_ptr(phys); + if (!__start) { + ret = -ENOMEM; + if (iov_iter_zero(tsz, iter) != tsz) + ret = -EFAULT; + goto out; } } else { - if (clear_user(buffer, tsz)) - return -EFAULT; + __start = (void *)start; + } + + /* + * Sadly we must use a bounce buffer here to be able to + * make use of copy_from_kernel_nofault(), as these + * memory regions might not always be mapped on all + * architectures. + */ + ret = copy_from_kernel_nofault(buf, __start, tsz); + if (m->type == KCORE_RAM) + kc_unxlate_dev_mem_ptr(phys, __start); + if (ret) { + if (iov_iter_zero(tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } + ret = 0; + /* + * We know the bounce buffer is safe to copy from, so + * use _copy_to_iter() directly. + */ + } else if (_copy_to_iter(buf, tsz, iter) != tsz) { + ret = -EFAULT; + goto out; + } + break; + default: + pr_warn_once("Unhandled KCORE type: %d\n", m->type); + if (iov_iter_zero(tsz, iter) != tsz) { + ret = -EFAULT; + goto out; } } +skip: buflen -= tsz; *fpos += tsz; - buffer += tsz; - acc += tsz; start += tsz; tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); } - return acc; +out: + page_offline_thaw(); + percpu_up_read(&kclist_lock); + if (ret) + return ret; + return orig_buflen - buflen; } - static int open_kcore(struct inode *inode, struct file *filp) { + int ret = security_locked_down(LOCKDOWN_KCORE); + if (!capable(CAP_SYS_RAWIO)) return -EPERM; + + if (ret) + return ret; + + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { - mutex_lock(&inode->i_mutex); + inode_lock(inode); i_size_write(inode, proc_root_kcore->size); - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); } return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} -static const struct file_operations proc_kcore_operations = { - .read = read_kcore, - .open = open_kcore, - .llseek = default_llseek, +static const struct proc_ops kcore_proc_ops = { + .proc_flags = PROC_ENTRY_PERMANENT, + .proc_read_iter = read_kcore_iter, + .proc_open = open_kcore, + .proc_release = release_kcore, + .proc_lseek = default_llseek, }; /* just remember that we have to update kcore */ @@ -574,17 +677,12 @@ static int __meminit kcore_callback(struct notifier_block *self, switch (action) { case MEM_ONLINE: case MEM_OFFLINE: - write_lock(&kclist_lock); kcore_need_update = 1; - write_unlock(&kclist_lock); + break; } return NOTIFY_OK; } -static struct notifier_block kcore_callback_nb __meminitdata = { - .notifier_call = kcore_callback, - .priority = 0, -}; static struct kcore_list kcore_vmalloc; @@ -608,11 +706,13 @@ static void __init proc_kcore_text_init(void) /* * MODULES_VADDR has no intersection with VMALLOC_ADDR. */ -struct kcore_list kcore_modules; +static struct kcore_list kcore_modules; static void __init add_modules_range(void) { - kclist_add(&kcore_modules, (void *)MODULES_VADDR, + if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { + kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_END - MODULES_VADDR, KCORE_VMALLOC); + } } #else static void __init add_modules_range(void) @@ -622,8 +722,7 @@ static void __init add_modules_range(void) static int __init proc_kcore_init(void) { - proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, - &proc_kcore_operations); + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops); if (!proc_root_kcore) { pr_err("couldn't create /proc/kcore\n"); return 0; /* Always returns 0. */ @@ -636,8 +735,8 @@ static int __init proc_kcore_init(void) add_modules_range(); /* Store direct-map area from physical memory map */ kcore_update_ram(); - register_hotmemory_notifier(&kcore_callback_nb); + hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI); return 0; } -module_init(proc_kcore_init); +fs_initcall(proc_kcore_init); |
