summaryrefslogtreecommitdiff
path: root/fs/proc/kcore.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc/kcore.c')
-rw-r--r--fs/proc/kcore.c767
1 files changed, 433 insertions, 334 deletions
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 06ea155e1a59..728630b10fdf 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/proc/kcore.c kernel ELF core dumper
*
@@ -9,6 +10,7 @@
* Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
*/
+#include <linux/vmcore_info.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/kcore.h>
@@ -16,23 +18,22 @@
#include <linux/capability.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
-#include <linux/notifier.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/printk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/init.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uio.h>
#include <asm/io.h>
#include <linux/list.h>
#include <linux/ioport.h>
#include <linux/memory.h>
+#include <linux/sched/task.h>
+#include <linux/security.h>
#include <asm/sections.h>
#include "internal.h"
-#define CORE_STR "CORE"
-
#ifndef ELF_CORE_EFLAGS
#define ELF_CORE_EFLAGS 0
#endif
@@ -47,104 +48,100 @@ static struct proc_dir_entry *proc_root_kcore;
#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
#endif
-/* An ELF note in memory */
-struct memelfnote
+#ifndef kc_xlate_dev_mem_ptr
+#define kc_xlate_dev_mem_ptr kc_xlate_dev_mem_ptr
+static inline void *kc_xlate_dev_mem_ptr(phys_addr_t phys)
{
- const char *name;
- int type;
- unsigned int datasz;
- void *data;
-};
+ return __va(phys);
+}
+#endif
+#ifndef kc_unxlate_dev_mem_ptr
+#define kc_unxlate_dev_mem_ptr kc_unxlate_dev_mem_ptr
+static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt)
+{
+}
+#endif
static LIST_HEAD(kclist_head);
-static DEFINE_RWLOCK(kclist_lock);
+static int kcore_nphdr;
+static size_t kcore_phdrs_len;
+static size_t kcore_notes_len;
+static size_t kcore_data_offset;
+DEFINE_STATIC_PERCPU_RWSEM(kclist_lock);
static int kcore_need_update = 1;
-void
-kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
+/*
+ * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
+ * Same as oldmem_pfn_is_ram in vmcore
+ */
+static int (*mem_pfn_is_ram)(unsigned long pfn);
+
+int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+ if (mem_pfn_is_ram)
+ return -EBUSY;
+ mem_pfn_is_ram = fn;
+ return 0;
+}
+
+static int pfn_is_ram(unsigned long pfn)
+{
+ if (mem_pfn_is_ram)
+ return mem_pfn_is_ram(pfn);
+ else
+ return 1;
+}
+
+/* This doesn't grab kclist_lock, so it should only be used at init time. */
+void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
+ int type)
{
new->addr = (unsigned long)addr;
new->size = size;
new->type = type;
- write_lock(&kclist_lock);
list_add_tail(&new->list, &kclist_head);
- write_unlock(&kclist_lock);
}
-static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
+static void update_kcore_size(void)
{
size_t try, size;
struct kcore_list *m;
- *nphdr = 1; /* PT_NOTE */
+ kcore_nphdr = 1; /* PT_NOTE */
size = 0;
list_for_each_entry(m, &kclist_head, list) {
try = kc_vaddr_to_offset((size_t)m->addr + m->size);
if (try > size)
size = try;
- *nphdr = *nphdr + 1;
+ kcore_nphdr++;
}
- *elf_buflen = sizeof(struct elfhdr) +
- (*nphdr + 2)*sizeof(struct elf_phdr) +
- 3 * ((sizeof(struct elf_note)) +
- roundup(sizeof(CORE_STR), 4)) +
- roundup(sizeof(struct elf_prstatus), 4) +
- roundup(sizeof(struct elf_prpsinfo), 4) +
- roundup(sizeof(struct task_struct), 4);
- *elf_buflen = PAGE_ALIGN(*elf_buflen);
- return size + *elf_buflen;
-}
-
-static void free_kclist_ents(struct list_head *head)
-{
- struct kcore_list *tmp, *pos;
-
- list_for_each_entry_safe(pos, tmp, head, list) {
- list_del(&pos->list);
- kfree(pos);
- }
-}
-/*
- * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
- */
-static void __kcore_update_ram(struct list_head *list)
-{
- int nphdr;
- size_t size;
- struct kcore_list *tmp, *pos;
- LIST_HEAD(garbage);
- write_lock(&kclist_lock);
- if (kcore_need_update) {
- list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
- if (pos->type == KCORE_RAM
- || pos->type == KCORE_VMEMMAP)
- list_move(&pos->list, &garbage);
- }
- list_splice_tail(list, &kclist_head);
- } else
- list_splice(list, &garbage);
- kcore_need_update = 0;
- proc_root_kcore->size = get_kcore_size(&nphdr, &size);
- write_unlock(&kclist_lock);
-
- free_kclist_ents(&garbage);
+ kcore_phdrs_len = kcore_nphdr * sizeof(struct elf_phdr);
+ kcore_notes_len = (4 * sizeof(struct elf_note) +
+ ALIGN(sizeof(NN_PRSTATUS), 4) +
+ ALIGN(sizeof(NN_PRPSINFO), 4) +
+ ALIGN(sizeof(NN_TASKSTRUCT), 4) +
+ VMCOREINFO_NOTE_NAME_BYTES +
+ ALIGN(sizeof(struct elf_prstatus), 4) +
+ ALIGN(sizeof(struct elf_prpsinfo), 4) +
+ ALIGN(arch_task_struct_size, 4) +
+ ALIGN(vmcoreinfo_size, 4));
+ kcore_data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + kcore_phdrs_len +
+ kcore_notes_len);
+ proc_root_kcore->size = kcore_data_offset + size;
}
-
#ifdef CONFIG_HIGHMEM
/*
* If no highmem, we can assume [0...max_low_pfn) continuous range of memory
* because memory hole is not as big as !HIGHMEM case.
* (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
*/
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *head)
{
- LIST_HEAD(head);
struct kcore_list *ent;
- int ret = 0;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
@@ -152,9 +149,8 @@ static int kcore_update_ram(void)
ent->addr = (unsigned long)__va(0);
ent->size = max_low_pfn << PAGE_SHIFT;
ent->type = KCORE_RAM;
- list_add(&ent->list, &head);
- __kcore_update_ram(&head);
- return ret;
+ list_add(&ent->list, head);
+ return 0;
}
#else /* !CONFIG_HIGHMEM */
@@ -172,7 +168,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
- end = ALIGN(end, PAGE_SIZE);
+ end = PAGE_ALIGN(end);
/* overlap check (because we have to align page */
list_for_each_entry(tmp, head, list) {
if (tmp->type != KCORE_VMEMMAP)
@@ -207,25 +203,32 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
+ struct page *p;
+
+ if (!pfn_valid(pfn))
+ return 1;
+
+ p = pfn_to_page(pfn);
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
- ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+ ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
- /* Sanity check: Can happen in 32bit arch...maybe */
- if (ent->addr < (unsigned long) __va(0))
+ if (!virt_addr_valid((void *)ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;
- /* cut when vmalloc() area is higher than direct-map area */
- if (VMALLOC_START > (unsigned long)__va(0)) {
- if (ent->addr > VMALLOC_START)
- goto free_out;
+ /*
+ * We've already checked virt_addr_valid so we know this address
+ * is a valid pointer, therefore we can check against it to determine
+ * if we need to trim
+ */
+ if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
@@ -244,327 +247,427 @@ free_out:
return 1;
}
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *list)
{
int nid, ret;
unsigned long end_pfn;
- LIST_HEAD(head);
- /* Not inialized....update now */
+ /* Not initialized....update now */
/* find out "max pfn" */
end_pfn = 0;
for_each_node_state(nid, N_MEMORY) {
unsigned long node_end;
- node_end = NODE_DATA(nid)->node_start_pfn +
- NODE_DATA(nid)->node_spanned_pages;
+ node_end = node_end_pfn(nid);
if (end_pfn < node_end)
end_pfn = node_end;
}
/* scan 0 to max_pfn */
- ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
- if (ret) {
- free_kclist_ents(&head);
+ ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
+ if (ret)
return -ENOMEM;
- }
- __kcore_update_ram(&head);
- return ret;
+ return 0;
}
#endif /* CONFIG_HIGHMEM */
-/*****************************************************************************/
-/*
- * determine size of ELF note
- */
-static int notesize(struct memelfnote *en)
-{
- int sz;
-
- sz = sizeof(struct elf_note);
- sz += roundup((strlen(en->name) + 1), 4);
- sz += roundup(en->datasz, 4);
-
- return sz;
-} /* end notesize() */
-
-/*****************************************************************************/
-/*
- * store a note in the header buffer
- */
-static char *storenote(struct memelfnote *men, char *bufp)
+static int kcore_update_ram(void)
{
- struct elf_note en;
+ LIST_HEAD(list);
+ LIST_HEAD(garbage);
+ struct kcore_list *tmp, *pos;
+ int ret = 0;
-#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
+ percpu_down_write(&kclist_lock);
+ if (!xchg(&kcore_need_update, 0))
+ goto out;
- en.n_namesz = strlen(men->name) + 1;
- en.n_descsz = men->datasz;
- en.n_type = men->type;
+ ret = kcore_ram_list(&list);
+ if (ret) {
+ /* Couldn't get the RAM list, try again next time. */
+ WRITE_ONCE(kcore_need_update, 1);
+ list_splice_tail(&list, &garbage);
+ goto out;
+ }
- DUMP_WRITE(&en, sizeof(en));
- DUMP_WRITE(men->name, en.n_namesz);
+ list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
+ if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
+ list_move(&pos->list, &garbage);
+ }
+ list_splice_tail(&list, &kclist_head);
- /* XXX - cast from long long to long to avoid need for libgcc.a */
- bufp = (char*) roundup((unsigned long)bufp,4);
- DUMP_WRITE(men->data, men->datasz);
- bufp = (char*) roundup((unsigned long)bufp,4);
+ update_kcore_size();
-#undef DUMP_WRITE
+out:
+ percpu_up_write(&kclist_lock);
+ list_for_each_entry_safe(pos, tmp, &garbage, list) {
+ list_del(&pos->list);
+ kfree(pos);
+ }
+ return ret;
+}
- return bufp;
-} /* end storenote() */
+static void append_kcore_note(char *notes, size_t *i, const char *name,
+ unsigned int type, const void *desc,
+ size_t descsz)
+{
+ struct elf_note *note = (struct elf_note *)&notes[*i];
+
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = descsz;
+ note->n_type = type;
+ *i += sizeof(*note);
+ memcpy(&notes[*i], name, note->n_namesz);
+ *i = ALIGN(*i + note->n_namesz, 4);
+ memcpy(&notes[*i], desc, descsz);
+ *i = ALIGN(*i + descsz, 4);
+}
-/*
- * store an ELF coredump header in the supplied buffer
- * nphdr is the number of elf_phdr to insert
- */
-static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
+static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
{
- struct elf_prstatus prstatus; /* NT_PRSTATUS */
- struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
- struct elf_phdr *nhdr, *phdr;
- struct elfhdr *elf;
- struct memelfnote notes[3];
- off_t offset = 0;
+ struct file *file = iocb->ki_filp;
+ char *buf = file->private_data;
+ loff_t *fpos = &iocb->ki_pos;
+ size_t phdrs_offset, notes_offset;
+ size_t page_offline_frozen = 1;
struct kcore_list *m;
+ size_t tsz;
+ unsigned long start;
+ size_t buflen = iov_iter_count(iter);
+ size_t orig_buflen = buflen;
+ int ret = 0;
- /* setup ELF header */
- elf = (struct elfhdr *) bufp;
- bufp += sizeof(struct elfhdr);
- offset += sizeof(struct elfhdr);
- memcpy(elf->e_ident, ELFMAG, SELFMAG);
- elf->e_ident[EI_CLASS] = ELF_CLASS;
- elf->e_ident[EI_DATA] = ELF_DATA;
- elf->e_ident[EI_VERSION]= EV_CURRENT;
- elf->e_ident[EI_OSABI] = ELF_OSABI;
- memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
- elf->e_type = ET_CORE;
- elf->e_machine = ELF_ARCH;
- elf->e_version = EV_CURRENT;
- elf->e_entry = 0;
- elf->e_phoff = sizeof(struct elfhdr);
- elf->e_shoff = 0;
- elf->e_flags = ELF_CORE_EFLAGS;
- elf->e_ehsize = sizeof(struct elfhdr);
- elf->e_phentsize= sizeof(struct elf_phdr);
- elf->e_phnum = nphdr;
- elf->e_shentsize= 0;
- elf->e_shnum = 0;
- elf->e_shstrndx = 0;
-
- /* setup ELF PT_NOTE program header */
- nhdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
- nhdr->p_type = PT_NOTE;
- nhdr->p_offset = 0;
- nhdr->p_vaddr = 0;
- nhdr->p_paddr = 0;
- nhdr->p_filesz = 0;
- nhdr->p_memsz = 0;
- nhdr->p_flags = 0;
- nhdr->p_align = 0;
-
- /* setup ELF PT_LOAD program header for every area */
- list_for_each_entry(m, &kclist_head, list) {
- phdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
-
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
- phdr->p_vaddr = (size_t)m->addr;
- phdr->p_paddr = 0;
- phdr->p_filesz = phdr->p_memsz = m->size;
- phdr->p_align = PAGE_SIZE;
- }
-
+ percpu_down_read(&kclist_lock);
/*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
+ * Don't race against drivers that set PageOffline() and expect no
+ * further page access.
*/
- nhdr->p_offset = offset;
-
- /* set up the process status */
- notes[0].name = CORE_STR;
- notes[0].type = NT_PRSTATUS;
- notes[0].datasz = sizeof(struct elf_prstatus);
- notes[0].data = &prstatus;
-
- memset(&prstatus, 0, sizeof(struct elf_prstatus));
-
- nhdr->p_filesz = notesize(&notes[0]);
- bufp = storenote(&notes[0], bufp);
-
- /* set up the process info */
- notes[1].name = CORE_STR;
- notes[1].type = NT_PRPSINFO;
- notes[1].datasz = sizeof(struct elf_prpsinfo);
- notes[1].data = &prpsinfo;
-
- memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
- prpsinfo.pr_state = 0;
- prpsinfo.pr_sname = 'R';
- prpsinfo.pr_zomb = 0;
-
- strcpy(prpsinfo.pr_fname, "vmlinux");
- strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
+ page_offline_freeze();
+
+ phdrs_offset = sizeof(struct elfhdr);
+ notes_offset = phdrs_offset + kcore_phdrs_len;
+
+ /* ELF file header. */
+ if (buflen && *fpos < sizeof(struct elfhdr)) {
+ struct elfhdr ehdr = {
+ .e_ident = {
+ [EI_MAG0] = ELFMAG0,
+ [EI_MAG1] = ELFMAG1,
+ [EI_MAG2] = ELFMAG2,
+ [EI_MAG3] = ELFMAG3,
+ [EI_CLASS] = ELF_CLASS,
+ [EI_DATA] = ELF_DATA,
+ [EI_VERSION] = EV_CURRENT,
+ [EI_OSABI] = ELF_OSABI,
+ },
+ .e_type = ET_CORE,
+ .e_machine = ELF_ARCH,
+ .e_version = EV_CURRENT,
+ .e_phoff = sizeof(struct elfhdr),
+ .e_flags = ELF_CORE_EFLAGS,
+ .e_ehsize = sizeof(struct elfhdr),
+ .e_phentsize = sizeof(struct elf_phdr),
+ .e_phnum = kcore_nphdr,
+ };
+
+ tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
+ if (copy_to_iter((char *)&ehdr + *fpos, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
- nhdr->p_filesz += notesize(&notes[1]);
- bufp = storenote(&notes[1], bufp);
+ buflen -= tsz;
+ *fpos += tsz;
+ }
- /* set up the task structure */
- notes[2].name = CORE_STR;
- notes[2].type = NT_TASKSTRUCT;
- notes[2].datasz = sizeof(struct task_struct);
- notes[2].data = current;
+ /* ELF program headers. */
+ if (buflen && *fpos < phdrs_offset + kcore_phdrs_len) {
+ struct elf_phdr *phdrs, *phdr;
- nhdr->p_filesz += notesize(&notes[2]);
- bufp = storenote(&notes[2], bufp);
+ phdrs = kzalloc(kcore_phdrs_len, GFP_KERNEL);
+ if (!phdrs) {
+ ret = -ENOMEM;
+ goto out;
+ }
-} /* end elf_kcore_store_hdr() */
+ phdrs[0].p_type = PT_NOTE;
+ phdrs[0].p_offset = notes_offset;
+ phdrs[0].p_filesz = kcore_notes_len;
-/*****************************************************************************/
-/*
- * read from the ELF header and then kernel memory
- */
-static ssize_t
-read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
-{
- ssize_t acc = 0;
- size_t size, tsz;
- size_t elf_buflen;
- int nphdr;
- unsigned long start;
+ phdr = &phdrs[1];
+ list_for_each_entry(m, &kclist_head, list) {
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_offset = kc_vaddr_to_offset(m->addr)
+ + kcore_data_offset;
+ phdr->p_vaddr = (size_t)m->addr;
+ if (m->type == KCORE_RAM)
+ phdr->p_paddr = __pa(m->addr);
+ else if (m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa_symbol(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
+ phdr->p_filesz = phdr->p_memsz = m->size;
+ phdr->p_align = PAGE_SIZE;
+ phdr++;
+ }
- read_lock(&kclist_lock);
- size = get_kcore_size(&nphdr, &elf_buflen);
+ tsz = min_t(size_t, buflen,
+ phdrs_offset + kcore_phdrs_len - *fpos);
+ if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz,
+ iter) != tsz) {
+ kfree(phdrs);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(phdrs);
- if (buflen == 0 || *fpos >= size) {
- read_unlock(&kclist_lock);
- return 0;
+ buflen -= tsz;
+ *fpos += tsz;
}
- /* trim buflen to not go beyond EOF */
- if (buflen > size - *fpos)
- buflen = size - *fpos;
-
- /* construct an ELF core header if we'll need some of it */
- if (*fpos < elf_buflen) {
- char * elf_buf;
-
- tsz = elf_buflen - *fpos;
- if (buflen < tsz)
- tsz = buflen;
- elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
- if (!elf_buf) {
- read_unlock(&kclist_lock);
- return -ENOMEM;
+ /* ELF note segment. */
+ if (buflen && *fpos < notes_offset + kcore_notes_len) {
+ struct elf_prstatus prstatus = {};
+ struct elf_prpsinfo prpsinfo = {
+ .pr_sname = 'R',
+ .pr_fname = "vmlinux",
+ };
+ char *notes;
+ size_t i = 0;
+
+ strscpy(prpsinfo.pr_psargs, saved_command_line,
+ sizeof(prpsinfo.pr_psargs));
+
+ notes = kzalloc(kcore_notes_len, GFP_KERNEL);
+ if (!notes) {
+ ret = -ENOMEM;
+ goto out;
}
- elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
- read_unlock(&kclist_lock);
- if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
- kfree(elf_buf);
- return -EFAULT;
+
+ append_kcore_note(notes, &i, NN_PRSTATUS, NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ append_kcore_note(notes, &i, NN_PRPSINFO, NT_PRPSINFO, &prpsinfo,
+ sizeof(prpsinfo));
+ append_kcore_note(notes, &i, NN_TASKSTRUCT, NT_TASKSTRUCT, current,
+ arch_task_struct_size);
+ /*
+ * vmcoreinfo_size is mostly constant after init time, but it
+ * can be changed by crash_save_vmcoreinfo(). Racing here with a
+ * panic on another CPU before the machine goes down is insanely
+ * unlikely, but it's better to not leave potential buffer
+ * overflows lying around, regardless.
+ */
+ append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
+ vmcoreinfo_data,
+ min(vmcoreinfo_size, kcore_notes_len - i));
+
+ tsz = min_t(size_t, buflen,
+ notes_offset + kcore_notes_len - *fpos);
+ if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) {
+ kfree(notes);
+ ret = -EFAULT;
+ goto out;
}
- kfree(elf_buf);
+ kfree(notes);
+
buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
- acc += tsz;
-
- /* leave now if filled buffer already */
- if (buflen == 0)
- return acc;
- } else
- read_unlock(&kclist_lock);
+ }
/*
* Check to see if our file offset matches with any of
* the addresses in the elf_phdr on our list.
*/
- start = kc_offset_to_vaddr(*fpos - elf_buflen);
+ start = kc_offset_to_vaddr(*fpos - kcore_data_offset);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
-
+
+ m = NULL;
while (buflen) {
- struct kcore_list *m;
+ struct page *page;
+ unsigned long pfn;
+ phys_addr_t phys;
+ void *__start;
+
+ /*
+ * If this is the first iteration or the address is not within
+ * the previous entry, search for a matching entry.
+ */
+ if (!m || start < m->addr || start >= m->addr + m->size) {
+ struct kcore_list *pos;
+
+ m = NULL;
+ list_for_each_entry(pos, &kclist_head, list) {
+ if (start >= pos->addr &&
+ start < pos->addr + pos->size) {
+ m = pos;
+ break;
+ }
+ }
+ }
- read_lock(&kclist_lock);
- list_for_each_entry(m, &kclist_head, list) {
- if (start >= m->addr && start < (m->addr+m->size))
- break;
+ if (page_offline_frozen++ % MAX_ORDER_NR_PAGES == 0) {
+ page_offline_thaw();
+ cond_resched();
+ page_offline_freeze();
+ }
+
+ if (!m) {
+ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ goto skip;
+ }
+
+ switch (m->type) {
+ case KCORE_VMALLOC:
+ {
+ const char *src = (char *)start;
+ size_t read = 0, left = tsz;
+
+ /*
+ * vmalloc uses spinlocks, so we optimistically try to
+ * read memory. If this fails, fault pages in and try
+ * again until we are done.
+ */
+ while (true) {
+ read += vread_iter(iter, src, left);
+ if (read == tsz)
+ break;
+
+ src += read;
+ left -= read;
+
+ if (fault_in_iov_iter_writeable(iter, left)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+ break;
}
- read_unlock(&kclist_lock);
-
- if (&m->list == &kclist_head) {
- if (clear_user(buffer, tsz))
- return -EFAULT;
- } else if (is_vmalloc_or_module_addr((void *)start)) {
- char * elf_buf;
-
- elf_buf = kzalloc(tsz, GFP_KERNEL);
- if (!elf_buf)
- return -ENOMEM;
- vread(elf_buf, (char *)start, tsz);
- /* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, elf_buf, tsz)) {
- kfree(elf_buf);
- return -EFAULT;
+ case KCORE_USER:
+ /* User page is handled prior to normal kernel page: */
+ if (copy_to_iter((char *)start, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ break;
+ case KCORE_RAM:
+ phys = __pa(start);
+ pfn = phys >> PAGE_SHIFT;
+ page = pfn_to_online_page(pfn);
+
+ /*
+ * Don't read offline sections, logically offline pages
+ * (e.g., inflated in a balloon), hwpoisoned pages,
+ * and explicitly excluded physical ranges.
+ */
+ if (!page || PageOffline(page) ||
+ is_page_hwpoison(page) || !pfn_is_ram(pfn) ||
+ pfn_is_unaccepted_memory(pfn)) {
+ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ break;
}
- kfree(elf_buf);
- } else {
- if (kern_addr_valid(start)) {
- unsigned long n;
-
- n = copy_to_user(buffer, (char *)start, tsz);
- /*
- * We cannot distinguish between fault on source
- * and fault on destination. When this happens
- * we clear too and hope it will trigger the
- * EFAULT again.
- */
- if (n) {
- if (clear_user(buffer + tsz - n,
- n))
- return -EFAULT;
+ fallthrough;
+ case KCORE_VMEMMAP:
+ case KCORE_TEXT:
+ if (m->type == KCORE_RAM) {
+ __start = kc_xlate_dev_mem_ptr(phys);
+ if (!__start) {
+ ret = -ENOMEM;
+ if (iov_iter_zero(tsz, iter) != tsz)
+ ret = -EFAULT;
+ goto out;
}
} else {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ __start = (void *)start;
+ }
+
+ /*
+ * Sadly we must use a bounce buffer here to be able to
+ * make use of copy_from_kernel_nofault(), as these
+ * memory regions might not always be mapped on all
+ * architectures.
+ */
+ ret = copy_from_kernel_nofault(buf, __start, tsz);
+ if (m->type == KCORE_RAM)
+ kc_unxlate_dev_mem_ptr(phys, __start);
+ if (ret) {
+ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = 0;
+ /*
+ * We know the bounce buffer is safe to copy from, so
+ * use _copy_to_iter() directly.
+ */
+ } else if (_copy_to_iter(buf, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ break;
+ default:
+ pr_warn_once("Unhandled KCORE type: %d\n", m->type);
+ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
}
}
+skip:
buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
- acc += tsz;
start += tsz;
tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
}
- return acc;
+out:
+ page_offline_thaw();
+ percpu_up_read(&kclist_lock);
+ if (ret)
+ return ret;
+ return orig_buflen - buflen;
}
-
static int open_kcore(struct inode *inode, struct file *filp)
{
+ int ret = security_locked_down(LOCKDOWN_KCORE);
+
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+
+ if (ret)
+ return ret;
+
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, proc_root_kcore->size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
-static const struct file_operations proc_kcore_operations = {
- .read = read_kcore,
- .open = open_kcore,
- .llseek = default_llseek,
+static const struct proc_ops kcore_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
+ .proc_read_iter = read_kcore_iter,
+ .proc_open = open_kcore,
+ .proc_release = release_kcore,
+ .proc_lseek = default_llseek,
};
/* just remember that we have to update kcore */
@@ -574,17 +677,12 @@ static int __meminit kcore_callback(struct notifier_block *self,
switch (action) {
case MEM_ONLINE:
case MEM_OFFLINE:
- write_lock(&kclist_lock);
kcore_need_update = 1;
- write_unlock(&kclist_lock);
+ break;
}
return NOTIFY_OK;
}
-static struct notifier_block kcore_callback_nb __meminitdata = {
- .notifier_call = kcore_callback,
- .priority = 0,
-};
static struct kcore_list kcore_vmalloc;
@@ -608,11 +706,13 @@ static void __init proc_kcore_text_init(void)
/*
* MODULES_VADDR has no intersection with VMALLOC_ADDR.
*/
-struct kcore_list kcore_modules;
+static struct kcore_list kcore_modules;
static void __init add_modules_range(void)
{
- kclist_add(&kcore_modules, (void *)MODULES_VADDR,
+ if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
+ kclist_add(&kcore_modules, (void *)MODULES_VADDR,
MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
+ }
}
#else
static void __init add_modules_range(void)
@@ -622,8 +722,7 @@ static void __init add_modules_range(void)
static int __init proc_kcore_init(void)
{
- proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
- &proc_kcore_operations);
+ proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops);
if (!proc_root_kcore) {
pr_err("couldn't create /proc/kcore\n");
return 0; /* Always returns 0. */
@@ -636,8 +735,8 @@ static int __init proc_kcore_init(void)
add_modules_range();
/* Store direct-map area from physical memory map */
kcore_update_ram();
- register_hotmemory_notifier(&kcore_callback_nb);
+ hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI);
return 0;
}
-module_init(proc_kcore_init);
+fs_initcall(proc_kcore_init);