summaryrefslogtreecommitdiff
path: root/fs/binfmt_elf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/binfmt_elf.c')
-rw-r--r--fs/binfmt_elf.c2183
1 files changed, 1043 insertions, 1140 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 100edcc5e312..3eb734c192e9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/binfmt_elf.c
*
@@ -12,6 +13,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
+#include <linux/log2.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/errno.h>
@@ -26,18 +28,32 @@
#include <linux/highuid.h>
#include <linux/compiler.h>
#include <linux/highmem.h>
+#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
#include <linux/security.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/elf-randomize.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+#include <linux/cred.h>
+#include <linux/dax.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/rseq.h>
#include <asm/param.h>
#include <asm/page.h>
+#ifndef ELF_COMPAT
+#define ELF_COMPAT 0
+#endif
+
#ifndef user_long_t
#define user_long_t long
#endif
@@ -45,10 +61,12 @@
#define user_siginfo_t siginfo_t
#endif
+/* That's for binfmt_elf_fdpic to deal with */
+#ifndef elf_check_fdpic
+#define elf_check_fdpic(ex) false
+#endif
+
static int load_elf_binary(struct linux_binprm *bprm);
-static int load_elf_library(struct file *);
-static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
- int, int, unsigned long);
/*
* If we don't support core dumping, then supply a NULL so we
@@ -70,47 +88,49 @@ static int elf_core_dump(struct coredump_params *cprm);
#define ELF_CORE_EFLAGS 0
#endif
-#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
+#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
- .load_shlib = load_elf_library,
+#ifdef CONFIG_COREDUMP
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
+#endif
};
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
-static int set_brk(unsigned long start, unsigned long end)
+static inline void elf_coredump_set_mm_eflags(struct mm_struct *mm, u32 flags)
{
- start = ELF_PAGEALIGN(start);
- end = ELF_PAGEALIGN(end);
- if (end > start) {
- unsigned long addr;
- addr = vm_brk(start, end - start);
- if (BAD_ADDR(addr))
- return addr;
- }
- current->mm->start_brk = current->mm->brk = end;
- return 0;
+#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
+ mm->saved_e_flags = flags;
+#endif
}
-/* We need to explicitly zero any fractional pages
- after the data section (i.e. bss). This would
- contain the junk from the file that should not
- be in memory
+static inline u32 elf_coredump_get_mm_eflags(struct mm_struct *mm, u32 flags)
+{
+#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
+ flags = mm->saved_e_flags;
+#endif
+ return flags;
+}
+
+/*
+ * We need to explicitly zero any trailing portion of the page that follows
+ * p_filesz when it ends before the page ends (e.g. bss), otherwise this
+ * memory will contain the junk from the file that should not be present.
*/
-static int padzero(unsigned long elf_bss)
+static int padzero(unsigned long address)
{
unsigned long nbyte;
- nbyte = ELF_PAGEOFFSET(elf_bss);
+ nbyte = ELF_PAGEOFFSET(address);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
- if (clear_user((void __user *) elf_bss, nbyte))
+ if (clear_user((void __user *)address, nbyte))
return -EFAULT;
}
return 0;
@@ -128,7 +148,7 @@ static int padzero(unsigned long elf_bss)
#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
#define STACK_ROUND(sp, items) \
(((unsigned long) (sp - items)) &~ 15UL)
-#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
+#define STACK_ALLOC(sp, len) (sp -= len)
#endif
#ifndef ELF_BASE_PLATFORM
@@ -141,14 +161,14 @@ static int padzero(unsigned long elf_bss)
#endif
static int
-create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
- unsigned long load_addr, unsigned long interp_load_addr)
+create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
+ unsigned long interp_load_addr,
+ unsigned long e_entry, unsigned long phdr_addr)
{
+ struct mm_struct *mm = current->mm;
unsigned long p = bprm->p;
int argc = bprm->argc;
int envc = bprm->envc;
- elf_addr_t __user *argv;
- elf_addr_t __user *envp;
elf_addr_t __user *sp;
elf_addr_t __user *u_platform;
elf_addr_t __user *u_base_platform;
@@ -158,7 +178,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned char k_rand_bytes[16];
int items;
elf_addr_t *elf_info;
- int ei_index = 0;
+ elf_addr_t flags = 0;
+ int ei_index;
const struct cred *cred = current_cred();
struct vm_area_struct *vma;
@@ -181,7 +202,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
size_t len = strlen(k_platform) + 1;
u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
- if (__copy_to_user(u_platform, k_platform, len))
+ if (copy_to_user(u_platform, k_platform, len))
return -EFAULT;
}
@@ -194,7 +215,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
size_t len = strlen(k_base_platform) + 1;
u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
- if (__copy_to_user(u_base_platform, k_base_platform, len))
+ if (copy_to_user(u_base_platform, k_base_platform, len))
return -EFAULT;
}
@@ -204,20 +225,20 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
- if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
+ if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
return -EFAULT;
/* Create the ELF interpreter info */
- elf_info = (elf_addr_t *)current->mm->saved_auxv;
+ elf_info = (elf_addr_t *)mm->saved_auxv;
/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
#define NEW_AUX_ENT(id, val) \
do { \
- elf_info[ei_index++] = id; \
- elf_info[ei_index++] = val; \
+ *elf_info++ = id; \
+ *elf_info++ = val; \
} while (0)
#ifdef ARCH_DLINFO
- /*
+ /*
* ARCH_DLINFO must come first so PPC can do its special alignment of
* AUXV.
* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
@@ -228,21 +249,29 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
- NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
+ NEW_AUX_ENT(AT_PHDR, phdr_addr);
NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
NEW_AUX_ENT(AT_BASE, interp_load_addr);
- NEW_AUX_ENT(AT_FLAGS, 0);
- NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
+ if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
+ flags |= AT_FLAGS_PRESERVE_ARGV0;
+ NEW_AUX_ENT(AT_FLAGS, flags);
+ NEW_AUX_ENT(AT_ENTRY, e_entry);
NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
- NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
+ NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
#ifdef ELF_HWCAP2
NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
#endif
+#ifdef ELF_HWCAP3
+ NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3);
+#endif
+#ifdef ELF_HWCAP4
+ NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4);
+#endif
NEW_AUX_ENT(AT_EXECFN, bprm->exec);
if (k_platform) {
NEW_AUX_ENT(AT_PLATFORM,
@@ -252,17 +281,22 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
NEW_AUX_ENT(AT_BASE_PLATFORM,
(elf_addr_t)(unsigned long)u_base_platform);
}
- if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
- NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
+ if (bprm->have_execfd) {
+ NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
}
+#ifdef CONFIG_RSEQ
+ NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
+ NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
+#endif
#undef NEW_AUX_ENT
/* AT_NULL is zero; clear the rest too */
- memset(&elf_info[ei_index], 0,
- sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
+ memset(elf_info, 0, (char *)mm->saved_auxv +
+ sizeof(mm->saved_auxv) - (char *)elf_info);
/* And advance past the AT_NULL entry. */
- ei_index += 2;
+ elf_info += 2;
+ ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
sp = STACK_ADD(p, ei_index);
items = (argc + 1) + (envc + 1) + 1;
@@ -281,54 +315,60 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
* Grow the stack manually; some architectures have a limit on how
* far ahead a user-space access may be in order to grow the stack.
*/
- vma = find_extend_vma(current->mm, bprm->p);
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+ vma = find_extend_vma_locked(mm, bprm->p);
+ mmap_write_unlock(mm);
if (!vma)
return -EFAULT;
/* Now, let's put argc (and argv, envp if appropriate) on the stack */
- if (__put_user(argc, sp++))
+ if (put_user(argc, sp++))
return -EFAULT;
- argv = sp;
- envp = argv + argc + 1;
- /* Populate argv and envp */
- p = current->mm->arg_end = current->mm->arg_start;
+ /* Populate list of argv pointers back to argv strings. */
+ p = mm->arg_end = mm->arg_start;
while (argc-- > 0) {
size_t len;
- if (__put_user((elf_addr_t)p, argv++))
+ if (put_user((elf_addr_t)p, sp++))
return -EFAULT;
len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
if (!len || len > MAX_ARG_STRLEN)
return -EINVAL;
p += len;
}
- if (__put_user(0, argv))
+ if (put_user(0, sp++))
return -EFAULT;
- current->mm->arg_end = current->mm->env_start = p;
+ mm->arg_end = p;
+
+ /* Populate list of envp pointers back to envp strings. */
+ mm->env_end = mm->env_start = p;
while (envc-- > 0) {
size_t len;
- if (__put_user((elf_addr_t)p, envp++))
+ if (put_user((elf_addr_t)p, sp++))
return -EFAULT;
len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
if (!len || len > MAX_ARG_STRLEN)
return -EINVAL;
p += len;
}
- if (__put_user(0, envp))
+ if (put_user(0, sp++))
return -EFAULT;
- current->mm->env_end = p;
+ mm->env_end = p;
/* Put the elf_info on the stack in the right place. */
- sp = (elf_addr_t __user *)envp + 1;
- if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
+ if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
return -EFAULT;
return 0;
}
-#ifndef elf_map
-
+/*
+ * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
+ * into memory at "addr". (Note that p_filesz is rounded up to the
+ * next page, so any extra bytes from the file must be wiped.)
+ */
static unsigned long elf_map(struct file *filep, unsigned long addr,
- struct elf_phdr *eppnt, int prot, int type,
+ const struct elf_phdr *eppnt, int prot, int type,
unsigned long total_size)
{
unsigned long map_addr;
@@ -358,29 +398,244 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
} else
map_addr = vm_mmap(filep, addr, size, prot, type, off);
+ if ((type & MAP_FIXED_NOREPLACE) &&
+ PTR_ERR((void *)map_addr) == -EEXIST)
+ pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
+ task_pid_nr(current), current->comm, (void *)addr);
+
return(map_addr);
}
-#endif /* !elf_map */
+/*
+ * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
+ * into memory at "addr". Memory from "p_filesz" through "p_memsz"
+ * rounded up to the next page is zeroed.
+ */
+static unsigned long elf_load(struct file *filep, unsigned long addr,
+ const struct elf_phdr *eppnt, int prot, int type,
+ unsigned long total_size)
+{
+ unsigned long zero_start, zero_end;
+ unsigned long map_addr;
+
+ if (eppnt->p_filesz) {
+ map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
+ if (BAD_ADDR(map_addr))
+ return map_addr;
+ if (eppnt->p_memsz > eppnt->p_filesz) {
+ zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_filesz;
+ zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_memsz;
+
+ /*
+ * Zero the end of the last mapped page but ignore
+ * any errors if the segment isn't writable.
+ */
+ if (padzero(zero_start) && (prot & PROT_WRITE))
+ return -EFAULT;
+ }
+ } else {
+ map_addr = zero_start = ELF_PAGESTART(addr);
+ zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_memsz;
+ }
+ if (eppnt->p_memsz > eppnt->p_filesz) {
+ /*
+ * Map the last of the segment.
+ * If the header is requesting these pages to be
+ * executable, honour that (ppc32 needs this).
+ */
+ int error;
+
+ zero_start = ELF_PAGEALIGN(zero_start);
+ zero_end = ELF_PAGEALIGN(zero_end);
+
+ error = vm_brk_flags(zero_start, zero_end - zero_start,
+ prot & PROT_EXEC ? VM_EXEC : 0);
+ if (error)
+ map_addr = error;
+ }
+ return map_addr;
+}
+
+
+static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
+{
+ elf_addr_t min_addr = -1;
+ elf_addr_t max_addr = 0;
+ bool pt_load = false;
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ if (phdr[i].p_type == PT_LOAD) {
+ min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
+ max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
+ pt_load = true;
+ }
+ }
+ return pt_load ? (max_addr - min_addr) : 0;
+}
+
+static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
+{
+ ssize_t rv;
+
+ rv = kernel_read(file, buf, len, &pos);
+ if (unlikely(rv != len)) {
+ return (rv < 0) ? rv : -EIO;
+ }
+ return 0;
+}
-static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
{
- int i, first_idx = -1, last_idx = -1;
+ unsigned long alignment = 0;
+ int i;
for (i = 0; i < nr; i++) {
if (cmds[i].p_type == PT_LOAD) {
- last_idx = i;
- if (first_idx == -1)
- first_idx = i;
+ unsigned long p_align = cmds[i].p_align;
+
+ /* skip non-power of two alignments as invalid */
+ if (!is_power_of_2(p_align))
+ continue;
+ alignment = max(alignment, p_align);
}
}
- if (first_idx == -1)
- return 0;
- return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
- ELF_PAGESTART(cmds[first_idx].p_vaddr);
+ /* ensure we align to at least one page */
+ return ELF_PAGEALIGN(alignment);
+}
+
+/**
+ * load_elf_phdrs() - load ELF program headers
+ * @elf_ex: ELF header of the binary whose program headers should be loaded
+ * @elf_file: the opened ELF binary file
+ *
+ * Loads ELF program headers from the binary file elf_file, which has the ELF
+ * header pointed to by elf_ex, into a newly allocated array. The caller is
+ * responsible for freeing the allocated data. Returns NULL upon failure.
+ */
+static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
+ struct file *elf_file)
+{
+ struct elf_phdr *elf_phdata = NULL;
+ int retval = -1;
+ unsigned int size;
+
+ /*
+ * If the size of this structure has changed, then punt, since
+ * we will be doing the wrong thing.
+ */
+ if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
+ goto out;
+
+ /* Sanity check the number of program headers... */
+ /* ...and their total size. */
+ size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
+ if (size == 0 || size > 65536)
+ goto out;
+
+ elf_phdata = kmalloc(size, GFP_KERNEL);
+ if (!elf_phdata)
+ goto out;
+
+ /* Read in the program headers */
+ retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
+
+out:
+ if (retval) {
+ kfree(elf_phdata);
+ elf_phdata = NULL;
+ }
+ return elf_phdata;
+}
+
+#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
+
+/**
+ * struct arch_elf_state - arch-specific ELF loading state
+ *
+ * This structure is used to preserve architecture specific data during
+ * the loading of an ELF file, throughout the checking of architecture
+ * specific ELF headers & through to the point where the ELF load is
+ * known to be proceeding (ie. SET_PERSONALITY).
+ *
+ * This implementation is a dummy for architectures which require no
+ * specific state.
+ */
+struct arch_elf_state {
+};
+
+#define INIT_ARCH_ELF_STATE {}
+
+/**
+ * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
+ * @ehdr: The main ELF header
+ * @phdr: The program header to check
+ * @elf: The open ELF file
+ * @is_interp: True if the phdr is from the interpreter of the ELF being
+ * loaded, else false.
+ * @state: Architecture-specific state preserved throughout the process
+ * of loading the ELF.
+ *
+ * Inspects the program header phdr to validate its correctness and/or
+ * suitability for the system. Called once per ELF program header in the
+ * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
+ * interpreter.
+ *
+ * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
+ * with that return code.
+ */
+static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
+ struct elf_phdr *phdr,
+ struct file *elf, bool is_interp,
+ struct arch_elf_state *state)
+{
+ /* Dummy implementation, always proceed */
+ return 0;
}
+/**
+ * arch_check_elf() - check an ELF executable
+ * @ehdr: The main ELF header
+ * @has_interp: True if the ELF has an interpreter, else false.
+ * @interp_ehdr: The interpreter's ELF header
+ * @state: Architecture-specific state preserved throughout the process
+ * of loading the ELF.
+ *
+ * Provides a final opportunity for architecture code to reject the loading
+ * of the ELF & cause an exec syscall to return an error. This is called after
+ * all program headers to be checked by arch_elf_pt_proc have been.
+ *
+ * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
+ * with that return code.
+ */
+static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+ struct elfhdr *interp_ehdr,
+ struct arch_elf_state *state)
+{
+ /* Dummy implementation, always proceed */
+ return 0;
+}
+
+#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
+
+static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
+ bool has_interp, bool is_interp)
+{
+ int prot = 0;
+
+ if (p_flags & PF_R)
+ prot |= PROT_READ;
+ if (p_flags & PF_W)
+ prot |= PROT_WRITE;
+ if (p_flags & PF_X)
+ prot |= PROT_EXEC;
+
+ return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
+}
/* This is much more generalized than the library routine read function,
so we keep this separate. Technically the library read function
@@ -388,88 +643,55 @@ static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
an ELF header */
static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
- struct file *interpreter, unsigned long *interp_map_addr,
- unsigned long no_base)
+ struct file *interpreter,
+ unsigned long no_base, struct elf_phdr *interp_elf_phdata,
+ struct arch_elf_state *arch_state)
{
- struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
unsigned long load_addr = 0;
int load_addr_set = 0;
- unsigned long last_bss = 0, elf_bss = 0;
unsigned long error = ~0UL;
unsigned long total_size;
- int retval, i, size;
+ int i;
/* First of all, some simple consistency checks */
if (interp_elf_ex->e_type != ET_EXEC &&
interp_elf_ex->e_type != ET_DYN)
goto out;
- if (!elf_check_arch(interp_elf_ex))
+ if (!elf_check_arch(interp_elf_ex) ||
+ elf_check_fdpic(interp_elf_ex))
goto out;
- if (!interpreter->f_op || !interpreter->f_op->mmap)
- goto out;
-
- /*
- * If the size of this structure has changed, then punt, since
- * we will be doing the wrong thing.
- */
- if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
- goto out;
- if (interp_elf_ex->e_phnum < 1 ||
- interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
- goto out;
-
- /* Now read in all of the header information */
- size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
- if (size > ELF_MIN_ALIGN)
- goto out;
- elf_phdata = kmalloc(size, GFP_KERNEL);
- if (!elf_phdata)
+ if (!can_mmap_file(interpreter))
goto out;
- retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
- (char *)elf_phdata, size);
- error = -EIO;
- if (retval != size) {
- if (retval < 0)
- error = retval;
- goto out_close;
- }
-
- total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+ total_size = total_mapping_size(interp_elf_phdata,
+ interp_elf_ex->e_phnum);
if (!total_size) {
error = -EINVAL;
- goto out_close;
+ goto out;
}
- eppnt = elf_phdata;
+ eppnt = interp_elf_phdata;
for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
if (eppnt->p_type == PT_LOAD) {
- int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
- int elf_prot = 0;
+ int elf_type = MAP_PRIVATE;
+ int elf_prot = make_prot(eppnt->p_flags, arch_state,
+ true, true);
unsigned long vaddr = 0;
unsigned long k, map_addr;
- if (eppnt->p_flags & PF_R)
- elf_prot = PROT_READ;
- if (eppnt->p_flags & PF_W)
- elf_prot |= PROT_WRITE;
- if (eppnt->p_flags & PF_X)
- elf_prot |= PROT_EXEC;
vaddr = eppnt->p_vaddr;
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED;
else if (no_base && interp_elf_ex->e_type == ET_DYN)
load_addr = -vaddr;
- map_addr = elf_map(interpreter, load_addr + vaddr,
+ map_addr = elf_load(interpreter, load_addr + vaddr,
eppnt, elf_prot, elf_type, total_size);
total_size = 0;
- if (!*interp_map_addr)
- *interp_map_addr = map_addr;
error = map_addr;
if (BAD_ADDR(map_addr))
- goto out_close;
+ goto out;
if (!load_addr_set &&
interp_elf_ex->e_type == ET_DYN) {
@@ -488,52 +710,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
eppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - eppnt->p_memsz < k) {
error = -ENOMEM;
- goto out_close;
+ goto out;
}
-
- /*
- * Find the end of the file mapping for this phdr, and
- * keep track of the largest address we see for this.
- */
- k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
- if (k > elf_bss)
- elf_bss = k;
-
- /*
- * Do the same thing for the memory mapping - between
- * elf_bss and last_bss is the bss section.
- */
- k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
- if (k > last_bss)
- last_bss = k;
- }
- }
-
- if (last_bss > elf_bss) {
- /*
- * Now fill out the bss section. First pad the last page up
- * to the page boundary, and then perform a mmap to make sure
- * that there are zero-mapped pages up to and including the
- * last bss page.
- */
- if (padzero(elf_bss)) {
- error = -EFAULT;
- goto out_close;
}
-
- /* What we have mapped so far */
- elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
-
- /* Map the last of the bss segment */
- error = vm_brk(elf_bss, last_bss - elf_bss);
- if (BAD_ADDR(error))
- goto out_close;
}
error = load_addr;
-
-out_close:
- kfree(elf_phdata);
out:
return error;
}
@@ -543,195 +725,300 @@ out:
* libraries. There is no binary dependent code anywhere else.
*/
-#define INTERPRETER_NONE 0
-#define INTERPRETER_ELF 2
+static int parse_elf_property(const char *data, size_t *off, size_t datasz,
+ struct arch_elf_state *arch,
+ bool have_prev_type, u32 *prev_type)
+{
+ size_t o, step;
+ const struct gnu_property *pr;
+ int ret;
+
+ if (*off == datasz)
+ return -ENOENT;
+
+ if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
+ return -EIO;
+ o = *off;
+ datasz -= *off;
+
+ if (datasz < sizeof(*pr))
+ return -ENOEXEC;
+ pr = (const struct gnu_property *)(data + o);
+ o += sizeof(*pr);
+ datasz -= sizeof(*pr);
+
+ if (pr->pr_datasz > datasz)
+ return -ENOEXEC;
+
+ WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
+ step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
+ if (step > datasz)
+ return -ENOEXEC;
+
+ /* Properties are supposed to be unique and sorted on pr_type: */
+ if (have_prev_type && pr->pr_type <= *prev_type)
+ return -ENOEXEC;
+ *prev_type = pr->pr_type;
+
+ ret = arch_parse_elf_property(pr->pr_type, data + o,
+ pr->pr_datasz, ELF_COMPAT, arch);
+ if (ret)
+ return ret;
-#ifndef STACK_RND_MASK
-#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
-#endif
+ *off = o + step;
+ return 0;
+}
+
+#define NOTE_DATA_SZ SZ_1K
+#define NOTE_NAME_SZ (sizeof(NN_GNU_PROPERTY_TYPE_0))
-static unsigned long randomize_stack_top(unsigned long stack_top)
+static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
+ struct arch_elf_state *arch)
{
- unsigned int random_variable = 0;
+ union {
+ struct elf_note nhdr;
+ char data[NOTE_DATA_SZ];
+ } note;
+ loff_t pos;
+ ssize_t n;
+ size_t off, datasz;
+ int ret;
+ bool have_prev_type;
+ u32 prev_type;
+
+ if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
+ return 0;
- if ((current->flags & PF_RANDOMIZE) &&
- !(current->personality & ADDR_NO_RANDOMIZE)) {
- random_variable = get_random_int() & STACK_RND_MASK;
- random_variable <<= PAGE_SHIFT;
- }
-#ifdef CONFIG_STACK_GROWSUP
- return PAGE_ALIGN(stack_top) + random_variable;
-#else
- return PAGE_ALIGN(stack_top) - random_variable;
-#endif
+ /* load_elf_binary() shouldn't call us unless this is true... */
+ if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
+ return -ENOEXEC;
+
+ /* If the properties are crazy large, that's too bad (for now): */
+ if (phdr->p_filesz > sizeof(note))
+ return -ENOEXEC;
+
+ pos = phdr->p_offset;
+ n = kernel_read(f, &note, phdr->p_filesz, &pos);
+
+ BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
+ if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
+ return -EIO;
+
+ if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
+ note.nhdr.n_namesz != NOTE_NAME_SZ ||
+ strncmp(note.data + sizeof(note.nhdr),
+ NN_GNU_PROPERTY_TYPE_0, n - sizeof(note.nhdr)))
+ return -ENOEXEC;
+
+ off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
+ ELF_GNU_PROPERTY_ALIGN);
+ if (off > n)
+ return -ENOEXEC;
+
+ if (note.nhdr.n_descsz > n - off)
+ return -ENOEXEC;
+ datasz = off + note.nhdr.n_descsz;
+
+ have_prev_type = false;
+ do {
+ ret = parse_elf_property(note.data, &off, datasz, arch,
+ have_prev_type, &prev_type);
+ have_prev_type = true;
+ } while (!ret);
+
+ return ret == -ENOENT ? 0 : ret;
}
static int load_elf_binary(struct linux_binprm *bprm)
{
struct file *interpreter = NULL; /* to shut gcc up */
- unsigned long load_addr = 0, load_bias = 0;
- int load_addr_set = 0;
- char * elf_interpreter = NULL;
+ unsigned long load_bias = 0, phdr_addr = 0;
+ int first_pt_load = 1;
unsigned long error;
- struct elf_phdr *elf_ppnt, *elf_phdata;
- unsigned long elf_bss, elf_brk;
+ struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
+ struct elf_phdr *elf_property_phdata = NULL;
+ unsigned long elf_brk;
+ bool brk_moved = false;
int retval, i;
- unsigned int size;
unsigned long elf_entry;
+ unsigned long e_entry;
unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc __maybe_unused = 0;
int executable_stack = EXSTACK_DEFAULT;
- unsigned long def_flags = 0;
- struct pt_regs *regs = current_pt_regs();
- struct {
- struct elfhdr elf_ex;
- struct elfhdr interp_elf_ex;
- } *loc;
-
- loc = kmalloc(sizeof(*loc), GFP_KERNEL);
- if (!loc) {
- retval = -ENOMEM;
- goto out_ret;
- }
-
- /* Get the exec-header */
- loc->elf_ex = *((struct elfhdr *)bprm->buf);
+ struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
+ struct elfhdr *interp_elf_ex = NULL;
+ struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
+ struct mm_struct *mm;
+ struct pt_regs *regs;
retval = -ENOEXEC;
/* First of all, some simple consistency checks */
- if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
+ if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
goto out;
- if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
- goto out;
- if (!elf_check_arch(&loc->elf_ex))
+ if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
goto out;
- if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+ if (!elf_check_arch(elf_ex))
goto out;
-
- /* Now read in all of the header information */
- if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
+ if (elf_check_fdpic(elf_ex))
goto out;
- if (loc->elf_ex.e_phnum < 1 ||
- loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
+ if (!can_mmap_file(bprm->file))
goto out;
- size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
- retval = -ENOMEM;
- elf_phdata = kmalloc(size, GFP_KERNEL);
+
+ elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
if (!elf_phdata)
goto out;
- retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
- (char *)elf_phdata, size);
- if (retval != size) {
- if (retval >= 0)
- retval = -EIO;
- goto out_free_ph;
- }
-
elf_ppnt = elf_phdata;
- elf_bss = 0;
- elf_brk = 0;
+ for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
+ char *elf_interpreter;
- start_code = ~0UL;
- end_code = 0;
- start_data = 0;
- end_data = 0;
+ if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
+ elf_property_phdata = elf_ppnt;
+ continue;
+ }
- for (i = 0; i < loc->elf_ex.e_phnum; i++) {
- if (elf_ppnt->p_type == PT_INTERP) {
- /* This is the program interpreter used for
- * shared libraries - for now assume that this
- * is an a.out format binary
- */
- retval = -ENOEXEC;
- if (elf_ppnt->p_filesz > PATH_MAX ||
- elf_ppnt->p_filesz < 2)
- goto out_free_ph;
+ if (elf_ppnt->p_type != PT_INTERP)
+ continue;
- retval = -ENOMEM;
- elf_interpreter = kmalloc(elf_ppnt->p_filesz,
- GFP_KERNEL);
- if (!elf_interpreter)
- goto out_free_ph;
-
- retval = kernel_read(bprm->file, elf_ppnt->p_offset,
- elf_interpreter,
- elf_ppnt->p_filesz);
- if (retval != elf_ppnt->p_filesz) {
- if (retval >= 0)
- retval = -EIO;
- goto out_free_interp;
- }
- /* make sure path is NULL terminated */
- retval = -ENOEXEC;
- if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
- goto out_free_interp;
+ /*
+ * This is the program interpreter used for shared libraries -
+ * for now assume that this is an a.out format binary.
+ */
+ retval = -ENOEXEC;
+ if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
+ goto out_free_ph;
- interpreter = open_exec(elf_interpreter);
- retval = PTR_ERR(interpreter);
- if (IS_ERR(interpreter))
- goto out_free_interp;
+ retval = -ENOMEM;
+ elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
+ if (!elf_interpreter)
+ goto out_free_ph;
- /*
- * If the binary is not readable then enforce
- * mm->dumpable = 0 regardless of the interpreter's
- * permissions.
- */
- would_dump(bprm, interpreter);
+ retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
+ elf_ppnt->p_offset);
+ if (retval < 0)
+ goto out_free_interp;
+ /* make sure path is NULL terminated */
+ retval = -ENOEXEC;
+ if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
+ goto out_free_interp;
- retval = kernel_read(interpreter, 0, bprm->buf,
- BINPRM_BUF_SIZE);
- if (retval != BINPRM_BUF_SIZE) {
- if (retval >= 0)
- retval = -EIO;
- goto out_free_dentry;
- }
+ interpreter = open_exec(elf_interpreter);
+ kfree(elf_interpreter);
+ retval = PTR_ERR(interpreter);
+ if (IS_ERR(interpreter))
+ goto out_free_ph;
- /* Get the exec headers */
- loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
- break;
+ /*
+ * If the binary is not readable then enforce mm->dumpable = 0
+ * regardless of the interpreter's permissions.
+ */
+ would_dump(bprm, interpreter);
+
+ interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
+ if (!interp_elf_ex) {
+ retval = -ENOMEM;
+ goto out_free_file;
}
- elf_ppnt++;
+
+ /* Get the exec headers */
+ retval = elf_read(interpreter, interp_elf_ex,
+ sizeof(*interp_elf_ex), 0);
+ if (retval < 0)
+ goto out_free_dentry;
+
+ break;
+
+out_free_interp:
+ kfree(elf_interpreter);
+ goto out_free_ph;
}
elf_ppnt = elf_phdata;
- for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
- if (elf_ppnt->p_type == PT_GNU_STACK) {
+ for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
+ switch (elf_ppnt->p_type) {
+ case PT_GNU_STACK:
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
+
+ case PT_LOPROC ... PT_HIPROC:
+ retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
+ bprm->file, false,
+ &arch_state);
+ if (retval)
+ goto out_free_dentry;
+ break;
}
/* Some simple consistency checks for the interpreter */
- if (elf_interpreter) {
+ if (interpreter) {
retval = -ELIBBAD;
/* Not an ELF interpreter */
- if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
+ if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
goto out_free_dentry;
/* Verify the interpreter has a valid arch */
- if (!elf_check_arch(&loc->interp_elf_ex))
+ if (!elf_check_arch(interp_elf_ex) ||
+ elf_check_fdpic(interp_elf_ex))
goto out_free_dentry;
+
+ /* Load the interpreter program headers */
+ interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
+ interpreter);
+ if (!interp_elf_phdata)
+ goto out_free_dentry;
+
+ /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
+ elf_property_phdata = NULL;
+ elf_ppnt = interp_elf_phdata;
+ for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
+ switch (elf_ppnt->p_type) {
+ case PT_GNU_PROPERTY:
+ elf_property_phdata = elf_ppnt;
+ break;
+
+ case PT_LOPROC ... PT_HIPROC:
+ retval = arch_elf_pt_proc(interp_elf_ex,
+ elf_ppnt, interpreter,
+ true, &arch_state);
+ if (retval)
+ goto out_free_dentry;
+ break;
+ }
}
- /* Flush all traces of the currently running executable */
- retval = flush_old_exec(bprm);
+ retval = parse_elf_properties(interpreter ?: bprm->file,
+ elf_property_phdata, &arch_state);
if (retval)
goto out_free_dentry;
- /* OK, This is the point of no return */
- current->mm->def_flags = def_flags;
+ /*
+ * Allow arch code to reject the ELF at this point, whilst it's
+ * still possible to return an error to the code that invoked
+ * the exec syscall.
+ */
+ retval = arch_check_elf(elf_ex,
+ !!interpreter, interp_elf_ex,
+ &arch_state);
+ if (retval)
+ goto out_free_dentry;
+
+ /* Flush all traces of the currently running executable */
+ retval = begin_new_exec(bprm);
+ if (retval)
+ goto out_free_dentry;
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
- SET_PERSONALITY(loc->elf_ex);
- if (elf_read_implies_exec(loc->elf_ex, executable_stack))
+ SET_PERSONALITY2(*elf_ex, &arch_state);
+ if (elf_read_implies_exec(*elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
current->flags |= PF_RANDOMIZE;
setup_new_exec(bprm);
@@ -740,106 +1027,193 @@ static int load_elf_binary(struct linux_binprm *bprm)
change some of these later */
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
- if (retval < 0) {
- send_sig(SIGKILL, current, 0);
+ if (retval < 0)
goto out_free_dentry;
- }
-
- current->mm->start_stack = bprm->p;
+
+ elf_brk = 0;
+
+ start_code = ~0UL;
+ end_code = 0;
+ start_data = 0;
+ end_data = 0;
/* Now we do a little grungy work by mmapping the ELF image into
the correct location in memory. */
for(i = 0, elf_ppnt = elf_phdata;
- i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
- int elf_prot = 0, elf_flags;
+ i < elf_ex->e_phnum; i++, elf_ppnt++) {
+ int elf_prot, elf_flags;
unsigned long k, vaddr;
+ unsigned long total_size = 0;
+ unsigned long alignment;
if (elf_ppnt->p_type != PT_LOAD)
continue;
- if (unlikely (elf_brk > elf_bss)) {
- unsigned long nbyte;
-
- /* There was a PT_LOAD segment with p_memsz > p_filesz
- before this one. Map anonymous pages, if needed,
- and clear the area. */
- retval = set_brk(elf_bss + load_bias,
- elf_brk + load_bias);
- if (retval) {
- send_sig(SIGKILL, current, 0);
- goto out_free_dentry;
- }
- nbyte = ELF_PAGEOFFSET(elf_bss);
- if (nbyte) {
- nbyte = ELF_MIN_ALIGN - nbyte;
- if (nbyte > elf_brk - elf_bss)
- nbyte = elf_brk - elf_bss;
- if (clear_user((void __user *)elf_bss +
- load_bias, nbyte)) {
- /*
- * This bss-zeroing can fail if the ELF
- * file specifies odd protections. So
- * we don't check the return value
- */
- }
- }
- }
-
- if (elf_ppnt->p_flags & PF_R)
- elf_prot |= PROT_READ;
- if (elf_ppnt->p_flags & PF_W)
- elf_prot |= PROT_WRITE;
- if (elf_ppnt->p_flags & PF_X)
- elf_prot |= PROT_EXEC;
+ elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
+ !!interpreter, false);
- elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
+ elf_flags = MAP_PRIVATE;
vaddr = elf_ppnt->p_vaddr;
- if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
+ /*
+ * The first time through the loop, first_pt_load is true:
+ * layout will be calculated. Once set, use MAP_FIXED since
+ * we know we've already safely mapped the entire region with
+ * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
+ */
+ if (!first_pt_load) {
elf_flags |= MAP_FIXED;
- } else if (loc->elf_ex.e_type == ET_DYN) {
- /* Try and get dynamic programs out of the way of the
- * default mmap base, as well as whatever program they
- * might try to exec. This is because the brk will
- * follow the loader, and is not movable. */
-#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
- /* Memory randomization might have been switched off
- * in runtime via sysctl or explicit setting of
- * personality flags.
- * If that is the case, retain the original non-zero
- * load_bias value in order to establish proper
- * non-randomized mappings.
+ } else if (elf_ex->e_type == ET_EXEC) {
+ /*
+ * This logic is run once for the first LOAD Program
+ * Header for ET_EXEC binaries. No special handling
+ * is needed.
*/
- if (current->flags & PF_RANDOMIZE)
- load_bias = 0;
- else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#endif
+ elf_flags |= MAP_FIXED_NOREPLACE;
+ } else if (elf_ex->e_type == ET_DYN) {
+ /*
+ * This logic is run once for the first LOAD Program
+ * Header for ET_DYN binaries to calculate the
+ * randomization (load_bias) for all the LOAD
+ * Program Headers.
+ */
+
+ /*
+ * Calculate the entire size of the ELF mapping
+ * (total_size), used for the initial mapping,
+ * due to load_addr_set which is set to true later
+ * once the initial mapping is performed.
+ *
+ * Note that this is only sensible when the LOAD
+ * segments are contiguous (or overlapping). If
+ * used for LOADs that are far apart, this would
+ * cause the holes between LOADs to be mapped,
+ * running the risk of having the mapping fail,
+ * as it would be larger than the ELF file itself.
+ *
+ * As a result, only ET_DYN does this, since
+ * some ET_EXEC (e.g. ia64) may have large virtual
+ * memory holes between LOADs.
+ *
+ */
+ total_size = total_mapping_size(elf_phdata,
+ elf_ex->e_phnum);
+ if (!total_size) {
+ retval = -EINVAL;
+ goto out_free_dentry;
+ }
+
+ /* Calculate any requested alignment. */
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+
+ /**
+ * DOC: PIE handling
+ *
+ * There are effectively two types of ET_DYN ELF
+ * binaries: programs (i.e. PIE: ET_DYN with
+ * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
+ * without PT_INTERP, usually the ELF interpreter
+ * itself). Loaders must be loaded away from programs
+ * since the program may otherwise collide with the
+ * loader (especially for ET_EXEC which does not have
+ * a randomized position).
+ *
+ * For example, to handle invocations of
+ * "./ld.so someprog" to test out a new version of
+ * the loader, the subsequent program that the
+ * loader loads must avoid the loader itself, so
+ * they cannot share the same load range. Sufficient
+ * room for the brk must be allocated with the
+ * loader as well, since brk must be available with
+ * the loader.
+ *
+ * Therefore, programs are loaded offset from
+ * ELF_ET_DYN_BASE and loaders are loaded into the
+ * independently randomized mmap region (0 load_bias
+ * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
+ *
+ * See below for "brk" handling details, which is
+ * also affected by program vs loader and ASLR.
+ */
+ if (interpreter) {
+ /* On ET_DYN with PT_INTERP, we do the ASLR. */
+ load_bias = ELF_ET_DYN_BASE;
+ if (current->flags & PF_RANDOMIZE)
+ load_bias += arch_mmap_rnd();
+ /* Adjust alignment as requested. */
+ if (alignment)
+ load_bias &= ~(alignment - 1);
+ elf_flags |= MAP_FIXED_NOREPLACE;
+ } else {
+ /*
+ * For ET_DYN without PT_INTERP, we rely on
+ * the architectures's (potentially ASLR) mmap
+ * base address (via a load_bias of 0).
+ *
+ * When a large alignment is requested, we
+ * must do the allocation at address "0" right
+ * now to discover where things will load so
+ * that we can adjust the resulting alignment.
+ * In this case (load_bias != 0), we can use
+ * MAP_FIXED_NOREPLACE to make sure the mapping
+ * doesn't collide with anything.
+ */
+ if (alignment > ELF_MIN_ALIGN) {
+ load_bias = elf_load(bprm->file, 0, elf_ppnt,
+ elf_prot, elf_flags, total_size);
+ if (BAD_ADDR(load_bias)) {
+ retval = IS_ERR_VALUE(load_bias) ?
+ PTR_ERR((void*)load_bias) : -EINVAL;
+ goto out_free_dentry;
+ }
+ vm_munmap(load_bias, total_size);
+ /* Adjust alignment as requested. */
+ if (alignment)
+ load_bias &= ~(alignment - 1);
+ elf_flags |= MAP_FIXED_NOREPLACE;
+ } else
+ load_bias = 0;
+ }
+
+ /*
+ * Since load_bias is used for all subsequent loading
+ * calculations, we must lower it by the first vaddr
+ * so that the remaining calculations based on the
+ * ELF vaddrs will be correctly offset. The result
+ * is then page aligned.
+ */
+ load_bias = ELF_PAGESTART(load_bias - vaddr);
}
- error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags, 0);
+ error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
+ elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
- send_sig(SIGKILL, current, 0);
- retval = IS_ERR((void *)error) ?
+ retval = IS_ERR_VALUE(error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
- if (!load_addr_set) {
- load_addr_set = 1;
- load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
- if (loc->elf_ex.e_type == ET_DYN) {
+ if (first_pt_load) {
+ first_pt_load = 0;
+ if (elf_ex->e_type == ET_DYN) {
load_bias += error -
ELF_PAGESTART(load_bias + vaddr);
- load_addr += load_bias;
reloc_func_desc = load_bias;
}
}
+
+ /*
+ * Figure out which segment in the file contains the Program
+ * Header table, and map to the associated memory address.
+ */
+ if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
+ elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
+ phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
+ elf_ppnt->p_vaddr;
+ }
+
k = elf_ppnt->p_vaddr;
- if (k < start_code)
+ if ((elf_ppnt->p_flags & PF_X) && k < start_code)
start_code = k;
if (start_data < k)
start_data = k;
@@ -853,15 +1227,12 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_ppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - elf_ppnt->p_memsz < k) {
/* set_brk can never work. Avoid overflows. */
- send_sig(SIGKILL, current, 0);
retval = -EINVAL;
goto out_free_dentry;
}
k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
- if (k > elf_bss)
- elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
@@ -871,60 +1242,42 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_brk = k;
}
- loc->elf_ex.e_entry += load_bias;
- elf_bss += load_bias;
+ e_entry = elf_ex->e_entry + load_bias;
+ phdr_addr += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias;
- /* Calling set_brk effectively mmaps the pages that we need
- * for the bss and break sections. We must do this before
- * mapping in the interpreter, to make sure it doesn't wind
- * up getting placed where the bss needs to go.
- */
- retval = set_brk(elf_bss, elf_brk);
- if (retval) {
- send_sig(SIGKILL, current, 0);
- goto out_free_dentry;
- }
- if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
- send_sig(SIGSEGV, current, 0);
- retval = -EFAULT; /* Nobody gets to see this, but.. */
- goto out_free_dentry;
- }
-
- if (elf_interpreter) {
- unsigned long interp_map_addr = 0;
-
- elf_entry = load_elf_interp(&loc->interp_elf_ex,
+ if (interpreter) {
+ elf_entry = load_elf_interp(interp_elf_ex,
interpreter,
- &interp_map_addr,
- load_bias);
- if (!IS_ERR((void *)elf_entry)) {
+ load_bias, interp_elf_phdata,
+ &arch_state);
+ if (!IS_ERR_VALUE(elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
*/
interp_load_addr = elf_entry;
- elf_entry += loc->interp_elf_ex.e_entry;
+ elf_entry += interp_elf_ex->e_entry;
}
if (BAD_ADDR(elf_entry)) {
- force_sig(SIGSEGV, current);
- retval = IS_ERR((void *)elf_entry) ?
+ retval = IS_ERR_VALUE(elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr;
- allow_write_access(interpreter);
+ exe_file_allow_write_access(interpreter);
fput(interpreter);
- kfree(elf_interpreter);
+
+ kfree(interp_elf_ex);
+ kfree(interp_elf_phdata);
} else {
- elf_entry = loc->elf_ex.e_entry;
+ elf_entry = e_entry;
if (BAD_ADDR(elf_entry)) {
- force_sig(SIGSEGV, current);
retval = -EINVAL;
goto out_free_dentry;
}
@@ -935,35 +1288,62 @@ static int load_elf_binary(struct linux_binprm *bprm)
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
- retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
- if (retval < 0) {
- send_sig(SIGKILL, current, 0);
+ retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
+ if (retval < 0)
goto out;
- }
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
- install_exec_creds(bprm);
- retval = create_elf_tables(bprm, &loc->elf_ex,
- load_addr, interp_load_addr);
- if (retval < 0) {
- send_sig(SIGKILL, current, 0);
+ retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
+ e_entry, phdr_addr);
+ if (retval < 0)
goto out;
+
+ mm = current->mm;
+ mm->end_code = end_code;
+ mm->start_code = start_code;
+ mm->start_data = start_data;
+ mm->end_data = end_data;
+ mm->start_stack = bprm->p;
+
+ elf_coredump_set_mm_eflags(mm, elf_ex->e_flags);
+
+ /**
+ * DOC: "brk" handling
+ *
+ * For architectures with ELF randomization, when executing a
+ * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
+ * move the brk area out of the mmap region and into the unused
+ * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
+ * early with the stack growing down or other regions being put
+ * into the mmap region by the kernel (e.g. vdso).
+ *
+ * In the CONFIG_COMPAT_BRK case, though, everything is turned
+ * off because we're not allowed to move the brk at all.
+ */
+ if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
+ IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
+ elf_ex->e_type == ET_DYN && !interpreter) {
+ elf_brk = ELF_ET_DYN_BASE;
+ /* This counts as moving the brk, so let brk(2) know. */
+ brk_moved = true;
}
- /* N.B. passed_fileno might not be initialized? */
- current->mm->end_code = end_code;
- current->mm->start_code = start_code;
- current->mm->start_data = start_data;
- current->mm->end_data = end_data;
- current->mm->start_stack = bprm->p;
-
-#ifdef arch_randomize_brk
- if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
- current->mm->brk = current->mm->start_brk =
- arch_randomize_brk(current->mm);
-#ifdef CONFIG_COMPAT_BRK
- current->brk_randomized = 1;
-#endif
+ mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
+
+ if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
+ /*
+ * If we didn't move the brk to ELF_ET_DYN_BASE (above),
+ * leave a gap between .bss and brk.
+ */
+ if (!brk_moved)
+ mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
+
+ mm->brk = mm->start_brk = arch_randomize_brk(mm);
+ brk_moved = true;
}
+
+#ifdef compat_brk_randomized
+ if (brk_moved)
+ current->brk_randomized = 1;
#endif
if (current->personality & MMAP_PAGE_ZERO) {
@@ -973,8 +1353,14 @@ static int load_elf_binary(struct linux_binprm *bprm)
emulate the SVr4 behavior. Sigh. */
error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
+
+ retval = do_mseal(0, PAGE_SIZE, 0);
+ if (retval)
+ pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
+ task_pid_nr(current), retval);
}
+ regs = current_pt_regs();
#ifdef ELF_PLAT_INIT
/*
* The ABI may specify that certain registers be set up in special
@@ -989,104 +1375,25 @@ static int load_elf_binary(struct linux_binprm *bprm)
ELF_PLAT_INIT(regs, reloc_func_desc);
#endif
- start_thread(regs, elf_entry, bprm->p);
+ finalize_exec(bprm);
+ START_THREAD(elf_ex, regs, elf_entry, bprm->p);
retval = 0;
out:
- kfree(loc);
-out_ret:
return retval;
/* error cleanup */
out_free_dentry:
- allow_write_access(interpreter);
+ kfree(interp_elf_ex);
+ kfree(interp_elf_phdata);
+out_free_file:
+ exe_file_allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
-out_free_interp:
- kfree(elf_interpreter);
out_free_ph:
kfree(elf_phdata);
goto out;
}
-/* This is really simpleminded and specialized - we are loading an
- a.out library that is given an ELF header. */
-static int load_elf_library(struct file *file)
-{
- struct elf_phdr *elf_phdata;
- struct elf_phdr *eppnt;
- unsigned long elf_bss, bss, len;
- int retval, error, i, j;
- struct elfhdr elf_ex;
-
- error = -ENOEXEC;
- retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
- if (retval != sizeof(elf_ex))
- goto out;
-
- if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
- goto out;
-
- /* First of all, some simple consistency checks */
- if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
- !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
- goto out;
-
- /* Now read in all of the header information */
-
- j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
- /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
-
- error = -ENOMEM;
- elf_phdata = kmalloc(j, GFP_KERNEL);
- if (!elf_phdata)
- goto out;
-
- eppnt = elf_phdata;
- error = -ENOEXEC;
- retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
- if (retval != j)
- goto out_free_ph;
-
- for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
- if ((eppnt + i)->p_type == PT_LOAD)
- j++;
- if (j != 1)
- goto out_free_ph;
-
- while (eppnt->p_type != PT_LOAD)
- eppnt++;
-
- /* Now use mmap to map the library into memory. */
- error = vm_mmap(file,
- ELF_PAGESTART(eppnt->p_vaddr),
- (eppnt->p_filesz +
- ELF_PAGEOFFSET(eppnt->p_vaddr)),
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
- (eppnt->p_offset -
- ELF_PAGEOFFSET(eppnt->p_vaddr)));
- if (error != ELF_PAGESTART(eppnt->p_vaddr))
- goto out_free_ph;
-
- elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
- if (padzero(elf_bss)) {
- error = -EFAULT;
- goto out_free_ph;
- }
-
- len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
- ELF_MIN_ALIGN - 1);
- bss = eppnt->p_memsz + eppnt->p_vaddr;
- if (bss > len)
- vm_brk(len, bss - len);
- error = 0;
-
-out_free_ph:
- kfree(elf_phdata);
-out:
- return error;
-}
-
#ifdef CONFIG_ELF_CORE
/*
* ELF core dumper
@@ -1095,116 +1402,6 @@ out:
* Jeremy Fitzhardinge <jeremy@sw.oz.au>
*/
-/*
- * The purpose of always_dump_vma() is to make sure that special kernel mappings
- * that are useful for post-mortem analysis are included in every core dump.
- * In that way we ensure that the core dump is fully interpretable later
- * without matching up the same kernel and hardware config to see what PC values
- * meant. These special mappings include - vDSO, vsyscall, and other
- * architecture specific mappings
- */
-static bool always_dump_vma(struct vm_area_struct *vma)
-{
- /* Any vsyscall mappings? */
- if (vma == get_gate_vma(vma->vm_mm))
- return true;
- /*
- * arch_vma_name() returns non-NULL for special architecture mappings,
- * such as vDSO sections.
- */
- if (arch_vma_name(vma))
- return true;
-
- return false;
-}
-
-/*
- * Decide what to dump of a segment, part, all or none.
- */
-static unsigned long vma_dump_size(struct vm_area_struct *vma,
- unsigned long mm_flags)
-{
-#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
-
- /* always dump the vdso and vsyscall sections */
- if (always_dump_vma(vma))
- goto whole;
-
- if (vma->vm_flags & VM_DONTDUMP)
- return 0;
-
- /* Hugetlb memory check */
- if (vma->vm_flags & VM_HUGETLB) {
- if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
- goto whole;
- if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
- goto whole;
- return 0;
- }
-
- /* Do not dump I/O mapped devices or special mappings */
- if (vma->vm_flags & VM_IO)
- return 0;
-
- /* By default, dump shared memory if mapped from an anonymous file. */
- if (vma->vm_flags & VM_SHARED) {
- if (file_inode(vma->vm_file)->i_nlink == 0 ?
- FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
- goto whole;
- return 0;
- }
-
- /* Dump segments that have been written to. */
- if (vma->anon_vma && FILTER(ANON_PRIVATE))
- goto whole;
- if (vma->vm_file == NULL)
- return 0;
-
- if (FILTER(MAPPED_PRIVATE))
- goto whole;
-
- /*
- * If this looks like the beginning of a DSO or executable mapping,
- * check for an ELF header. If we find one, dump the first page to
- * aid in determining what was mapped here.
- */
- if (FILTER(ELF_HEADERS) &&
- vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
- u32 __user *header = (u32 __user *) vma->vm_start;
- u32 word;
- mm_segment_t fs = get_fs();
- /*
- * Doing it this way gets the constant folded by GCC.
- */
- union {
- u32 cmp;
- char elfmag[SELFMAG];
- } magic;
- BUILD_BUG_ON(SELFMAG != sizeof word);
- magic.elfmag[EI_MAG0] = ELFMAG0;
- magic.elfmag[EI_MAG1] = ELFMAG1;
- magic.elfmag[EI_MAG2] = ELFMAG2;
- magic.elfmag[EI_MAG3] = ELFMAG3;
- /*
- * Switch to the user "segment" for get_user(),
- * then put back what elf_core_dump() had in place.
- */
- set_fs(USER_DS);
- if (unlikely(get_user(word, header)))
- word = 0;
- set_fs(fs);
- if (word == magic.cmp)
- return PAGE_SIZE;
- }
-
-#undef FILTER
-
- return 0;
-
-whole:
- return vma->vm_end - vma->vm_start;
-}
-
/* An ELF note in memory */
struct memelfnote
{
@@ -1225,35 +1422,17 @@ static int notesize(struct memelfnote *en)
return sz;
}
-#define DUMP_WRITE(addr, nr, foffset) \
- do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
-{
- static const char buf[4] = { 0, };
- DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
- return 1;
-}
-
-static int writenote(struct memelfnote *men, struct file *file,
- loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
{
struct elf_note en;
en.n_namesz = strlen(men->name) + 1;
en.n_descsz = men->datasz;
en.n_type = men->type;
- DUMP_WRITE(&en, sizeof(en), foffset);
- DUMP_WRITE(men->name, en.n_namesz, foffset);
- if (!alignfile(file, foffset))
- return 0;
- DUMP_WRITE(men->data, men->datasz, foffset);
- if (!alignfile(file, foffset))
- return 0;
-
- return 1;
+ return dump_emit(cprm, &en, sizeof(en)) &&
+ dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+ dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
}
-#undef DUMP_WRITE
static void fill_elf_header(struct elfhdr *elf, int segs,
u16 machine, u32 flags)
@@ -1274,8 +1453,6 @@ static void fill_elf_header(struct elfhdr *elf, int segs,
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize = sizeof(struct elf_phdr);
elf->e_phnum = segs;
-
- return;
}
static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
@@ -1287,25 +1464,26 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
phdr->p_filesz = sz;
phdr->p_memsz = 0;
phdr->p_flags = 0;
- phdr->p_align = 0;
- return;
+ phdr->p_align = 4;
}
-static void fill_note(struct memelfnote *note, const char *name, int type,
- unsigned int sz, void *data)
+static void __fill_note(struct memelfnote *note, const char *name, int type,
+ unsigned int sz, void *data)
{
note->name = name;
note->type = type;
note->datasz = sz;
note->data = data;
- return;
}
+#define fill_note(note, type, sz, data) \
+ __fill_note(note, NN_ ## type, NT_ ## type, sz, data)
+
/*
* fill up all the fields in prstatus from the given task struct, except
* registers which need to be filled up separately.
*/
-static void fill_prstatus(struct elf_prstatus *prstatus,
+static void fill_prstatus(struct elf_prstatus_common *prstatus,
struct task_struct *p, long signr)
{
prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
@@ -1325,17 +1503,18 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
* group-wide total, not its individual thread total.
*/
thread_group_cputime(p, &cputime);
- cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
- cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
+ prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
+ prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
} else {
- cputime_t utime, stime;
+ u64 utime, stime;
task_cputime(p, &utime, &stime);
- cputime_to_timeval(utime, &prstatus->pr_utime);
- cputime_to_timeval(stime, &prstatus->pr_stime);
+ prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
+ prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
}
- cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
- cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
+
+ prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
+ prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
}
static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
@@ -1343,7 +1522,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
{
const struct cred *cred;
unsigned int i, len;
-
+ unsigned int state;
+
/* first copy the parameters from user space */
memset(psinfo, 0, sizeof(struct elf_prpsinfo));
@@ -1365,7 +1545,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
psinfo->pr_pgrp = task_pgrp_vnr(p);
psinfo->pr_sid = task_session_vnr(p);
- i = p->state ? ffz(~p->state) + 1 : 0;
+ state = READ_ONCE(p->__state);
+ i = state ? ffz(~state) + 1 : 0;
psinfo->pr_state = i;
psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
psinfo->pr_zomb = psinfo->pr_sname == 'Z';
@@ -1376,8 +1557,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
- strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
-
+ get_task_comm(psinfo->pr_fname, p);
+
return 0;
}
@@ -1388,20 +1569,16 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
do
i += 2;
while (auxv[i - 2] != AT_NULL);
- fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
+ fill_note(note, AUXV, i * sizeof(elf_addr_t), auxv);
}
static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
- siginfo_t *siginfo)
+ const kernel_siginfo_t *siginfo)
{
- mm_segment_t old_fs = get_fs();
- set_fs(KERNEL_DS);
- copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
- set_fs(old_fs);
- fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
+ copy_siginfo_to_external(csigdata, siginfo);
+ fill_note(note, SIGINFO, sizeof(*csigdata), csigdata);
}
-#define MAX_FILE_NOTE_SIZE (4*1024*1024)
/*
* Format of NT_FILE note:
*
@@ -1413,58 +1590,69 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
* long file_ofs
* followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
*/
-static void fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
{
- struct vm_area_struct *vma;
unsigned count, size, names_ofs, remaining, n;
user_long_t *data;
user_long_t *start_end_ofs;
char *name_base, *name_curpos;
+ int i;
/* *Estimated* file count and total data size needed */
- count = current->mm->map_count;
+ count = cprm->vma_count;
+ if (count > UINT_MAX / 64)
+ return -EINVAL;
size = count * 64;
names_ofs = (2 + 3 * count) * sizeof(data[0]);
alloc:
- if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
- goto err;
+ /* paranoia check */
+ if (size >= core_file_note_size_limit) {
+ pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n",
+ size);
+ return -EINVAL;
+ }
size = round_up(size, PAGE_SIZE);
- data = vmalloc(size);
- if (!data)
- goto err;
+ /*
+ * "size" can be 0 here legitimately.
+ * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
+ */
+ data = kvmalloc(size, GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(data))
+ return -ENOMEM;
start_end_ofs = data + 2;
name_base = name_curpos = ((char *)data) + names_ofs;
remaining = size - names_ofs;
count = 0;
- for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *m = &cprm->vma_meta[i];
struct file *file;
const char *filename;
- file = vma->vm_file;
+ file = m->file;
if (!file)
continue;
- filename = d_path(&file->f_path, name_curpos, remaining);
+ filename = file_path(file, name_curpos, remaining);
if (IS_ERR(filename)) {
if (PTR_ERR(filename) == -ENAMETOOLONG) {
- vfree(data);
+ kvfree(data);
size = size * 5 / 4;
goto alloc;
}
continue;
}
- /* d_path() fills at the end, move name down */
+ /* file_path() fills at the end, move name down */
/* n = strlen(filename) + 1: */
n = (name_curpos + remaining) - filename;
remaining = filename - name_curpos;
memmove(name_curpos, filename, n);
name_curpos += n;
- *start_end_ofs++ = vma->vm_start;
- *start_end_ofs++ = vma->vm_end;
- *start_end_ofs++ = vma->vm_pgoff;
+ *start_end_ofs++ = m->start;
+ *start_end_ofs++ = m->end;
+ *start_end_ofs++ = m->pgoff;
count++;
}
@@ -1472,10 +1660,10 @@ static void fill_files_note(struct memelfnote *note)
data[0] = count;
data[1] = PAGE_SIZE;
/*
- * Count usually is less than current->mm->map_count,
+ * Count usually is less than mm->map_count,
* we need to move filenames down.
*/
- n = current->mm->map_count - count;
+ n = cprm->vma_count - count;
if (n != 0) {
unsigned shift_bytes = n * 3 * sizeof(data[0]);
memmove(name_base - shift_bytes, name_base,
@@ -1484,18 +1672,17 @@ static void fill_files_note(struct memelfnote *note)
}
size = name_curpos - (char *)data;
- fill_note(note, "CORE", NT_FILE, size, data);
- err: ;
+ fill_note(note, FILE, size, data);
+ return 0;
}
-#ifdef CORE_DUMP_USE_REGSET
#include <linux/regset.h>
struct elf_thread_core_info {
struct elf_thread_core_info *next;
struct task_struct *task;
struct elf_prstatus prstatus;
- struct memelfnote notes[0];
+ struct memelfnote notes[];
};
struct elf_note_info {
@@ -1509,6 +1696,7 @@ struct elf_note_info {
int thread_notes;
};
+#ifdef CORE_DUMP_USE_REGSET
/*
* When a regset has a writeback hook, we call it on each thread before
* dumping user memory. On register window machines, this makes sure the
@@ -1521,109 +1709,134 @@ static void do_thread_regset_writeback(struct task_struct *task,
regset->writeback(task, regset, 1);
}
-#ifndef PR_REG_SIZE
-#define PR_REG_SIZE(S) sizeof(S)
-#endif
-
#ifndef PRSTATUS_SIZE
-#define PRSTATUS_SIZE(S) sizeof(S)
-#endif
-
-#ifndef PR_REG_PTR
-#define PR_REG_PTR(S) (&((S)->pr_reg))
+#define PRSTATUS_SIZE sizeof(struct elf_prstatus)
#endif
#ifndef SET_PR_FPVALID
-#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
#endif
static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset_view *view,
- long signr, size_t *total)
+ long signr, struct elf_note_info *info)
{
- unsigned int i;
+ unsigned int note_iter, view_iter;
/*
* NT_PRSTATUS is the one special case, because the regset data
* goes into the pr_reg field inside the note contents, rather
- * than being the whole note contents. We fill the reset in here.
+ * than being the whole note contents. We fill the regset in here.
* We assume that regset 0 is NT_PRSTATUS.
*/
- fill_prstatus(&t->prstatus, t->task, signr);
- (void) view->regsets[0].get(t->task, &view->regsets[0],
- 0, PR_REG_SIZE(t->prstatus.pr_reg),
- PR_REG_PTR(&t->prstatus), NULL);
+ fill_prstatus(&t->prstatus.common, t->task, signr);
+ regset_get(t->task, &view->regsets[0],
+ sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
- fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- PRSTATUS_SIZE(t->prstatus), &t->prstatus);
- *total += notesize(&t->notes[0]);
+ fill_note(&t->notes[0], PRSTATUS, PRSTATUS_SIZE, &t->prstatus);
+ info->size += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
/*
* Each other regset might generate a note too. For each regset
- * that has no core_note_type or is inactive, we leave t->notes[i]
- * all zero and we'll know to skip writing it later.
+ * that has no core_note_type or is inactive, skip it.
*/
- for (i = 1; i < view->n; ++i) {
- const struct user_regset *regset = &view->regsets[i];
+ note_iter = 1;
+ for (view_iter = 1; view_iter < view->n; ++view_iter) {
+ const struct user_regset *regset = &view->regsets[view_iter];
+ int note_type = regset->core_note_type;
+ const char *note_name = regset->core_note_name;
+ bool is_fpreg = note_type == NT_PRFPREG;
+ void *data;
+ int ret;
+
do_thread_regset_writeback(t->task, regset);
- if (regset->core_note_type && regset->get &&
- (!regset->active || regset->active(t->task, regset))) {
- int ret;
- size_t size = regset->n * regset->size;
- void *data = kmalloc(size, GFP_KERNEL);
- if (unlikely(!data))
- return 0;
- ret = regset->get(t->task, regset,
- 0, size, data, NULL);
- if (unlikely(ret))
- kfree(data);
- else {
- if (regset->core_note_type != NT_PRFPREG)
- fill_note(&t->notes[i], "LINUX",
- regset->core_note_type,
- size, data);
- else {
- SET_PR_FPVALID(&t->prstatus, 1);
- fill_note(&t->notes[i], "CORE",
- NT_PRFPREG, size, data);
- }
- *total += notesize(&t->notes[i]);
- }
- }
+ if (!note_type) // not for coredumps
+ continue;
+ if (regset->active && regset->active(t->task, regset) <= 0)
+ continue;
+
+ ret = regset_get_alloc(t->task, regset, ~0U, &data);
+ if (ret < 0)
+ continue;
+
+ if (WARN_ON_ONCE(note_iter >= info->thread_notes))
+ break;
+
+ if (is_fpreg)
+ SET_PR_FPVALID(&t->prstatus);
+
+ /* There should be a note name, but if not, guess: */
+ if (WARN_ON_ONCE(!note_name))
+ note_name = "LINUX";
+ else
+ /* Warn on non-legacy-compatible names, for now. */
+ WARN_ON_ONCE(strcmp(note_name,
+ is_fpreg ? "CORE" : "LINUX"));
+
+ __fill_note(&t->notes[note_iter], note_name, note_type,
+ ret, data);
+
+ info->size += notesize(&t->notes[note_iter]);
+ note_iter++;
}
return 1;
}
+#else
+static int fill_thread_core_info(struct elf_thread_core_info *t,
+ const struct user_regset_view *view,
+ long signr, struct elf_note_info *info)
+{
+ struct task_struct *p = t->task;
+ elf_fpregset_t *fpu;
+
+ fill_prstatus(&t->prstatus.common, p, signr);
+ elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
+
+ fill_note(&t->notes[0], PRSTATUS, sizeof(t->prstatus), &t->prstatus);
+ info->size += notesize(&t->notes[0]);
+
+ fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
+ if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
+ kfree(fpu);
+ return 1;
+ }
+
+ t->prstatus.pr_fpvalid = 1;
+ fill_note(&t->notes[1], PRFPREG, sizeof(*fpu), fpu);
+ info->size += notesize(&t->notes[1]);
+
+ return 1;
+}
+#endif
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- siginfo_t *siginfo, struct pt_regs *regs)
+ struct coredump_params *cprm)
{
struct task_struct *dump_task = current;
- const struct user_regset_view *view = task_user_regset_view(dump_task);
+ const struct user_regset_view *view;
struct elf_thread_core_info *t;
struct elf_prpsinfo *psinfo;
struct core_thread *ct;
- unsigned int i;
-
- info->size = 0;
- info->thread = NULL;
+ u16 machine;
+ u32 flags;
psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
- if (psinfo == NULL) {
- info->psinfo.data = NULL; /* So we don't free this wrongly */
+ if (!psinfo)
return 0;
- }
+ fill_note(&info->psinfo, PRPSINFO, sizeof(*psinfo), psinfo);
- fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+#ifdef CORE_DUMP_USE_REGSET
+ view = task_user_regset_view(dump_task);
/*
* Figure out how many notes we're going to need for each thread.
*/
info->thread_notes = 0;
- for (i = 0; i < view->n; ++i)
+ for (int i = 0; i < view->n; ++i)
if (view->regsets[i].core_note_type != 0)
++info->thread_notes;
@@ -1637,41 +1850,51 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
return 0;
}
+ machine = view->e_machine;
+ flags = view->e_flags;
+#else
+ view = NULL;
+ info->thread_notes = 2;
+ machine = ELF_ARCH;
+ flags = ELF_CORE_EFLAGS;
+#endif
+
+ /*
+ * Override ELF e_flags with value taken from process,
+ * if arch needs that.
+ */
+ flags = elf_coredump_get_mm_eflags(dump_task->mm, flags);
+
/*
* Initialize the ELF file header.
*/
- fill_elf_header(elf, phdrs,
- view->e_machine, view->e_flags);
+ fill_elf_header(elf, phdrs, machine, flags);
/*
* Allocate a structure for each thread.
*/
- for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
- t = kzalloc(offsetof(struct elf_thread_core_info,
- notes[info->thread_notes]),
+ info->thread = kzalloc(struct_size(info->thread, notes, info->thread_notes),
+ GFP_KERNEL);
+ if (unlikely(!info->thread))
+ return 0;
+
+ info->thread->task = dump_task;
+ for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
+ t = kzalloc(struct_size(t, notes, info->thread_notes),
GFP_KERNEL);
if (unlikely(!t))
return 0;
t->task = ct->task;
- if (ct->task == dump_task || !info->thread) {
- t->next = info->thread;
- info->thread = t;
- } else {
- /*
- * Make sure to keep the original task at
- * the head of the list.
- */
- t->next = info->thread->next;
- info->thread->next = t;
- }
+ t->next = info->thread->next;
+ info->thread->next = t;
}
/*
* Now fill in each thread's information.
*/
for (t = info->thread; t != NULL; t = t->next)
- if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
+ if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
return 0;
/*
@@ -1680,54 +1903,50 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
info->size += notesize(&info->psinfo);
- fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
+ fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
info->size += notesize(&info->signote);
fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv);
- fill_files_note(&info->files);
- info->size += notesize(&info->files);
+ if (fill_files_note(&info->files, cprm) == 0)
+ info->size += notesize(&info->files);
return 1;
}
-static size_t get_note_info_size(struct elf_note_info *info)
-{
- return info->size;
-}
-
/*
* Write all the notes for each thread. When writing the first thread, the
* process-wide notes are interleaved after the first thread-specific note.
*/
static int write_note_info(struct elf_note_info *info,
- struct file *file, loff_t *foffset)
+ struct coredump_params *cprm)
{
- bool first = 1;
+ bool first = true;
struct elf_thread_core_info *t = info->thread;
do {
int i;
- if (!writenote(&t->notes[0], file, foffset))
+ if (!writenote(&t->notes[0], cprm))
return 0;
- if (first && !writenote(&info->psinfo, file, foffset))
+ if (first && !writenote(&info->psinfo, cprm))
return 0;
- if (first && !writenote(&info->signote, file, foffset))
+ if (first && !writenote(&info->signote, cprm))
return 0;
- if (first && !writenote(&info->auxv, file, foffset))
+ if (first && !writenote(&info->auxv, cprm))
return 0;
- if (first && !writenote(&info->files, file, foffset))
+ if (first && info->files.data &&
+ !writenote(&info->files, cprm))
return 0;
for (i = 1; i < info->thread_notes; ++i)
if (t->notes[i].data &&
- !writenote(&t->notes[i], file, foffset))
+ !writenote(&t->notes[i], cprm))
return 0;
- first = 0;
+ first = false;
t = t->next;
} while (t);
@@ -1743,261 +1962,11 @@ static void free_note_info(struct elf_note_info *info)
threads = t->next;
WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
for (i = 1; i < info->thread_notes; ++i)
- kfree(t->notes[i].data);
+ kvfree(t->notes[i].data);
kfree(t);
}
kfree(info->psinfo.data);
- vfree(info->files.data);
-}
-
-#else
-
-/* Here is the structure in which status of each thread is captured. */
-struct elf_thread_status
-{
- struct list_head list;
- struct elf_prstatus prstatus; /* NT_PRSTATUS */
- elf_fpregset_t fpu; /* NT_PRFPREG */
- struct task_struct *thread;
-#ifdef ELF_CORE_COPY_XFPREGS
- elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
-#endif
- struct memelfnote notes[3];
- int num_notes;
-};
-
-/*
- * In order to add the specific thread information for the elf file format,
- * we need to keep a linked list of every threads pr_status and then create
- * a single section for them in the final core file.
- */
-static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
-{
- int sz = 0;
- struct task_struct *p = t->thread;
- t->num_notes = 0;
-
- fill_prstatus(&t->prstatus, p, signr);
- elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
-
- fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
- &(t->prstatus));
- t->num_notes++;
- sz += notesize(&t->notes[0]);
-
- if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
- &t->fpu))) {
- fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
- &(t->fpu));
- t->num_notes++;
- sz += notesize(&t->notes[1]);
- }
-
-#ifdef ELF_CORE_COPY_XFPREGS
- if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
- fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
- sizeof(t->xfpu), &t->xfpu);
- t->num_notes++;
- sz += notesize(&t->notes[2]);
- }
-#endif
- return sz;
-}
-
-struct elf_note_info {
- struct memelfnote *notes;
- struct elf_prstatus *prstatus; /* NT_PRSTATUS */
- struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
- struct list_head thread_list;
- elf_fpregset_t *fpu;
-#ifdef ELF_CORE_COPY_XFPREGS
- elf_fpxregset_t *xfpu;
-#endif
- user_siginfo_t csigdata;
- int thread_status_size;
- int numnote;
-};
-
-static int elf_note_info_init(struct elf_note_info *info)
-{
- memset(info, 0, sizeof(*info));
- INIT_LIST_HEAD(&info->thread_list);
-
- /* Allocate space for ELF notes */
- info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
- if (!info->notes)
- return 0;
- info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
- if (!info->psinfo)
- return 0;
- info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
- if (!info->prstatus)
- return 0;
- info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
- if (!info->fpu)
- return 0;
-#ifdef ELF_CORE_COPY_XFPREGS
- info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
- if (!info->xfpu)
- return 0;
-#endif
- return 1;
-}
-
-static int fill_note_info(struct elfhdr *elf, int phdrs,
- struct elf_note_info *info,
- siginfo_t *siginfo, struct pt_regs *regs)
-{
- struct list_head *t;
-
- if (!elf_note_info_init(info))
- return 0;
-
- if (siginfo->si_signo) {
- struct core_thread *ct;
- struct elf_thread_status *ets;
-
- for (ct = current->mm->core_state->dumper.next;
- ct; ct = ct->next) {
- ets = kzalloc(sizeof(*ets), GFP_KERNEL);
- if (!ets)
- return 0;
-
- ets->thread = ct->task;
- list_add(&ets->list, &info->thread_list);
- }
-
- list_for_each(t, &info->thread_list) {
- int sz;
-
- ets = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(siginfo->si_signo, ets);
- info->thread_status_size += sz;
- }
- }
- /* now collect the dump for the current */
- memset(info->prstatus, 0, sizeof(*info->prstatus));
- fill_prstatus(info->prstatus, current, siginfo->si_signo);
- elf_core_copy_regs(&info->prstatus->pr_reg, regs);
-
- /* Set up header */
- fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
-
- /*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
- */
-
- fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
- sizeof(*info->prstatus), info->prstatus);
- fill_psinfo(info->psinfo, current->group_leader, current->mm);
- fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
- sizeof(*info->psinfo), info->psinfo);
-
- fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
- fill_auxv_note(info->notes + 3, current->mm);
- fill_files_note(info->notes + 4);
-
- info->numnote = 5;
-
- /* Try to dump the FPU. */
- info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
- info->fpu);
- if (info->prstatus->pr_fpvalid)
- fill_note(info->notes + info->numnote++,
- "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
-#ifdef ELF_CORE_COPY_XFPREGS
- if (elf_core_copy_task_xfpregs(current, info->xfpu))
- fill_note(info->notes + info->numnote++,
- "LINUX", ELF_CORE_XFPREG_TYPE,
- sizeof(*info->xfpu), info->xfpu);
-#endif
-
- return 1;
-}
-
-static size_t get_note_info_size(struct elf_note_info *info)
-{
- int sz = 0;
- int i;
-
- for (i = 0; i < info->numnote; i++)
- sz += notesize(info->notes + i);
-
- sz += info->thread_status_size;
-
- return sz;
-}
-
-static int write_note_info(struct elf_note_info *info,
- struct file *file, loff_t *foffset)
-{
- int i;
- struct list_head *t;
-
- for (i = 0; i < info->numnote; i++)
- if (!writenote(info->notes + i, file, foffset))
- return 0;
-
- /* write out the thread status notes section */
- list_for_each(t, &info->thread_list) {
- struct elf_thread_status *tmp =
- list_entry(t, struct elf_thread_status, list);
-
- for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], file, foffset))
- return 0;
- }
-
- return 1;
-}
-
-static void free_note_info(struct elf_note_info *info)
-{
- while (!list_empty(&info->thread_list)) {
- struct list_head *tmp = info->thread_list.next;
- list_del(tmp);
- kfree(list_entry(tmp, struct elf_thread_status, list));
- }
-
- /* Free data allocated by fill_files_note(): */
- vfree(info->notes[4].data);
-
- kfree(info->prstatus);
- kfree(info->psinfo);
- kfree(info->notes);
- kfree(info->fpu);
-#ifdef ELF_CORE_COPY_XFPREGS
- kfree(info->xfpu);
-#endif
-}
-
-#endif
-
-static struct vm_area_struct *first_vma(struct task_struct *tsk,
- struct vm_area_struct *gate_vma)
-{
- struct vm_area_struct *ret = tsk->mm->mmap;
-
- if (ret)
- return ret;
- return gate_vma;
-}
-/*
- * Helper function for iterating across a vma list. It ensures that the caller
- * will visit `gate_vma' prior to terminating the search.
- */
-static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
- struct vm_area_struct *gate_vma)
-{
- struct vm_area_struct *ret;
-
- ret = this_vma->vm_next;
- if (ret)
- return ret;
- if (this_vma == gate_vma)
- return NULL;
- return gate_vma;
+ kvfree(info->files.data);
}
static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
@@ -2016,18 +1985,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
shdr4extnum->sh_info = segs;
}
-static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
- unsigned long mm_flags)
-{
- struct vm_area_struct *vma;
- size_t size = 0;
-
- for (vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma))
- size += vma_dump_size(vma, mm_flags);
- return size;
-}
-
/*
* Actual dumper
*
@@ -2038,44 +1995,20 @@ static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
static int elf_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
- mm_segment_t fs;
- int segs;
- size_t size = 0;
- struct vm_area_struct *vma, *gate_vma;
- struct elfhdr *elf = NULL;
- loff_t offset = 0, dataoff, foffset;
- struct elf_note_info info;
+ int segs, i;
+ struct elfhdr elf;
+ loff_t offset = 0, dataoff;
+ struct elf_note_info info = { };
struct elf_phdr *phdr4note = NULL;
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
elf_addr_t e_shoff;
/*
- * We no longer stop all VM operations.
- *
- * This is because those proceses that could possibly change map_count
- * or the mmap / vma pages are now blocked in do_exit on current
- * finishing this core dump.
- *
- * Only ptrace can touch these memory addresses, but it doesn't change
- * the map_count or the pages allocated. So no possibility of crashing
- * exists while dumping the mm->vm_next areas to the core file.
- */
-
- /* alloc memory for large data structures: too large to be on stack */
- elf = kmalloc(sizeof(*elf), GFP_KERNEL);
- if (!elf)
- goto out;
- /*
* The number of segs are recored into ELF header as 16bit value.
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
*/
- segs = current->mm->map_count;
- segs += elf_core_extra_phdrs();
-
- gate_vma = get_gate_vma(current->mm);
- if (gate_vma != NULL)
- segs++;
+ segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
/* for notes section */
segs++;
@@ -2089,22 +2022,19 @@ static int elf_core_dump(struct coredump_params *cprm)
* Collect all the non-memory information about the process for the
* notes. This also sets up the file header.
*/
- if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
- goto cleanup;
+ if (!fill_note_info(&elf, e_phnum, &info, cprm))
+ goto end_coredump;
has_dumped = 1;
- fs = get_fs();
- set_fs(KERNEL_DS);
-
- offset += sizeof(*elf); /* Elf header */
+ offset += sizeof(elf); /* ELF header */
offset += segs * sizeof(struct elf_phdr); /* Program headers */
- foffset = offset;
/* Write notes phdr entry */
{
- size_t sz = get_note_info_size(&info);
+ size_t sz = info.size;
+ /* For cell spufs and x86 xstate */
sz += elf_coredump_extra_notes_size();
phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
@@ -2117,113 +2047,83 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
- offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
- offset += elf_core_extra_data_size();
+ offset += cprm->vma_data_size;
+ offset += elf_core_extra_data_size(cprm);
e_shoff = offset;
if (e_phnum == PN_XNUM) {
shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
if (!shdr4extnum)
goto end_coredump;
- fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
+ fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
}
offset = dataoff;
- size += sizeof(*elf);
- if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+ if (!dump_emit(cprm, &elf, sizeof(elf)))
goto end_coredump;
- size += sizeof(*phdr4note);
- if (size > cprm->limit
- || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+ if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
goto end_coredump;
/* Write program headers for segments dump */
- for (vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma)) {
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *meta = cprm->vma_meta + i;
struct elf_phdr phdr;
phdr.p_type = PT_LOAD;
phdr.p_offset = offset;
- phdr.p_vaddr = vma->vm_start;
+ phdr.p_vaddr = meta->start;
phdr.p_paddr = 0;
- phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
- phdr.p_memsz = vma->vm_end - vma->vm_start;
+ phdr.p_filesz = meta->dump_size;
+ phdr.p_memsz = meta->end - meta->start;
offset += phdr.p_filesz;
- phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
- if (vma->vm_flags & VM_WRITE)
+ phdr.p_flags = 0;
+ if (meta->flags & VM_READ)
+ phdr.p_flags |= PF_R;
+ if (meta->flags & VM_WRITE)
phdr.p_flags |= PF_W;
- if (vma->vm_flags & VM_EXEC)
+ if (meta->flags & VM_EXEC)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
- size += sizeof(phdr);
- if (size > cprm->limit
- || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+ if (!dump_emit(cprm, &phdr, sizeof(phdr)))
goto end_coredump;
}
- if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+ if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
- /* write out the notes section */
- if (!write_note_info(&info, cprm->file, &foffset))
+ /* write out the notes section */
+ if (!write_note_info(&info, cprm))
goto end_coredump;
- if (elf_coredump_extra_notes_write(cprm->file, &foffset))
+ /* For cell spufs and x86 xstate */
+ if (elf_coredump_extra_notes_write(cprm))
goto end_coredump;
/* Align to page */
- if (!dump_seek(cprm->file, dataoff - foffset))
- goto end_coredump;
+ dump_skip_to(cprm, dataoff);
- for (vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma)) {
- unsigned long addr;
- unsigned long end;
-
- end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
-
- for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
- struct page *page;
- int stop;
-
- page = get_dump_page(addr);
- if (page) {
- void *kaddr = kmap(page);
- stop = ((size += PAGE_SIZE) > cprm->limit) ||
- !dump_write(cprm->file, kaddr,
- PAGE_SIZE);
- kunmap(page);
- page_cache_release(page);
- } else
- stop = !dump_seek(cprm->file, PAGE_SIZE);
- if (stop)
- goto end_coredump;
- }
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *meta = cprm->vma_meta + i;
+
+ if (!dump_user_range(cprm, meta->start, meta->dump_size))
+ goto end_coredump;
}
- if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+ if (!elf_core_write_extra_data(cprm))
goto end_coredump;
if (e_phnum == PN_XNUM) {
- size += sizeof(*shdr4extnum);
- if (size > cprm->limit
- || !dump_write(cprm->file, shdr4extnum,
- sizeof(*shdr4extnum)))
+ if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
goto end_coredump;
}
end_coredump:
- set_fs(fs);
-
-cleanup:
free_note_info(&info);
kfree(shdr4extnum);
kfree(phdr4note);
- kfree(elf);
-out:
return has_dumped;
}
@@ -2243,4 +2143,7 @@ static void __exit exit_elf_binfmt(void)
core_initcall(init_elf_binfmt);
module_exit(exit_elf_binfmt);
-MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
+#include "tests/binfmt_elf_kunit.c"
+#endif