diff options
Diffstat (limited to 'fs/binfmt_elf.c')
| -rw-r--r-- | fs/binfmt_elf.c | 2183 |
1 files changed, 1043 insertions, 1140 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 100edcc5e312..3eb734c192e9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_elf.c * @@ -12,6 +13,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/fs.h> +#include <linux/log2.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/errno.h> @@ -26,18 +28,32 @@ #include <linux/highuid.h> #include <linux/compiler.h> #include <linux/highmem.h> +#include <linux/hugetlb.h> #include <linux/pagemap.h> #include <linux/vmalloc.h> #include <linux/security.h> #include <linux/random.h> #include <linux/elf.h> +#include <linux/elf-randomize.h> #include <linux/utsname.h> #include <linux/coredump.h> #include <linux/sched.h> -#include <asm/uaccess.h> +#include <linux/sched/coredump.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/cputime.h> +#include <linux/sizes.h> +#include <linux/types.h> +#include <linux/cred.h> +#include <linux/dax.h> +#include <linux/uaccess.h> +#include <uapi/linux/rseq.h> #include <asm/param.h> #include <asm/page.h> +#ifndef ELF_COMPAT +#define ELF_COMPAT 0 +#endif + #ifndef user_long_t #define user_long_t long #endif @@ -45,10 +61,12 @@ #define user_siginfo_t siginfo_t #endif +/* That's for binfmt_elf_fdpic to deal with */ +#ifndef elf_check_fdpic +#define elf_check_fdpic(ex) false +#endif + static int load_elf_binary(struct linux_binprm *bprm); -static int load_elf_library(struct file *); -static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, - int, int, unsigned long); /* * If we don't support core dumping, then supply a NULL so we @@ -70,47 +88,49 @@ static int elf_core_dump(struct coredump_params *cprm); #define ELF_CORE_EFLAGS 0 #endif -#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) +#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1)) #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1)) #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) static struct linux_binfmt elf_format = { .module = THIS_MODULE, .load_binary = load_elf_binary, - .load_shlib = load_elf_library, +#ifdef CONFIG_COREDUMP .core_dump = elf_core_dump, .min_coredump = ELF_EXEC_PAGESIZE, +#endif }; -#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) +#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE)) -static int set_brk(unsigned long start, unsigned long end) +static inline void elf_coredump_set_mm_eflags(struct mm_struct *mm, u32 flags) { - start = ELF_PAGEALIGN(start); - end = ELF_PAGEALIGN(end); - if (end > start) { - unsigned long addr; - addr = vm_brk(start, end - start); - if (BAD_ADDR(addr)) - return addr; - } - current->mm->start_brk = current->mm->brk = end; - return 0; +#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS + mm->saved_e_flags = flags; +#endif } -/* We need to explicitly zero any fractional pages - after the data section (i.e. bss). This would - contain the junk from the file that should not - be in memory +static inline u32 elf_coredump_get_mm_eflags(struct mm_struct *mm, u32 flags) +{ +#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS + flags = mm->saved_e_flags; +#endif + return flags; +} + +/* + * We need to explicitly zero any trailing portion of the page that follows + * p_filesz when it ends before the page ends (e.g. bss), otherwise this + * memory will contain the junk from the file that should not be present. */ -static int padzero(unsigned long elf_bss) +static int padzero(unsigned long address) { unsigned long nbyte; - nbyte = ELF_PAGEOFFSET(elf_bss); + nbyte = ELF_PAGEOFFSET(address); if (nbyte) { nbyte = ELF_MIN_ALIGN - nbyte; - if (clear_user((void __user *) elf_bss, nbyte)) + if (clear_user((void __user *)address, nbyte)) return -EFAULT; } return 0; @@ -128,7 +148,7 @@ static int padzero(unsigned long elf_bss) #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) #define STACK_ROUND(sp, items) \ (((unsigned long) (sp - items)) &~ 15UL) -#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) +#define STACK_ALLOC(sp, len) (sp -= len) #endif #ifndef ELF_BASE_PLATFORM @@ -141,14 +161,14 @@ static int padzero(unsigned long elf_bss) #endif static int -create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, - unsigned long load_addr, unsigned long interp_load_addr) +create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, + unsigned long interp_load_addr, + unsigned long e_entry, unsigned long phdr_addr) { + struct mm_struct *mm = current->mm; unsigned long p = bprm->p; int argc = bprm->argc; int envc = bprm->envc; - elf_addr_t __user *argv; - elf_addr_t __user *envp; elf_addr_t __user *sp; elf_addr_t __user *u_platform; elf_addr_t __user *u_base_platform; @@ -158,7 +178,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned char k_rand_bytes[16]; int items; elf_addr_t *elf_info; - int ei_index = 0; + elf_addr_t flags = 0; + int ei_index; const struct cred *cred = current_cred(); struct vm_area_struct *vma; @@ -181,7 +202,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, size_t len = strlen(k_platform) + 1; u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); - if (__copy_to_user(u_platform, k_platform, len)) + if (copy_to_user(u_platform, k_platform, len)) return -EFAULT; } @@ -194,7 +215,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, size_t len = strlen(k_base_platform) + 1; u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); - if (__copy_to_user(u_base_platform, k_base_platform, len)) + if (copy_to_user(u_base_platform, k_base_platform, len)) return -EFAULT; } @@ -204,20 +225,20 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); u_rand_bytes = (elf_addr_t __user *) STACK_ALLOC(p, sizeof(k_rand_bytes)); - if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) + if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) return -EFAULT; /* Create the ELF interpreter info */ - elf_info = (elf_addr_t *)current->mm->saved_auxv; + elf_info = (elf_addr_t *)mm->saved_auxv; /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ #define NEW_AUX_ENT(id, val) \ do { \ - elf_info[ei_index++] = id; \ - elf_info[ei_index++] = val; \ + *elf_info++ = id; \ + *elf_info++ = val; \ } while (0) #ifdef ARCH_DLINFO - /* + /* * ARCH_DLINFO must come first so PPC can do its special alignment of * AUXV. * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in @@ -228,21 +249,29 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); - NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); + NEW_AUX_ENT(AT_PHDR, phdr_addr); NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); NEW_AUX_ENT(AT_BASE, interp_load_addr); - NEW_AUX_ENT(AT_FLAGS, 0); - NEW_AUX_ENT(AT_ENTRY, exec->e_entry); + if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0) + flags |= AT_FLAGS_PRESERVE_ARGV0; + NEW_AUX_ENT(AT_FLAGS, flags); + NEW_AUX_ENT(AT_ENTRY, e_entry); NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid)); NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid)); NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid)); NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid)); - NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); + NEW_AUX_ENT(AT_SECURE, bprm->secureexec); NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); #endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); +#endif NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { NEW_AUX_ENT(AT_PLATFORM, @@ -252,17 +281,22 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE_PLATFORM, (elf_addr_t)(unsigned long)u_base_platform); } - if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { - NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); + if (bprm->have_execfd) { + NEW_AUX_ENT(AT_EXECFD, bprm->execfd); } +#ifdef CONFIG_RSEQ + NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end)); + NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq)); +#endif #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */ - memset(&elf_info[ei_index], 0, - sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]); + memset(elf_info, 0, (char *)mm->saved_auxv + + sizeof(mm->saved_auxv) - (char *)elf_info); /* And advance past the AT_NULL entry. */ - ei_index += 2; + elf_info += 2; + ei_index = elf_info - (elf_addr_t *)mm->saved_auxv; sp = STACK_ADD(p, ei_index); items = (argc + 1) + (envc + 1) + 1; @@ -281,54 +315,60 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, * Grow the stack manually; some architectures have a limit on how * far ahead a user-space access may be in order to grow the stack. */ - vma = find_extend_vma(current->mm, bprm->p); + if (mmap_write_lock_killable(mm)) + return -EINTR; + vma = find_extend_vma_locked(mm, bprm->p); + mmap_write_unlock(mm); if (!vma) return -EFAULT; /* Now, let's put argc (and argv, envp if appropriate) on the stack */ - if (__put_user(argc, sp++)) + if (put_user(argc, sp++)) return -EFAULT; - argv = sp; - envp = argv + argc + 1; - /* Populate argv and envp */ - p = current->mm->arg_end = current->mm->arg_start; + /* Populate list of argv pointers back to argv strings. */ + p = mm->arg_end = mm->arg_start; while (argc-- > 0) { size_t len; - if (__put_user((elf_addr_t)p, argv++)) + if (put_user((elf_addr_t)p, sp++)) return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) return -EINVAL; p += len; } - if (__put_user(0, argv)) + if (put_user(0, sp++)) return -EFAULT; - current->mm->arg_end = current->mm->env_start = p; + mm->arg_end = p; + + /* Populate list of envp pointers back to envp strings. */ + mm->env_end = mm->env_start = p; while (envc-- > 0) { size_t len; - if (__put_user((elf_addr_t)p, envp++)) + if (put_user((elf_addr_t)p, sp++)) return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) return -EINVAL; p += len; } - if (__put_user(0, envp)) + if (put_user(0, sp++)) return -EFAULT; - current->mm->env_end = p; + mm->env_end = p; /* Put the elf_info on the stack in the right place. */ - sp = (elf_addr_t __user *)envp + 1; - if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t))) + if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t))) return -EFAULT; return 0; } -#ifndef elf_map - +/* + * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset" + * into memory at "addr". (Note that p_filesz is rounded up to the + * next page, so any extra bytes from the file must be wiped.) + */ static unsigned long elf_map(struct file *filep, unsigned long addr, - struct elf_phdr *eppnt, int prot, int type, + const struct elf_phdr *eppnt, int prot, int type, unsigned long total_size) { unsigned long map_addr; @@ -358,29 +398,244 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, } else map_addr = vm_mmap(filep, addr, size, prot, type, off); + if ((type & MAP_FIXED_NOREPLACE) && + PTR_ERR((void *)map_addr) == -EEXIST) + pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n", + task_pid_nr(current), current->comm, (void *)addr); + return(map_addr); } -#endif /* !elf_map */ +/* + * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset" + * into memory at "addr". Memory from "p_filesz" through "p_memsz" + * rounded up to the next page is zeroed. + */ +static unsigned long elf_load(struct file *filep, unsigned long addr, + const struct elf_phdr *eppnt, int prot, int type, + unsigned long total_size) +{ + unsigned long zero_start, zero_end; + unsigned long map_addr; + + if (eppnt->p_filesz) { + map_addr = elf_map(filep, addr, eppnt, prot, type, total_size); + if (BAD_ADDR(map_addr)) + return map_addr; + if (eppnt->p_memsz > eppnt->p_filesz) { + zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) + + eppnt->p_filesz; + zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) + + eppnt->p_memsz; + + /* + * Zero the end of the last mapped page but ignore + * any errors if the segment isn't writable. + */ + if (padzero(zero_start) && (prot & PROT_WRITE)) + return -EFAULT; + } + } else { + map_addr = zero_start = ELF_PAGESTART(addr); + zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) + + eppnt->p_memsz; + } + if (eppnt->p_memsz > eppnt->p_filesz) { + /* + * Map the last of the segment. + * If the header is requesting these pages to be + * executable, honour that (ppc32 needs this). + */ + int error; + + zero_start = ELF_PAGEALIGN(zero_start); + zero_end = ELF_PAGEALIGN(zero_end); + + error = vm_brk_flags(zero_start, zero_end - zero_start, + prot & PROT_EXEC ? VM_EXEC : 0); + if (error) + map_addr = error; + } + return map_addr; +} + + +static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr) +{ + elf_addr_t min_addr = -1; + elf_addr_t max_addr = 0; + bool pt_load = false; + int i; + + for (i = 0; i < nr; i++) { + if (phdr[i].p_type == PT_LOAD) { + min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr)); + max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz); + pt_load = true; + } + } + return pt_load ? (max_addr - min_addr) : 0; +} + +static int elf_read(struct file *file, void *buf, size_t len, loff_t pos) +{ + ssize_t rv; + + rv = kernel_read(file, buf, len, &pos); + if (unlikely(rv != len)) { + return (rv < 0) ? rv : -EIO; + } + return 0; +} -static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) +static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr) { - int i, first_idx = -1, last_idx = -1; + unsigned long alignment = 0; + int i; for (i = 0; i < nr; i++) { if (cmds[i].p_type == PT_LOAD) { - last_idx = i; - if (first_idx == -1) - first_idx = i; + unsigned long p_align = cmds[i].p_align; + + /* skip non-power of two alignments as invalid */ + if (!is_power_of_2(p_align)) + continue; + alignment = max(alignment, p_align); } } - if (first_idx == -1) - return 0; - return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - - ELF_PAGESTART(cmds[first_idx].p_vaddr); + /* ensure we align to at least one page */ + return ELF_PAGEALIGN(alignment); +} + +/** + * load_elf_phdrs() - load ELF program headers + * @elf_ex: ELF header of the binary whose program headers should be loaded + * @elf_file: the opened ELF binary file + * + * Loads ELF program headers from the binary file elf_file, which has the ELF + * header pointed to by elf_ex, into a newly allocated array. The caller is + * responsible for freeing the allocated data. Returns NULL upon failure. + */ +static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, + struct file *elf_file) +{ + struct elf_phdr *elf_phdata = NULL; + int retval = -1; + unsigned int size; + + /* + * If the size of this structure has changed, then punt, since + * we will be doing the wrong thing. + */ + if (elf_ex->e_phentsize != sizeof(struct elf_phdr)) + goto out; + + /* Sanity check the number of program headers... */ + /* ...and their total size. */ + size = sizeof(struct elf_phdr) * elf_ex->e_phnum; + if (size == 0 || size > 65536) + goto out; + + elf_phdata = kmalloc(size, GFP_KERNEL); + if (!elf_phdata) + goto out; + + /* Read in the program headers */ + retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff); + +out: + if (retval) { + kfree(elf_phdata); + elf_phdata = NULL; + } + return elf_phdata; +} + +#ifndef CONFIG_ARCH_BINFMT_ELF_STATE + +/** + * struct arch_elf_state - arch-specific ELF loading state + * + * This structure is used to preserve architecture specific data during + * the loading of an ELF file, throughout the checking of architecture + * specific ELF headers & through to the point where the ELF load is + * known to be proceeding (ie. SET_PERSONALITY). + * + * This implementation is a dummy for architectures which require no + * specific state. + */ +struct arch_elf_state { +}; + +#define INIT_ARCH_ELF_STATE {} + +/** + * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header + * @ehdr: The main ELF header + * @phdr: The program header to check + * @elf: The open ELF file + * @is_interp: True if the phdr is from the interpreter of the ELF being + * loaded, else false. + * @state: Architecture-specific state preserved throughout the process + * of loading the ELF. + * + * Inspects the program header phdr to validate its correctness and/or + * suitability for the system. Called once per ELF program header in the + * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its + * interpreter. + * + * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load + * with that return code. + */ +static inline int arch_elf_pt_proc(struct elfhdr *ehdr, + struct elf_phdr *phdr, + struct file *elf, bool is_interp, + struct arch_elf_state *state) +{ + /* Dummy implementation, always proceed */ + return 0; } +/** + * arch_check_elf() - check an ELF executable + * @ehdr: The main ELF header + * @has_interp: True if the ELF has an interpreter, else false. + * @interp_ehdr: The interpreter's ELF header + * @state: Architecture-specific state preserved throughout the process + * of loading the ELF. + * + * Provides a final opportunity for architecture code to reject the loading + * of the ELF & cause an exec syscall to return an error. This is called after + * all program headers to be checked by arch_elf_pt_proc have been. + * + * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load + * with that return code. + */ +static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp, + struct elfhdr *interp_ehdr, + struct arch_elf_state *state) +{ + /* Dummy implementation, always proceed */ + return 0; +} + +#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */ + +static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state, + bool has_interp, bool is_interp) +{ + int prot = 0; + + if (p_flags & PF_R) + prot |= PROT_READ; + if (p_flags & PF_W) + prot |= PROT_WRITE; + if (p_flags & PF_X) + prot |= PROT_EXEC; + + return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp); +} /* This is much more generalized than the library routine read function, so we keep this separate. Technically the library read function @@ -388,88 +643,55 @@ static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) an ELF header */ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, - struct file *interpreter, unsigned long *interp_map_addr, - unsigned long no_base) + struct file *interpreter, + unsigned long no_base, struct elf_phdr *interp_elf_phdata, + struct arch_elf_state *arch_state) { - struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; unsigned long load_addr = 0; int load_addr_set = 0; - unsigned long last_bss = 0, elf_bss = 0; unsigned long error = ~0UL; unsigned long total_size; - int retval, i, size; + int i; /* First of all, some simple consistency checks */ if (interp_elf_ex->e_type != ET_EXEC && interp_elf_ex->e_type != ET_DYN) goto out; - if (!elf_check_arch(interp_elf_ex)) + if (!elf_check_arch(interp_elf_ex) || + elf_check_fdpic(interp_elf_ex)) goto out; - if (!interpreter->f_op || !interpreter->f_op->mmap) - goto out; - - /* - * If the size of this structure has changed, then punt, since - * we will be doing the wrong thing. - */ - if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr)) - goto out; - if (interp_elf_ex->e_phnum < 1 || - interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr)) - goto out; - - /* Now read in all of the header information */ - size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum; - if (size > ELF_MIN_ALIGN) - goto out; - elf_phdata = kmalloc(size, GFP_KERNEL); - if (!elf_phdata) + if (!can_mmap_file(interpreter)) goto out; - retval = kernel_read(interpreter, interp_elf_ex->e_phoff, - (char *)elf_phdata, size); - error = -EIO; - if (retval != size) { - if (retval < 0) - error = retval; - goto out_close; - } - - total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum); + total_size = total_mapping_size(interp_elf_phdata, + interp_elf_ex->e_phnum); if (!total_size) { error = -EINVAL; - goto out_close; + goto out; } - eppnt = elf_phdata; + eppnt = interp_elf_phdata; for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { if (eppnt->p_type == PT_LOAD) { - int elf_type = MAP_PRIVATE | MAP_DENYWRITE; - int elf_prot = 0; + int elf_type = MAP_PRIVATE; + int elf_prot = make_prot(eppnt->p_flags, arch_state, + true, true); unsigned long vaddr = 0; unsigned long k, map_addr; - if (eppnt->p_flags & PF_R) - elf_prot = PROT_READ; - if (eppnt->p_flags & PF_W) - elf_prot |= PROT_WRITE; - if (eppnt->p_flags & PF_X) - elf_prot |= PROT_EXEC; vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) elf_type |= MAP_FIXED; else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr; - map_addr = elf_map(interpreter, load_addr + vaddr, + map_addr = elf_load(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type, total_size); total_size = 0; - if (!*interp_map_addr) - *interp_map_addr = map_addr; error = map_addr; if (BAD_ADDR(map_addr)) - goto out_close; + goto out; if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) { @@ -488,52 +710,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) { error = -ENOMEM; - goto out_close; + goto out; } - - /* - * Find the end of the file mapping for this phdr, and - * keep track of the largest address we see for this. - */ - k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; - if (k > elf_bss) - elf_bss = k; - - /* - * Do the same thing for the memory mapping - between - * elf_bss and last_bss is the bss section. - */ - k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; - if (k > last_bss) - last_bss = k; - } - } - - if (last_bss > elf_bss) { - /* - * Now fill out the bss section. First pad the last page up - * to the page boundary, and then perform a mmap to make sure - * that there are zero-mapped pages up to and including the - * last bss page. - */ - if (padzero(elf_bss)) { - error = -EFAULT; - goto out_close; } - - /* What we have mapped so far */ - elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); - - /* Map the last of the bss segment */ - error = vm_brk(elf_bss, last_bss - elf_bss); - if (BAD_ADDR(error)) - goto out_close; } error = load_addr; - -out_close: - kfree(elf_phdata); out: return error; } @@ -543,195 +725,300 @@ out: * libraries. There is no binary dependent code anywhere else. */ -#define INTERPRETER_NONE 0 -#define INTERPRETER_ELF 2 +static int parse_elf_property(const char *data, size_t *off, size_t datasz, + struct arch_elf_state *arch, + bool have_prev_type, u32 *prev_type) +{ + size_t o, step; + const struct gnu_property *pr; + int ret; + + if (*off == datasz) + return -ENOENT; + + if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN)) + return -EIO; + o = *off; + datasz -= *off; + + if (datasz < sizeof(*pr)) + return -ENOEXEC; + pr = (const struct gnu_property *)(data + o); + o += sizeof(*pr); + datasz -= sizeof(*pr); + + if (pr->pr_datasz > datasz) + return -ENOEXEC; + + WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN); + step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN); + if (step > datasz) + return -ENOEXEC; + + /* Properties are supposed to be unique and sorted on pr_type: */ + if (have_prev_type && pr->pr_type <= *prev_type) + return -ENOEXEC; + *prev_type = pr->pr_type; + + ret = arch_parse_elf_property(pr->pr_type, data + o, + pr->pr_datasz, ELF_COMPAT, arch); + if (ret) + return ret; -#ifndef STACK_RND_MASK -#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ -#endif + *off = o + step; + return 0; +} + +#define NOTE_DATA_SZ SZ_1K +#define NOTE_NAME_SZ (sizeof(NN_GNU_PROPERTY_TYPE_0)) -static unsigned long randomize_stack_top(unsigned long stack_top) +static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, + struct arch_elf_state *arch) { - unsigned int random_variable = 0; + union { + struct elf_note nhdr; + char data[NOTE_DATA_SZ]; + } note; + loff_t pos; + ssize_t n; + size_t off, datasz; + int ret; + bool have_prev_type; + u32 prev_type; + + if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr) + return 0; - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) { - random_variable = get_random_int() & STACK_RND_MASK; - random_variable <<= PAGE_SHIFT; - } -#ifdef CONFIG_STACK_GROWSUP - return PAGE_ALIGN(stack_top) + random_variable; -#else - return PAGE_ALIGN(stack_top) - random_variable; -#endif + /* load_elf_binary() shouldn't call us unless this is true... */ + if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY)) + return -ENOEXEC; + + /* If the properties are crazy large, that's too bad (for now): */ + if (phdr->p_filesz > sizeof(note)) + return -ENOEXEC; + + pos = phdr->p_offset; + n = kernel_read(f, ¬e, phdr->p_filesz, &pos); + + BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ); + if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ) + return -EIO; + + if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 || + note.nhdr.n_namesz != NOTE_NAME_SZ || + strncmp(note.data + sizeof(note.nhdr), + NN_GNU_PROPERTY_TYPE_0, n - sizeof(note.nhdr))) + return -ENOEXEC; + + off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ, + ELF_GNU_PROPERTY_ALIGN); + if (off > n) + return -ENOEXEC; + + if (note.nhdr.n_descsz > n - off) + return -ENOEXEC; + datasz = off + note.nhdr.n_descsz; + + have_prev_type = false; + do { + ret = parse_elf_property(note.data, &off, datasz, arch, + have_prev_type, &prev_type); + have_prev_type = true; + } while (!ret); + + return ret == -ENOENT ? 0 : ret; } static int load_elf_binary(struct linux_binprm *bprm) { struct file *interpreter = NULL; /* to shut gcc up */ - unsigned long load_addr = 0, load_bias = 0; - int load_addr_set = 0; - char * elf_interpreter = NULL; + unsigned long load_bias = 0, phdr_addr = 0; + int first_pt_load = 1; unsigned long error; - struct elf_phdr *elf_ppnt, *elf_phdata; - unsigned long elf_bss, elf_brk; + struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; + struct elf_phdr *elf_property_phdata = NULL; + unsigned long elf_brk; + bool brk_moved = false; int retval, i; - unsigned int size; unsigned long elf_entry; + unsigned long e_entry; unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc __maybe_unused = 0; int executable_stack = EXSTACK_DEFAULT; - unsigned long def_flags = 0; - struct pt_regs *regs = current_pt_regs(); - struct { - struct elfhdr elf_ex; - struct elfhdr interp_elf_ex; - } *loc; - - loc = kmalloc(sizeof(*loc), GFP_KERNEL); - if (!loc) { - retval = -ENOMEM; - goto out_ret; - } - - /* Get the exec-header */ - loc->elf_ex = *((struct elfhdr *)bprm->buf); + struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf; + struct elfhdr *interp_elf_ex = NULL; + struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; + struct mm_struct *mm; + struct pt_regs *regs; retval = -ENOEXEC; /* First of all, some simple consistency checks */ - if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) + if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0) goto out; - if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) - goto out; - if (!elf_check_arch(&loc->elf_ex)) + if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN) goto out; - if (!bprm->file->f_op || !bprm->file->f_op->mmap) + if (!elf_check_arch(elf_ex)) goto out; - - /* Now read in all of the header information */ - if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr)) + if (elf_check_fdpic(elf_ex)) goto out; - if (loc->elf_ex.e_phnum < 1 || - loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr)) + if (!can_mmap_file(bprm->file)) goto out; - size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr); - retval = -ENOMEM; - elf_phdata = kmalloc(size, GFP_KERNEL); + + elf_phdata = load_elf_phdrs(elf_ex, bprm->file); if (!elf_phdata) goto out; - retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, - (char *)elf_phdata, size); - if (retval != size) { - if (retval >= 0) - retval = -EIO; - goto out_free_ph; - } - elf_ppnt = elf_phdata; - elf_bss = 0; - elf_brk = 0; + for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) { + char *elf_interpreter; - start_code = ~0UL; - end_code = 0; - start_data = 0; - end_data = 0; + if (elf_ppnt->p_type == PT_GNU_PROPERTY) { + elf_property_phdata = elf_ppnt; + continue; + } - for (i = 0; i < loc->elf_ex.e_phnum; i++) { - if (elf_ppnt->p_type == PT_INTERP) { - /* This is the program interpreter used for - * shared libraries - for now assume that this - * is an a.out format binary - */ - retval = -ENOEXEC; - if (elf_ppnt->p_filesz > PATH_MAX || - elf_ppnt->p_filesz < 2) - goto out_free_ph; + if (elf_ppnt->p_type != PT_INTERP) + continue; - retval = -ENOMEM; - elf_interpreter = kmalloc(elf_ppnt->p_filesz, - GFP_KERNEL); - if (!elf_interpreter) - goto out_free_ph; - - retval = kernel_read(bprm->file, elf_ppnt->p_offset, - elf_interpreter, - elf_ppnt->p_filesz); - if (retval != elf_ppnt->p_filesz) { - if (retval >= 0) - retval = -EIO; - goto out_free_interp; - } - /* make sure path is NULL terminated */ - retval = -ENOEXEC; - if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') - goto out_free_interp; + /* + * This is the program interpreter used for shared libraries - + * for now assume that this is an a.out format binary. + */ + retval = -ENOEXEC; + if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) + goto out_free_ph; - interpreter = open_exec(elf_interpreter); - retval = PTR_ERR(interpreter); - if (IS_ERR(interpreter)) - goto out_free_interp; + retval = -ENOMEM; + elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); + if (!elf_interpreter) + goto out_free_ph; - /* - * If the binary is not readable then enforce - * mm->dumpable = 0 regardless of the interpreter's - * permissions. - */ - would_dump(bprm, interpreter); + retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz, + elf_ppnt->p_offset); + if (retval < 0) + goto out_free_interp; + /* make sure path is NULL terminated */ + retval = -ENOEXEC; + if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') + goto out_free_interp; - retval = kernel_read(interpreter, 0, bprm->buf, - BINPRM_BUF_SIZE); - if (retval != BINPRM_BUF_SIZE) { - if (retval >= 0) - retval = -EIO; - goto out_free_dentry; - } + interpreter = open_exec(elf_interpreter); + kfree(elf_interpreter); + retval = PTR_ERR(interpreter); + if (IS_ERR(interpreter)) + goto out_free_ph; - /* Get the exec headers */ - loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); - break; + /* + * If the binary is not readable then enforce mm->dumpable = 0 + * regardless of the interpreter's permissions. + */ + would_dump(bprm, interpreter); + + interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL); + if (!interp_elf_ex) { + retval = -ENOMEM; + goto out_free_file; } - elf_ppnt++; + + /* Get the exec headers */ + retval = elf_read(interpreter, interp_elf_ex, + sizeof(*interp_elf_ex), 0); + if (retval < 0) + goto out_free_dentry; + + break; + +out_free_interp: + kfree(elf_interpreter); + goto out_free_ph; } elf_ppnt = elf_phdata; - for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) - if (elf_ppnt->p_type == PT_GNU_STACK) { + for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) + switch (elf_ppnt->p_type) { + case PT_GNU_STACK: if (elf_ppnt->p_flags & PF_X) executable_stack = EXSTACK_ENABLE_X; else executable_stack = EXSTACK_DISABLE_X; break; + + case PT_LOPROC ... PT_HIPROC: + retval = arch_elf_pt_proc(elf_ex, elf_ppnt, + bprm->file, false, + &arch_state); + if (retval) + goto out_free_dentry; + break; } /* Some simple consistency checks for the interpreter */ - if (elf_interpreter) { + if (interpreter) { retval = -ELIBBAD; /* Not an ELF interpreter */ - if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) + if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0) goto out_free_dentry; /* Verify the interpreter has a valid arch */ - if (!elf_check_arch(&loc->interp_elf_ex)) + if (!elf_check_arch(interp_elf_ex) || + elf_check_fdpic(interp_elf_ex)) goto out_free_dentry; + + /* Load the interpreter program headers */ + interp_elf_phdata = load_elf_phdrs(interp_elf_ex, + interpreter); + if (!interp_elf_phdata) + goto out_free_dentry; + + /* Pass PT_LOPROC..PT_HIPROC headers to arch code */ + elf_property_phdata = NULL; + elf_ppnt = interp_elf_phdata; + for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++) + switch (elf_ppnt->p_type) { + case PT_GNU_PROPERTY: + elf_property_phdata = elf_ppnt; + break; + + case PT_LOPROC ... PT_HIPROC: + retval = arch_elf_pt_proc(interp_elf_ex, + elf_ppnt, interpreter, + true, &arch_state); + if (retval) + goto out_free_dentry; + break; + } } - /* Flush all traces of the currently running executable */ - retval = flush_old_exec(bprm); + retval = parse_elf_properties(interpreter ?: bprm->file, + elf_property_phdata, &arch_state); if (retval) goto out_free_dentry; - /* OK, This is the point of no return */ - current->mm->def_flags = def_flags; + /* + * Allow arch code to reject the ELF at this point, whilst it's + * still possible to return an error to the code that invoked + * the exec syscall. + */ + retval = arch_check_elf(elf_ex, + !!interpreter, interp_elf_ex, + &arch_state); + if (retval) + goto out_free_dentry; + + /* Flush all traces of the currently running executable */ + retval = begin_new_exec(bprm); + if (retval) + goto out_free_dentry; /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ - SET_PERSONALITY(loc->elf_ex); - if (elf_read_implies_exec(loc->elf_ex, executable_stack)) + SET_PERSONALITY2(*elf_ex, &arch_state); + if (elf_read_implies_exec(*elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space); + if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); @@ -740,106 +1027,193 @@ static int load_elf_binary(struct linux_binprm *bprm) change some of these later */ retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) goto out_free_dentry; - } - - current->mm->start_stack = bprm->p; + + elf_brk = 0; + + start_code = ~0UL; + end_code = 0; + start_data = 0; + end_data = 0; /* Now we do a little grungy work by mmapping the ELF image into the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; - i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { - int elf_prot = 0, elf_flags; + i < elf_ex->e_phnum; i++, elf_ppnt++) { + int elf_prot, elf_flags; unsigned long k, vaddr; + unsigned long total_size = 0; + unsigned long alignment; if (elf_ppnt->p_type != PT_LOAD) continue; - if (unlikely (elf_brk > elf_bss)) { - unsigned long nbyte; - - /* There was a PT_LOAD segment with p_memsz > p_filesz - before this one. Map anonymous pages, if needed, - and clear the area. */ - retval = set_brk(elf_bss + load_bias, - elf_brk + load_bias); - if (retval) { - send_sig(SIGKILL, current, 0); - goto out_free_dentry; - } - nbyte = ELF_PAGEOFFSET(elf_bss); - if (nbyte) { - nbyte = ELF_MIN_ALIGN - nbyte; - if (nbyte > elf_brk - elf_bss) - nbyte = elf_brk - elf_bss; - if (clear_user((void __user *)elf_bss + - load_bias, nbyte)) { - /* - * This bss-zeroing can fail if the ELF - * file specifies odd protections. So - * we don't check the return value - */ - } - } - } - - if (elf_ppnt->p_flags & PF_R) - elf_prot |= PROT_READ; - if (elf_ppnt->p_flags & PF_W) - elf_prot |= PROT_WRITE; - if (elf_ppnt->p_flags & PF_X) - elf_prot |= PROT_EXEC; + elf_prot = make_prot(elf_ppnt->p_flags, &arch_state, + !!interpreter, false); - elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; + elf_flags = MAP_PRIVATE; vaddr = elf_ppnt->p_vaddr; - if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { + /* + * The first time through the loop, first_pt_load is true: + * layout will be calculated. Once set, use MAP_FIXED since + * we know we've already safely mapped the entire region with + * MAP_FIXED_NOREPLACE in the once-per-binary logic following. + */ + if (!first_pt_load) { elf_flags |= MAP_FIXED; - } else if (loc->elf_ex.e_type == ET_DYN) { - /* Try and get dynamic programs out of the way of the - * default mmap base, as well as whatever program they - * might try to exec. This is because the brk will - * follow the loader, and is not movable. */ -#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE - /* Memory randomization might have been switched off - * in runtime via sysctl or explicit setting of - * personality flags. - * If that is the case, retain the original non-zero - * load_bias value in order to establish proper - * non-randomized mappings. + } else if (elf_ex->e_type == ET_EXEC) { + /* + * This logic is run once for the first LOAD Program + * Header for ET_EXEC binaries. No special handling + * is needed. */ - if (current->flags & PF_RANDOMIZE) - load_bias = 0; - else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#endif + elf_flags |= MAP_FIXED_NOREPLACE; + } else if (elf_ex->e_type == ET_DYN) { + /* + * This logic is run once for the first LOAD Program + * Header for ET_DYN binaries to calculate the + * randomization (load_bias) for all the LOAD + * Program Headers. + */ + + /* + * Calculate the entire size of the ELF mapping + * (total_size), used for the initial mapping, + * due to load_addr_set which is set to true later + * once the initial mapping is performed. + * + * Note that this is only sensible when the LOAD + * segments are contiguous (or overlapping). If + * used for LOADs that are far apart, this would + * cause the holes between LOADs to be mapped, + * running the risk of having the mapping fail, + * as it would be larger than the ELF file itself. + * + * As a result, only ET_DYN does this, since + * some ET_EXEC (e.g. ia64) may have large virtual + * memory holes between LOADs. + * + */ + total_size = total_mapping_size(elf_phdata, + elf_ex->e_phnum); + if (!total_size) { + retval = -EINVAL; + goto out_free_dentry; + } + + /* Calculate any requested alignment. */ + alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); + + /** + * DOC: PIE handling + * + * There are effectively two types of ET_DYN ELF + * binaries: programs (i.e. PIE: ET_DYN with + * PT_INTERP) and loaders (i.e. static PIE: ET_DYN + * without PT_INTERP, usually the ELF interpreter + * itself). Loaders must be loaded away from programs + * since the program may otherwise collide with the + * loader (especially for ET_EXEC which does not have + * a randomized position). + * + * For example, to handle invocations of + * "./ld.so someprog" to test out a new version of + * the loader, the subsequent program that the + * loader loads must avoid the loader itself, so + * they cannot share the same load range. Sufficient + * room for the brk must be allocated with the + * loader as well, since brk must be available with + * the loader. + * + * Therefore, programs are loaded offset from + * ELF_ET_DYN_BASE and loaders are loaded into the + * independently randomized mmap region (0 load_bias + * without MAP_FIXED nor MAP_FIXED_NOREPLACE). + * + * See below for "brk" handling details, which is + * also affected by program vs loader and ASLR. + */ + if (interpreter) { + /* On ET_DYN with PT_INTERP, we do the ASLR. */ + load_bias = ELF_ET_DYN_BASE; + if (current->flags & PF_RANDOMIZE) + load_bias += arch_mmap_rnd(); + /* Adjust alignment as requested. */ + if (alignment) + load_bias &= ~(alignment - 1); + elf_flags |= MAP_FIXED_NOREPLACE; + } else { + /* + * For ET_DYN without PT_INTERP, we rely on + * the architectures's (potentially ASLR) mmap + * base address (via a load_bias of 0). + * + * When a large alignment is requested, we + * must do the allocation at address "0" right + * now to discover where things will load so + * that we can adjust the resulting alignment. + * In this case (load_bias != 0), we can use + * MAP_FIXED_NOREPLACE to make sure the mapping + * doesn't collide with anything. + */ + if (alignment > ELF_MIN_ALIGN) { + load_bias = elf_load(bprm->file, 0, elf_ppnt, + elf_prot, elf_flags, total_size); + if (BAD_ADDR(load_bias)) { + retval = IS_ERR_VALUE(load_bias) ? + PTR_ERR((void*)load_bias) : -EINVAL; + goto out_free_dentry; + } + vm_munmap(load_bias, total_size); + /* Adjust alignment as requested. */ + if (alignment) + load_bias &= ~(alignment - 1); + elf_flags |= MAP_FIXED_NOREPLACE; + } else + load_bias = 0; + } + + /* + * Since load_bias is used for all subsequent loading + * calculations, we must lower it by the first vaddr + * so that the remaining calculations based on the + * ELF vaddrs will be correctly offset. The result + * is then page aligned. + */ + load_bias = ELF_PAGESTART(load_bias - vaddr); } - error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, 0); + error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt, + elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { - send_sig(SIGKILL, current, 0); - retval = IS_ERR((void *)error) ? + retval = IS_ERR_VALUE(error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } - if (!load_addr_set) { - load_addr_set = 1; - load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); - if (loc->elf_ex.e_type == ET_DYN) { + if (first_pt_load) { + first_pt_load = 0; + if (elf_ex->e_type == ET_DYN) { load_bias += error - ELF_PAGESTART(load_bias + vaddr); - load_addr += load_bias; reloc_func_desc = load_bias; } } + + /* + * Figure out which segment in the file contains the Program + * Header table, and map to the associated memory address. + */ + if (elf_ppnt->p_offset <= elf_ex->e_phoff && + elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) { + phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset + + elf_ppnt->p_vaddr; + } + k = elf_ppnt->p_vaddr; - if (k < start_code) + if ((elf_ppnt->p_flags & PF_X) && k < start_code) start_code = k; if (start_data < k) start_data = k; @@ -853,15 +1227,12 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ - send_sig(SIGKILL, current, 0); retval = -EINVAL; goto out_free_dentry; } k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; - if (k > elf_bss) - elf_bss = k; if ((elf_ppnt->p_flags & PF_X) && end_code < k) end_code = k; if (end_data < k) @@ -871,60 +1242,42 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_brk = k; } - loc->elf_ex.e_entry += load_bias; - elf_bss += load_bias; + e_entry = elf_ex->e_entry + load_bias; + phdr_addr += load_bias; elf_brk += load_bias; start_code += load_bias; end_code += load_bias; start_data += load_bias; end_data += load_bias; - /* Calling set_brk effectively mmaps the pages that we need - * for the bss and break sections. We must do this before - * mapping in the interpreter, to make sure it doesn't wind - * up getting placed where the bss needs to go. - */ - retval = set_brk(elf_bss, elf_brk); - if (retval) { - send_sig(SIGKILL, current, 0); - goto out_free_dentry; - } - if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { - send_sig(SIGSEGV, current, 0); - retval = -EFAULT; /* Nobody gets to see this, but.. */ - goto out_free_dentry; - } - - if (elf_interpreter) { - unsigned long interp_map_addr = 0; - - elf_entry = load_elf_interp(&loc->interp_elf_ex, + if (interpreter) { + elf_entry = load_elf_interp(interp_elf_ex, interpreter, - &interp_map_addr, - load_bias); - if (!IS_ERR((void *)elf_entry)) { + load_bias, interp_elf_phdata, + &arch_state); + if (!IS_ERR_VALUE(elf_entry)) { /* * load_elf_interp() returns relocation * adjustment */ interp_load_addr = elf_entry; - elf_entry += loc->interp_elf_ex.e_entry; + elf_entry += interp_elf_ex->e_entry; } if (BAD_ADDR(elf_entry)) { - force_sig(SIGSEGV, current); - retval = IS_ERR((void *)elf_entry) ? + retval = IS_ERR_VALUE(elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; } reloc_func_desc = interp_load_addr; - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); fput(interpreter); - kfree(elf_interpreter); + + kfree(interp_elf_ex); + kfree(interp_elf_phdata); } else { - elf_entry = loc->elf_ex.e_entry; + elf_entry = e_entry; if (BAD_ADDR(elf_entry)) { - force_sig(SIGSEGV, current); retval = -EINVAL; goto out_free_dentry; } @@ -935,35 +1288,62 @@ static int load_elf_binary(struct linux_binprm *bprm) set_binfmt(&elf_format); #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES - retval = arch_setup_additional_pages(bprm, !!elf_interpreter); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter); + if (retval < 0) goto out; - } #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - install_exec_creds(bprm); - retval = create_elf_tables(bprm, &loc->elf_ex, - load_addr, interp_load_addr); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + retval = create_elf_tables(bprm, elf_ex, interp_load_addr, + e_entry, phdr_addr); + if (retval < 0) goto out; + + mm = current->mm; + mm->end_code = end_code; + mm->start_code = start_code; + mm->start_data = start_data; + mm->end_data = end_data; + mm->start_stack = bprm->p; + + elf_coredump_set_mm_eflags(mm, elf_ex->e_flags); + + /** + * DOC: "brk" handling + * + * For architectures with ELF randomization, when executing a + * loader directly (i.e. static PIE: ET_DYN without PT_INTERP), + * move the brk area out of the mmap region and into the unused + * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide + * early with the stack growing down or other regions being put + * into the mmap region by the kernel (e.g. vdso). + * + * In the CONFIG_COMPAT_BRK case, though, everything is turned + * off because we're not allowed to move the brk at all. + */ + if (!IS_ENABLED(CONFIG_COMPAT_BRK) && + IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && + elf_ex->e_type == ET_DYN && !interpreter) { + elf_brk = ELF_ET_DYN_BASE; + /* This counts as moving the brk, so let brk(2) know. */ + brk_moved = true; } - /* N.B. passed_fileno might not be initialized? */ - current->mm->end_code = end_code; - current->mm->start_code = start_code; - current->mm->start_data = start_data; - current->mm->end_data = end_data; - current->mm->start_stack = bprm->p; - -#ifdef arch_randomize_brk - if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { - current->mm->brk = current->mm->start_brk = - arch_randomize_brk(current->mm); -#ifdef CONFIG_COMPAT_BRK - current->brk_randomized = 1; -#endif + mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk); + + if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) { + /* + * If we didn't move the brk to ELF_ET_DYN_BASE (above), + * leave a gap between .bss and brk. + */ + if (!brk_moved) + mm->brk = mm->start_brk = mm->brk + PAGE_SIZE; + + mm->brk = mm->start_brk = arch_randomize_brk(mm); + brk_moved = true; } + +#ifdef compat_brk_randomized + if (brk_moved) + current->brk_randomized = 1; #endif if (current->personality & MMAP_PAGE_ZERO) { @@ -973,8 +1353,14 @@ static int load_elf_binary(struct linux_binprm *bprm) emulate the SVr4 behavior. Sigh. */ error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); + + retval = do_mseal(0, PAGE_SIZE, 0); + if (retval) + pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n", + task_pid_nr(current), retval); } + regs = current_pt_regs(); #ifdef ELF_PLAT_INIT /* * The ABI may specify that certain registers be set up in special @@ -989,104 +1375,25 @@ static int load_elf_binary(struct linux_binprm *bprm) ELF_PLAT_INIT(regs, reloc_func_desc); #endif - start_thread(regs, elf_entry, bprm->p); + finalize_exec(bprm); + START_THREAD(elf_ex, regs, elf_entry, bprm->p); retval = 0; out: - kfree(loc); -out_ret: return retval; /* error cleanup */ out_free_dentry: - allow_write_access(interpreter); + kfree(interp_elf_ex); + kfree(interp_elf_phdata); +out_free_file: + exe_file_allow_write_access(interpreter); if (interpreter) fput(interpreter); -out_free_interp: - kfree(elf_interpreter); out_free_ph: kfree(elf_phdata); goto out; } -/* This is really simpleminded and specialized - we are loading an - a.out library that is given an ELF header. */ -static int load_elf_library(struct file *file) -{ - struct elf_phdr *elf_phdata; - struct elf_phdr *eppnt; - unsigned long elf_bss, bss, len; - int retval, error, i, j; - struct elfhdr elf_ex; - - error = -ENOEXEC; - retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex)); - if (retval != sizeof(elf_ex)) - goto out; - - if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - goto out; - - /* First of all, some simple consistency checks */ - if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || - !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) - goto out; - - /* Now read in all of the header information */ - - j = sizeof(struct elf_phdr) * elf_ex.e_phnum; - /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */ - - error = -ENOMEM; - elf_phdata = kmalloc(j, GFP_KERNEL); - if (!elf_phdata) - goto out; - - eppnt = elf_phdata; - error = -ENOEXEC; - retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j); - if (retval != j) - goto out_free_ph; - - for (j = 0, i = 0; i<elf_ex.e_phnum; i++) - if ((eppnt + i)->p_type == PT_LOAD) - j++; - if (j != 1) - goto out_free_ph; - - while (eppnt->p_type != PT_LOAD) - eppnt++; - - /* Now use mmap to map the library into memory. */ - error = vm_mmap(file, - ELF_PAGESTART(eppnt->p_vaddr), - (eppnt->p_filesz + - ELF_PAGEOFFSET(eppnt->p_vaddr)), - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, - (eppnt->p_offset - - ELF_PAGEOFFSET(eppnt->p_vaddr))); - if (error != ELF_PAGESTART(eppnt->p_vaddr)) - goto out_free_ph; - - elf_bss = eppnt->p_vaddr + eppnt->p_filesz; - if (padzero(elf_bss)) { - error = -EFAULT; - goto out_free_ph; - } - - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + - ELF_MIN_ALIGN - 1); - bss = eppnt->p_memsz + eppnt->p_vaddr; - if (bss > len) - vm_brk(len, bss - len); - error = 0; - -out_free_ph: - kfree(elf_phdata); -out: - return error; -} - #ifdef CONFIG_ELF_CORE /* * ELF core dumper @@ -1095,116 +1402,6 @@ out: * Jeremy Fitzhardinge <jeremy@sw.oz.au> */ -/* - * The purpose of always_dump_vma() is to make sure that special kernel mappings - * that are useful for post-mortem analysis are included in every core dump. - * In that way we ensure that the core dump is fully interpretable later - * without matching up the same kernel and hardware config to see what PC values - * meant. These special mappings include - vDSO, vsyscall, and other - * architecture specific mappings - */ -static bool always_dump_vma(struct vm_area_struct *vma) -{ - /* Any vsyscall mappings? */ - if (vma == get_gate_vma(vma->vm_mm)) - return true; - /* - * arch_vma_name() returns non-NULL for special architecture mappings, - * such as vDSO sections. - */ - if (arch_vma_name(vma)) - return true; - - return false; -} - -/* - * Decide what to dump of a segment, part, all or none. - */ -static unsigned long vma_dump_size(struct vm_area_struct *vma, - unsigned long mm_flags) -{ -#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) - - /* always dump the vdso and vsyscall sections */ - if (always_dump_vma(vma)) - goto whole; - - if (vma->vm_flags & VM_DONTDUMP) - return 0; - - /* Hugetlb memory check */ - if (vma->vm_flags & VM_HUGETLB) { - if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) - goto whole; - if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) - goto whole; - return 0; - } - - /* Do not dump I/O mapped devices or special mappings */ - if (vma->vm_flags & VM_IO) - return 0; - - /* By default, dump shared memory if mapped from an anonymous file. */ - if (vma->vm_flags & VM_SHARED) { - if (file_inode(vma->vm_file)->i_nlink == 0 ? - FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) - goto whole; - return 0; - } - - /* Dump segments that have been written to. */ - if (vma->anon_vma && FILTER(ANON_PRIVATE)) - goto whole; - if (vma->vm_file == NULL) - return 0; - - if (FILTER(MAPPED_PRIVATE)) - goto whole; - - /* - * If this looks like the beginning of a DSO or executable mapping, - * check for an ELF header. If we find one, dump the first page to - * aid in determining what was mapped here. - */ - if (FILTER(ELF_HEADERS) && - vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { - u32 __user *header = (u32 __user *) vma->vm_start; - u32 word; - mm_segment_t fs = get_fs(); - /* - * Doing it this way gets the constant folded by GCC. - */ - union { - u32 cmp; - char elfmag[SELFMAG]; - } magic; - BUILD_BUG_ON(SELFMAG != sizeof word); - magic.elfmag[EI_MAG0] = ELFMAG0; - magic.elfmag[EI_MAG1] = ELFMAG1; - magic.elfmag[EI_MAG2] = ELFMAG2; - magic.elfmag[EI_MAG3] = ELFMAG3; - /* - * Switch to the user "segment" for get_user(), - * then put back what elf_core_dump() had in place. - */ - set_fs(USER_DS); - if (unlikely(get_user(word, header))) - word = 0; - set_fs(fs); - if (word == magic.cmp) - return PAGE_SIZE; - } - -#undef FILTER - - return 0; - -whole: - return vma->vm_end - vma->vm_start; -} - /* An ELF note in memory */ struct memelfnote { @@ -1225,35 +1422,17 @@ static int notesize(struct memelfnote *en) return sz; } -#define DUMP_WRITE(addr, nr, foffset) \ - do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) - -static int alignfile(struct file *file, loff_t *foffset) -{ - static const char buf[4] = { 0, }; - DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); - return 1; -} - -static int writenote(struct memelfnote *men, struct file *file, - loff_t *foffset) +static int writenote(struct memelfnote *men, struct coredump_params *cprm) { struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - DUMP_WRITE(&en, sizeof(en), foffset); - DUMP_WRITE(men->name, en.n_namesz, foffset); - if (!alignfile(file, foffset)) - return 0; - DUMP_WRITE(men->data, men->datasz, foffset); - if (!alignfile(file, foffset)) - return 0; - - return 1; + return dump_emit(cprm, &en, sizeof(en)) && + dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && + dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); } -#undef DUMP_WRITE static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags) @@ -1274,8 +1453,6 @@ static void fill_elf_header(struct elfhdr *elf, int segs, elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize = sizeof(struct elf_phdr); elf->e_phnum = segs; - - return; } static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) @@ -1287,25 +1464,26 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) phdr->p_filesz = sz; phdr->p_memsz = 0; phdr->p_flags = 0; - phdr->p_align = 0; - return; + phdr->p_align = 4; } -static void fill_note(struct memelfnote *note, const char *name, int type, - unsigned int sz, void *data) +static void __fill_note(struct memelfnote *note, const char *name, int type, + unsigned int sz, void *data) { note->name = name; note->type = type; note->datasz = sz; note->data = data; - return; } +#define fill_note(note, type, sz, data) \ + __fill_note(note, NN_ ## type, NT_ ## type, sz, data) + /* * fill up all the fields in prstatus from the given task struct, except * registers which need to be filled up separately. */ -static void fill_prstatus(struct elf_prstatus *prstatus, +static void fill_prstatus(struct elf_prstatus_common *prstatus, struct task_struct *p, long signr) { prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; @@ -1325,17 +1503,18 @@ static void fill_prstatus(struct elf_prstatus *prstatus, * group-wide total, not its individual thread total. */ thread_group_cputime(p, &cputime); - cputime_to_timeval(cputime.utime, &prstatus->pr_utime); - cputime_to_timeval(cputime.stime, &prstatus->pr_stime); + prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime); + prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime); } else { - cputime_t utime, stime; + u64 utime, stime; task_cputime(p, &utime, &stime); - cputime_to_timeval(utime, &prstatus->pr_utime); - cputime_to_timeval(stime, &prstatus->pr_stime); + prstatus->pr_utime = ns_to_kernel_old_timeval(utime); + prstatus->pr_stime = ns_to_kernel_old_timeval(stime); } - cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); - cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + + prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime); + prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime); } static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, @@ -1343,7 +1522,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, { const struct cred *cred; unsigned int i, len; - + unsigned int state; + /* first copy the parameters from user space */ memset(psinfo, 0, sizeof(struct elf_prpsinfo)); @@ -1365,7 +1545,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_pgrp = task_pgrp_vnr(p); psinfo->pr_sid = task_session_vnr(p); - i = p->state ? ffz(~p->state) + 1 : 0; + state = READ_ONCE(p->__state); + i = state ? ffz(~state) + 1 : 0; psinfo->pr_state = i; psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; psinfo->pr_zomb = psinfo->pr_sname == 'Z'; @@ -1376,8 +1557,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); rcu_read_unlock(); - strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); - + get_task_comm(psinfo->pr_fname, p); + return 0; } @@ -1388,20 +1569,16 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) do i += 2; while (auxv[i - 2] != AT_NULL); - fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); + fill_note(note, AUXV, i * sizeof(elf_addr_t), auxv); } static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, - siginfo_t *siginfo) + const kernel_siginfo_t *siginfo) { - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); - copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo); - set_fs(old_fs); - fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); + copy_siginfo_to_external(csigdata, siginfo); + fill_note(note, SIGINFO, sizeof(*csigdata), csigdata); } -#define MAX_FILE_NOTE_SIZE (4*1024*1024) /* * Format of NT_FILE note: * @@ -1413,58 +1590,69 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ -static void fill_files_note(struct memelfnote *note) +static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm) { - struct vm_area_struct *vma; unsigned count, size, names_ofs, remaining, n; user_long_t *data; user_long_t *start_end_ofs; char *name_base, *name_curpos; + int i; /* *Estimated* file count and total data size needed */ - count = current->mm->map_count; + count = cprm->vma_count; + if (count > UINT_MAX / 64) + return -EINVAL; size = count * 64; names_ofs = (2 + 3 * count) * sizeof(data[0]); alloc: - if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ - goto err; + /* paranoia check */ + if (size >= core_file_note_size_limit) { + pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n", + size); + return -EINVAL; + } size = round_up(size, PAGE_SIZE); - data = vmalloc(size); - if (!data) - goto err; + /* + * "size" can be 0 here legitimately. + * Let it ENOMEM and omit NT_FILE section which will be empty anyway. + */ + data = kvmalloc(size, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(data)) + return -ENOMEM; start_end_ofs = data + 2; name_base = name_curpos = ((char *)data) + names_ofs; remaining = size - names_ofs; count = 0; - for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *m = &cprm->vma_meta[i]; struct file *file; const char *filename; - file = vma->vm_file; + file = m->file; if (!file) continue; - filename = d_path(&file->f_path, name_curpos, remaining); + filename = file_path(file, name_curpos, remaining); if (IS_ERR(filename)) { if (PTR_ERR(filename) == -ENAMETOOLONG) { - vfree(data); + kvfree(data); size = size * 5 / 4; goto alloc; } continue; } - /* d_path() fills at the end, move name down */ + /* file_path() fills at the end, move name down */ /* n = strlen(filename) + 1: */ n = (name_curpos + remaining) - filename; remaining = filename - name_curpos; memmove(name_curpos, filename, n); name_curpos += n; - *start_end_ofs++ = vma->vm_start; - *start_end_ofs++ = vma->vm_end; - *start_end_ofs++ = vma->vm_pgoff; + *start_end_ofs++ = m->start; + *start_end_ofs++ = m->end; + *start_end_ofs++ = m->pgoff; count++; } @@ -1472,10 +1660,10 @@ static void fill_files_note(struct memelfnote *note) data[0] = count; data[1] = PAGE_SIZE; /* - * Count usually is less than current->mm->map_count, + * Count usually is less than mm->map_count, * we need to move filenames down. */ - n = current->mm->map_count - count; + n = cprm->vma_count - count; if (n != 0) { unsigned shift_bytes = n * 3 * sizeof(data[0]); memmove(name_base - shift_bytes, name_base, @@ -1484,18 +1672,17 @@ static void fill_files_note(struct memelfnote *note) } size = name_curpos - (char *)data; - fill_note(note, "CORE", NT_FILE, size, data); - err: ; + fill_note(note, FILE, size, data); + return 0; } -#ifdef CORE_DUMP_USE_REGSET #include <linux/regset.h> struct elf_thread_core_info { struct elf_thread_core_info *next; struct task_struct *task; struct elf_prstatus prstatus; - struct memelfnote notes[0]; + struct memelfnote notes[]; }; struct elf_note_info { @@ -1509,6 +1696,7 @@ struct elf_note_info { int thread_notes; }; +#ifdef CORE_DUMP_USE_REGSET /* * When a regset has a writeback hook, we call it on each thread before * dumping user memory. On register window machines, this makes sure the @@ -1521,109 +1709,134 @@ static void do_thread_regset_writeback(struct task_struct *task, regset->writeback(task, regset, 1); } -#ifndef PR_REG_SIZE -#define PR_REG_SIZE(S) sizeof(S) -#endif - #ifndef PRSTATUS_SIZE -#define PRSTATUS_SIZE(S) sizeof(S) -#endif - -#ifndef PR_REG_PTR -#define PR_REG_PTR(S) (&((S)->pr_reg)) +#define PRSTATUS_SIZE sizeof(struct elf_prstatus) #endif #ifndef SET_PR_FPVALID -#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V)) +#define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1) #endif static int fill_thread_core_info(struct elf_thread_core_info *t, const struct user_regset_view *view, - long signr, size_t *total) + long signr, struct elf_note_info *info) { - unsigned int i; + unsigned int note_iter, view_iter; /* * NT_PRSTATUS is the one special case, because the regset data * goes into the pr_reg field inside the note contents, rather - * than being the whole note contents. We fill the reset in here. + * than being the whole note contents. We fill the regset in here. * We assume that regset 0 is NT_PRSTATUS. */ - fill_prstatus(&t->prstatus, t->task, signr); - (void) view->regsets[0].get(t->task, &view->regsets[0], - 0, PR_REG_SIZE(t->prstatus.pr_reg), - PR_REG_PTR(&t->prstatus), NULL); + fill_prstatus(&t->prstatus.common, t->task, signr); + regset_get(t->task, &view->regsets[0], + sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg); - fill_note(&t->notes[0], "CORE", NT_PRSTATUS, - PRSTATUS_SIZE(t->prstatus), &t->prstatus); - *total += notesize(&t->notes[0]); + fill_note(&t->notes[0], PRSTATUS, PRSTATUS_SIZE, &t->prstatus); + info->size += notesize(&t->notes[0]); do_thread_regset_writeback(t->task, &view->regsets[0]); /* * Each other regset might generate a note too. For each regset - * that has no core_note_type or is inactive, we leave t->notes[i] - * all zero and we'll know to skip writing it later. + * that has no core_note_type or is inactive, skip it. */ - for (i = 1; i < view->n; ++i) { - const struct user_regset *regset = &view->regsets[i]; + note_iter = 1; + for (view_iter = 1; view_iter < view->n; ++view_iter) { + const struct user_regset *regset = &view->regsets[view_iter]; + int note_type = regset->core_note_type; + const char *note_name = regset->core_note_name; + bool is_fpreg = note_type == NT_PRFPREG; + void *data; + int ret; + do_thread_regset_writeback(t->task, regset); - if (regset->core_note_type && regset->get && - (!regset->active || regset->active(t->task, regset))) { - int ret; - size_t size = regset->n * regset->size; - void *data = kmalloc(size, GFP_KERNEL); - if (unlikely(!data)) - return 0; - ret = regset->get(t->task, regset, - 0, size, data, NULL); - if (unlikely(ret)) - kfree(data); - else { - if (regset->core_note_type != NT_PRFPREG) - fill_note(&t->notes[i], "LINUX", - regset->core_note_type, - size, data); - else { - SET_PR_FPVALID(&t->prstatus, 1); - fill_note(&t->notes[i], "CORE", - NT_PRFPREG, size, data); - } - *total += notesize(&t->notes[i]); - } - } + if (!note_type) // not for coredumps + continue; + if (regset->active && regset->active(t->task, regset) <= 0) + continue; + + ret = regset_get_alloc(t->task, regset, ~0U, &data); + if (ret < 0) + continue; + + if (WARN_ON_ONCE(note_iter >= info->thread_notes)) + break; + + if (is_fpreg) + SET_PR_FPVALID(&t->prstatus); + + /* There should be a note name, but if not, guess: */ + if (WARN_ON_ONCE(!note_name)) + note_name = "LINUX"; + else + /* Warn on non-legacy-compatible names, for now. */ + WARN_ON_ONCE(strcmp(note_name, + is_fpreg ? "CORE" : "LINUX")); + + __fill_note(&t->notes[note_iter], note_name, note_type, + ret, data); + + info->size += notesize(&t->notes[note_iter]); + note_iter++; } return 1; } +#else +static int fill_thread_core_info(struct elf_thread_core_info *t, + const struct user_regset_view *view, + long signr, struct elf_note_info *info) +{ + struct task_struct *p = t->task; + elf_fpregset_t *fpu; + + fill_prstatus(&t->prstatus.common, p, signr); + elf_core_copy_task_regs(p, &t->prstatus.pr_reg); + + fill_note(&t->notes[0], PRSTATUS, sizeof(t->prstatus), &t->prstatus); + info->size += notesize(&t->notes[0]); + + fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL); + if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) { + kfree(fpu); + return 1; + } + + t->prstatus.pr_fpvalid = 1; + fill_note(&t->notes[1], PRFPREG, sizeof(*fpu), fpu); + info->size += notesize(&t->notes[1]); + + return 1; +} +#endif static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - siginfo_t *siginfo, struct pt_regs *regs) + struct coredump_params *cprm) { struct task_struct *dump_task = current; - const struct user_regset_view *view = task_user_regset_view(dump_task); + const struct user_regset_view *view; struct elf_thread_core_info *t; struct elf_prpsinfo *psinfo; struct core_thread *ct; - unsigned int i; - - info->size = 0; - info->thread = NULL; + u16 machine; + u32 flags; psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); - if (psinfo == NULL) { - info->psinfo.data = NULL; /* So we don't free this wrongly */ + if (!psinfo) return 0; - } + fill_note(&info->psinfo, PRPSINFO, sizeof(*psinfo), psinfo); - fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); +#ifdef CORE_DUMP_USE_REGSET + view = task_user_regset_view(dump_task); /* * Figure out how many notes we're going to need for each thread. */ info->thread_notes = 0; - for (i = 0; i < view->n; ++i) + for (int i = 0; i < view->n; ++i) if (view->regsets[i].core_note_type != 0) ++info->thread_notes; @@ -1637,41 +1850,51 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, return 0; } + machine = view->e_machine; + flags = view->e_flags; +#else + view = NULL; + info->thread_notes = 2; + machine = ELF_ARCH; + flags = ELF_CORE_EFLAGS; +#endif + + /* + * Override ELF e_flags with value taken from process, + * if arch needs that. + */ + flags = elf_coredump_get_mm_eflags(dump_task->mm, flags); + /* * Initialize the ELF file header. */ - fill_elf_header(elf, phdrs, - view->e_machine, view->e_flags); + fill_elf_header(elf, phdrs, machine, flags); /* * Allocate a structure for each thread. */ - for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) { - t = kzalloc(offsetof(struct elf_thread_core_info, - notes[info->thread_notes]), + info->thread = kzalloc(struct_size(info->thread, notes, info->thread_notes), + GFP_KERNEL); + if (unlikely(!info->thread)) + return 0; + + info->thread->task = dump_task; + for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) { + t = kzalloc(struct_size(t, notes, info->thread_notes), GFP_KERNEL); if (unlikely(!t)) return 0; t->task = ct->task; - if (ct->task == dump_task || !info->thread) { - t->next = info->thread; - info->thread = t; - } else { - /* - * Make sure to keep the original task at - * the head of the list. - */ - t->next = info->thread->next; - info->thread->next = t; - } + t->next = info->thread->next; + info->thread->next = t; } /* * Now fill in each thread's information. */ for (t = info->thread; t != NULL; t = t->next) - if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) + if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info)) return 0; /* @@ -1680,54 +1903,50 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); info->size += notesize(&info->psinfo); - fill_siginfo_note(&info->signote, &info->csigdata, siginfo); + fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo); info->size += notesize(&info->signote); fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); - fill_files_note(&info->files); - info->size += notesize(&info->files); + if (fill_files_note(&info->files, cprm) == 0) + info->size += notesize(&info->files); return 1; } -static size_t get_note_info_size(struct elf_note_info *info) -{ - return info->size; -} - /* * Write all the notes for each thread. When writing the first thread, the * process-wide notes are interleaved after the first thread-specific note. */ static int write_note_info(struct elf_note_info *info, - struct file *file, loff_t *foffset) + struct coredump_params *cprm) { - bool first = 1; + bool first = true; struct elf_thread_core_info *t = info->thread; do { int i; - if (!writenote(&t->notes[0], file, foffset)) + if (!writenote(&t->notes[0], cprm)) return 0; - if (first && !writenote(&info->psinfo, file, foffset)) + if (first && !writenote(&info->psinfo, cprm)) return 0; - if (first && !writenote(&info->signote, file, foffset)) + if (first && !writenote(&info->signote, cprm)) return 0; - if (first && !writenote(&info->auxv, file, foffset)) + if (first && !writenote(&info->auxv, cprm)) return 0; - if (first && !writenote(&info->files, file, foffset)) + if (first && info->files.data && + !writenote(&info->files, cprm)) return 0; for (i = 1; i < info->thread_notes; ++i) if (t->notes[i].data && - !writenote(&t->notes[i], file, foffset)) + !writenote(&t->notes[i], cprm)) return 0; - first = 0; + first = false; t = t->next; } while (t); @@ -1743,261 +1962,11 @@ static void free_note_info(struct elf_note_info *info) threads = t->next; WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); for (i = 1; i < info->thread_notes; ++i) - kfree(t->notes[i].data); + kvfree(t->notes[i].data); kfree(t); } kfree(info->psinfo.data); - vfree(info->files.data); -} - -#else - -/* Here is the structure in which status of each thread is captured. */ -struct elf_thread_status -{ - struct list_head list; - struct elf_prstatus prstatus; /* NT_PRSTATUS */ - elf_fpregset_t fpu; /* NT_PRFPREG */ - struct task_struct *thread; -#ifdef ELF_CORE_COPY_XFPREGS - elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */ -#endif - struct memelfnote notes[3]; - int num_notes; -}; - -/* - * In order to add the specific thread information for the elf file format, - * we need to keep a linked list of every threads pr_status and then create - * a single section for them in the final core file. - */ -static int elf_dump_thread_status(long signr, struct elf_thread_status *t) -{ - int sz = 0; - struct task_struct *p = t->thread; - t->num_notes = 0; - - fill_prstatus(&t->prstatus, p, signr); - elf_core_copy_task_regs(p, &t->prstatus.pr_reg); - - fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), - &(t->prstatus)); - t->num_notes++; - sz += notesize(&t->notes[0]); - - if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, - &t->fpu))) { - fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), - &(t->fpu)); - t->num_notes++; - sz += notesize(&t->notes[1]); - } - -#ifdef ELF_CORE_COPY_XFPREGS - if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { - fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE, - sizeof(t->xfpu), &t->xfpu); - t->num_notes++; - sz += notesize(&t->notes[2]); - } -#endif - return sz; -} - -struct elf_note_info { - struct memelfnote *notes; - struct elf_prstatus *prstatus; /* NT_PRSTATUS */ - struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ - struct list_head thread_list; - elf_fpregset_t *fpu; -#ifdef ELF_CORE_COPY_XFPREGS - elf_fpxregset_t *xfpu; -#endif - user_siginfo_t csigdata; - int thread_status_size; - int numnote; -}; - -static int elf_note_info_init(struct elf_note_info *info) -{ - memset(info, 0, sizeof(*info)); - INIT_LIST_HEAD(&info->thread_list); - - /* Allocate space for ELF notes */ - info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL); - if (!info->notes) - return 0; - info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); - if (!info->psinfo) - return 0; - info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); - if (!info->prstatus) - return 0; - info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); - if (!info->fpu) - return 0; -#ifdef ELF_CORE_COPY_XFPREGS - info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); - if (!info->xfpu) - return 0; -#endif - return 1; -} - -static int fill_note_info(struct elfhdr *elf, int phdrs, - struct elf_note_info *info, - siginfo_t *siginfo, struct pt_regs *regs) -{ - struct list_head *t; - - if (!elf_note_info_init(info)) - return 0; - - if (siginfo->si_signo) { - struct core_thread *ct; - struct elf_thread_status *ets; - - for (ct = current->mm->core_state->dumper.next; - ct; ct = ct->next) { - ets = kzalloc(sizeof(*ets), GFP_KERNEL); - if (!ets) - return 0; - - ets->thread = ct->task; - list_add(&ets->list, &info->thread_list); - } - - list_for_each(t, &info->thread_list) { - int sz; - - ets = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(siginfo->si_signo, ets); - info->thread_status_size += sz; - } - } - /* now collect the dump for the current */ - memset(info->prstatus, 0, sizeof(*info->prstatus)); - fill_prstatus(info->prstatus, current, siginfo->si_signo); - elf_core_copy_regs(&info->prstatus->pr_reg, regs); - - /* Set up header */ - fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); - - /* - * Set up the notes in similar form to SVR4 core dumps made - * with info from their /proc. - */ - - fill_note(info->notes + 0, "CORE", NT_PRSTATUS, - sizeof(*info->prstatus), info->prstatus); - fill_psinfo(info->psinfo, current->group_leader, current->mm); - fill_note(info->notes + 1, "CORE", NT_PRPSINFO, - sizeof(*info->psinfo), info->psinfo); - - fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); - fill_auxv_note(info->notes + 3, current->mm); - fill_files_note(info->notes + 4); - - info->numnote = 5; - - /* Try to dump the FPU. */ - info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, - info->fpu); - if (info->prstatus->pr_fpvalid) - fill_note(info->notes + info->numnote++, - "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); -#ifdef ELF_CORE_COPY_XFPREGS - if (elf_core_copy_task_xfpregs(current, info->xfpu)) - fill_note(info->notes + info->numnote++, - "LINUX", ELF_CORE_XFPREG_TYPE, - sizeof(*info->xfpu), info->xfpu); -#endif - - return 1; -} - -static size_t get_note_info_size(struct elf_note_info *info) -{ - int sz = 0; - int i; - - for (i = 0; i < info->numnote; i++) - sz += notesize(info->notes + i); - - sz += info->thread_status_size; - - return sz; -} - -static int write_note_info(struct elf_note_info *info, - struct file *file, loff_t *foffset) -{ - int i; - struct list_head *t; - - for (i = 0; i < info->numnote; i++) - if (!writenote(info->notes + i, file, foffset)) - return 0; - - /* write out the thread status notes section */ - list_for_each(t, &info->thread_list) { - struct elf_thread_status *tmp = - list_entry(t, struct elf_thread_status, list); - - for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], file, foffset)) - return 0; - } - - return 1; -} - -static void free_note_info(struct elf_note_info *info) -{ - while (!list_empty(&info->thread_list)) { - struct list_head *tmp = info->thread_list.next; - list_del(tmp); - kfree(list_entry(tmp, struct elf_thread_status, list)); - } - - /* Free data allocated by fill_files_note(): */ - vfree(info->notes[4].data); - - kfree(info->prstatus); - kfree(info->psinfo); - kfree(info->notes); - kfree(info->fpu); -#ifdef ELF_CORE_COPY_XFPREGS - kfree(info->xfpu); -#endif -} - -#endif - -static struct vm_area_struct *first_vma(struct task_struct *tsk, - struct vm_area_struct *gate_vma) -{ - struct vm_area_struct *ret = tsk->mm->mmap; - - if (ret) - return ret; - return gate_vma; -} -/* - * Helper function for iterating across a vma list. It ensures that the caller - * will visit `gate_vma' prior to terminating the search. - */ -static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, - struct vm_area_struct *gate_vma) -{ - struct vm_area_struct *ret; - - ret = this_vma->vm_next; - if (ret) - return ret; - if (this_vma == gate_vma) - return NULL; - return gate_vma; + kvfree(info->files.data); } static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, @@ -2016,18 +1985,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, shdr4extnum->sh_info = segs; } -static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma, - unsigned long mm_flags) -{ - struct vm_area_struct *vma; - size_t size = 0; - - for (vma = first_vma(current, gate_vma); vma != NULL; - vma = next_vma(vma, gate_vma)) - size += vma_dump_size(vma, mm_flags); - return size; -} - /* * Actual dumper * @@ -2038,44 +1995,20 @@ static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma, static int elf_core_dump(struct coredump_params *cprm) { int has_dumped = 0; - mm_segment_t fs; - int segs; - size_t size = 0; - struct vm_area_struct *vma, *gate_vma; - struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff, foffset; - struct elf_note_info info; + int segs, i; + struct elfhdr elf; + loff_t offset = 0, dataoff; + struct elf_note_info info = { }; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; /* - * We no longer stop all VM operations. - * - * This is because those proceses that could possibly change map_count - * or the mmap / vma pages are now blocked in do_exit on current - * finishing this core dump. - * - * Only ptrace can touch these memory addresses, but it doesn't change - * the map_count or the pages allocated. So no possibility of crashing - * exists while dumping the mm->vm_next areas to the core file. - */ - - /* alloc memory for large data structures: too large to be on stack */ - elf = kmalloc(sizeof(*elf), GFP_KERNEL); - if (!elf) - goto out; - /* * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = current->mm->map_count; - segs += elf_core_extra_phdrs(); - - gate_vma = get_gate_vma(current->mm); - if (gate_vma != NULL) - segs++; + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -2089,22 +2022,19 @@ static int elf_core_dump(struct coredump_params *cprm) * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ - if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs)) - goto cleanup; + if (!fill_note_info(&elf, e_phnum, &info, cprm)) + goto end_coredump; has_dumped = 1; - fs = get_fs(); - set_fs(KERNEL_DS); - - offset += sizeof(*elf); /* Elf header */ + offset += sizeof(elf); /* ELF header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ - foffset = offset; /* Write notes phdr entry */ { - size_t sz = get_note_info_size(&info); + size_t sz = info.size; + /* For cell spufs and x86 xstate */ sz += elf_coredump_extra_notes_size(); phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); @@ -2117,113 +2047,83 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags); - offset += elf_core_extra_data_size(); + offset += cprm->vma_data_size; + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); if (!shdr4extnum) goto end_coredump; - fill_extnum_info(elf, shdr4extnum, e_shoff, segs); + fill_extnum_info(&elf, shdr4extnum, e_shoff, segs); } offset = dataoff; - size += sizeof(*elf); - if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + if (!dump_emit(cprm, &elf, sizeof(elf))) goto end_coredump; - size += sizeof(*phdr4note); - if (size > cprm->limit - || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) + if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) goto end_coredump; /* Write program headers for segments dump */ - for (vma = first_vma(current, gate_vma); vma != NULL; - vma = next_vma(vma, gate_vma)) { + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; struct elf_phdr phdr; phdr.p_type = PT_LOAD; phdr.p_offset = offset; - phdr.p_vaddr = vma->vm_start; + phdr.p_vaddr = meta->start; phdr.p_paddr = 0; - phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags); - phdr.p_memsz = vma->vm_end - vma->vm_start; + phdr.p_filesz = meta->dump_size; + phdr.p_memsz = meta->end - meta->start; offset += phdr.p_filesz; - phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; - if (vma->vm_flags & VM_WRITE) + phdr.p_flags = 0; + if (meta->flags & VM_READ) + phdr.p_flags |= PF_R; + if (meta->flags & VM_WRITE) phdr.p_flags |= PF_W; - if (vma->vm_flags & VM_EXEC) + if (meta->flags & VM_EXEC) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - size += sizeof(phdr); - if (size > cprm->limit - || !dump_write(cprm->file, &phdr, sizeof(phdr))) + if (!dump_emit(cprm, &phdr, sizeof(phdr))) goto end_coredump; } - if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ - if (!write_note_info(&info, cprm->file, &foffset)) + /* write out the notes section */ + if (!write_note_info(&info, cprm)) goto end_coredump; - if (elf_coredump_extra_notes_write(cprm->file, &foffset)) + /* For cell spufs and x86 xstate */ + if (elf_coredump_extra_notes_write(cprm)) goto end_coredump; /* Align to page */ - if (!dump_seek(cprm->file, dataoff - foffset)) - goto end_coredump; + dump_skip_to(cprm, dataoff); - for (vma = first_vma(current, gate_vma); vma != NULL; - vma = next_vma(vma, gate_vma)) { - unsigned long addr; - unsigned long end; - - end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags); - - for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { - struct page *page; - int stop; - - page = get_dump_page(addr); - if (page) { - void *kaddr = kmap(page); - stop = ((size += PAGE_SIZE) > cprm->limit) || - !dump_write(cprm->file, kaddr, - PAGE_SIZE); - kunmap(page); - page_cache_release(page); - } else - stop = !dump_seek(cprm->file, PAGE_SIZE); - if (stop) - goto end_coredump; - } + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; + + if (!dump_user_range(cprm, meta->start, meta->dump_size)) + goto end_coredump; } - if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + if (!elf_core_write_extra_data(cprm)) goto end_coredump; if (e_phnum == PN_XNUM) { - size += sizeof(*shdr4extnum); - if (size > cprm->limit - || !dump_write(cprm->file, shdr4extnum, - sizeof(*shdr4extnum))) + if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) goto end_coredump; } end_coredump: - set_fs(fs); - -cleanup: free_note_info(&info); kfree(shdr4extnum); kfree(phdr4note); - kfree(elf); -out: return has_dumped; } @@ -2243,4 +2143,7 @@ static void __exit exit_elf_binfmt(void) core_initcall(init_elf_binfmt); module_exit(exit_elf_binfmt); -MODULE_LICENSE("GPL"); + +#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST +#include "tests/binfmt_elf_kunit.c" +#endif |
