diff options
Diffstat (limited to 'arch/x86/entry/vdso/vma.c')
| -rw-r--r-- | arch/x86/entry/vdso/vma.c | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c new file mode 100644 index 000000000000..afe105b2f907 --- /dev/null +++ b/arch/x86/entry/vdso/vma.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2007 Andi Kleen, SUSE Labs. + * + * This contains most of the x86 vDSO kernel-side code. + */ +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/random.h> +#include <linux/elf.h> +#include <linux/cpu.h> +#include <linux/ptrace.h> +#include <linux/vdso_datastore.h> + +#include <asm/pvclock.h> +#include <asm/vgtod.h> +#include <asm/proto.h> +#include <asm/vdso.h> +#include <asm/tlb.h> +#include <asm/page.h> +#include <asm/desc.h> +#include <asm/cpufeature.h> +#include <asm/vdso/vsyscall.h> +#include <clocksource/hyperv_timer.h> + +static_assert(VDSO_NR_PAGES + VDSO_NR_VCLOCK_PAGES == __VDSO_PAGES); + +unsigned int vclocks_used __read_mostly; + +#if defined(CONFIG_X86_64) +unsigned int __read_mostly vdso64_enabled = 1; +#endif + +int __init init_vdso_image(const struct vdso_image *image) +{ + BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32); + BUG_ON(image->size % PAGE_SIZE != 0); + + apply_alternatives((struct alt_instr *)(image->data + image->alt), + (struct alt_instr *)(image->data + image->alt + + image->alt_len)); + + return 0; +} + +struct linux_binprm; + +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + const struct vdso_image *image = vma->vm_mm->context.vdso_image; + + if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); + get_page(vmf->page); + return 0; +} + +static void vdso_fix_landing(const struct vdso_image *image, + struct vm_area_struct *new_vma) +{ + if (in_ia32_syscall() && image == &vdso_image_32) { + struct pt_regs *regs = current_pt_regs(); + unsigned long vdso_land = image->sym_int80_landing_pad; + unsigned long old_land_addr = vdso_land + + (unsigned long)current->mm->context.vdso; + + /* Fixing userspace landing - look at do_fast_syscall_32 */ + if (regs->ip == old_land_addr) + regs->ip = new_vma->vm_start + vdso_land; + } +} + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + const struct vdso_image *image = current->mm->context.vdso_image; + + vdso_fix_landing(image, new_vma); + current->mm->context.vdso = (void __user *)new_vma->vm_start; + + return 0; +} + +static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + switch (vmf->pgoff) { +#ifdef CONFIG_PARAVIRT_CLOCK + case VDSO_PAGE_PVCLOCK_OFFSET: + { + struct pvclock_vsyscall_time_info *pvti = + pvclock_get_pvti_cpu0_va(); + + if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) + return vmf_insert_pfn_prot(vma, vmf->address, + __pa(pvti) >> PAGE_SHIFT, + pgprot_decrypted(vma->vm_page_prot)); + break; + } +#endif /* CONFIG_PARAVIRT_CLOCK */ +#ifdef CONFIG_HYPERV_TIMER + case VDSO_PAGE_HVCLOCK_OFFSET: + { + unsigned long pfn = hv_get_tsc_pfn(); + if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) + return vmf_insert_pfn(vma, vmf->address, pfn); + break; + } +#endif /* CONFIG_HYPERV_TIMER */ + } + + return VM_FAULT_SIGBUS; +} + +static const struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .fault = vdso_fault, + .mremap = vdso_mremap, +}; +static const struct vm_special_mapping vvar_vclock_mapping = { + .name = "[vvar_vclock]", + .fault = vvar_vclock_fault, +}; + +/* + * Add vdso and vvar mappings to current process. + * @image - blob to map + * @addr - request a specific address (zero to map at free addr) + */ +static int map_vdso(const struct vdso_image *image, unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long text_start; + int ret = 0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + addr = get_unmapped_area(NULL, addr, + image->size + __VDSO_PAGES * PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + text_start = addr + __VDSO_PAGES * PAGE_SIZE; + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + */ + vma = _install_special_mapping(mm, + text_start, + image->size, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_SEALED_SYSMAP, + &vdso_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto up_fail; + } + + vma = vdso_install_vvar_mapping(mm, addr); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + do_munmap(mm, text_start, image->size, NULL); + goto up_fail; + } + + vma = _install_special_mapping(mm, + VDSO_VCLOCK_PAGES_START(addr), + VDSO_NR_VCLOCK_PAGES * PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| + VM_PFNMAP|VM_SEALED_SYSMAP, + &vvar_vclock_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + do_munmap(mm, text_start, image->size, NULL); + do_munmap(mm, addr, image->size, NULL); + goto up_fail; + } + + current->mm->context.vdso = (void __user *)text_start; + current->mm->context.vdso_image = image; + +up_fail: + mmap_write_unlock(mm); + return ret; +} + +int map_vdso_once(const struct vdso_image *image, unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + mmap_write_lock(mm); + /* + * Check if we have already mapped vdso blob - fail to prevent + * abusing from userspace install_special_mapping, which may + * not do accounting and rlimit right. + * We could search vma near context.vdso, but it's a slowpath, + * so let's explicitly check all VMAs to be completely sure. + */ + for_each_vma(vmi, vma) { + if (vma_is_special_mapping(vma, &vdso_mapping) || + vma_is_special_mapping(vma, &vdso_vvar_mapping) || + vma_is_special_mapping(vma, &vvar_vclock_mapping)) { + mmap_write_unlock(mm); + return -EEXIST; + } + } + mmap_write_unlock(mm); + + return map_vdso(image, addr); +} + +static int load_vdso32(void) +{ + if (vdso32_enabled != 1) /* Other values all mean "disabled" */ + return 0; + + return map_vdso(&vdso_image_32, 0); +} + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + if (IS_ENABLED(CONFIG_X86_64)) { + if (!vdso64_enabled) + return 0; + + return map_vdso(&vdso_image_64, 0); + } + + return load_vdso32(); +} + +#ifdef CONFIG_COMPAT +int compat_arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp, bool x32) +{ + if (IS_ENABLED(CONFIG_X86_X32_ABI) && x32) { + if (!vdso64_enabled) + return 0; + return map_vdso(&vdso_image_x32, 0); + } + + if (IS_ENABLED(CONFIG_IA32_EMULATION)) + return load_vdso32(); + + return 0; +} +#endif + +bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ + const struct vdso_image *image = current->mm->context.vdso_image; + unsigned long vdso = (unsigned long) current->mm->context.vdso; + + if (in_ia32_syscall() && image == &vdso_image_32) { + if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad || + regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad) + return true; + } + return false; +} + +#ifdef CONFIG_X86_64 +static __init int vdso_setup(char *s) +{ + vdso64_enabled = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("vdso=", vdso_setup); +#endif /* CONFIG_X86_64 */ |
