diff options
Diffstat (limited to 'arch/powerpc/lib')
37 files changed, 4287 insertions, 1393 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b8de3be10eb4..0ab65eeb93ee 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -5,18 +5,31 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) +CFLAGS_code-patching.o += -fno-stack-protector +CFLAGS_feature-fixups.o += -fno-stack-protector + CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) KASAN_SANITIZE_code-patching.o := n KASAN_SANITIZE_feature-fixups.o := n +# restart_table.o contains functions called in the NMI interrupt path +# which can be in real mode. Disable KASAN. +KASAN_SANITIZE_restart_table.o := n +KCSAN_SANITIZE_code-patching.o := n +KCSAN_SANITIZE_feature-fixups.o := n ifdef CONFIG_KASAN CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + +obj-y += code-patching.o feature-fixups.o pmem.o + +obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o @@ -31,17 +44,22 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o # 64-bit linker creates .sfpr on demand for final link (vmlinux), # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs -ifeq ($(call ld-ifversion, -lt, 225000000, y),y) -extra-$(CONFIG_PPC64) += crtsavres.o +ifndef CONFIG_LD_IS_BFD +always-$(CONFIG_PPC64) += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ - memcpy_power7.o + memcpy_power7.o restart_table.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o memcpy_mcsafe_64.o + memcpy_64.o copy_mc_64.o +ifdef CONFIG_PPC_QUEUED_SPINLOCKS +obj-$(CONFIG_SMP) += qspinlock.o +else obj64-$(CONFIG_SMP) += locks.o +endif + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ test_emulate_step_exec_instr.o @@ -58,6 +76,8 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) +CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) +# Enable <altivec.h> +CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c deleted file mode 100644 index ce180870bd52..000000000000 --- a/arch/powerpc/lib/alloc.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/memblock.h> -#include <linux/string.h> -#include <asm/setup.h> - - -void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) -{ - void *p; - - if (slab_is_available()) - p = kzalloc(size, mask); - else { - p = memblock_alloc(size, SMP_CACHE_BYTES); - if (!p) - panic("%s: Failed to allocate %zu bytes\n", __func__, - size); - } - return p; -} diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index ecd150dc3ed9..cd00b9bdd772 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -8,12 +8,12 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> .text @@ -78,12 +78,10 @@ EXPORT_SYMBOL(__csum_partial) /* * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively, and (for an error on - * src) zeroes the rest of dst. + * and adds in 0xffffffff, while copying the block to dst. + * If an access exception occurs it returns zero. * - * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) + * csum_partial_copy_generic(src, dst, len) */ #define CSUM_COPY_16_BYTES_WITHEX(n) \ 8 ## n ## 0: \ @@ -108,30 +106,24 @@ EXPORT_SYMBOL(__csum_partial) adde r12,r12,r10 #define CSUM_COPY_16_BYTES_EXCODE(n) \ - EX_TABLE(8 ## n ## 0b, src_error); \ - EX_TABLE(8 ## n ## 1b, src_error); \ - EX_TABLE(8 ## n ## 2b, src_error); \ - EX_TABLE(8 ## n ## 3b, src_error); \ - EX_TABLE(8 ## n ## 4b, dst_error); \ - EX_TABLE(8 ## n ## 5b, dst_error); \ - EX_TABLE(8 ## n ## 6b, dst_error); \ - EX_TABLE(8 ## n ## 7b, dst_error); + EX_TABLE(8 ## n ## 0b, fault); \ + EX_TABLE(8 ## n ## 1b, fault); \ + EX_TABLE(8 ## n ## 2b, fault); \ + EX_TABLE(8 ## n ## 3b, fault); \ + EX_TABLE(8 ## n ## 4b, fault); \ + EX_TABLE(8 ## n ## 5b, fault); \ + EX_TABLE(8 ## n ## 6b, fault); \ + EX_TABLE(8 ## n ## 7b, fault); .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "checksum_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) _GLOBAL(csum_partial_copy_generic) - stwu r1,-16(r1) - stw r7,12(r1) - stw r8,8(r1) - - addic r12,r6,0 + li r12,-1 + addic r0,r0,0 /* clear carry */ addi r6,r4,-4 neg r0,r4 addi r4,r3,-4 @@ -241,39 +233,23 @@ _GLOBAL(csum_partial_copy_generic) slwi r0,r0,8 adde r12,r12,r0 66: addze r3,r12 - addi r1,r1,16 beqlr+ cr7 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ blr -/* read fault */ -src_error: - lwz r7,12(r1) - addi r1,r1,16 - cmpwi cr0,r7,0 - beqlr - li r0,-EFAULT - stw r0,0(r7) - blr -/* write fault */ -dst_error: - lwz r8,8(r1) - addi r1,r1,16 - cmpwi cr0,r8,0 - beqlr - li r0,-EFAULT - stw r0,0(r8) +fault: + li r3,0 blr - EX_TABLE(70b, src_error); - EX_TABLE(71b, dst_error); - EX_TABLE(72b, src_error); - EX_TABLE(73b, dst_error); - EX_TABLE(54b, dst_error); + EX_TABLE(70b, fault); + EX_TABLE(71b, fault); + EX_TABLE(72b, fault); + EX_TABLE(73b, fault); + EX_TABLE(54b, fault); /* * this stuff handles faults in the cacheline loop and branches to either - * src_error (if in read part) or dst_error (if in write part) + * fault (if in read part) or fault (if in write part) */ CSUM_COPY_16_BYTES_EXCODE(0) #if L1_CACHE_BYTES >= 32 @@ -290,12 +266,12 @@ dst_error: #endif #endif - EX_TABLE(30b, src_error); - EX_TABLE(31b, dst_error); - EX_TABLE(40b, src_error); - EX_TABLE(41b, dst_error); - EX_TABLE(50b, src_error); - EX_TABLE(51b, dst_error); + EX_TABLE(30b, fault); + EX_TABLE(31b, fault); + EX_TABLE(40b, fault); + EX_TABLE(41b, fault); + EX_TABLE(50b, fault); + EX_TABLE(51b, fault); EXPORT_SYMBOL(csum_partial_copy_generic) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 514978f908d4..d53d8f09a2c2 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -8,11 +8,11 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/processor.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> /* * Computes the checksum of a memory block at buff, length len, @@ -182,34 +182,33 @@ EXPORT_SYMBOL(__csum_partial) .macro srcnr 100: - EX_TABLE(100b,.Lsrc_error_nr) + EX_TABLE(100b,.Lerror_nr) .endm .macro source 150: - EX_TABLE(150b,.Lsrc_error) + EX_TABLE(150b,.Lerror) .endm .macro dstnr 200: - EX_TABLE(200b,.Ldest_error_nr) + EX_TABLE(200b,.Lerror_nr) .endm .macro dest 250: - EX_TABLE(250b,.Ldest_error) + EX_TABLE(250b,.Lerror) .endm /* * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively. The caller must take any action - * required in this case (zeroing memory, recalculating partial checksum etc). + * and adds in 0xffffffff (32-bit), while copying the block to dst. + * If an access exception occurs, it returns 0. * - * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + * csum_partial_copy_generic(r3=src, r4=dst, r5=len) */ _GLOBAL(csum_partial_copy_generic) + li r6,-1 addic r0,r6,0 /* clear carry */ srdi. r6,r5,3 /* less than 8 bytes? */ @@ -401,29 +400,15 @@ dstnr; stb r6,0(r4) srdi r3,r3,32 blr -.Lsrc_error: +.Lerror: ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) addi r1,r1,STACKFRAMESIZE -.Lsrc_error_nr: - cmpdi 0,r7,0 - beqlr - li r6,-EFAULT - stw r6,0(r7) +.Lerror_nr: + li r3,0 blr -.Ldest_error: - ld r14,STK_REG(R14)(r1) - ld r15,STK_REG(R15)(r1) - ld r16,STK_REG(R16)(r1) - addi r1,r1,STACKFRAMESIZE -.Ldest_error_nr: - cmpdi 0,r8,0 - beqlr - li r6,-EFAULT - stw r6,0(r8) - blr EXPORT_SYMBOL(csum_partial_copy_generic) /* diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index fabe4db28726..1a14c8780278 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -12,83 +12,28 @@ #include <linux/uaccess.h> __wsum csum_and_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) + int len) { - unsigned int csum; + __wsum csum; - might_sleep(); - allow_read_from_user(src, len); + if (unlikely(!user_read_access_begin(src, len))) + return 0; - *err_ptr = 0; + csum = csum_partial_copy_generic((void __force *)src, dst, len); - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(src, len))) { - *err_ptr = -EFAULT; - csum = (__force unsigned int)sum; - goto out; - } - - csum = csum_partial_copy_generic((void __force *)src, dst, - len, sum, err_ptr, NULL); - - if (unlikely(*err_ptr)) { - int missing = __copy_from_user(dst, src, len); - - if (missing) { - memset(dst + len - missing, 0, missing); - *err_ptr = -EFAULT; - } else { - *err_ptr = 0; - } - - csum = csum_partial(dst, len, sum); - } - -out: - prevent_read_from_user(src, len); - return (__force __wsum)csum; + user_read_access_end(); + return csum; } -EXPORT_SYMBOL(csum_and_copy_from_user); -__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, - __wsum sum, int *err_ptr) +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len) { - unsigned int csum; - - might_sleep(); - allow_write_to_user(dst, len); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(dst, len))) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - goto out; - } - - csum = csum_partial_copy_generic(src, (void __force *)dst, - len, sum, NULL, err_ptr); + __wsum csum; - if (unlikely(*err_ptr)) { - csum = csum_partial(src, len, sum); + if (unlikely(!user_write_access_begin(dst, len))) + return 0; - if (copy_to_user(dst, src, len)) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - } - } + csum = csum_partial_copy_generic(src, (void __force *)dst, len); -out: - prevent_write_to_user(dst, len); - return (__force __wsum)csum; + user_write_access_end(); + return csum; } -EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a876..c6ab46156cda 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -3,47 +3,108 @@ * Copyright 2008 Michael Ellerman, IBM Corporation. */ -#include <linux/kernel.h> #include <linux/kprobes.h> +#include <linux/mmu_context.h> +#include <linux/random.h> #include <linux/vmalloc.h> #include <linux/init.h> -#include <linux/mm.h> #include <linux/cpuhotplug.h> -#include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/jump_label.h> -#include <asm/pgtable.h> +#include <asm/debug.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/page.h> #include <asm/code-patching.h> -#include <asm/setup.h> +#include <asm/inst.h> -static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, - unsigned int *patch_addr) +static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) { - int err = 0; + if (!ppc_inst_prefixed(instr)) { + u32 val = ppc_inst_val(instr); - __put_user_asm(instr, patch_addr, err, "stw"); - if (err) - return err; + __put_kernel_nofault(patch_addr, &val, u32, failed); + } else { + u64 val = ppc_inst_as_ulong(instr); + + __put_kernel_nofault(patch_addr, &val, u64, failed); + } asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); return 0; + +failed: + mb(); /* sync */ + return -EPERM; } -int raw_patch_instruction(unsigned int *addr, unsigned int instr) +int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { return __patch_instruction(addr, instr, addr); } -#ifdef CONFIG_STRICT_KERNEL_RWX -static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +struct patch_context { + union { + struct vm_struct *area; + struct mm_struct *mm; + }; + unsigned long addr; + pte_t *pte; +}; + +static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); + +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + +static bool mm_patch_enabled(void) +{ + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); +} + +/* + * The following applies for Radix MMU. Hash MMU has different requirements, + * and so is not supported. + * + * Changing mm requires context synchronising instructions on both sides of + * the context switch, as well as a hwsync between the last instruction for + * which the address of an associated storage access was translated using + * the current context. + * + * switch_mm_irqs_off() performs an isync after the context switch. It is + * the responsibility of the caller to perform the CSI and hwsync before + * starting/stopping the temp mm. + */ +static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) +{ + struct mm_struct *orig_mm = current->active_mm; + + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(orig_mm, temp_mm, current); + + WARN_ON(!mm_is_thread_local(temp_mm)); + + suspend_breakpoints(); + return orig_mm; +} + +static void stop_using_temp_mm(struct mm_struct *temp_mm, + struct mm_struct *orig_mm) +{ + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(temp_mm, orig_mm, current); + restore_breakpoints(); +} static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; + unsigned long addr; + int err; area = get_vm_area(PAGE_SIZE, VM_ALLOC); if (!area) { @@ -51,188 +112,426 @@ static int text_area_cpu_up(unsigned int cpu) cpu); return -1; } - this_cpu_write(text_poke_area, area); + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); + if (err) + return err; + + unmap_patch_area(addr); + + this_cpu_write(cpu_patching_context.area, area); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); return 0; } static int text_area_cpu_down(unsigned int cpu) { - free_vm_area(this_cpu_read(text_poke_area)); + free_vm_area(this_cpu_read(cpu_patching_context.area)); + this_cpu_write(cpu_patching_context.area, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } -/* - * Run as a late init call. This allows all the boot time patching to be done - * simply by patching the code, and then we're called here prior to - * mark_rodata_ro(), which happens after all init calls are run. Although - * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge - * it as being preferable to a kernel that will crash later when someone tries - * to use patch_instruction(). - */ -static int __init setup_text_poke_area(void) +static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) +{ + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, mm); + free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); + mmput(mm); +} + +static int text_area_cpu_up_mm(unsigned int cpu) +{ + struct mm_struct *mm; + unsigned long addr; + pte_t *pte; + spinlock_t *ptl; + + mm = mm_alloc(); + if (WARN_ON(!mm)) + goto fail_no_mm; + + /* + * Choose a random page-aligned address from the interval + * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. + * The lower address bound is PAGE_SIZE to avoid the zero-page. + */ + addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; + + /* + * PTE allocation uses GFP_KERNEL which means we need to + * pre-allocate the PTE here because we cannot do the + * allocation during patching when IRQs are disabled. + * + * Using get_locked_pte() to avoid open coding, the lock + * is unnecessary. + */ + pte = get_locked_pte(mm, addr, &ptl); + if (!pte) + goto fail_no_pte; + pte_unmap_unlock(pte, ptl); + + this_cpu_write(cpu_patching_context.mm, mm); + this_cpu_write(cpu_patching_context.addr, addr); + + return 0; + +fail_no_pte: + put_patching_mm(mm, addr); +fail_no_mm: + return -ENOMEM; +} + +static int text_area_cpu_down_mm(unsigned int cpu) { - BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", text_area_cpu_up, - text_area_cpu_down)); + put_patching_mm(this_cpu_read(cpu_patching_context.mm), + this_cpu_read(cpu_patching_context.addr)); + + this_cpu_write(cpu_patching_context.mm, NULL); + this_cpu_write(cpu_patching_context.addr, 0); return 0; } -late_initcall(setup_text_poke_area); -/* - * This can be called for kernel text or a module. - */ -static int map_patch_area(void *addr, unsigned long text_poke_addr) +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); + +void __init poking_init(void) { - unsigned long pfn; - int err; + int ret; - if (is_vmalloc_addr(addr)) - pfn = vmalloc_to_pfn(addr); + if (mm_patch_enabled()) + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke_mm:online", + text_area_cpu_up_mm, + text_area_cpu_down_mm); else - pfn = __pa_symbol(addr) >> PAGE_SHIFT; + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); - err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); + /* cpuhp_setup_state returns >= 0 on success */ + if (WARN_ON(ret < 0)) + return; - pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); - if (err) - return -1; + static_branch_enable(&poking_init_done); +} - return 0; +static unsigned long get_patch_pfn(void *addr) +{ + if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr)) + return vmalloc_to_pfn(addr); + else + return __pa_symbol(addr) >> PAGE_SHIFT; } -static inline int unmap_patch_area(unsigned long addr) +/* + * This can be called for kernel text or a module. + */ +static int map_patch_area(void *addr, unsigned long text_poke_addr) +{ + unsigned long pfn = get_patch_pfn(addr); + + return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); +} + +static void unmap_patch_area(unsigned long addr) { pte_t *ptep; pmd_t *pmdp; pud_t *pudp; + p4d_t *p4dp; pgd_t *pgdp; pgdp = pgd_offset_k(addr); - if (unlikely(!pgdp)) - return -EINVAL; + if (WARN_ON(pgd_none(*pgdp))) + return; + + p4dp = p4d_offset(pgdp, addr); + if (WARN_ON(p4d_none(*p4dp))) + return; - pudp = pud_offset(pgdp, addr); - if (unlikely(!pudp)) - return -EINVAL; + pudp = pud_offset(p4dp, addr); + if (WARN_ON(pud_none(*pudp))) + return; pmdp = pmd_offset(pudp, addr); - if (unlikely(!pmdp)) - return -EINVAL; + if (WARN_ON(pmd_none(*pmdp))) + return; ptep = pte_offset_kernel(pmdp, addr); - if (unlikely(!ptep)) - return -EINVAL; - - pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr); + if (WARN_ON(pte_none(*ptep))) + return; /* * In hash, pte_clear flushes the tlb, in radix, we have to */ pte_clear(&init_mm, addr, ptep); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +} - return 0; +static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) +{ + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + struct mm_struct *patching_mm; + struct mm_struct *orig_mm; + spinlock_t *ptl; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync": : :"memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instruction(addr, instr, patch_addr); + + /* context synchronisation performed by __patch_instruction (isync or exception) */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; } -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; - unsigned int *patch_addr = NULL; - unsigned long flags; + u32 *patch_addr; unsigned long text_poke_addr; - unsigned long kaddr = (unsigned long)addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync": : :"memory"); + + err = __patch_instruction(addr, instr, patch_addr); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; +} + +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + int err; + unsigned long flags; /* * During early early boot patch_instruction is called * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!this_cpu_read(text_poke_area)) + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || + !static_branch_likely(&poking_init_done)) return raw_patch_instruction(addr, instr); local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_instruction_mm(addr, instr); + else + err = __do_patch_instruction(addr, instr); + local_irq_restore(flags); + + return err; +} +NOKPROBE_SYMBOL(patch_instruction); + +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long start = (unsigned long)patch_addr; - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; - if (map_patch_area(addr, text_poke_addr)) { - err = -1; - goto out; + /* Repeat instruction */ + if (repeat_instr) { + ppc_inst_t instr = ppc_inst_read(code); + + if (ppc_inst_prefixed(instr)) { + u64 val = ppc_inst_as_ulong(instr); + + memset64((u64 *)patch_addr, val, len / 8); + } else { + u32 val = ppc_inst_val(instr); + + memset32(patch_addr, val, len / 4); + } + } else { + memcpy(patch_addr, code, len); } - patch_addr = (unsigned int *)(text_poke_addr) + - ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); + smp_wmb(); /* smp write barrier */ + flush_icache_range(start, start + len); + return 0; +} - __patch_instruction(addr, instr, patch_addr); +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + struct mm_struct *patching_mm, *orig_mm; + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + spinlock_t *ptl; + u32 *patch_addr; + pte_t *pte; + int err; - err = unmap_patch_area(text_poke_addr); - if (err) - pr_warn("failed to unmap %lx\n", text_poke_addr); + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); -out: - local_irq_restore(flags); + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync" ::: "memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + /* context synchronisation performed by __patch_instructions */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); return err; } -#else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) { - return raw_patch_instruction(addr, instr); -} + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + u32 *patch_addr; + pte_t *pte; + int err; -#endif /* CONFIG_STRICT_KERNEL_RWX */ + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); -int patch_instruction(unsigned int *addr, unsigned int instr) -{ - /* Make sure we aren't patching a freed init section */ - if (init_mem_is_free && init_section_contains(addr, 4)) { - pr_debug("Skipping init section patching addr: 0x%px\n", addr); - return 0; - } - return do_patch_instruction(addr, instr); + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync" ::: "memory"); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; } -NOKPROBE_SYMBOL(patch_instruction); -int patch_branch(unsigned int *addr, unsigned long target, int flags) -{ - return patch_instruction(addr, create_branch(addr, target, flags)); +/* + * Patch 'addr' with 'len' bytes of instructions from 'code'. + * + * If repeat_instr is true, the same instruction is filled for + * 'len' bytes. + */ +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + while (len > 0) { + unsigned long flags; + size_t plen; + int err; + + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); + + local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); + else + err = __do_patch_instructions(addr, code, plen, repeat_instr); + local_irq_restore(flags); + if (err) + return err; + + len -= plen; + addr = (u32 *)((unsigned long)addr + plen); + if (!repeat_instr) + code = (u32 *)((unsigned long)code + plen); + } + + return 0; } +NOKPROBE_SYMBOL(patch_instructions); -bool is_offset_in_branch_range(long offset) +int patch_branch(u32 *addr, unsigned long target, int flags) { - /* - * Powerpc branch instruction is : - * - * 0 6 30 31 - * +---------+----------------+---+---+ - * | opcode | LI |AA |LK | - * +---------+----------------+---+---+ - * Where AA = 0 and LK = 0 - * - * LI is a signed 24 bits integer. The real branch offset is computed - * by: imm32 = SignExtend(LI:'0b00', 32); - * - * So the maximum forward branch should be: - * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc - * The maximum backward branch should be: - * (0xff800000 << 2) = 0xfe000000 = -0x2000000 - */ - return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); + ppc_inst_t instr; + + if (create_branch(&instr, addr, target, flags)) + return -ERANGE; + + return patch_instruction(addr, instr); } /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() */ -bool is_conditional_branch(unsigned int instr) +bool is_conditional_branch(ppc_inst_t instr) { - unsigned int opcode = instr >> 26; + unsigned int opcode = ppc_inst_primary_opcode(instr); if (opcode == 16) /* bc, bca, bcl, bcla */ return true; if (opcode == 19) { - switch ((instr >> 1) & 0x3ff) { + switch ((ppc_inst_val(instr) >> 1) & 0x3ff) { case 16: /* bclr, bclrl */ case 528: /* bcctr, bcctrl */ case 560: /* bctar, bctarl */ @@ -243,30 +542,9 @@ bool is_conditional_branch(unsigned int instr) } NOKPROBE_SYMBOL(is_conditional_branch); -unsigned int create_branch(const unsigned int *addr, - unsigned long target, int flags) -{ - unsigned int instruction; - long offset; - - offset = target; - if (! (flags & BRANCH_ABSOLUTE)) - offset = offset - (unsigned long)addr; - - /* Check we can represent the target in the instruction format */ - if (!is_offset_in_branch_range(offset)) - return 0; - - /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC); - - return instruction; -} - -unsigned int create_cond_branch(const unsigned int *addr, - unsigned long target, int flags) +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, + unsigned long target, int flags) { - unsigned int instruction; long offset; offset = target; @@ -274,413 +552,81 @@ unsigned int create_cond_branch(const unsigned int *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) - return 0; + if (!is_offset_in_cond_branch_range(offset)) + return 1; /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC); - - return instruction; -} + *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC)); -static unsigned int branch_opcode(unsigned int instr) -{ - return (instr >> 26) & 0x3F; -} - -static int instr_is_branch_iform(unsigned int instr) -{ - return branch_opcode(instr) == 18; -} - -static int instr_is_branch_bform(unsigned int instr) -{ - return branch_opcode(instr) == 16; + return 0; } -int instr_is_relative_branch(unsigned int instr) +int instr_is_relative_branch(ppc_inst_t instr) { - if (instr & BRANCH_ABSOLUTE) + if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) return 0; return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } -int instr_is_relative_link_branch(unsigned int instr) +int instr_is_relative_link_branch(ppc_inst_t instr) { - return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK); + return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } -static unsigned long branch_iform_target(const unsigned int *instr) +static unsigned long branch_iform_target(const u32 *instr) { signed long imm; - imm = *instr & 0x3FFFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x2000000) imm -= 0x4000000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -static unsigned long branch_bform_target(const unsigned int *instr) +static unsigned long branch_bform_target(const u32 *instr) { signed long imm; - imm = *instr & 0xFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x8000) imm -= 0x10000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -unsigned long branch_target(const unsigned int *instr) +unsigned long branch_target(const u32 *instr) { - if (instr_is_branch_iform(*instr)) + if (instr_is_branch_iform(ppc_inst_read(instr))) return branch_iform_target(instr); - else if (instr_is_branch_bform(*instr)) + else if (instr_is_branch_bform(ppc_inst_read(instr))) return branch_bform_target(instr); return 0; } -int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr) -{ - if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr)) - return branch_target(instr) == addr; - - return 0; -} - -unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) { unsigned long target; - target = branch_target(src); - if (instr_is_branch_iform(*src)) - return create_branch(dest, target, *src); - else if (instr_is_branch_bform(*src)) - return create_cond_branch(dest, target, *src); + if (instr_is_branch_iform(ppc_inst_read(src))) + return create_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); + else if (instr_is_branch_bform(ppc_inst_read(src))) + return create_cond_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); - return 0; + return 1; } - -#ifdef CONFIG_PPC_BOOK3E_64 -void __patch_exception(int exc, unsigned long addr) -{ - extern unsigned int interrupt_base_book3e; - unsigned int *ibase = &interrupt_base_book3e; - - /* Our exceptions vectors start with a NOP and -then- a branch - * to deal with single stepping from userspace which stops on - * the second instruction. Thus we need to patch the second - * instruction of the exception, not the first one - */ - - patch_branch(ibase + (exc / 4) + 1, addr, 0); -} -#endif - -#ifdef CONFIG_CODE_PATCHING_SELFTEST - -static void __init test_trampoline(void) -{ - asm ("nop;\n"); -} - -#define check(x) \ - if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__); - -static void __init test_branch_iform(void) -{ - unsigned int instr; - unsigned long addr; - - addr = (unsigned long)&instr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_iform(0x48000000)); - /* All bits of target set, and flags */ - check(instr_is_branch_iform(0x4bffffff)); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_iform(0xcbffffff)); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_iform(0x7bffffff)); - - /* Simplest case, branch to self with link */ - check(instr_is_branch_iform(0x48000001)); - /* All bits of targets set */ - check(instr_is_branch_iform(0x4bfffffd)); - /* Some bits of targets set */ - check(instr_is_branch_iform(0x4bff00fd)); - /* Must be a valid branch to start with */ - check(!instr_is_branch_iform(0x7bfffffd)); - - /* Absolute branch to 0x100 */ - instr = 0x48000103; - check(instr_is_branch_to_addr(&instr, 0x100)); - /* Absolute branch to 0x420fc */ - instr = 0x480420ff; - check(instr_is_branch_to_addr(&instr, 0x420fc)); - /* Maximum positive relative branch, + 20MB - 4B */ - instr = 0x49fffffc; - check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); - /* Smallest negative relative branch, - 4B */ - instr = 0x4bfffffc; - check(instr_is_branch_to_addr(&instr, addr - 4)); - /* Largest negative relative branch, - 32 MB */ - instr = 0x4a000000; - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); - - /* Branch to self, with link */ - instr = create_branch(&instr, addr, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr)); - - /* Branch to self - 0x100, with link */ - instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); - - /* Branch to self + 0x100, no link */ - instr = create_branch(&instr, addr + 0x100, 0); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 MB */ - instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); - - /* Out of range relative negative offset, - 32 MB + 4*/ - instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK); - check(instr == 0); - - /* Out of range relative positive offset, + 32 MB */ - instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK); - check(instr == 0); - - /* Unaligned target */ - instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK); - check(instr == 0); - - /* Check flags are masked correctly */ - instr = create_branch(&instr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x48000000); -} - -static void __init test_create_function_call(void) -{ - unsigned int *iptr; - unsigned long dest; - - /* Check we can create a function call */ - iptr = (unsigned int *)ppc_function_entry(test_trampoline); - dest = ppc_function_entry(test_create_function_call); - patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK)); - check(instr_is_branch_to_addr(iptr, dest)); -} - -static void __init test_branch_bform(void) -{ - unsigned long addr; - unsigned int *iptr, instr, flags; - - iptr = &instr; - addr = (unsigned long)iptr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_bform(0x40000000)); - /* All bits of target set, and flags */ - check(instr_is_branch_bform(0x43ffffff)); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_bform(0xc3ffffff)); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_bform(0x7bffffff)); - - /* Absolute conditional branch to 0x100 */ - instr = 0x43ff0103; - check(instr_is_branch_to_addr(&instr, 0x100)); - /* Absolute conditional branch to 0x20fc */ - instr = 0x43ff20ff; - check(instr_is_branch_to_addr(&instr, 0x20fc)); - /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = 0x43ff7ffc; - check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); - /* Smallest negative relative conditional branch, - 4B */ - instr = 0x43fffffc; - check(instr_is_branch_to_addr(&instr, addr - 4)); - /* Largest negative relative conditional branch, - 32 KB */ - instr = 0x43ff8000; - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); - - /* All condition code bits set & link */ - flags = 0x3ff000 | BRANCH_SET_LINK; - - /* Branch to self */ - instr = create_cond_branch(iptr, addr, flags); - check(instr_is_branch_to_addr(&instr, addr)); - - /* Branch to self - 0x100 */ - instr = create_cond_branch(iptr, addr - 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); - - /* Branch to self + 0x100 */ - instr = create_cond_branch(iptr, addr + 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 KB */ - instr = create_cond_branch(iptr, addr - 0x8000, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); - - /* Out of range relative negative offset, - 32 KB + 4*/ - instr = create_cond_branch(iptr, addr - 0x8004, flags); - check(instr == 0); - - /* Out of range relative positive offset, + 32 KB */ - instr = create_cond_branch(iptr, addr + 0x8000, flags); - check(instr == 0); - - /* Unaligned target */ - instr = create_cond_branch(iptr, addr + 3, flags); - check(instr == 0); - - /* Check flags are masked correctly */ - instr = create_cond_branch(iptr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x43FF0000); -} - -static void __init test_translate_branch(void) -{ - unsigned long addr; - unsigned int *p, *q; - void *buf; - - buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); - check(buf); - if (!buf) - return; - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 MB */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 0x2000000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x4a000000); - - /* Maximum positive case, move x to x - 32 MB + 4 */ - p = buf + 0x2000000; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x49fffffc); - - /* Jump to x + 16 MB moved to x + 20 MB */ - p = buf; - addr = 0x1000000 + (unsigned long)buf; - patch_branch(p, addr, BRANCH_SET_LINK); - q = buf + 0x1400000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 16 MB moved to x - 16 MB + 4 */ - p = buf + 0x1000000; - addr = 0x2000000 + (unsigned long)buf; - patch_branch(p, addr, 0); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - - /* Conditional branch tests */ - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0)); - check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 KB */ - p = buf; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); - q = buf + 0x8000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff8000); - - /* Maximum positive case, move x to x - 32 KB + 4 */ - p = buf + 0x8000; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff7ffc); - - /* Jump to x + 12 KB moved to x + 20 KB */ - p = buf; - addr = 0x3000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK)); - q = buf + 0x5000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 8 KB moved to x - 8 KB + 4 */ - p = buf + 0x2000; - addr = 0x4000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, 0)); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Free the buffer we were using */ - vfree(buf); -} - -static int __init test_code_patching(void) -{ - printk(KERN_DEBUG "Running code patching self-tests ...\n"); - - test_branch_iform(); - test_branch_bform(); - test_create_function_call(); - test_translate_branch(); - - return 0; -} -late_initcall(test_code_patching); - -#endif /* CONFIG_CODE_PATCHING_SELFTEST */ diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index a3bcf4786e4a..933b685e7ab6 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -4,11 +4,11 @@ * * Copyright (C) 1996-2005 Paul Mackerras. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/code-patching-asm.h> #include <asm/kasan.h> @@ -57,9 +57,6 @@ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/copy_mc_64.S index cb882d9a6d8a..bf1014b28fe8 100644 --- a/arch/powerpc/lib/memcpy_mcsafe_64.S +++ b/arch/powerpc/lib/copy_mc_64.S @@ -4,9 +4,9 @@ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> * Author - Balbir Singh <bsingharora@gmail.com> */ +#include <linux/export.h> #include <asm/ppc_asm.h> #include <asm/errno.h> -#include <asm/export.h> .macro err1 100: @@ -50,7 +50,7 @@ err3; stb r0,0(r3) blr -_GLOBAL(memcpy_mcsafe) +_GLOBAL(copy_mc_generic) mr r7,r5 cmpldi r5,16 blt .Lshort_copy @@ -239,4 +239,4 @@ err1; stb r0,0(r3) 15: li r3,0 blr -EXPORT_SYMBOL_GPL(memcpy_mcsafe); +EXPORT_SYMBOL_GPL(copy_mc_generic); diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index d1091b5ee5da..f33a2e6088e5 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -2,18 +2,13 @@ /* * Copyright (C) 2008 Mark Nelson, IBM Corp. */ +#include <linux/export.h> #include <asm/page.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/feature-fixups.h> - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - _GLOBAL_TOC(copy_page) BEGIN_FTR_SECTION lis r5,PAGE_SIZE@h @@ -23,8 +18,18 @@ FTR_SECTION_ELSE #endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) ori r5,r5,PAGE_SIZE@l +#ifdef CONFIG_PPC_KERNEL_PCREL + /* + * Hack for toolchain - prefixed instructions cause label difference to + * be non-constant even if 8 byte alignment is known, so they can not + * be put in FTR sections. + */ + LOAD_REG_ADDR(r10, ppc64_caches) +BEGIN_FTR_SECTION +#else BEGIN_FTR_SECTION - ld r10,PPC64_CACHES@toc(r2) + LOAD_REG_ADDR(r10, ppc64_caches) +#endif lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */ lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */ li r9,0 diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index a9844c6353cf..07e7cec4d135 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -27,17 +27,7 @@ _GLOBAL(copypage_power7) #endif ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - - /* setup read stream 0 */ - dcbt 0,r4,0b01000 /* addr from */ - dcbt 0,r7,0b01010 /* length and depth from */ - /* setup write stream 1 */ - dcbtst 0,r9,0b01000 /* addr to */ - dcbtst 0,r10,0b01010 /* length and depth to */ - eieio - dcbt 0,r8,0b01010 /* all streams GO */ + DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8) #ifdef CONFIG_ALTIVEC mflr r0 @@ -45,7 +35,7 @@ _GLOBAL(copypage_power7) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_ops + bl CFUNC(enter_vmx_ops) cmpwi r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -88,7 +78,7 @@ _GLOBAL(copypage_power7) addi r3,r3,128 bdnz 1b - b exit_vmx_ops /* tail call optimise */ + b CFUNC(exit_vmx_ops) /* tail call optimise */ #else li r0,(PAGE_SIZE/128) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index db8719a14846..9af969d2cc0c 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 28f0be523c06..8474c682a178 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -47,7 +47,7 @@ ld r15,STK_REG(R15)(r1) ld r14,STK_REG(R14)(r1) .Ldo_err3: - bl exit_vmx_usercopy + bl CFUNC(exit_vmx_usercopy) ld r0,STACKFRAMESIZE+16(r1) mtlr r0 b .Lexit @@ -272,7 +272,7 @@ err1; stb r0,0(r3) mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_usercopy + bl CFUNC(enter_vmx_usercopy) cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -298,17 +298,7 @@ err1; stb r0,0(r3) or r7,r7,r0 ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - - /* setup read stream 0 */ - dcbt 0,r6,0b01000 /* addr from */ - dcbt 0,r7,0b01010 /* length and depth from */ - /* setup write stream 1 */ - dcbtst 0,r9,0b01000 /* addr to */ - dcbtst 0,r10,0b01010 /* length and depth to */ - eieio - dcbt 0,r8,0b01010 /* all streams GO */ + DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) beq cr1,.Lunwind_stack_nonvmx_copy @@ -488,7 +478,7 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b exit_vmx_usercopy /* tail call optimise */ + b CFUNC(exit_vmx_usercopy) /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -691,5 +681,5 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b exit_vmx_usercopy /* tail call optimise */ + b CFUNC(exit_vmx_usercopy) /* tail call optimise */ #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c index 407b992fb02f..e834079d2b5c 100644 --- a/arch/powerpc/lib/error-inject.c +++ b/arch/powerpc/lib/error-inject.c @@ -11,6 +11,6 @@ void override_function_with_return(struct pt_regs *regs) * function in the kernel/module, captured on a kprobe. We don't need * to worry about 32-bit userspace on a 64-bit kernel. */ - regs->nip = regs->link; + regs_set_return_ip(regs, regs->link); } NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S index b12168c2447a..480172fbd024 100644 --- a/arch/powerpc/lib/feature-fixups-test.S +++ b/arch/powerpc/lib/feature-fixups-test.S @@ -7,6 +7,7 @@ #include <asm/ppc_asm.h> #include <asm/synch.h> #include <asm/asm-compat.h> +#include <asm/ppc-opcode.h> .text @@ -791,3 +792,71 @@ globl(lwsync_fixup_test_expected_SYNC) 1: or 1,1,1 sync +globl(ftr_fixup_prefix1) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix1) + +globl(ftr_fixup_prefix1_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix1_expected) + or 1,1,1 + nop + nop + or 2,2,2 + +globl(ftr_fixup_prefix2) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix2) + +globl(ftr_fixup_prefix2_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix2_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + +globl(ftr_fixup_prefix2_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + or 2,2,2 + +globl(ftr_fixup_prefix3) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 +globl(end_ftr_fixup_prefix3) + +globl(ftr_fixup_prefix3_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 + +globl(ftr_fixup_prefix3_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + nop + +globl(ftr_fixup_prefix3_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + nop + or 3,3,3 diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4ba634b89ce5..4f82581ca203 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -14,13 +14,16 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include <asm/cputable.h> #include <asm/code-patching.h> +#include <asm/interrupt.h> #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/security_features.h> #include <asm/firmware.h> +#include <asm/inst.h> struct fixup_entry { unsigned long mask; @@ -31,30 +34,30 @@ struct fixup_entry { long alt_end_off; }; -static unsigned int *calc_addr(struct fixup_entry *fcur, long offset) +static u32 *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (unsigned int *)((unsigned long)fcur + offset); + return (u32 *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(unsigned int *src, unsigned int *dest, - unsigned int *alt_start, unsigned int *alt_end) +static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { - unsigned int instr; + int err; + ppc_inst_t instr; - instr = *src; + instr = ppc_inst_read(src); - if (instr_is_relative_branch(*src)) { - unsigned int *target = (unsigned int *)branch_target(src); + if (instr_is_relative_branch(ppc_inst_read(src))) { + u32 *target = (u32 *)branch_target(src); /* Branch within the section doesn't need translating */ if (target < alt_start || target > alt_end) { - instr = translate_branch(dest, src); - if (!instr) + err = translate_branch(&instr, dest, src); + if (err) return 1; } } @@ -64,9 +67,10 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, return 0; } -static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +static int patch_feature_section_mask(unsigned long value, unsigned long mask, + struct fixup_entry *fcur) { - unsigned int *start, *end, *alt_start, *alt_end, *src, *dest; + u32 *start, *end, *alt_start, *alt_end, *src, *dest; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -76,24 +80,26 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) if ((alt_end - alt_start) > (end - start)) return 1; - if ((value & fcur->mask) == fcur->value) + if ((value & fcur->mask & mask) == (fcur->value & mask)) return 0; src = alt_start; dest = start; - for (; src < alt_end; src++, dest++) { + for (; src < alt_end; src = ppc_inst_next(src, src), + dest = ppc_inst_next(dest, dest)) { if (patch_alt_instruction(src, dest, alt_start, alt_end)) return 1; } for (; dest < end; dest++) - raw_patch_instruction(dest, PPC_INST_NOP); + raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP())); return 0; } -void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +static void do_feature_fixups_mask(unsigned long value, unsigned long mask, + void *fixup_start, void *fixup_end) { struct fixup_entry *fcur, *fend; @@ -101,7 +107,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) fend = fixup_end; for (; fcur < fend; fcur++) { - if (patch_feature_section(value, fcur)) { + if (patch_feature_section_mask(value, mask, fcur)) { WARN_ON(1); printk("Unable to patch feature section at %p - %p" \ " with %p - %p\n", @@ -113,48 +119,94 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +{ + do_feature_fixups_mask(value, ~0, fixup_start, fixup_end); +} + +#ifdef CONFIG_PPC_BARRIER_NOSPEC +static bool is_fixup_addr_valid(void *dest, size_t size) +{ + return system_state < SYSTEM_FREEING_INITMEM || + !init_section_contains(dest, size); +} + +static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) +{ + int i; + + for (i = 0; start < end; start++, i++) { + int j; + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + for (j = 0; j < num; j++) + patch_instruction(dest + j, ppc_inst(instrs[j])); + } + return i; +} +#endif + #ifdef CONFIG_PPC_BOOK3S_64 +static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, + bool do_fallback, void *fallback) +{ + int i; + + for (i = 0; start < end; start++, i++) { + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + // See comment in do_entry_flush_fixups() RE order of patching + if (do_fallback) { + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK); + } else { + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + } + } + return i; +} + static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; - start = PTRRELOC(&__start___stf_entry_barrier_fixup), + start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } else if (types & STF_BARRIER_SYNC_ORI) { - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R10, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, instrs[0]); - - if (types & STF_BARRIER_FALLBACK) - patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, - BRANCH_SET_LINK); - else - patch_instruction(dest + 1, instrs[1]); - - patch_instruction(dest + 2, instrs[2]); - } + i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK, + &stf_barrier_fallback); printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -166,53 +218,42 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[6], *dest; + unsigned int instrs[6]; long *start, *end; int i; - start = PTRRELOC(&__start___stf_exit_barrier_fixup), + start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x60000000; /* nop */ - instrs[4] = 0x60000000; /* nop */ - instrs[5] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_NOP(); + instrs[4] = PPC_RAW_NOP(); + instrs[5] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ - instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0); } else { - instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ - instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1); } - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ - } else { - instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ - } + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R13, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1); + else + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); - patch_instruction(dest + 3, instrs[3]); - patch_instruction(dest + 4, instrs[4]); - patch_instruction(dest + 5, instrs[5]); - } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : (types == STF_BARRIER_FALLBACK) ? "fallback" : @@ -221,49 +262,205 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static bool stf_exit_reentrant = false; +static bool rfi_exit_reentrant = false; +static DEFINE_MUTEX(exit_flush_lock); + +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} void do_stf_barrier_fixups(enum stf_barrier_type types) { - do_stf_entry_barrier_fixups(types); - do_stf_exit_barrier_fixups(types); + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are + * executing them. So call __do_stf_barrier_fixups() on one CPU while + * all other CPUs spin in the stop machine core with interrupts hard + * disabled. + * + * The branch to mark interrupt exits non-reentrant is enabled first, + * then stop_machine runs which will ensure all CPUs are out of the + * low level interrupt exit code before patching. After the patching, + * if allowed, then flip the branch to allow fast exits. + */ + + // Prevent static key update races with do_rfi_flush_fixups() + mutex_lock(&exit_flush_lock); + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_stf_barrier_fixups, &types, NULL); + + if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI)) + stf_exit_reentrant = false; + else + stf_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); } -void do_rfi_flush_fixups(enum l1d_flush_type types) +void do_uaccess_flush_fixups(enum l1d_flush_type types) { - unsigned int instrs[3], *dest; + unsigned int instrs[4]; long *start, *end; int i; - start = PTRRELOC(&__start___rfi_flush_fixup), - end = PTRRELOC(&__stop___rfi_flush_fixup); + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_BLR(); - if (types & L1D_FLUSH_FALLBACK) - /* b .+16 to fallback flush */ - instrs[0] = 0x48000010; + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = PPC_RAW_NOP(); + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); + + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + +static int __do_entry_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3]; + long *start, *end; + int i; + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); + } + if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; + /* + * If we're patching in or out the fallback flush we need to be careful about the + * order in which we patch instructions. That's because it's possible we could + * take a page fault after patching one instruction, so the sequence of + * instructions must be safe even in a half patched state. + * + * To make that work, when patching in the fallback flush we patch in this order: + * - the mflr (dest) + * - the mtlr (dest + 2) + * - the branch (dest + 1) + * + * That ensures the sequence is safe to execute at any point. In contrast if we + * patch the mtlr last, it's possible we could return from the branch and not + * restore LR, leading to a crash later. + * + * When patching out the fallback flush (either with nops or another flush type), + * we patch in this order: + * - the branch (dest + 1) + * - the mtlr (dest + 2) + * - the mflr (dest) + * + * Note we are protected by stop_machine() from other CPUs executing the code in a + * semi-patched state. + */ - pr_devel("patching dest %lx\n", (unsigned long)dest); + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &entry_flush_fallback); + + start = PTRRELOC(&__start___scv_entry_flush_fixup); + end = PTRRELOC(&__stop___scv_entry_flush_fixup); + i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &scv_entry_flush_fallback); - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); +} + +static int __do_rfi_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3]; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup); + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = PPC_RAW_BRANCH(16); + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); + + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); + printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : @@ -272,30 +469,53 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) : "ori type" : (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); + + return 0; +} + +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + /* + * stop_machine gets all CPUs out of the interrupt exit handler same + * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run + * without stop_machine, so this could be achieved with a broadcast + * IPI instead, but this matches the stf sequence. + */ + + // Prevent static key update races with do_stf_barrier_fixups() + mutex_lock(&exit_flush_lock); + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_rfi_flush_fixups, &types, NULL); + + if (types & L1D_FLUSH_FALLBACK) + rfi_exit_reentrant = false; + else + rfi_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); } void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr, *dest; + unsigned int instr; long *start, *end; int i; start = fixup_start; end = fixup_end; - instr = 0x60000000; /* nop */ + instr = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using ORI speculation barrier\n"); - instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr); - } + i = do_patch_fixups(start, end, &instr, 1); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } @@ -307,44 +527,38 @@ void do_barrier_nospec_fixups(bool enable) { void *start, *end; - start = PTRRELOC(&__start___barrier_nospec_fixup), + start = PTRRELOC(&__start___barrier_nospec_fixup); end = PTRRELOC(&__stop___barrier_nospec_fixup); do_barrier_nospec_fixups_range(enable, start, end); } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr[2], *dest; + unsigned int instr[2]; long *start, *end; int i; start = fixup_start; end = fixup_end; - instr[0] = PPC_INST_NOP; - instr[1] = PPC_INST_NOP; + instr[0] = PPC_RAW_NOP(); + instr[1] = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); - instr[0] = PPC_INST_ISYNC; - instr[1] = PPC_INST_SYNC; + instr[0] = PPC_RAW_ISYNC(); + instr[1] = PPC_RAW_SYNC(); } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr[0]); - patch_instruction(dest + 1, instr[1]); - } + i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr)); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } -static void patch_btb_flush_section(long *curr) +static void __init patch_btb_flush_section(long *curr) { unsigned int *start, *end; @@ -352,11 +566,11 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction(start, PPC_INST_NOP); + patch_instruction(start, ppc_inst(PPC_RAW_NOP())); } } -void do_btb_flush_fixups(void) +void __init do_btb_flush_fixups(void) { long *start, *end; @@ -366,12 +580,12 @@ void do_btb_flush_fixups(void) for (; start < end; start += 2) patch_btb_flush_section(start); } -#endif /* CONFIG_PPC_FSL_BOOK3E */ +#endif /* CONFIG_PPC_E500 */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - unsigned int *dest; + u32 *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -381,27 +595,28 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - raw_patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC)); } } -static void do_final_fixups(void) +static void __init do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - int *src, *dest; - unsigned long length; + ppc_inst_t inst; + u32 *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (int *)(KERNELBASE + PHYSICAL_START); - dest = (int *)KERNELBASE; - length = (__end_interrupts - _stext) / sizeof(int); + src = (u32 *)(KERNELBASE + PHYSICAL_START); + dest = (u32 *)KERNELBASE; + end = (void *)src + (__end_interrupts - _stext); - while (length--) { - raw_patch_instruction(dest, *src); - src++; - dest++; + while (src < end) { + inst = ppc_inst_read(src); + raw_patch_instruction(dest, inst); + src = ppc_inst_next(src, src); + dest = ppc_inst_next(dest, dest); } #endif } @@ -443,6 +658,17 @@ void __init apply_feature_fixups(void) do_final_fixups(); } +void __init update_mmu_feature_fixups(unsigned long mask) +{ + saved_mmu_features &= ~mask; + saved_mmu_features |= cur_cpu_spec->mmu_features & mask; + + do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask, + PTRRELOC(&__start___mmu_ftr_fixup), + PTRRELOC(&__stop___mmu_ftr_fixup)); + mmu_feature_keys_init(); +} + void __init setup_feature_keys(void) { /* @@ -475,15 +701,20 @@ late_initcall(check_features); #define check(x) \ if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__); +static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +{ + return patch_feature_section_mask(value, ~0, fcur); +} + /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */ static struct fixup_entry fixup; -static long calc_offset(struct fixup_entry *entry, unsigned int *p) +static long __init calc_offset(struct fixup_entry *entry, unsigned int *p) { return (unsigned long)p - (unsigned long)entry; } -static void test_basic_patching(void) +static void __init test_basic_patching(void) { extern unsigned int ftr_fixup_test1[]; extern unsigned int end_ftr_fixup_test1[]; @@ -514,7 +745,7 @@ static void test_basic_patching(void) check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); } -static void test_alternative_patching(void) +static void __init test_alternative_patching(void) { extern unsigned int ftr_fixup_test2[]; extern unsigned int end_ftr_fixup_test2[]; @@ -547,7 +778,7 @@ static void test_alternative_patching(void) check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); } -static void test_alternative_case_too_big(void) +static void __init test_alternative_case_too_big(void) { extern unsigned int ftr_fixup_test3[]; extern unsigned int end_ftr_fixup_test3[]; @@ -573,7 +804,7 @@ static void test_alternative_case_too_big(void) check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); } -static void test_alternative_case_too_small(void) +static void __init test_alternative_case_too_small(void) { extern unsigned int ftr_fixup_test4[]; extern unsigned int end_ftr_fixup_test4[]; @@ -619,7 +850,7 @@ static void test_alternative_case_with_branch(void) check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0); } -static void test_alternative_case_with_external_branch(void) +static void __init test_alternative_case_with_external_branch(void) { extern unsigned int ftr_fixup_test6[]; extern unsigned int end_ftr_fixup_test6[]; @@ -629,7 +860,7 @@ static void test_alternative_case_with_external_branch(void) check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0); } -static void test_alternative_case_with_branch_to_end(void) +static void __init test_alternative_case_with_branch_to_end(void) { extern unsigned int ftr_fixup_test7[]; extern unsigned int end_ftr_fixup_test7[]; @@ -639,7 +870,7 @@ static void test_alternative_case_with_branch_to_end(void) check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0); } -static void test_cpu_macros(void) +static void __init test_cpu_macros(void) { extern u8 ftr_fixup_test_FTR_macros[]; extern u8 ftr_fixup_test_FTR_macros_expected[]; @@ -651,7 +882,7 @@ static void test_cpu_macros(void) ftr_fixup_test_FTR_macros_expected, size) == 0); } -static void test_fw_macros(void) +static void __init test_fw_macros(void) { #ifdef CONFIG_PPC64 extern u8 ftr_fixup_test_FW_FTR_macros[]; @@ -665,7 +896,7 @@ static void test_fw_macros(void) #endif } -static void test_lwsync_macros(void) +static void __init test_lwsync_macros(void) { extern u8 lwsync_fixup_test[]; extern u8 end_lwsync_fixup_test[]; @@ -684,6 +915,78 @@ static void test_lwsync_macros(void) } } +#ifdef CONFIG_PPC64 +static void __init test_prefix_patching(void) +{ + extern unsigned int ftr_fixup_prefix1[]; + extern unsigned int end_ftr_fixup_prefix1[]; + extern unsigned int ftr_fixup_prefix1_orig[]; + extern unsigned int ftr_fixup_prefix1_expected[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3); + fixup.alt_start_off = fixup.alt_end_off = 0; + + /* Sanity check */ + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0); +} + +static void __init test_prefix_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix2[]; + extern unsigned int end_ftr_fixup_prefix2[]; + extern unsigned int ftr_fixup_prefix2_orig[]; + extern unsigned int ftr_fixup_prefix2_expected[]; + extern unsigned int ftr_fixup_prefix2_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0); +} + +static void __init test_prefix_word_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix3[]; + extern unsigned int end_ftr_fixup_prefix3[]; + extern unsigned int ftr_fixup_prefix3_orig[]; + extern unsigned int ftr_fixup_prefix3_expected[]; + extern unsigned int ftr_fixup_prefix3_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0); + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0); +} +#else +static inline void test_prefix_patching(void) {} +static inline void test_prefix_alt_patching(void) {} +static inline void test_prefix_word_alt_patching(void) {} +#endif /* CONFIG_PPC64 */ + static int __init test_feature_fixups(void) { printk(KERN_DEBUG "Running feature fixup self-tests ...\n"); @@ -698,6 +1001,9 @@ static int __init test_feature_fixups(void) test_cpu_macros(); test_fw_macros(); test_lwsync_macros(); + test_prefix_patching(); + test_prefix_alt_patching(); + test_prefix_word_alt_patching(); return 0; } diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 6effad901ef7..151875050da9 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -5,16 +5,16 @@ * * Author: Anton Blanchard <anton@au.ibm.com> */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/feature-fixups.h> /* Note: This code relies on -mminimal-toc */ _GLOBAL(__arch_hweight8) BEGIN_FTR_SECTION - b __sw_hweight8 + b CFUNC(__sw_hweight8) nop nop FTR_SECTION_ELSE @@ -26,7 +26,7 @@ EXPORT_SYMBOL(__arch_hweight8) _GLOBAL(__arch_hweight16) BEGIN_FTR_SECTION - b __sw_hweight16 + b CFUNC(__sw_hweight16) nop nop nop @@ -49,7 +49,7 @@ EXPORT_SYMBOL(__arch_hweight16) _GLOBAL(__arch_hweight32) BEGIN_FTR_SECTION - b __sw_hweight32 + b CFUNC(__sw_hweight32) nop nop nop @@ -75,7 +75,7 @@ EXPORT_SYMBOL(__arch_hweight32) _GLOBAL(__arch_hweight64) BEGIN_FTR_SECTION - b __sw_hweight64 + b CFUNC(__sw_hweight64) nop nop nop diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 6440d5943c00..04165b7a163f 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -27,14 +27,14 @@ void splpar_spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (lock->slock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } EXPORT_SYMBOL_GPL(splpar_spin_yield); @@ -53,13 +53,13 @@ void splpar_rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (rw->lock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } #endif diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 9351ffab409c..6fd06cd20faa 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -4,10 +4,10 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/kasan.h> #ifndef CONFIG_KASAN diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S index 5010e376f7b8..f6fca5664e91 100644 --- a/arch/powerpc/lib/memcmp_32.S +++ b/arch/powerpc/lib/memcmp_32.S @@ -7,8 +7,8 @@ * */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> .text diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index 384218df71ba..142c666d3897 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -3,8 +3,8 @@ * Author: Anton Blanchard <anton@au.ibm.com> * Copyright 2015 IBM Corporation. */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/ppc-opcode.h> #define off8 r6 @@ -44,7 +44,7 @@ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ std r0,16(r1); \ stdu r1,-STACKFRAMESIZE(r1); \ - bl enter_vmx_ops; \ + bl CFUNC(enter_vmx_ops); \ cmpwi cr1,r3,0; \ ld r0,STACKFRAMESIZE+16(r1); \ ld r3,STK_REG(R31)(r1); \ @@ -60,7 +60,7 @@ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ std r0,16(r1); \ stdu r1,-STACKFRAMESIZE(r1); \ - bl exit_vmx_ops; \ + bl CFUNC(exit_vmx_ops); \ ld r0,STACKFRAMESIZE+16(r1); \ ld r3,STK_REG(R31)(r1); \ ld r4,STK_REG(R30)(r1); \ diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 016c91e958d8..b5a67e20143f 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> #include <asm/kasan.h> diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 54f226333c94..b7c5e7fca8b9 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -218,7 +218,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_ops + bl CFUNC(enter_vmx_ops) cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -244,15 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) or r7,r7,r0 ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - - dcbt 0,r6,0b01000 - dcbt 0,r7,0b01010 - dcbtst 0,r9,0b01000 - dcbtst 0,r10,0b01010 - eieio - dcbt 0,r8,0b01010 /* GO */ + DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) beq cr1,.Lunwind_stack_nonvmx_copy @@ -433,7 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - b exit_vmx_ops /* tail call optimise */ + b CFUNC(exit_vmx_ops) /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -637,5 +629,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - b exit_vmx_ops /* tail call optimise */ + b CFUNC(exit_vmx_ops) /* tail call optimise */ #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 0666a8d29596..4e724c4c01ad 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -6,23 +6,60 @@ #include <linux/string.h> #include <linux/export.h> #include <linux/uaccess.h> +#include <linux/libnvdimm.h> #include <asm/cacheflush.h> +static inline void __clean_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void __flush_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void clean_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __clean_pmem_range(start, stop); +} + +static inline void flush_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __flush_pmem_range(start, stop); +} + /* * CONFIG_ARCH_HAS_PMEM_API symbols */ void arch_wb_cache_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + flush_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); @@ -35,25 +72,16 @@ long __copy_from_user_flushcache(void *dest, const void __user *src, unsigned long copied, start = (unsigned long) dest; copied = __copy_from_user(dest, src, size); - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); return copied; } -void *memcpy_flushcache(void *dest, const void *src, size_t size) +void memcpy_flushcache(void *dest, const void *src, size_t size) { unsigned long start = (unsigned long) dest; memcpy(dest, src, size); - flush_dcache_range(start, start + size); - - return dest; + clean_pmem_range(start, start + size); } EXPORT_SYMBOL(memcpy_flushcache); - -void memcpy_page_flushcache(char *to, struct page *page, size_t offset, - size_t len) -{ - memcpy_flushcache(to, page_to_virt(page) + offset, len); -} -EXPORT_SYMBOL(memcpy_page_flushcache); diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c new file mode 100644 index 000000000000..5de4dd549f6e --- /dev/null +++ b/arch/powerpc/lib/qspinlock.c @@ -0,0 +1,989 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/percpu.h> +#include <linux/processor.h> +#include <linux/smp.h> +#include <linux/topology.h> +#include <linux/sched/clock.h> +#include <asm/qspinlock.h> +#include <asm/paravirt.h> + +#define MAX_NODES 4 + +struct qnode { + struct qnode *next; + struct qspinlock *lock; + int cpu; + u8 sleepy; /* 1 if the previous vCPU was preempted or + * if the previous node was sleepy */ + u8 locked; /* 1 if lock acquired */ +}; + +struct qnodes { + int count; + struct qnode nodes[MAX_NODES]; +}; + +/* Tuning parameters */ +static int steal_spins __read_mostly = (1 << 5); +static int remote_steal_spins __read_mostly = (1 << 2); +#if _Q_SPIN_TRY_LOCK_STEAL == 1 +static const bool maybe_stealers = true; +#else +static bool maybe_stealers __read_mostly = true; +#endif +static int head_spins __read_mostly = (1 << 8); + +static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_allow_steal __read_mostly = false; +static bool pv_spin_on_preempted_owner __read_mostly = false; +static bool pv_sleepy_lock __read_mostly = true; +static bool pv_sleepy_lock_sticky __read_mostly = false; +static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; +static int pv_sleepy_lock_factor __read_mostly = 256; +static bool pv_yield_prev __read_mostly = true; +static bool pv_yield_sleepy_owner __read_mostly = true; +static bool pv_prod_head __read_mostly = false; + +static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); + +#if _Q_SPIN_SPEC_BARRIER == 1 +#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) +#else +#define spec_barrier() do { } while (0) +#endif + +static __always_inline bool recently_sleepy(void) +{ + /* pv_sleepy_lock is true when this is called */ + if (pv_sleepy_lock_interval_ns) { + u64 seen = this_cpu_read(sleepy_lock_seen_clock); + + if (seen) { + u64 delta = sched_clock() - seen; + if (delta < pv_sleepy_lock_interval_ns) + return true; + this_cpu_write(sleepy_lock_seen_clock, 0); + } + } + + return false; +} + +static __always_inline int get_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return steal_spins * pv_sleepy_lock_factor; + else + return steal_spins; +} + +static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return remote_steal_spins * pv_sleepy_lock_factor; + else + return remote_steal_spins; +} + +static __always_inline int get_head_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return head_spins * pv_sleepy_lock_factor; + else + return head_spins; +} + +static inline u32 encode_tail_cpu(int cpu) +{ + return (cpu + 1) << _Q_TAIL_CPU_OFFSET; +} + +static inline int decode_tail_cpu(u32 val) +{ + return (val >> _Q_TAIL_CPU_OFFSET) - 1; +} + +static inline int get_owner_cpu(u32 val) +{ + return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; +} + +/* + * Try to acquire the lock if it was not already locked. If the tail matches + * mytail then clear it, otherwise leave it unchnaged. Return previous value. + * + * This is used by the head of the queue to acquire the lock and clean up + * its tail if it was the last one queued. + */ +static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) +{ + u32 newval = queued_spin_encode_locked_val(); + u32 prev, tmp; + + asm volatile( +"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" + /* This test is necessary if there could be stealers */ +" andi. %1,%0,%5 \n" +" bne 3f \n" + /* Test whether the lock tail == mytail */ +" and %1,%0,%6 \n" +" cmpw 0,%1,%3 \n" + /* Merge the new locked value */ +" or %1,%1,%4 \n" +" bne 2f \n" + /* If the lock tail matched, then clear it, otherwise leave it. */ +" andc %1,%1,%6 \n" +"2: stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"3: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r"(tail), "r" (newval), + "i" (_Q_LOCKED_VAL), + "r" (_Q_TAIL_CPU_MASK), + "i" (_Q_SPIN_EH_HINT) + : "cr0", "memory"); + + return prev; +} + +/* + * Publish our tail, replacing previous tail. Return previous value. + * + * This provides a release barrier for publishing node, this pairs with the + * acquire barrier in get_tail_qnode() when the next CPU finds this tail + * value. + */ +static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) +{ + u32 prev, tmp; + + kcsan_release(); + + asm volatile( +"\t" PPC_RELEASE_BARRIER " \n" +"1: lwarx %0,0,%2 # publish_tail_cpu \n" +" andc %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" + : "=&r" (prev), "=&r"(tmp) + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 set_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # set_mustq \n" +" or %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 clear_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # clear_mustq \n" +" andc %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) +{ + u32 prev; + u32 new = old | _Q_SLEEPY_VAL; + + BUG_ON(!(old & _Q_LOCKED_VAL)); + BUG_ON(old & _Q_SLEEPY_VAL); + + asm volatile( +"1: lwarx %0,0,%1 # try_set_sleepy \n" +" cmpw 0,%0,%2 \n" +" bne- 2f \n" +" stwcx. %3,0,%1 \n" +" bne- 1b \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r"(old), "r" (new) + : "cr0", "memory"); + + return likely(prev == old); +} + +static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } +} + +static __always_inline void seen_sleepy_lock(void) +{ + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); +} + +static __always_inline void seen_sleepy_node(void) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + /* Don't set sleepy because we likely have a stale val */ + } +} + +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) +{ + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); + int idx; + + /* + * After publishing the new tail and finding a previous tail in the + * previous val (which is the control dependency), this barrier + * orders the release barrier in publish_tail_cpu performed by the + * last CPU, with subsequently looking at its qnode structures + * after the barrier. + */ + smp_acquire__after_ctrl_dep(); + + for (idx = 0; idx < MAX_NODES; idx++) { + struct qnode *qnode = &qnodesp->nodes[idx]; + if (qnode->lock == lock) + return qnode; + } + + BUG(); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) +{ + int owner; + u32 yield_count; + bool preempted = false; + + BUG_ON(!(val & _Q_LOCKED_VAL)); + + if (!paravirt) + goto relax; + + if (!pv_yield_owner) + goto relax; + + owner = get_owner_cpu(val); + yield_count = yield_count_of(owner); + + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + seen_sleepy_owner(lock, val); + preempted = true; + + /* + * Read the lock word after sampling the yield count. On the other side + * there may a wmb because the yield count update is done by the + * hypervisor preemption and the value update by the OS, however this + * ordering might reduce the chance of out of order accesses and + * improve the heuristic. + */ + smp_rmb(); + + if (READ_ONCE(lock->val) == val) { + if (mustq) + clear_mustq(lock); + yield_to_preempted(owner, yield_count); + if (mustq) + set_mustq(lock); + spin_begin(); + + /* Don't relax if we yielded. Maybe we should? */ + return preempted; + } + spin_begin(); +relax: + spin_cpu_relax(); + + return preempted; +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + return __yield_to_locked_owner(lock, val, paravirt, false); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + bool mustq = false; + + if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) + mustq = true; + + return __yield_to_locked_owner(lock, val, paravirt, mustq); +} + +static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt) +{ + struct qnode *next; + int owner; + + if (!paravirt) + return; + if (!pv_yield_sleepy_owner) + return; + + next = READ_ONCE(node->next); + if (!next) + return; + + if (next->sleepy) + return; + + owner = get_owner_cpu(val); + if (vcpu_is_preempted(owner)) + next->sleepy = 1; +} + +/* Called inside spin_begin() */ +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) +{ + u32 yield_count; + bool preempted = false; + + if (!paravirt) + goto relax; + + if (!pv_yield_sleepy_owner) + goto yield_prev; + + /* + * If the previous waiter was preempted it might not be able to + * propagate sleepy to us, so check the lock in that case too. + */ + if (node->sleepy || vcpu_is_preempted(prev_cpu)) { + u32 val = READ_ONCE(lock->val); + + if (val & _Q_LOCKED_VAL) { + if (node->next && !node->next->sleepy) { + /* + * Propagate sleepy to next waiter. Only if + * owner is preempted, which allows the queue + * to become "non-sleepy" if vCPU preemption + * ceases to occur, even if the lock remains + * highly contended. + */ + if (vcpu_is_preempted(get_owner_cpu(val))) + node->next->sleepy = 1; + } + + preempted = yield_to_locked_owner(lock, val, paravirt); + if (preempted) + return preempted; + } + node->sleepy = false; + } + +yield_prev: + if (!pv_yield_prev) + goto relax; + + yield_count = yield_count_of(prev_cpu); + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + preempted = true; + seen_sleepy_node(); + + smp_rmb(); /* See __yield_to_locked_owner comment */ + + if (!READ_ONCE(node->locked)) { + yield_to_preempted(prev_cpu, yield_count); + spin_begin(); + return preempted; + } + spin_begin(); + +relax: + spin_cpu_relax(); + + return preempted; +} + +static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) +{ + if (iters >= get_steal_spins(paravirt, sleepy)) + return true; + + if (IS_ENABLED(CONFIG_NUMA) && + (iters >= get_remote_steal_spins(paravirt, sleepy))) { + int cpu = get_owner_cpu(val); + if (numa_node_id() != cpu_to_node(cpu)) + return true; + } + return false; +} + +static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) +{ + bool seen_preempted = false; + bool sleepy = false; + int iters = 0; + u32 val; + + if (!steal_spins) { + /* XXX: should spin_on_preempted_owner do anything here? */ + return false; + } + + /* Attempt to steal the lock */ + spin_begin(); + do { + bool preempted = false; + + val = READ_ONCE(lock->val); + if (val & _Q_MUST_Q_VAL) + break; + spec_barrier(); + + if (unlikely(!(val & _Q_LOCKED_VAL))) { + spin_end(); + if (__queued_spin_trylock_steal(lock)) + return true; + spin_begin(); + } else { + preempted = yield_to_locked_owner(lock, val, paravirt); + } + + if (paravirt && pv_sleepy_lock) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + if (preempted) { + seen_preempted = true; + sleepy = true; + if (!pv_spin_on_preempted_owner) + iters++; + /* + * pv_spin_on_preempted_owner don't increase iters + * while the owner is preempted -- we won't interfere + * with it by definition. This could introduce some + * latency issue if we continually observe preempted + * owners, but hopefully that's a rare corner case of + * a badly oversubscribed system. + */ + } else { + iters++; + } + } while (!steal_break(val, iters, paravirt, sleepy)); + + spin_end(); + + return false; +} + +static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) +{ + struct qnodes *qnodesp; + struct qnode *next, *node; + u32 val, old, tail; + bool seen_preempted = false; + bool sleepy = false; + bool mustq = false; + int idx; + int iters = 0; + + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + + qnodesp = this_cpu_ptr(&qnodes); + if (unlikely(qnodesp->count >= MAX_NODES)) { + spec_barrier(); + while (!queued_spin_trylock(lock)) + cpu_relax(); + return; + } + + idx = qnodesp->count++; + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + node = &qnodesp->nodes[idx]; + node->next = NULL; + node->lock = lock; + node->cpu = smp_processor_id(); + node->sleepy = 0; + node->locked = 0; + + tail = encode_tail_cpu(node->cpu); + + /* + * Assign all attributes of a node before it can be published. + * Issues an lwsync, serving as a release barrier, as well as a + * compiler barrier. + */ + old = publish_tail_cpu(lock, tail); + + /* + * If there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_CPU_MASK) { + int prev_cpu = decode_tail_cpu(old); + struct qnode *prev = get_tail_qnode(lock, prev_cpu); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + /* Wait for mcs node lock to be released */ + spin_begin(); + while (!READ_ONCE(node->locked)) { + spec_barrier(); + + if (yield_to_prev(lock, node, prev_cpu, paravirt)) + seen_preempted = true; + } + spec_barrier(); + spin_end(); + + smp_rmb(); /* acquire barrier for the mcs lock */ + + /* + * Generic qspinlocks have this prefetch here, but it seems + * like it could cause additional line transitions because + * the waiter will keep loading from it. + */ + if (_Q_SPIN_PREFETCH_NEXT) { + next = READ_ONCE(node->next); + if (next) + prefetchw(next); + } + } + + /* We're at the head of the waitqueue, wait for the lock. */ +again: + spin_begin(); + for (;;) { + bool preempted; + + val = READ_ONCE(lock->val); + if (!(val & _Q_LOCKED_VAL)) + break; + spec_barrier(); + + if (paravirt && pv_sleepy_lock && maybe_stealers) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + propagate_sleepy(node, val, paravirt); + preempted = yield_head_to_locked_owner(lock, val, paravirt); + if (!maybe_stealers) + continue; + + if (preempted) + seen_preempted = true; + + if (paravirt && preempted) { + sleepy = true; + + if (!pv_spin_on_preempted_owner) + iters++; + } else { + iters++; + } + + if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { + mustq = true; + set_mustq(lock); + val |= _Q_MUST_Q_VAL; + } + } + spec_barrier(); + spin_end(); + + /* If we're the last queued, must clean up the tail. */ + old = trylock_clean_tail(lock, tail); + if (unlikely(old & _Q_LOCKED_VAL)) { + BUG_ON(!maybe_stealers); + goto again; /* Can only be true if maybe_stealers. */ + } + + if ((old & _Q_TAIL_CPU_MASK) == tail) + goto release; /* We were the tail, no next. */ + + /* There is a next, must wait for node->next != NULL (MCS protocol) */ + next = READ_ONCE(node->next); + if (!next) { + spin_begin(); + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + spin_end(); + } + spec_barrier(); + + /* + * Unlock the next mcs waiter node. Release barrier is not required + * here because the acquirer is only accessing the lock word, and + * the acquire barrier we took the lock with orders that update vs + * this store to locked. The corresponding barrier is the smp_rmb() + * acquire barrier for mcs lock, above. + */ + if (paravirt && pv_prod_head) { + int next_cpu = next->cpu; + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + if (vcpu_is_preempted(next_cpu)) + prod_cpu(next_cpu); + } else { + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + } + +release: + qnodesp->count--; /* release the node */ +} + +void queued_spin_lock_slowpath(struct qspinlock *lock) +{ + /* + * This looks funny, but it induces the compiler to inline both + * sides of the branch rather than share code as when the condition + * is passed as the paravirt argument to the functions. + */ + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { + if (try_to_steal_lock(lock, true)) { + spec_barrier(); + return; + } + queued_spin_lock_mcs_queue(lock, true); + } else { + if (try_to_steal_lock(lock, false)) { + spec_barrier(); + return; + } + queued_spin_lock_mcs_queue(lock, false); + } +} +EXPORT_SYMBOL(queued_spin_lock_slowpath); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void) +{ +} +#endif + +#include <linux/debugfs.h> +static int steal_spins_set(void *data, u64 val) +{ +#if _Q_SPIN_TRY_LOCK_STEAL == 1 + /* MAYBE_STEAL remains true */ + steal_spins = val; +#else + static DEFINE_MUTEX(lock); + + /* + * The lock slow path has a !maybe_stealers case that can assume + * the head of queue will not see concurrent waiters. That waiter + * is unsafe in the presence of stealers, so must keep them away + * from one another. + */ + + mutex_lock(&lock); + if (val && !steal_spins) { + maybe_stealers = true; + /* wait for queue head waiter to go away */ + synchronize_rcu(); + steal_spins = val; + } else if (!val && steal_spins) { + steal_spins = val; + /* wait for all possible stealers to go away */ + synchronize_rcu(); + maybe_stealers = false; + } else { + steal_spins = val; + } + mutex_unlock(&lock); +#endif + + return 0; +} + +static int steal_spins_get(void *data, u64 *val) +{ + *val = steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); + +static int remote_steal_spins_set(void *data, u64 val) +{ + remote_steal_spins = val; + + return 0; +} + +static int remote_steal_spins_get(void *data, u64 *val) +{ + *val = remote_steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); + +static int head_spins_set(void *data, u64 val) +{ + head_spins = val; + + return 0; +} + +static int head_spins_get(void *data, u64 *val) +{ + *val = head_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); + +static int pv_yield_owner_set(void *data, u64 val) +{ + pv_yield_owner = !!val; + + return 0; +} + +static int pv_yield_owner_get(void *data, u64 *val) +{ + *val = pv_yield_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); + +static int pv_yield_allow_steal_set(void *data, u64 val) +{ + pv_yield_allow_steal = !!val; + + return 0; +} + +static int pv_yield_allow_steal_get(void *data, u64 *val) +{ + *val = pv_yield_allow_steal; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); + +static int pv_spin_on_preempted_owner_set(void *data, u64 val) +{ + pv_spin_on_preempted_owner = !!val; + + return 0; +} + +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) +{ + *val = pv_spin_on_preempted_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); + +static int pv_sleepy_lock_set(void *data, u64 val) +{ + pv_sleepy_lock = !!val; + + return 0; +} + +static int pv_sleepy_lock_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); + +static int pv_sleepy_lock_sticky_set(void *data, u64 val) +{ + pv_sleepy_lock_sticky = !!val; + + return 0; +} + +static int pv_sleepy_lock_sticky_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_sticky; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); + +static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) +{ + pv_sleepy_lock_interval_ns = val; + + return 0; +} + +static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_interval_ns; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); + +static int pv_sleepy_lock_factor_set(void *data, u64 val) +{ + pv_sleepy_lock_factor = val; + + return 0; +} + +static int pv_sleepy_lock_factor_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_factor; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); + +static int pv_yield_prev_set(void *data, u64 val) +{ + pv_yield_prev = !!val; + + return 0; +} + +static int pv_yield_prev_get(void *data, u64 *val) +{ + *val = pv_yield_prev; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); + +static int pv_yield_sleepy_owner_set(void *data, u64 val) +{ + pv_yield_sleepy_owner = !!val; + + return 0; +} + +static int pv_yield_sleepy_owner_get(void *data, u64 *val) +{ + *val = pv_yield_sleepy_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n"); + +static int pv_prod_head_set(void *data, u64 val) +{ + pv_prod_head = !!val; + + return 0; +} + +static int pv_prod_head_get(void *data, u64 *val) +{ + *val = pv_prod_head; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); + +static __init int spinlock_debugfs_init(void) +{ + debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); + debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); + if (is_shared_processor()) { + debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); + debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); + debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); + debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); + debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); + debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner); + debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); + } + + return 0; +} +device_initcall(spinlock_debugfs_init); diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c new file mode 100644 index 000000000000..bccb662c1b7b --- /dev/null +++ b/arch/powerpc/lib/restart_table.c @@ -0,0 +1,56 @@ +#include <asm/interrupt.h> +#include <asm/kprobes.h> + +struct soft_mask_table_entry { + unsigned long start; + unsigned long end; +}; + +struct restart_table_entry { + unsigned long start; + unsigned long end; + unsigned long fixup; +}; + +extern struct soft_mask_table_entry __start___soft_mask_table[]; +extern struct soft_mask_table_entry __stop___soft_mask_table[]; + +extern struct restart_table_entry __start___restart_table[]; +extern struct restart_table_entry __stop___restart_table[]; + +/* Given an address, look for it in the soft mask table */ +bool search_kernel_soft_mask_table(unsigned long addr) +{ + struct soft_mask_table_entry *smte = __start___soft_mask_table; + + while (smte < __stop___soft_mask_table) { + unsigned long start = smte->start; + unsigned long end = smte->end; + + if (addr >= start && addr < end) + return true; + + smte++; + } + return false; +} +NOKPROBE_SYMBOL(search_kernel_soft_mask_table); + +/* Given an address, look for it in the kernel exception table */ +unsigned long search_kernel_restart_table(unsigned long addr) +{ + struct restart_table_entry *rte = __start___restart_table; + + while (rte < __stop___restart_table) { + unsigned long start = rte->start; + unsigned long end = rte->end; + unsigned long fixup = rte->fixup; + + if (addr >= start && addr < end) + return fixup; + + rte++; + } + return 0; +} +NOKPROBE_SYMBOL(search_kernel_restart_table); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c077acb983a1..e65f3fb68d06 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -13,8 +13,7 @@ #include <linux/uaccess.h> #include <asm/cpu_has_feature.h> #include <asm/cputable.h> - -extern char system_call_common[]; +#include <asm/disassemble.h> #ifdef CONFIG_PPC64 /* Bits in SRR1 that are copied from MSR */ @@ -30,6 +29,10 @@ extern char system_call_common[]; #define XER_OV32 0x00080000U #define XER_CA32 0x00040000U +#ifdef CONFIG_VSX +#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe)) +#endif + #ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S @@ -69,10 +72,8 @@ extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) { -#ifdef __powerpc64__ if ((msr & MSR_64BIT) == 0) val &= 0xffffffffUL; -#endif return val; } @@ -106,11 +107,11 @@ static nokprobe_inline long address_ok(struct pt_regs *regs, { if (!user_mode(regs)) return 1; - if (__access_ok(ea, nb, USER_DS)) + if (access_ok((void __user *)ea, nb)) return 1; - if (__access_ok(ea, 1, USER_DS)) + if (access_ok((void __user *)ea, 1)) /* Access overlaps the end of the user region */ - regs->dar = USER_DS.seg; + regs->dar = TASK_SIZE_MAX - 1; else regs->dar = ea; return 0; @@ -188,6 +189,47 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr, } /* + * Calculate effective address for a MLS:D-form / 8LS:D-form + * prefixed instruction + */ +static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, + unsigned int suffix, + const struct pt_regs *regs) +{ + int ra, prefix_r; + unsigned int dd; + unsigned long ea, d0, d1, d; + + prefix_r = GET_PREFIX_R(instr); + ra = GET_PREFIX_RA(suffix); + + d0 = instr & 0x3ffff; + d1 = suffix & 0xffff; + d = (d0 << 16) | d1; + + /* + * sign extend a 34 bit number + */ + dd = (unsigned int)(d >> 2); + ea = (signed int)dd; + ea = (ea << 2) | (d & 0x3); + + if (!prefix_r && ra) + ea += regs->gpr[ra]; + else if (!prefix_r && !ra) + ; /* Leave ea as is */ + else if (prefix_r) + ea += regs->nip; + + /* + * (prefix_r && ra) is an invalid form. Should already be + * checked for by caller! + */ + + return ea; +} + +/* * Return the largest power of 2, not greater than sizeof(unsigned long), * such that x is a multiple of it. */ @@ -236,39 +278,70 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) up[1] = tmp; break; } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + #endif default: WARN_ON_ONCE(1); } } -static nokprobe_inline int read_mem_aligned(unsigned long *dest, - unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int +__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; unsigned long x = 0; switch (nb) { case 1: - err = __get_user(x, (unsigned char __user *) ea); + unsafe_get_user(x, (unsigned char __user *)ea, Efault); break; case 2: - err = __get_user(x, (unsigned short __user *) ea); + unsafe_get_user(x, (unsigned short __user *)ea, Efault); break; case 4: - err = __get_user(x, (unsigned int __user *) ea); + unsafe_get_user(x, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __get_user(x, (unsigned long __user *) ea); + unsafe_get_user(x, (unsigned long __user *)ea, Efault); break; #endif } - if (!err) - *dest = x; - else + *dest = x; + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int +read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __read_mem_aligned(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __read_mem_aligned(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; regs->dar = ea; + } + return err; } @@ -276,10 +349,8 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest, * Copy from userspace to a buffer, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; int c; for (; nb > 0; nb -= c) { @@ -288,31 +359,46 @@ static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, c = max_align(nb); switch (c) { case 1: - err = __get_user(*dest, (unsigned char __user *) ea); + unsafe_get_user(*dest, (u8 __user *)ea, Efault); break; case 2: - err = __get_user(*(u16 *)dest, - (unsigned short __user *) ea); + unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault); break; case 4: - err = __get_user(*(u32 *)dest, - (unsigned int __user *) ea); + unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __get_user(*(unsigned long *)dest, - (unsigned long __user *) ea); + unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault); break; #endif } - if (err) { - regs->dar = ea; - return err; - } dest += c; ea += c; } return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_in(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __copy_mem_in(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; } static nokprobe_inline int read_mem_unaligned(unsigned long *dest, @@ -350,30 +436,48 @@ static int read_mem(unsigned long *dest, unsigned long ea, int nb, } NOKPROBE_SYMBOL(read_mem); -static nokprobe_inline int write_mem_aligned(unsigned long val, - unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int +__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; - switch (nb) { case 1: - err = __put_user(val, (unsigned char __user *) ea); + unsafe_put_user(val, (unsigned char __user *)ea, Efault); break; case 2: - err = __put_user(val, (unsigned short __user *) ea); + unsafe_put_user(val, (unsigned short __user *)ea, Efault); break; case 4: - err = __put_user(val, (unsigned int __user *) ea); + unsafe_put_user(val, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __put_user(val, (unsigned long __user *) ea); + unsafe_put_user(val, (unsigned long __user *)ea, Efault); break; #endif } - if (err) + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int +write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __write_mem_aligned(val, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __write_mem_aligned(val, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; regs->dar = ea; + } + return err; } @@ -381,10 +485,8 @@ static nokprobe_inline int write_mem_aligned(unsigned long val, * Copy from a buffer to userspace, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; int c; for (; nb > 0; nb -= c) { @@ -393,31 +495,46 @@ static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, c = max_align(nb); switch (c) { case 1: - err = __put_user(*dest, (unsigned char __user *) ea); + unsafe_put_user(*dest, (u8 __user *)ea, Efault); break; case 2: - err = __put_user(*(u16 *)dest, - (unsigned short __user *) ea); + unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault); break; case 4: - err = __put_user(*(u32 *)dest, - (unsigned int __user *) ea); + unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __put_user(*(unsigned long *)dest, - (unsigned long __user *) ea); + unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault); break; #endif } - if (err) { - regs->dar = ea; - return err; - } dest += c; ea += c; } return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_out(dest, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __copy_mem_out(dest, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; } static nokprobe_inline int write_mem_unaligned(unsigned long val, @@ -469,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -519,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -563,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -571,7 +695,7 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, if (err) return err; if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); preempt_disable(); if (regs->msr & MSR_VEC) put_vr(rn, &u.v); @@ -590,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -602,7 +729,7 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u.v = current->thread.vr_state.vr[rn]; preempt_enable(); if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); return copy_mem_out(&u.b[ea & 0xf], ea, size, regs); } #endif /* CONFIG_ALTIVEC */ @@ -666,6 +793,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, reg->d[0] = reg->d[1] = 0; switch (op->element_size) { + case 32: + /* [p]lxvp[x] */ case 16: /* whole vector; lxv[x] or lxvl[l] */ if (size == 0) @@ -674,7 +803,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) rev = !rev; if (rev) - do_byte_reverse(reg, 16); + do_byte_reverse(reg, size); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ @@ -750,6 +879,22 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, size = GETSIZE(op->type); switch (op->element_size) { + case 32: + /* [p]stxvp[x] */ + if (size == 0) + break; + if (rev) { + /* reverse 32 bytes */ + union vsx_reg buf32[2]; + buf32[0].d[0] = byterev_8(reg[1].d[1]); + buf32[0].d[1] = byterev_8(reg[1].d[0]); + buf32[1].d[0] = byterev_8(reg[0].d[1]); + buf32[1].d[1] = byterev_8(reg[0].d[0]); + memcpy(mem, buf32, size); + } else { + memcpy(mem, reg, size); + } + break; case 16: /* stxv, stxvx, stxvl, stxvll */ if (size == 0) @@ -818,28 +963,43 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem, cross_endian); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); + emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } } else { - current->thread.fp_state.fpr[reg][0] = buf.d[0]; - current->thread.fp_state.fpr[reg][1] = buf.d[1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0]; + current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1]; + } } } else { - if (regs->msr & MSR_VEC) - load_vsrn(reg, &buf); - else - current->thread.vr_state.vr[reg - 32] = buf.v; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.vr_state.vr[reg - 32 + i] = buf[j].v; + } + } } preempt_enable(); return 0; @@ -850,63 +1010,96 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size)) return -EFAULT; + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - store_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } } else { - buf.d[0] = current->thread.fp_state.fpr[reg][0]; - buf.d[1] = current->thread.fp_state.fpr[reg][1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0]; + buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1]; + } } } else { - if (regs->msr & MSR_VEC) - store_vsrn(reg, &buf); - else - buf.v = current->thread.vr_state.vr[reg - 32]; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].v = current->thread.vr_state.vr[reg - 32 + i]; + } + } } preempt_enable(); - emulate_vsx_store(op, &buf, mem, cross_endian); + emulate_vsx_store(op, buf, mem, cross_endian); return copy_mem_out(mem, ea, size, regs); } #endif /* CONFIG_VSX */ +static __always_inline int __emulate_dcbz(unsigned long ea) +{ + unsigned long i; + unsigned long size = l1_dcache_bytes(); + + for (i = 0; i < size; i += sizeof(long)) + unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault); + + return 0; + +Efault: + return -EFAULT; +} + int emulate_dcbz(unsigned long ea, struct pt_regs *regs) { int err; - unsigned long i, size; + unsigned long size = l1_dcache_bytes(); -#ifdef __powerpc64__ - size = ppc64_caches.l1d.block_size; - if (!(regs->msr & MSR_64BIT)) - ea &= 0xffffffffUL; -#else - size = L1_CACHE_BYTES; -#endif + ea = truncate_if_32bit(regs->msr, ea); ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; - for (i = 0; i < size; i += sizeof(long)) { - err = __put_user(0, (unsigned long __user *) (ea + i)); - if (err) { - regs->dar = ea; - return err; - } + + if (is_kernel_addr(ea)) { + err = __emulate_dcbz(ea); + } else if (user_write_access_begin((void __user *)ea, size)) { + err = __emulate_dcbz(ea); + user_write_access_end(); + } else { + err = -EFAULT; } - return 0; + + if (err) + regs->dar = ea; + + + return err; } NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -919,7 +1112,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -948,10 +1144,8 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs, op->type |= SETCC; op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) val = (int) val; -#endif if (val < 0) op->ccval |= 0x80000000; else if (val > 0) @@ -979,15 +1173,11 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, if (carry_in) ++val; - op->type = COMPUTE + SETREG + SETXER; + op->type = COMPUTE | SETREG | SETXER; op->reg = rd; op->val = val; -#ifdef __powerpc64__ - if (!(regs->msr & MSR_64BIT)) { - val = (unsigned int) val; - val1 = (unsigned int) val1; - } -#endif + val = truncate_if_32bit(regs->msr, val); + val1 = truncate_if_32bit(regs->msr, val1); op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; @@ -1004,7 +1194,7 @@ static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs, { unsigned int crval, shift; - op->type = COMPUTE + SETCC; + op->type = COMPUTE | SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -1023,7 +1213,7 @@ static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs, { unsigned int crval, shift; - op->type = COMPUTE + SETCC; + op->type = COMPUTE | SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -1163,54 +1353,64 @@ static nokprobe_inline int trap_compare(long v1, long v2) * otherwise. */ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - unsigned int instr) + ppc_inst_t instr) { +#ifdef CONFIG_PPC64 + unsigned int suffixopcode, prefixtype, prefix_r; +#endif unsigned int opcode, ra, rb, rc, rd, spr, u; unsigned long int imm; unsigned long int val, val2; unsigned int mb, me, sh; + unsigned int word, suffix; long ival; + word = ppc_inst_val(instr); + suffix = ppc_inst_suffix(instr); + op->type = COMPUTE; - opcode = instr >> 26; + opcode = ppc_inst_primary_opcode(instr); switch (opcode) { case 16: /* bc */ op->type = BRANCH; - imm = (signed short)(instr & 0xfffc); - if ((instr & 2) == 0) + imm = (signed short)(word & 0xfffc); + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; -#ifdef CONFIG_PPC64 case 17: /* sc */ - if ((instr & 0xfe2) == 2) + if ((word & 0xfe2) == 2) op->type = SYSCALL; - else + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && + (word & 0xfe3) == 1) { /* scv */ + op->type = SYSCALL_VECTORED_0; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + } else op->type = UNKNOWN; return 0; -#endif case 18: /* b */ op->type = BRANCH | BRTAKEN; - imm = instr & 0x03fffffc; + imm = word & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; - if ((instr & 2) == 0) + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; return 1; case 19: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 0: /* mcrf */ op->type = COMPUTE + SETCC; - rd = 7 - ((instr >> 23) & 0x7); - ra = 7 - ((instr >> 18) & 0x7); + rd = 7 - ((word >> 23) & 0x7); + ra = 7 - ((word >> 18) & 0x7); rd *= 4; ra *= 4; val = (regs->ccr >> ra) & 0xf; @@ -1220,16 +1420,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 16: /* bclr */ case 528: /* bcctr */ op->type = BRANCH; - imm = (instr & 0x400)? regs->ctr: regs->link; + imm = (word & 0x400)? regs->ctr: regs->link; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; case 18: /* rfid, scary */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = RFI; return 0; @@ -1247,23 +1447,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 417: /* crorc */ case 449: /* cror */ op->type = COMPUTE + SETCC; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rd = (instr >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rd = (word >> 21) & 0x1f; ra = (regs->ccr >> (31 - ra)) & 1; rb = (regs->ccr >> (31 - rb)) & 1; - val = (instr >> (6 + ra * 2 + rb)) & 1; + val = (word >> (6 + ra * 2 + rb)) & 1; op->ccval = (regs->ccr & ~(1UL << (31 - rd))) | (val << (31 - rd)); return 1; } break; case 31: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 598: /* sync */ op->type = BARRIER + BARRIER_SYNC; #ifdef __powerpc64__ - switch ((instr >> 21) & 3) { + switch ((word >> 21) & 3) { case 1: /* lwsync */ op->type = BARRIER + BARRIER_LWSYNC; break; @@ -1281,33 +1481,57 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } - /* Following cases refer to regs->gpr[], so we need all regs */ - if (!FULL_REGS(regs)) - return -1; - - rd = (instr >> 21) & 0x1f; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rc = (instr >> 6) & 0x1f; + rd = (word >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rc = (word >> 6) & 0x1f; switch (opcode) { #ifdef __powerpc64__ + case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 2: + if (prefix_r && ra) + return 0; + switch (suffixopcode) { + case 14: /* paddi */ + op->type = COMPUTE | PREFIXED; + op->val = mlsd_8lsd_ea(word, suffix, regs); + goto compute_done; + } + } + break; case 2: /* tdi */ - if (rd & trap_compare(regs->gpr[ra], (short) instr)) + if (rd & trap_compare(regs->gpr[ra], (short) word)) goto trap; return 1; #endif case 3: /* twi */ - if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + if (rd & trap_compare((int)regs->gpr[ra], (short) word)) goto trap; return 1; #ifdef __powerpc64__ case 4: + /* + * There are very many instructions with this primary opcode + * introduced in the ISA as early as v2.03. However, the ones + * we currently emulate were all introduced with ISA 3.0 + */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; - switch (instr & 0x3f) { + switch (word & 0x3f) { case 48: /* maddhd */ asm volatile(PPC_MADDHD(%0, %1, %2, %3) : "=r" (op->val) : "r" (regs->gpr[ra]), @@ -1331,20 +1555,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * There are other instructions from ISA 3.0 with the same * primary opcode which do not have emulation support yet. */ - return -1; + goto unknown_opcode; #endif case 7: /* mulli */ - op->val = regs->gpr[ra] * (short) instr; + op->val = regs->gpr[ra] * (short) word; goto compute_done; case 8: /* subfic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1); return 1; case 10: /* cmpli */ - imm = (unsigned short) instr; + imm = (unsigned short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1354,7 +1578,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 11: /* cmpi */ - imm = (short) instr; + imm = (short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1364,35 +1588,37 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 12: /* addic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); return 1; case 13: /* addic. */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); set_cr0(regs, op); return 1; case 14: /* addi */ - imm = (short) instr; + imm = (short) word; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 15: /* addis */ - imm = ((short) instr) << 16; + imm = ((short) word) << 16; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 19: - if (((instr >> 1) & 0x1f) == 2) { + if (((word >> 1) & 0x1f) == 2) { /* addpcis */ - imm = (short) (instr & 0xffc1); /* d0 + d2 fields */ - imm |= (instr >> 15) & 0x3e; /* d1 field */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + imm = (short) (word & 0xffc1); /* d0 + d2 fields */ + imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; goto compute_done; } @@ -1400,65 +1626,65 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 20: /* rlwimi */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); imm = MASK32(mb, me); op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); goto logical_done; case 21: /* rlwinm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 23: /* rlwnm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; rb = regs->gpr[rb] & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 24: /* ori */ - op->val = regs->gpr[rd] | (unsigned short) instr; + op->val = regs->gpr[rd] | (unsigned short) word; goto logical_done_nocc; case 25: /* oris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] | (imm << 16); goto logical_done_nocc; case 26: /* xori */ - op->val = regs->gpr[rd] ^ (unsigned short) instr; + op->val = regs->gpr[rd] ^ (unsigned short) word; goto logical_done_nocc; case 27: /* xoris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] ^ (imm << 16); goto logical_done_nocc; case 28: /* andi. */ - op->val = regs->gpr[rd] & (unsigned short) instr; + op->val = regs->gpr[rd] & (unsigned short) word; set_cr0(regs, op); goto logical_done_nocc; case 29: /* andis. */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] & (imm << 16); set_cr0(regs, op); goto logical_done_nocc; #ifdef __powerpc64__ case 30: /* rld* */ - mb = ((instr >> 6) & 0x1f) | (instr & 0x20); + mb = ((word >> 6) & 0x1f) | (word & 0x20); val = regs->gpr[rd]; - if ((instr & 0x10) == 0) { - sh = rb | ((instr & 2) << 4); + if ((word & 0x10) == 0) { + sh = rb | ((word & 2) << 4); val = ROTATE(val, sh); - switch ((instr >> 2) & 3) { + switch ((word >> 2) & 3) { case 0: /* rldicl */ val &= MASK64_L(mb); break; @@ -1478,7 +1704,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } else { sh = regs->gpr[rb] & 0x3f; val = ROTATE(val, sh); - switch ((instr >> 1) & 7) { + switch ((word >> 1) & 7) { case 0: /* rldcl */ op->val = val & MASK64_L(mb); goto logical_done; @@ -1493,8 +1719,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 31: /* isel occupies 32 minor opcodes */ - if (((instr >> 1) & 0x1f) == 15) { - mb = (instr >> 6) & 0x1f; /* bc field */ + if (((word >> 1) & 0x1f) == 15) { + mb = (word >> 6) & 0x1f; /* bc field */ val = (regs->ccr >> (31 - mb)) & 1; val2 = (ra) ? regs->gpr[ra] : 0; @@ -1502,7 +1728,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 4: /* tw */ if (rd == 0x1f || (rd & trap_compare((int)regs->gpr[ra], @@ -1516,13 +1742,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; #endif case 83: /* mfmsr */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MFMSR; op->reg = rd; return 0; case 146: /* mtmsr */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MTMSR; op->reg = rd; @@ -1530,23 +1756,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MTMSR; op->reg = rd; /* only MSR_EE and MSR_RI get changed if bit 15 set */ /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL; op->val = imm; return 0; #endif case 19: /* mfcr */ imm = 0xffffffffUL; - if ((instr >> 20) & 1) { + if ((word >> 20) & 1) { imm = 0xf0000000UL; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) break; imm >>= 4; } @@ -1554,13 +1780,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = regs->ccr & imm; goto compute_done; + case 128: /* setb */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + /* + * 'ra' encodes the CR field number (bfa) in the top 3 bits. + * Since each CR field is 4 bits, + * we can simply mask off the bottom two bits (bfa * 4) + * to yield the first bit in the CR field. + */ + ra = ra & ~0x3; + /* 'val' stores bits of the CR field (bfa) */ + val = regs->ccr >> (CR0_SHIFT - ra); + /* checks if the LT bit of CR field (bfa) is set */ + if (val & 8) + op->val = -1; + /* checks if the GT bit of CR field (bfa) is set */ + else if (val & 4) + op->val = 1; + else + op->val = 0; + goto compute_done; + case 144: /* mtcrf */ op->type = COMPUTE + SETCC; imm = 0xf0000000UL; val = regs->gpr[rd]; op->ccval = regs->ccr; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) op->ccval = (op->ccval & ~imm) | (val & imm); imm >>= 4; @@ -1568,7 +1816,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 339: /* mfspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MFSPR; op->reg = rd; op->spr = spr; @@ -1578,7 +1826,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 467: /* mtspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MTSPR; op->val = regs->gpr[rd]; op->spr = spr; @@ -1703,7 +1951,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 265: /* modud */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = regs->gpr[ra] % regs->gpr[rb]; goto compute_done; #endif @@ -1713,7 +1961,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 267: /* moduw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (unsigned int) regs->gpr[ra] % (unsigned int) regs->gpr[rb]; goto compute_done; @@ -1736,10 +1984,21 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = (int) regs->gpr[ra] / (int) regs->gpr[rb]; goto arith_done; - +#ifdef __powerpc64__ + case 425: /* divde[.] */ + asm volatile(PPC_DIVDE(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; + case 393: /* divdeu[.] */ + asm volatile(PPC_DIVDEU(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; +#endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (ra & 0x3) { case 0: /* 32-bit conditioned */ @@ -1757,18 +2016,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - return -1; + goto unknown_opcode; #ifdef __powerpc64__ case 777: /* modsd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (long int) regs->gpr[ra] % (long int) regs->gpr[rb]; goto compute_done; #endif case 779: /* modsw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (int) regs->gpr[ra] % (int) regs->gpr[rb]; goto compute_done; @@ -1845,14 +2104,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 538: /* cnttzw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = (unsigned int) regs->gpr[rd]; op->val = (val ? __builtin_ctz(val) : 32); goto logical_done; #ifdef __powerpc64__ case 570: /* cnttzd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = regs->gpr[rd]; op->val = (val ? __builtin_ctzl(val) : 64); goto logical_done; @@ -1948,7 +2207,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 826: /* sradi with sh_5 = 0 */ case 827: /* sradi with sh_5 = 1 */ op->type = COMPUTE + SETREG + SETXER; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); ival = (signed long int) regs->gpr[rd]; op->val = ival >> sh; op->xerval = regs->xer; @@ -1962,9 +2221,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 890: /* extswsli with sh_5 = 0 */ case 891: /* extswsli with sh_5 = 1 */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->type = COMPUTE + SETREG; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; if (sh) op->val = ROTATE(val, sh) & MASK64(0, 63 - sh); @@ -1979,34 +2238,34 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, */ case 54: /* dcbst */ op->type = MKOP(CACHEOP, DCBST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 86: /* dcbf */ op->type = MKOP(CACHEOP, DCBF, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 246: /* dcbtst */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 278: /* dcbt */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 982: /* icbi */ op->type = MKOP(CACHEOP, ICBI, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 1014: /* dcbz */ op->type = MKOP(CACHEOP, DCBZ, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; } break; @@ -2019,14 +2278,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->update_reg = ra; op->reg = rd; op->val = regs->gpr[rd]; - u = (instr >> 20) & UPDATE; + u = (word >> 20) & UPDATE; op->vsx_flags = 0; switch (opcode) { case 31: - u = instr & UPDATE; - op->ea = xform_ea(instr, regs); - switch ((instr >> 1) & 0x3ff) { + u = word & UPDATE; + op->ea = xform_ea(word, regs); + switch ((word >> 1) & 0x3ff) { case 20: /* lwarx */ op->type = MKOP(LARX, 0, 4); break; @@ -2035,15 +2294,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(STCX, 0, 4); break; -#ifdef __powerpc64__ - case 84: /* ldarx */ - op->type = MKOP(LARX, 0, 8); - break; - - case 214: /* stdcx. */ - op->type = MKOP(STCX, 0, 8); - break; - +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 52: /* lbarx */ op->type = MKOP(LARX, 0, 1); break; @@ -2059,6 +2310,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 726: /* sthcx. */ op->type = MKOP(STCX, 0, 2); break; +#endif +#ifdef __powerpc64__ + case 84: /* ldarx */ + op->type = MKOP(LARX, 0, 8); + break; + + case 214: /* stdcx. */ + op->type = MKOP(STCX, 0, 8); + break; case 276: /* lqarx */ if (!((rd & 1) || rd == ra || rd == rb)) @@ -2271,25 +2531,27 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 12: /* lxsiwzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; break; case 76: /* lxsiwax */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, SIGNEXT, 4); op->element_size = 8; break; case 140: /* stxsiwx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; break; case 268: /* lxvx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2298,33 +2560,47 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(LOAD_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } case 332: /* lxvdsx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_SPLAT; break; + case 333: /* lxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, 0, 32); + op->element_size = 32; + break; + case 364: /* lxvwsx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC; break; case 396: /* stxvx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2333,118 +2609,143 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(STORE_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } + case 461: /* stxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, 0, 32); + op->element_size = 32; + break; case 524: /* lxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 588: /* lxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; break; case 652: /* stxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 716: /* stxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; break; case 780: /* lxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 4; break; case 781: /* lxsibzx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 812: /* lxvh8x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 813: /* lxsihzx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 844: /* lxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 8; break; case 876: /* lxvb16x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; break; case 908: /* stxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 4; break; case 909: /* stxsibx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 940: /* stxvh8x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 941: /* stxsihx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 972: /* stxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 8; break; case 1004: /* stxvb16x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; @@ -2457,80 +2758,80 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 32: /* lwz */ case 33: /* lwzu */ op->type = MKOP(LOAD, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 34: /* lbz */ case 35: /* lbzu */ op->type = MKOP(LOAD, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 36: /* stw */ case 37: /* stwu */ op->type = MKOP(STORE, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 38: /* stb */ case 39: /* stbu */ op->type = MKOP(STORE, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 40: /* lhz */ case 41: /* lhzu */ op->type = MKOP(LOAD, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 42: /* lha */ case 43: /* lhau */ op->type = MKOP(LOAD, SIGNEXT | u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 44: /* sth */ case 45: /* sthu */ op->type = MKOP(STORE, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 46: /* lmw */ if (ra >= rd) break; /* invalid form, ra in range to load */ op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 47: /* stmw */ op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ op->type = MKOP(LOAD_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 50: /* lfd */ case 51: /* lfdu */ op->type = MKOP(LOAD_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 52: /* stfs */ case 53: /* stfsu */ op->type = MKOP(STORE_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 54: /* stfd */ case 55: /* stfdu */ op->type = MKOP(STORE_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #endif @@ -2538,26 +2839,30 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 56: /* lq */ if (!((rd & 1) || (rd == ra))) op->type = MKOP(LOAD, 0, 16); - op->ea = dqform_ea(instr, regs); + op->ea = dqform_ea(word, regs); break; #endif #ifdef CONFIG_VSX case 57: /* lfdp, lxsd, lxssp */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* lfdp */ if (rd & 1) break; /* reg must be even */ op->type = MKOP(LOAD_FP, 0, 16); break; case 2: /* lxsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 3: /* lxssp */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; @@ -2569,8 +2874,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 58: /* ld[u], lwa */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* ld */ op->type = MKOP(LOAD, 0, 8); break; @@ -2585,17 +2890,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif #ifdef CONFIG_VSX + case 6: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + op->reg = VSX_REGISTER_XTP(rd); + op->element_size = 32; + switch (word & 0xf) { + case 0: /* lxvp */ + op->type = MKOP(LOAD_VSX, 0, 32); + break; + case 1: /* stxvp */ + op->type = MKOP(STORE_VSX, 0, 32); + break; + } + break; + case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ - switch (instr & 7) { + switch (word & 7) { case 0: /* stfdp with LSB of DS field = 0 */ case 4: /* stfdp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->type = MKOP(STORE_FP, 0, 16); break; case 1: /* lxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2604,7 +2927,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; @@ -2613,7 +2938,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; @@ -2621,8 +2948,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2634,8 +2963,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 62: /* std[u] */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* std */ op->type = MKOP(STORE, 0, 8); break; @@ -2648,10 +2977,161 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } break; + case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + op->update_reg = ra; + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 0: /* Type 00 Eight-Byte Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 41: /* plwa */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); + break; +#ifdef CONFIG_VSX + case 42: /* plxsd */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 43: /* plxssp */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 46: /* pstxsd */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 47: /* pstxssp */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 51: /* plxv1 */ + op->reg += 32; + fallthrough; + case 50: /* plxv0 */ + op->type = MKOP(LOAD_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 55: /* pstxv1 */ + op->reg = rd + 32; + fallthrough; + case 54: /* pstxv0 */ + op->type = MKOP(STORE_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; +#endif /* CONFIG_VSX */ + case 56: /* plq */ + op->type = MKOP(LOAD, PREFIXED, 16); + break; + case 57: /* pld */ + op->type = MKOP(LOAD, PREFIXED, 8); + break; +#ifdef CONFIG_VSX + case 58: /* plxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + case 60: /* pstq */ + op->type = MKOP(STORE, PREFIXED, 16); + break; + case 61: /* pstd */ + op->type = MKOP(STORE, PREFIXED, 8); + break; +#ifdef CONFIG_VSX + case 62: /* pstxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + } + break; + case 1: /* Type 01 Eight-Byte Register-to-Register */ + break; + case 2: /* Type 10 Modified Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 32: /* plwz */ + op->type = MKOP(LOAD, PREFIXED, 4); + break; + case 34: /* plbz */ + op->type = MKOP(LOAD, PREFIXED, 1); + break; + case 36: /* pstw */ + op->type = MKOP(STORE, PREFIXED, 4); + break; + case 38: /* pstb */ + op->type = MKOP(STORE, PREFIXED, 1); + break; + case 40: /* plhz */ + op->type = MKOP(LOAD, PREFIXED, 2); + break; + case 42: /* plha */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2); + break; + case 44: /* psth */ + op->type = MKOP(STORE, PREFIXED, 2); + break; + case 48: /* plfs */ + op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4); + break; + case 50: /* plfd */ + op->type = MKOP(LOAD_FP, PREFIXED, 8); + break; + case 52: /* pstfs */ + op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4); + break; + case 54: /* pstfd */ + op->type = MKOP(STORE_FP, PREFIXED, 8); + break; + } + break; + case 3: /* Type 11 Modified Register-to-Register */ + break; + } #endif /* __powerpc64__ */ } + if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) { + switch (GETTYPE(op->type)) { + case LOAD: + if (ra == rd) + goto unknown_opcode; + fallthrough; + case STORE: + case LOAD_FP: + case STORE_FP: + if (ra == 0) + goto unknown_opcode; + } + } + #ifdef CONFIG_VSX if ((GETTYPE(op->type) == LOAD_VSX || GETTYPE(op->type) == STORE_VSX) && @@ -2662,8 +3142,12 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; + unknown_opcode: + op->type = UNKNOWN; + return 0; + logical_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); logical_done_nocc: op->reg = ra; @@ -2671,7 +3155,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; arith_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); compute_done: op->reg = rd; @@ -2701,15 +3185,6 @@ NOKPROBE_SYMBOL(analyse_instr); */ static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel stack overflow - */ - if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); - return -EINVAL; - } -#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. @@ -2756,7 +3231,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) { unsigned long next_pc; - next_pc = truncate_if_32bit(regs->msr, regs->nip + 4); + next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type)); switch (GETTYPE(op->type)) { case COMPUTE: if (op->type & SETREG) @@ -2787,12 +3262,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; @@ -2831,7 +3308,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) default: WARN_ON_ONCE(1); } - regs->nip = next_pc; + regs_set_return_ip(regs, next_pc); } NOKPROBE_SYMBOL(emulate_update_regs); @@ -2868,7 +3345,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) err = 0; val = 0; switch (size) { -#ifdef __powerpc64__ +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 1: __get_user_asmx(val, ea, err, "lbarx"); break; @@ -2910,7 +3387,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: @@ -2960,14 +3437,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) * stored in the thread_struct. If the instruction is in * the kernel, we must not touch the state in the thread_struct. */ - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) + if (!user_mode(regs) && !(regs->msr & MSR_FP)) return 0; err = do_fp_load(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) return 0; err = do_vec_load(op->reg, ea, size, regs, cross_endian); break; @@ -2982,7 +3459,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) */ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) + if (!user_mode(regs) && !(regs->msr & msrbit)) return 0; err = do_vsx_load(op, ea, regs, cross_endian); break; @@ -3018,8 +3495,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) } #endif if ((op->type & UPDATE) && size == sizeof(long) && - op->reg == 1 && op->update_reg == 1 && - !(regs->msr & MSR_PR) && + op->reg == 1 && op->update_reg == 1 && !user_mode(regs) && ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { err = handle_stack_update(ea, regs); break; @@ -3031,14 +3507,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) + if (!user_mode(regs) && !(regs->msr & MSR_FP)) return 0; err = do_fp_store(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) return 0; err = do_vec_store(op->reg, ea, size, regs, cross_endian); break; @@ -3053,7 +3529,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) */ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) + if (!user_mode(regs) && !(regs->msr & msrbit)) return 0; err = do_vsx_store(op, ea, regs, cross_endian); break; @@ -3101,7 +3577,7 @@ NOKPROBE_SYMBOL(emulate_loadstore); * or -1 if the instruction is one that should not be stepped, * such as an rfid, or a mtmsrd that would clear MSR_RI. */ -int emulate_step(struct pt_regs *regs, unsigned int instr) +int emulate_step(struct pt_regs *regs, ppc_inst_t instr) { struct instruction_op op; int r, err, type; @@ -3169,38 +3645,31 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) /* can't step mtmsr[d] that would clear MSR_RI */ return -1; /* here op.val is the mask of bits to change */ - regs->msr = (regs->msr & ~op.val) | (val & op.val); + regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val)); goto instr_done; -#ifdef CONFIG_PPC64 case SYSCALL: /* sc */ /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. + * Per ISA v3.1, section 7.5.15 'Trace Interrupt', we can't + * single step a system call instruction: + * + * Successful completion for an instruction means that the + * instruction caused no other interrupt. Thus a Trace + * interrupt never occurs for a System Call or System Call + * Vectored instruction, or for a Trap instruction that + * traps. */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; - + return -1; + case SYSCALL_VECTORED_0: /* scv 0 */ + return -1; case RFI: return -1; -#endif } return 0; instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + regs_set_return_ip(regs, + truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type))); return 1; } NOKPROBE_SYMBOL(emulate_step); diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 2752b1cc1d45..daa72061dc0c 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -4,8 +4,8 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/cache.h> .text diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfb..3ee45619a3f8 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -7,8 +7,8 @@ * */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/cache.h> .text @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 507b18b1660e..a25eb8588434 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -6,18 +6,13 @@ * Author: Anton Blanchard <anton@au.ibm.com> */ +#include <linux/export.h> #include <asm/ppc_asm.h> #include <asm/linkage.h> #include <asm/asm-offsets.h> -#include <asm/export.h> - - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -58,7 +53,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -133,7 +128,7 @@ err1; stb r0,0(r3) blr .Llong_clear: - ld r5,PPC64_CACHES@toc(r2) + LOAD_REG_ADDR(r5, ppc64_caches) bf cr7*4+0,11f err2; std r0,0(r3) @@ -181,4 +176,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S index 0a8d3f64d493..bbd24feb233f 100644 --- a/arch/powerpc/lib/strlen_32.S +++ b/arch/powerpc/lib/strlen_32.S @@ -6,8 +6,8 @@ * * Inspired from glibc implementation */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/cache.h> .text diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c new file mode 100644 index 000000000000..c44823292f73 --- /dev/null +++ b/arch/powerpc/lib/test-code-patching.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008 Michael Ellerman, IBM Corporation. + */ + +#include <linux/vmalloc.h> +#include <linux/init.h> + +#include <asm/code-patching.h> + +static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + +static void __init test_trampoline(void) +{ + asm ("nop;nop;\n"); +} + +#define check(x) do { \ + if (!(x)) \ + pr_err("code-patching: test failed at line %d\n", __LINE__); \ +} while (0) + +static void __init test_branch_iform(void) +{ + int err; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned long addr = (unsigned long)tmp; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_iform(ppc_inst(0x48000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); + + /* Simplest case, branch to self with link */ + check(instr_is_branch_iform(ppc_inst(0x48000001))); + /* All bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); + /* Some bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); + /* Must be a valid branch to start with */ + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); + + /* Absolute branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute branch to 0x420fc */ + ppc_inst_write(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); + /* Maximum positive relative branch, + 20MB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); + /* Smallest negative relative branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative branch, - 32 MB */ + ppc_inst_write(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Branch to self, with link */ + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100, with link */ + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100, no link */ + err = create_branch(&instr, iptr, addr + 0x100, 0); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 MB */ + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Out of range relative negative offset, - 32 MB + 4*/ + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); + check(err); + + /* Out of range relative positive offset, + 32 MB */ + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); + check(err); + + /* Unaligned target */ + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); + check(err); + + /* Check flags are masked correctly */ + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); +} + +static void __init test_create_function_call(void) +{ + u32 *iptr; + unsigned long dest; + ppc_inst_t instr; + + /* Check we can create a function call */ + iptr = (u32 *)ppc_function_entry(test_trampoline); + dest = ppc_function_entry(test_create_function_call); + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, dest)); +} + +static void __init test_branch_bform(void) +{ + int err; + unsigned long addr; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned int flags; + + addr = (unsigned long)iptr; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_bform(ppc_inst(0x40000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); + + /* Absolute conditional branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute conditional branch to 0x20fc */ + ppc_inst_write(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); + /* Maximum positive relative conditional branch, + 32 KB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); + /* Smallest negative relative conditional branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative conditional branch, - 32 KB */ + ppc_inst_write(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* All condition code bits set & link */ + flags = 0x3ff000 | BRANCH_SET_LINK; + + /* Branch to self */ + err = create_cond_branch(&instr, iptr, addr, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100 */ + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100 */ + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 KB */ + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* Out of range relative negative offset, - 32 KB + 4*/ + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); + check(err); + + /* Out of range relative positive offset, + 32 KB */ + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); + check(err); + + /* Unaligned target */ + err = create_cond_branch(&instr, iptr, addr + 3, flags); + check(err); + + /* Check flags are masked correctly */ + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); +} + +static void __init test_translate_branch(void) +{ + unsigned long addr; + void *p, *q; + ppc_inst_t instr; + void *buf; + + buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); + check(buf); + if (!buf) + return; + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = p + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 MB */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 0x2000000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); + + /* Maximum positive case, move x to x - 32 MB + 4 */ + p = buf + 0x2000000; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); + + /* Jump to x + 16 MB moved to x + 20 MB */ + p = buf; + addr = 0x1000000 + (unsigned long)buf; + create_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x1400000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 16 MB moved to x - 16 MB + 4 */ + p = buf + 0x1000000; + addr = 0x2000000 + (unsigned long)buf; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + + /* Conditional branch tests */ + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 KB */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 0x8000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); + + /* Maximum positive case, move x to x - 32 KB + 4 */ + p = buf + 0x8000; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); + + /* Jump to x + 12 KB moved to x + 20 KB */ + p = buf; + addr = 0x3000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x5000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 8 KB moved to x - 8 KB + 4 */ + p = buf + 0x2000; + addr = 0x4000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Free the buffer we were using */ + vfree(buf); +} + +static void __init test_prefixed_patching(void) +{ + u32 *iptr = (u32 *)ppc_function_entry(test_trampoline); + u32 expected[2] = {OP_PREFIX << 26, 0}; + ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0); + + if (!IS_ENABLED(CONFIG_PPC64)) + return; + + patch_instruction(iptr, inst); + + check(!memcmp(iptr, expected, sizeof(expected))); +} + +static int __init test_code_patching(void) +{ + pr_info("Running code patching self-tests ...\n"); + + test_branch_iform(); + test_branch_bform(); + test_create_function_call(); + test_translate_branch(); + test_prefixed_patching(); + + return 0; +} +late_initcall(test_code_patching); diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 42347067739c..23c7805fb7b3 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -8,57 +8,50 @@ #define pr_fmt(fmt) "emulate_step_test: " fmt #include <linux/ptrace.h> +#include <asm/cpu_has_feature.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/code-patching.h> - -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) - -/* - * Defined with TEST_ prefix so it does not conflict with other - * definitions. - */ -#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define TEST_ADDC(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADDC_DOT(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#include <asm/inst.h> #define MAX_SUBTESTS 16 #define IGNORE_GPR(n) (0x1UL << (n)) #define IGNORE_XER (0x1UL << 32) #define IGNORE_CCR (0x1UL << 33) +#define NEGATIVE_TEST (0x1UL << 63) + +#define TEST_PLD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLWZ(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_LWZ(r, base, i)) + +#define TEST_PSTD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PADDI(t, a, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_ADDI(t, a, i)) static void __init init_pt_regs(struct pt_regs *regs) { @@ -103,7 +96,7 @@ static void __init test_ld(void) regs.gpr[3] = (unsigned long) &a; /* ld r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("ld", "PASS"); @@ -111,6 +104,29 @@ static void __init test_ld(void) show_result("ld", "FAIL"); } +static void __init test_pld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* pld r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLD(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("pld", "PASS"); + else + show_result("pld", "FAIL"); +} + static void __init test_lwz(void) { struct pt_regs regs; @@ -121,7 +137,7 @@ static void __init test_lwz(void) regs.gpr[3] = (unsigned long) &a; /* lwz r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZ(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("lwz", "PASS"); @@ -129,6 +145,30 @@ static void __init test_lwz(void) show_result("lwz", "FAIL"); } +static void __init test_plwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* plwz r5, 0(r3), 0 */ + + stepped = emulate_step(®s, TEST_PLWZ(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("plwz", "PASS"); + else + show_result("plwz", "FAIL"); +} + static void __init test_lwzx(void) { struct pt_regs regs; @@ -141,7 +181,7 @@ static void __init test_lwzx(void) regs.gpr[5] = 0x8765; /* lwzx r5, r3, r4 */ - stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZX(5, 3, 4))); if (stepped == 1 && regs.gpr[5] == a[2]) show_result("lwzx", "PASS"); else @@ -159,13 +199,36 @@ static void __init test_std(void) regs.gpr[5] = 0x5678; /* std r5, 0(r3) */ - stepped = emulate_step(®s, TEST_STD(5, 3, 0)); - if (stepped == 1 || regs.gpr[5] == a) + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STD(5, 3, 0))); + if (stepped == 1 && regs.gpr[5] == a) show_result("std", "PASS"); else show_result("std", "FAIL"); } +static void __init test_pstd(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + regs.gpr[5] = 0x5678; + + /* pstd r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTD(5, 3, 0, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("pstd", "PASS"); + else + show_result("pstd", "FAIL"); +} + static void __init test_ldarx_stdcx(void) { struct pt_regs regs; @@ -184,7 +247,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x5678; /* ldarx r5, r3, r4, 0 */ - stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0))); /* * Don't touch 'a' here. Touching 'a' can do Load/store @@ -202,7 +265,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x9ABC; /* stdcx. r5, r3, r4 */ - stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STDCX(5, 3, 4))); /* * Two possible scenarios that indicates successful emulation @@ -242,7 +305,7 @@ static void __init test_lfsx_stfsx(void) regs.gpr[4] = 0; /* lfsx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFSX(10, 3, 4))); if (stepped == 1) show_result("lfsx", "PASS"); @@ -255,7 +318,7 @@ static void __init test_lfsx_stfsx(void) c.a = 678.91; /* stfsx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFSX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfsx", "PASS"); @@ -263,6 +326,53 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "FAIL"); } +static void __init test_plfs_pstfs(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfs ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfs frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFS(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfs", "PASS"); + else + show_result("plfs", "FAIL"); + + + /*** pstfs ***/ + + c.a = 678.91; + + /* pstfs frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFS(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfs", "PASS"); + else + show_result("pstfs", "FAIL"); +} + static void __init test_lfdx_stfdx(void) { struct pt_regs regs; @@ -285,7 +395,7 @@ static void __init test_lfdx_stfdx(void) regs.gpr[4] = 0; /* lfdx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFDX(10, 3, 4))); if (stepped == 1) show_result("lfdx", "PASS"); @@ -298,13 +408,60 @@ static void __init test_lfdx_stfdx(void) c.a = 987654.32; /* stfdx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFDX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfdx", "PASS"); else show_result("stfdx", "FAIL"); } + +static void __init test_plfd_pstfd(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfd ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfd frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFD(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfd", "PASS"); + else + show_result("plfd", "FAIL"); + + + /*** pstfd ***/ + + c.a = 987654.32; + + /* pstfd frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFD(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfd", "PASS"); + else + show_result("pstfd", "FAIL"); +} #else static void __init test_lfsx_stfsx(void) { @@ -312,11 +469,23 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); } +static void __init test_plfs_pstfs(void) +{ + show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)"); +} + static void __init test_lfdx_stfdx(void) { show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); } + +static void __init test_plfd_pstfd(void) +{ + show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)"); +} #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC @@ -344,7 +513,7 @@ static void __init test_lvx_stvx(void) regs.gpr[4] = 0; /* lvx vrt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LVX(10, 3, 4))); if (stepped == 1) show_result("lvx", "PASS"); @@ -360,7 +529,7 @@ static void __init test_lvx_stvx(void) c.b[3] = 498532; /* stvx vrs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STVX(10, 3, 4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) @@ -401,7 +570,7 @@ static void __init test_lxvd2x_stxvd2x(void) regs.gpr[4] = 0; /* lxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4))); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { show_result("lxvd2x", "PASS"); @@ -421,7 +590,7 @@ static void __init test_lxvd2x_stxvd2x(void) c.b[3] = 4; /* stxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3] && @@ -442,36 +611,315 @@ static void __init test_lxvd2x_stxvd2x(void) } #endif /* CONFIG_VSX */ +#ifdef CONFIG_VSX +static void __init test_lxvp_stxvp(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[4] = (unsigned long)&c[0].a; + + /* + * lxvp XTp,DQ(RA) + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVP(34, 4, 0))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvp", "FAIL"); + } + + /*** stxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvp XSp,DQ(RA) + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVP(34, 4, 0))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvp", "FAIL"); + } +} +#else +static void __init test_lxvp_stxvp(void) +{ + show_result("lxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvpx_stxvpx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvpx ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[3] = (unsigned long)&c[0].a; + regs.gpr[4] = 0; + + /* + * lxvpx XTp,RA,RB + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVPX(34, 3, 4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvpx", "FAIL"); + } + + /*** stxvpx ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvpx XSp,RA,RB + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVPX(34, 3, 4))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvpx", "FAIL"); + } +} +#else +static void __init test_lxvpx_stxvpx(void) +{ + show_result("lxvpx", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvpx", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_plxvp_pstxvp(void) +{ + ppc_inst_t instr; + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + /*** plxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&c[0].a; + + /* + * plxvp XTp,D(RA),R + * XTp = 32xTX + 2xTp + * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0)); + + stepped = emulate_step(®s, instr); + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("plxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("plxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("plxvp", "FAIL"); + } + + /*** pstxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * pstxvp XSp,D(RA),R + * XSp = 32xSX + 2xSp + * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0)); + + stepped = emulate_step(®s, instr); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("pstxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("pstxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("pstxvp", "FAIL"); + } +} +#else +static void __init test_plxvp_pstxvp(void) +{ + show_result("plxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("pstxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + static void __init run_tests_load_store(void) { test_ld(); + test_pld(); test_lwz(); + test_plwz(); test_lwzx(); test_std(); + test_pstd(); test_ldarx_stdcx(); test_lfsx_stfsx(); + test_plfs_pstfs(); test_lfdx_stfdx(); + test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); + test_lxvp_stxvp(); + test_lxvpx_stxvpx(); + test_plxvp_pstxvp(); } struct compute_test { char *mnemonic; + unsigned long cpu_feature; struct { char *descr; unsigned long flags; - unsigned int instr; + ppc_inst_t instr; struct pt_regs regs; } subtests[MAX_SUBTESTS + 1]; }; +/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */ +#define SI_MIN BIT(33) +#define SI_MAX (BIT(33) - 1) +#define SI_UMAX (BIT(34) - 1) + static struct compute_test compute_tests[] = { { .mnemonic = "nop", .subtests = { { .descr = "R0 = LONG_MAX", - .instr = PPC_INST_NOP, + .instr = ppc_inst(PPC_RAW_NOP()), .regs = { .gpr[0] = LONG_MAX, } @@ -479,11 +927,38 @@ static struct compute_test compute_tests[] = { } }, { + .mnemonic = "setb", + .cpu_feature = CPU_FTR_ARCH_300, + .subtests = { + { + .descr = "BFA = 1, CR = GT", + .instr = ppc_inst(PPC_RAW_SETB(20, 1)), + .regs = { + .ccr = 0x4000000, + } + }, + { + .descr = "BFA = 4, CR = LT", + .instr = ppc_inst(PPC_RAW_SETB(20, 4)), + .regs = { + .ccr = 0x8000, + } + }, + { + .descr = "BFA = 5, CR = EQ", + .instr = ppc_inst(PPC_RAW_SETB(20, 5)), + .regs = { + .ccr = 0x200, + } + } + } + }, + { .mnemonic = "add", .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -491,7 +966,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -499,7 +974,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -507,7 +982,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -515,7 +990,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -523,7 +998,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -531,7 +1006,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -539,7 +1014,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -547,7 +1022,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -555,7 +1030,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -569,7 +1044,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -577,7 +1052,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -586,7 +1061,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -594,7 +1069,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -602,7 +1077,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -610,7 +1085,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -618,7 +1093,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -626,7 +1101,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -634,7 +1109,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -642,7 +1117,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -655,7 +1130,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -663,7 +1138,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -671,7 +1146,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -679,7 +1154,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -687,7 +1162,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -695,7 +1170,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -703,7 +1178,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -711,7 +1186,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -719,7 +1194,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -727,7 +1202,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -735,7 +1210,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, @@ -749,7 +1224,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -757,7 +1232,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -766,7 +1241,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -774,7 +1249,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -782,7 +1257,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -790,7 +1265,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -798,7 +1273,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -806,7 +1281,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -814,7 +1289,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -822,7 +1297,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -830,47 +1305,336 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, } } } + }, + { + .mnemonic = "divde", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divde.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divdeu", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "divdeu.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "paddi", + .cpu_feature = CPU_FTR_ARCH_31, + .subtests = { + { + .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_UMAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 0, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + } + }, + { + .descr = "RA = 0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + .gpr[22] = 0x0, + } + }, + { + .descr = "RA is r0, SI = 0, R = 1", + .instr = TEST_PADDI(21, 0, 0, 1), + .regs = { + .gpr[21] = 0, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 1", + .instr = TEST_PADDI(21, 0, SI_MIN, 1), + .regs = { + .gpr[21] = 0, + } + }, + /* Invalid instruction form with R = 1 and RA != 0 */ + { + .descr = "RA = R22(0), SI = 0, R = 1", + .instr = TEST_PADDI(21, 22, 0, 1), + .flags = NEGATIVE_TEST, + .regs = { + .gpr[21] = 0, + .gpr[22] = 0, + } + } + } } }; static int __init emulate_compute_instr(struct pt_regs *regs, - unsigned int instr) + ppc_inst_t instr, + bool negative) { + int analysed; struct instruction_op op; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; - if (analyse_instr(&op, regs, instr) != 1 || - GETTYPE(op.type) != COMPUTE) { - pr_info("emulation failed, instruction = 0x%08x\n", instr); + /* This is not a return frame regs */ + regs->nip = patch_site_addr(&patch__exec_instr); + + analysed = analyse_instr(&op, regs, instr); + if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { + if (negative) + return -EFAULT; + pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } - - emulate_update_regs(regs, &op); + if (analysed == 1 && negative) + pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); + if (!negative) + emulate_update_regs(regs, &op); return 0; } static int __init execute_compute_instr(struct pt_regs *regs, - unsigned int instr) + ppc_inst_t instr) { extern int exec_instr(struct pt_regs *regs); - extern s32 patch__exec_instr; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = 0x%08x\n", instr); + pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } @@ -890,16 +1654,23 @@ static void __init run_tests_compute(void) unsigned long flags; struct compute_test *test; struct pt_regs *regs, exp, got; - unsigned int i, j, k, instr; - bool ignore_gpr, ignore_xer, ignore_ccr, passed; + unsigned int i, j, k; + ppc_inst_t instr; + bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { test = &compute_tests[i]; + if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) { + show_result(test->mnemonic, "SKIP (!CPU_FTR)"); + continue; + } + for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) { instr = test->subtests[j].instr; flags = test->subtests[j].flags; regs = &test->subtests[j].regs; + negative = flags & NEGATIVE_TEST; ignore_xer = flags & IGNORE_XER; ignore_ccr = flags & IGNORE_CCR; passed = true; @@ -914,8 +1685,12 @@ static void __init run_tests_compute(void) exp.msr = MSR_KERNEL; got.msr = MSR_KERNEL; - if (emulate_compute_instr(&got, instr) || - execute_compute_instr(&exp, instr)) { + rc = emulate_compute_instr(&got, instr, negative) != 0; + if (negative) { + /* skip executing instruction */ + passed = rc; + goto print; + } else if (rc || execute_compute_instr(&exp, instr)) { passed = false; goto print; } diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 1580f34f4f4f..e2b646a4f7fa 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -16,7 +16,7 @@ _GLOBAL(exec_instr) /* * Stack frame layout (INT_FRAME_SIZE bytes) - * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD) + * In-memory pt_regs (SP + STACK_INT_FRAME_REGS) * Scratch space (SP + 8) * Back chain (SP + 0) */ @@ -37,7 +37,7 @@ _GLOBAL(exec_instr) * The stack pointer (GPR1) and the thread pointer (GPR13) are not * saved as these should not be modified anyway. */ - SAVE_2GPRS(2, r1) + SAVE_GPRS(2, 3, r1) SAVE_NVGPRS(r1) /* @@ -75,12 +75,13 @@ _GLOBAL(exec_instr) /* Load GPRs from pt_regs */ REST_GPR(0, r31) - REST_10GPRS(2, r31) - REST_GPR(12, r31) + REST_GPRS(2, 12, r31) REST_NVGPRS(r31) /* Placeholder for the test instruction */ + .balign 64 1: nop + nop patch_site 1b patch__exec_instr /* @@ -97,8 +98,7 @@ _GLOBAL(exec_instr) subi r3, r3, GPR0 SAVE_GPR(0, r3) SAVE_GPR(2, r3) - SAVE_8GPRS(4, r3) - SAVE_GPR(12, r3) + SAVE_GPRS(4, 12, r3) SAVE_NVGPRS(r3) /* Save resulting LR to pt_regs */ diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 62e6c3045252..d491da8d1838 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -9,7 +9,6 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <asm/switch_to.h> -#include <asm/asm-prototypes.h> int enter_vmx_usercopy(void) { @@ -37,7 +36,17 @@ int exit_vmx_usercopy(void) { disable_kernel_altivec(); pagefault_enable(); - preempt_enable(); + preempt_enable_no_resched(); + /* + * Must never explicitly call schedule (including preempt_enable()) + * while in a kuap-unlocked user copy, because the AMR register will + * not be saved and restored across context switch. However preempt + * kernels need to be preempted as soon as possible if need_resched is + * set and we are preemptible. The hack here is to schedule a + * decrementer to fire here and reschedule for us if necessary. + */ + if (IS_ENABLED(CONFIG_PREEMPT) && need_resched()) + set_dec(1); return 0; } diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c index 54e61979e80e..aab49d056d18 100644 --- a/arch/powerpc/lib/xor_vmx.c +++ b/arch/powerpc/lib/xor_vmx.c @@ -49,8 +49,9 @@ typedef vector signed char unative_t; V1##_3 = vec_xor(V1##_3, V2##_3); \ } while (0) -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) { DEFINE(v1); DEFINE(v2); @@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) { DEFINE(v1); DEFINE(v2); @@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) { DEFINE(v1); DEFINE(v2); @@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) { DEFINE(v1); DEFINE(v2); diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h index 5c2b0839b179..573c41d90dac 100644 --- a/arch/powerpc/lib/xor_vmx.h +++ b/arch/powerpc/lib/xor_vmx.h @@ -6,16 +6,17 @@ * outside of the enable/disable altivec block. */ -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in); - -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in); - -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in); - -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in); +void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c index 80dba916c367..35d917ece4d1 100644 --- a/arch/powerpc/lib/xor_vmx_glue.c +++ b/arch/powerpc/lib/xor_vmx_glue.c @@ -12,47 +12,51 @@ #include <asm/xor_altivec.h> #include "xor_vmx.h" -void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_2(bytes, v1_in, v2_in); + __xor_altivec_2(bytes, p1, p2); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_2); -void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_3(bytes, v1_in, v2_in, v3_in); + __xor_altivec_3(bytes, p1, p2, p3); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_3); -void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in); + __xor_altivec_4(bytes, p1, p2, p3, p4); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_4); -void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); disable_kernel_altivec(); preempt_enable(); } |