diff options
Diffstat (limited to 'arch/powerpc/lib')
43 files changed, 7122 insertions, 1406 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b8de3be10eb4..dd8a4b52a0cc 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -3,20 +3,31 @@ # Makefile for ppc-specific library files.. # -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) +CFLAGS_code-patching.o += -fno-stack-protector +CFLAGS_feature-fixups.o += -fno-stack-protector CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) KASAN_SANITIZE_code-patching.o := n KASAN_SANITIZE_feature-fixups.o := n +# restart_table.o contains functions called in the NMI interrupt path +# which can be in real mode. Disable KASAN. +KASAN_SANITIZE_restart_table.o := n +KCSAN_SANITIZE_code-patching.o := n +KCSAN_SANITIZE_feature-fixups.o := n ifdef CONFIG_KASAN CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + +obj-y += code-patching.o feature-fixups.o pmem.o + +obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o @@ -31,17 +42,22 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o # 64-bit linker creates .sfpr on demand for final link (vmlinux), # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs -ifeq ($(call ld-ifversion, -lt, 225000000, y),y) -extra-$(CONFIG_PPC64) += crtsavres.o +ifndef CONFIG_LD_IS_BFD +always-$(CONFIG_PPC64) += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ - memcpy_power7.o + memcpy_power7.o restart_table.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o memcpy_mcsafe_64.o + memcpy_64.o copy_mc_64.o +ifdef CONFIG_PPC_QUEUED_SPINLOCKS +obj-$(CONFIG_SMP) += qspinlock.o +else obj64-$(CONFIG_SMP) += locks.o +endif + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ test_emulate_step_exec_instr.o @@ -58,6 +74,14 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) +CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) +# Enable <altivec.h> +CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) + +obj-$(CONFIG_CRC32_ARCH) += crc32-powerpc.o +crc32-powerpc-y := crc32-glue.o crc32c-vpmsum_asm.o + +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-powerpc.o +crc-t10dif-powerpc-y := crc-t10dif-glue.o crct10dif-vpmsum_asm.o obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c deleted file mode 100644 index ce180870bd52..000000000000 --- a/arch/powerpc/lib/alloc.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/memblock.h> -#include <linux/string.h> -#include <asm/setup.h> - - -void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) -{ - void *p; - - if (slab_is_available()) - p = kzalloc(size, mask); - else { - p = memblock_alloc(size, SMP_CACHE_BYTES); - if (!p) - panic("%s: Failed to allocate %zu bytes\n", __func__, - size); - } - return p; -} diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index ecd150dc3ed9..cd00b9bdd772 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -8,12 +8,12 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> .text @@ -78,12 +78,10 @@ EXPORT_SYMBOL(__csum_partial) /* * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively, and (for an error on - * src) zeroes the rest of dst. + * and adds in 0xffffffff, while copying the block to dst. + * If an access exception occurs it returns zero. * - * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) + * csum_partial_copy_generic(src, dst, len) */ #define CSUM_COPY_16_BYTES_WITHEX(n) \ 8 ## n ## 0: \ @@ -108,30 +106,24 @@ EXPORT_SYMBOL(__csum_partial) adde r12,r12,r10 #define CSUM_COPY_16_BYTES_EXCODE(n) \ - EX_TABLE(8 ## n ## 0b, src_error); \ - EX_TABLE(8 ## n ## 1b, src_error); \ - EX_TABLE(8 ## n ## 2b, src_error); \ - EX_TABLE(8 ## n ## 3b, src_error); \ - EX_TABLE(8 ## n ## 4b, dst_error); \ - EX_TABLE(8 ## n ## 5b, dst_error); \ - EX_TABLE(8 ## n ## 6b, dst_error); \ - EX_TABLE(8 ## n ## 7b, dst_error); + EX_TABLE(8 ## n ## 0b, fault); \ + EX_TABLE(8 ## n ## 1b, fault); \ + EX_TABLE(8 ## n ## 2b, fault); \ + EX_TABLE(8 ## n ## 3b, fault); \ + EX_TABLE(8 ## n ## 4b, fault); \ + EX_TABLE(8 ## n ## 5b, fault); \ + EX_TABLE(8 ## n ## 6b, fault); \ + EX_TABLE(8 ## n ## 7b, fault); .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "checksum_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) _GLOBAL(csum_partial_copy_generic) - stwu r1,-16(r1) - stw r7,12(r1) - stw r8,8(r1) - - addic r12,r6,0 + li r12,-1 + addic r0,r0,0 /* clear carry */ addi r6,r4,-4 neg r0,r4 addi r4,r3,-4 @@ -241,39 +233,23 @@ _GLOBAL(csum_partial_copy_generic) slwi r0,r0,8 adde r12,r12,r0 66: addze r3,r12 - addi r1,r1,16 beqlr+ cr7 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ blr -/* read fault */ -src_error: - lwz r7,12(r1) - addi r1,r1,16 - cmpwi cr0,r7,0 - beqlr - li r0,-EFAULT - stw r0,0(r7) - blr -/* write fault */ -dst_error: - lwz r8,8(r1) - addi r1,r1,16 - cmpwi cr0,r8,0 - beqlr - li r0,-EFAULT - stw r0,0(r8) +fault: + li r3,0 blr - EX_TABLE(70b, src_error); - EX_TABLE(71b, dst_error); - EX_TABLE(72b, src_error); - EX_TABLE(73b, dst_error); - EX_TABLE(54b, dst_error); + EX_TABLE(70b, fault); + EX_TABLE(71b, fault); + EX_TABLE(72b, fault); + EX_TABLE(73b, fault); + EX_TABLE(54b, fault); /* * this stuff handles faults in the cacheline loop and branches to either - * src_error (if in read part) or dst_error (if in write part) + * fault (if in read part) or fault (if in write part) */ CSUM_COPY_16_BYTES_EXCODE(0) #if L1_CACHE_BYTES >= 32 @@ -290,12 +266,12 @@ dst_error: #endif #endif - EX_TABLE(30b, src_error); - EX_TABLE(31b, dst_error); - EX_TABLE(40b, src_error); - EX_TABLE(41b, dst_error); - EX_TABLE(50b, src_error); - EX_TABLE(51b, dst_error); + EX_TABLE(30b, fault); + EX_TABLE(31b, fault); + EX_TABLE(40b, fault); + EX_TABLE(41b, fault); + EX_TABLE(50b, fault); + EX_TABLE(51b, fault); EXPORT_SYMBOL(csum_partial_copy_generic) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 514978f908d4..d53d8f09a2c2 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -8,11 +8,11 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/processor.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> /* * Computes the checksum of a memory block at buff, length len, @@ -182,34 +182,33 @@ EXPORT_SYMBOL(__csum_partial) .macro srcnr 100: - EX_TABLE(100b,.Lsrc_error_nr) + EX_TABLE(100b,.Lerror_nr) .endm .macro source 150: - EX_TABLE(150b,.Lsrc_error) + EX_TABLE(150b,.Lerror) .endm .macro dstnr 200: - EX_TABLE(200b,.Ldest_error_nr) + EX_TABLE(200b,.Lerror_nr) .endm .macro dest 250: - EX_TABLE(250b,.Ldest_error) + EX_TABLE(250b,.Lerror) .endm /* * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively. The caller must take any action - * required in this case (zeroing memory, recalculating partial checksum etc). + * and adds in 0xffffffff (32-bit), while copying the block to dst. + * If an access exception occurs, it returns 0. * - * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + * csum_partial_copy_generic(r3=src, r4=dst, r5=len) */ _GLOBAL(csum_partial_copy_generic) + li r6,-1 addic r0,r6,0 /* clear carry */ srdi. r6,r5,3 /* less than 8 bytes? */ @@ -401,29 +400,15 @@ dstnr; stb r6,0(r4) srdi r3,r3,32 blr -.Lsrc_error: +.Lerror: ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) addi r1,r1,STACKFRAMESIZE -.Lsrc_error_nr: - cmpdi 0,r7,0 - beqlr - li r6,-EFAULT - stw r6,0(r7) +.Lerror_nr: + li r3,0 blr -.Ldest_error: - ld r14,STK_REG(R14)(r1) - ld r15,STK_REG(R15)(r1) - ld r16,STK_REG(R16)(r1) - addi r1,r1,STACKFRAMESIZE -.Ldest_error_nr: - cmpdi 0,r8,0 - beqlr - li r6,-EFAULT - stw r6,0(r8) - blr EXPORT_SYMBOL(csum_partial_copy_generic) /* diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index fabe4db28726..1a14c8780278 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -12,83 +12,28 @@ #include <linux/uaccess.h> __wsum csum_and_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) + int len) { - unsigned int csum; + __wsum csum; - might_sleep(); - allow_read_from_user(src, len); + if (unlikely(!user_read_access_begin(src, len))) + return 0; - *err_ptr = 0; + csum = csum_partial_copy_generic((void __force *)src, dst, len); - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(src, len))) { - *err_ptr = -EFAULT; - csum = (__force unsigned int)sum; - goto out; - } - - csum = csum_partial_copy_generic((void __force *)src, dst, - len, sum, err_ptr, NULL); - - if (unlikely(*err_ptr)) { - int missing = __copy_from_user(dst, src, len); - - if (missing) { - memset(dst + len - missing, 0, missing); - *err_ptr = -EFAULT; - } else { - *err_ptr = 0; - } - - csum = csum_partial(dst, len, sum); - } - -out: - prevent_read_from_user(src, len); - return (__force __wsum)csum; + user_read_access_end(); + return csum; } -EXPORT_SYMBOL(csum_and_copy_from_user); -__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, - __wsum sum, int *err_ptr) +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len) { - unsigned int csum; - - might_sleep(); - allow_write_to_user(dst, len); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(dst, len))) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - goto out; - } - - csum = csum_partial_copy_generic(src, (void __force *)dst, - len, sum, NULL, err_ptr); + __wsum csum; - if (unlikely(*err_ptr)) { - csum = csum_partial(src, len, sum); + if (unlikely(!user_write_access_begin(dst, len))) + return 0; - if (copy_to_user(dst, src, len)) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - } - } + csum = csum_partial_copy_generic(src, (void __force *)dst, len); -out: - prevent_write_to_user(dst, len); - return (__force __wsum)csum; + user_write_access_end(); + return csum; } -EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a876..f84e0337cc02 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -3,236 +3,600 @@ * Copyright 2008 Michael Ellerman, IBM Corporation. */ -#include <linux/kernel.h> #include <linux/kprobes.h> +#include <linux/mmu_context.h> +#include <linux/random.h> #include <linux/vmalloc.h> #include <linux/init.h> -#include <linux/mm.h> #include <linux/cpuhotplug.h> -#include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/jump_label.h> -#include <asm/pgtable.h> +#include <asm/debug.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/page.h> -#include <asm/code-patching.h> -#include <asm/setup.h> +#include <asm/text-patching.h> +#include <asm/inst.h> -static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, - unsigned int *patch_addr) +static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword) { - int err = 0; + if (!IS_ENABLED(CONFIG_PPC64) || likely(!is_dword)) { + /* For big endian correctness: plain address would use the wrong half */ + u32 val32 = val; - __put_user_asm(instr, patch_addr, err, "stw"); - if (err) - return err; + __put_kernel_nofault(patch_addr, &val32, u32, failed); + } else { + __put_kernel_nofault(patch_addr, &val, u64, failed); + } asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); return 0; + +failed: + mb(); /* sync */ + return -EPERM; } -int raw_patch_instruction(unsigned int *addr, unsigned int instr) +int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { - return __patch_instruction(addr, instr, addr); + if (ppc_inst_prefixed(instr)) + return __patch_mem(addr, ppc_inst_as_ulong(instr), addr, true); + else + return __patch_mem(addr, ppc_inst_val(instr), addr, false); } -#ifdef CONFIG_STRICT_KERNEL_RWX -static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +struct patch_context { + union { + struct vm_struct *area; + struct mm_struct *mm; + }; + unsigned long addr; + pte_t *pte; +}; + +static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); + +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + +static bool mm_patch_enabled(void) +{ + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); +} + +/* + * The following applies for Radix MMU. Hash MMU has different requirements, + * and so is not supported. + * + * Changing mm requires context synchronising instructions on both sides of + * the context switch, as well as a hwsync between the last instruction for + * which the address of an associated storage access was translated using + * the current context. + * + * switch_mm_irqs_off() performs an isync after the context switch. It is + * the responsibility of the caller to perform the CSI and hwsync before + * starting/stopping the temp mm. + */ +static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) +{ + struct mm_struct *orig_mm = current->active_mm; + + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(orig_mm, temp_mm, current); + + WARN_ON(!mm_is_thread_local(temp_mm)); + + suspend_breakpoints(); + return orig_mm; +} + +static void stop_using_temp_mm(struct mm_struct *temp_mm, + struct mm_struct *orig_mm) +{ + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(temp_mm, orig_mm, current); + restore_breakpoints(); +} static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; + unsigned long addr; + int err; - area = get_vm_area(PAGE_SIZE, VM_ALLOC); + area = get_vm_area(PAGE_SIZE, 0); if (!area) { WARN_ONCE(1, "Failed to create text area for cpu %d\n", cpu); return -1; } - this_cpu_write(text_poke_area, area); + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); + if (err) + return err; + + unmap_patch_area(addr); + + this_cpu_write(cpu_patching_context.area, area); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); return 0; } static int text_area_cpu_down(unsigned int cpu) { - free_vm_area(this_cpu_read(text_poke_area)); + free_vm_area(this_cpu_read(cpu_patching_context.area)); + this_cpu_write(cpu_patching_context.area, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } -/* - * Run as a late init call. This allows all the boot time patching to be done - * simply by patching the code, and then we're called here prior to - * mark_rodata_ro(), which happens after all init calls are run. Although - * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge - * it as being preferable to a kernel that will crash later when someone tries - * to use patch_instruction(). - */ -static int __init setup_text_poke_area(void) +static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) { - BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", text_area_cpu_up, - text_area_cpu_down)); + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, mm); + free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); + mmput(mm); +} + +static int text_area_cpu_up_mm(unsigned int cpu) +{ + struct mm_struct *mm; + unsigned long addr; + pte_t *pte; + spinlock_t *ptl; + + mm = mm_alloc(); + if (WARN_ON(!mm)) + goto fail_no_mm; + + /* + * Choose a random page-aligned address from the interval + * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. + * The lower address bound is PAGE_SIZE to avoid the zero-page. + */ + addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; + + /* + * PTE allocation uses GFP_KERNEL which means we need to + * pre-allocate the PTE here because we cannot do the + * allocation during patching when IRQs are disabled. + * + * Using get_locked_pte() to avoid open coding, the lock + * is unnecessary. + */ + pte = get_locked_pte(mm, addr, &ptl); + if (!pte) + goto fail_no_pte; + pte_unmap_unlock(pte, ptl); + + this_cpu_write(cpu_patching_context.mm, mm); + this_cpu_write(cpu_patching_context.addr, addr); return 0; + +fail_no_pte: + put_patching_mm(mm, addr); +fail_no_mm: + return -ENOMEM; } -late_initcall(setup_text_poke_area); -/* - * This can be called for kernel text or a module. - */ -static int map_patch_area(void *addr, unsigned long text_poke_addr) +static int text_area_cpu_down_mm(unsigned int cpu) { - unsigned long pfn; - int err; + put_patching_mm(this_cpu_read(cpu_patching_context.mm), + this_cpu_read(cpu_patching_context.addr)); + + this_cpu_write(cpu_patching_context.mm, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + + return 0; +} + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); + +void __init poking_init(void) +{ + int ret; - if (is_vmalloc_addr(addr)) - pfn = vmalloc_to_pfn(addr); + if (mm_patch_enabled()) + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke_mm:online", + text_area_cpu_up_mm, + text_area_cpu_down_mm); else - pfn = __pa_symbol(addr) >> PAGE_SHIFT; + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); - err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); + /* cpuhp_setup_state returns >= 0 on success */ + if (WARN_ON(ret < 0)) + return; - pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); - if (err) - return -1; + static_branch_enable(&poking_init_done); +} - return 0; +static unsigned long get_patch_pfn(void *addr) +{ + if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr)) + return vmalloc_to_pfn(addr); + else + return __pa_symbol(addr) >> PAGE_SHIFT; } -static inline int unmap_patch_area(unsigned long addr) +/* + * This can be called for kernel text or a module. + */ +static int map_patch_area(void *addr, unsigned long text_poke_addr) +{ + unsigned long pfn = get_patch_pfn(addr); + + return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); +} + +static void unmap_patch_area(unsigned long addr) { pte_t *ptep; pmd_t *pmdp; pud_t *pudp; + p4d_t *p4dp; pgd_t *pgdp; pgdp = pgd_offset_k(addr); - if (unlikely(!pgdp)) - return -EINVAL; + if (WARN_ON(pgd_none(*pgdp))) + return; - pudp = pud_offset(pgdp, addr); - if (unlikely(!pudp)) - return -EINVAL; + p4dp = p4d_offset(pgdp, addr); + if (WARN_ON(p4d_none(*p4dp))) + return; + + pudp = pud_offset(p4dp, addr); + if (WARN_ON(pud_none(*pudp))) + return; pmdp = pmd_offset(pudp, addr); - if (unlikely(!pmdp)) - return -EINVAL; + if (WARN_ON(pmd_none(*pmdp))) + return; ptep = pte_offset_kernel(pmdp, addr); - if (unlikely(!ptep)) - return -EINVAL; - - pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr); + if (WARN_ON(pte_none(*ptep))) + return; /* * In hash, pte_clear flushes the tlb, in radix, we have to */ pte_clear(&init_mm, addr, ptep); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +} - return 0; +static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword) +{ + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + struct mm_struct *patching_mm; + struct mm_struct *orig_mm; + spinlock_t *ptl; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync": : :"memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_mem(addr, val, patch_addr, is_dword); + + /* context synchronisation performed by __patch_instruction (isync or exception) */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; } -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +static int __do_patch_mem(void *addr, unsigned long val, bool is_dword) { int err; - unsigned int *patch_addr = NULL; - unsigned long flags; + u32 *patch_addr; unsigned long text_poke_addr; - unsigned long kaddr = (unsigned long)addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync": : :"memory"); + + err = __patch_mem(addr, val, patch_addr, is_dword); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; +} + +static int patch_mem(void *addr, unsigned long val, bool is_dword) +{ + int err; + unsigned long flags; /* * During early early boot patch_instruction is called * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!this_cpu_read(text_poke_area)) - return raw_patch_instruction(addr, instr); + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || + !static_branch_likely(&poking_init_done)) + return __patch_mem(addr, val, addr, is_dword); local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_mem_mm(addr, val, is_dword); + else + err = __do_patch_mem(addr, val, is_dword); + local_irq_restore(flags); - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; - if (map_patch_area(addr, text_poke_addr)) { - err = -1; - goto out; - } + return err; +} - patch_addr = (unsigned int *)(text_poke_addr) + - ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); +#ifdef CONFIG_PPC64 - __patch_instruction(addr, instr, patch_addr); +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + if (ppc_inst_prefixed(instr)) + return patch_mem(addr, ppc_inst_as_ulong(instr), true); + else + return patch_mem(addr, ppc_inst_val(instr), false); +} +NOKPROBE_SYMBOL(patch_instruction); - err = unmap_patch_area(text_poke_addr); - if (err) - pr_warn("failed to unmap %lx\n", text_poke_addr); +int patch_uint(void *addr, unsigned int val) +{ + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int))) + return -EINVAL; -out: - local_irq_restore(flags); + return patch_mem(addr, val, false); +} +NOKPROBE_SYMBOL(patch_uint); - return err; +int patch_ulong(void *addr, unsigned long val) +{ + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long))) + return -EINVAL; + + return patch_mem(addr, val, true); } -#else /* !CONFIG_STRICT_KERNEL_RWX */ +NOKPROBE_SYMBOL(patch_ulong); -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +#else + +int patch_instruction(u32 *addr, ppc_inst_t instr) { - return raw_patch_instruction(addr, instr); + return patch_mem(addr, ppc_inst_val(instr), false); } +NOKPROBE_SYMBOL(patch_instruction) -#endif /* CONFIG_STRICT_KERNEL_RWX */ +#endif -int patch_instruction(unsigned int *addr, unsigned int instr) +static int patch_memset64(u64 *addr, u64 val, size_t count) { - /* Make sure we aren't patching a freed init section */ - if (init_mem_is_free && init_section_contains(addr, 4)) { - pr_debug("Skipping init section patching addr: 0x%px\n", addr); - return 0; - } - return do_patch_instruction(addr, instr); + for (u64 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u64, failed); + + return 0; + +failed: + return -EPERM; } -NOKPROBE_SYMBOL(patch_instruction); -int patch_branch(unsigned int *addr, unsigned long target, int flags) +static int patch_memset32(u32 *addr, u32 val, size_t count) { - return patch_instruction(addr, create_branch(addr, target, flags)); + for (u32 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u32, failed); + + return 0; + +failed: + return -EPERM; } -bool is_offset_in_branch_range(long offset) +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) { + unsigned long start = (unsigned long)patch_addr; + int err; + + /* Repeat instruction */ + if (repeat_instr) { + ppc_inst_t instr = ppc_inst_read(code); + + if (ppc_inst_prefixed(instr)) { + u64 val = ppc_inst_as_ulong(instr); + + err = patch_memset64((u64 *)patch_addr, val, len / 8); + } else { + u32 val = ppc_inst_val(instr); + + err = patch_memset32(patch_addr, val, len / 4); + } + } else { + err = copy_to_kernel_nofault(patch_addr, code, len); + } + + smp_wmb(); /* smp write barrier */ + flush_icache_range(start, start + len); + return err; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + struct mm_struct *patching_mm, *orig_mm; + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + spinlock_t *ptl; + u32 *patch_addr; + pte_t *pte; + int err; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync" ::: "memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + kasan_disable_current(); + err = __patch_instructions(patch_addr, code, len, repeat_instr); + kasan_enable_current(); + + /* context synchronisation performed by __patch_instructions */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); /* - * Powerpc branch instruction is : - * - * 0 6 30 31 - * +---------+----------------+---+---+ - * | opcode | LI |AA |LK | - * +---------+----------------+---+---+ - * Where AA = 0 and LK = 0 - * - * LI is a signed 24 bits integer. The real branch offset is computed - * by: imm32 = SignExtend(LI:'0b00', 32); - * - * So the maximum forward branch should be: - * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc - * The maximum backward branch should be: - * (0xff800000 << 2) = 0xfe000000 = -0x2000000 + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) */ - return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + u32 *patch_addr; + pte_t *pte; + int err; + + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync" ::: "memory"); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; +} + +/* + * Patch 'addr' with 'len' bytes of instructions from 'code'. + * + * If repeat_instr is true, the same instruction is filled for + * 'len' bytes. + */ +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + while (len > 0) { + unsigned long flags; + size_t plen; + int err; + + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); + + local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); + else + err = __do_patch_instructions(addr, code, plen, repeat_instr); + local_irq_restore(flags); + if (err) + return err; + + len -= plen; + addr = (u32 *)((unsigned long)addr + plen); + if (!repeat_instr) + code = (u32 *)((unsigned long)code + plen); + } + + return 0; +} +NOKPROBE_SYMBOL(patch_instructions); + +int patch_branch(u32 *addr, unsigned long target, int flags) +{ + ppc_inst_t instr; + + if (create_branch(&instr, addr, target, flags)) + return -ERANGE; + + return patch_instruction(addr, instr); } /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() */ -bool is_conditional_branch(unsigned int instr) +bool is_conditional_branch(ppc_inst_t instr) { - unsigned int opcode = instr >> 26; + unsigned int opcode = ppc_inst_primary_opcode(instr); if (opcode == 16) /* bc, bca, bcl, bcla */ return true; if (opcode == 19) { - switch ((instr >> 1) & 0x3ff) { + switch ((ppc_inst_val(instr) >> 1) & 0x3ff) { case 16: /* bclr, bclrl */ case 528: /* bcctr, bcctrl */ case 560: /* bctar, bctarl */ @@ -243,30 +607,9 @@ bool is_conditional_branch(unsigned int instr) } NOKPROBE_SYMBOL(is_conditional_branch); -unsigned int create_branch(const unsigned int *addr, - unsigned long target, int flags) -{ - unsigned int instruction; - long offset; - - offset = target; - if (! (flags & BRANCH_ABSOLUTE)) - offset = offset - (unsigned long)addr; - - /* Check we can represent the target in the instruction format */ - if (!is_offset_in_branch_range(offset)) - return 0; - - /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC); - - return instruction; -} - -unsigned int create_cond_branch(const unsigned int *addr, - unsigned long target, int flags) +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, + unsigned long target, int flags) { - unsigned int instruction; long offset; offset = target; @@ -274,413 +617,81 @@ unsigned int create_cond_branch(const unsigned int *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) - return 0; + if (!is_offset_in_cond_branch_range(offset)) + return 1; /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC); - - return instruction; -} - -static unsigned int branch_opcode(unsigned int instr) -{ - return (instr >> 26) & 0x3F; -} - -static int instr_is_branch_iform(unsigned int instr) -{ - return branch_opcode(instr) == 18; -} + *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC)); -static int instr_is_branch_bform(unsigned int instr) -{ - return branch_opcode(instr) == 16; + return 0; } -int instr_is_relative_branch(unsigned int instr) +int instr_is_relative_branch(ppc_inst_t instr) { - if (instr & BRANCH_ABSOLUTE) + if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) return 0; return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } -int instr_is_relative_link_branch(unsigned int instr) +int instr_is_relative_link_branch(ppc_inst_t instr) { - return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK); + return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } -static unsigned long branch_iform_target(const unsigned int *instr) +static unsigned long branch_iform_target(const u32 *instr) { signed long imm; - imm = *instr & 0x3FFFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x2000000) imm -= 0x4000000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -static unsigned long branch_bform_target(const unsigned int *instr) +static unsigned long branch_bform_target(const u32 *instr) { signed long imm; - imm = *instr & 0xFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x8000) imm -= 0x10000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -unsigned long branch_target(const unsigned int *instr) +unsigned long branch_target(const u32 *instr) { - if (instr_is_branch_iform(*instr)) + if (instr_is_branch_iform(ppc_inst_read(instr))) return branch_iform_target(instr); - else if (instr_is_branch_bform(*instr)) + else if (instr_is_branch_bform(ppc_inst_read(instr))) return branch_bform_target(instr); return 0; } -int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr) -{ - if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr)) - return branch_target(instr) == addr; - - return 0; -} - -unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) { unsigned long target; - target = branch_target(src); - if (instr_is_branch_iform(*src)) - return create_branch(dest, target, *src); - else if (instr_is_branch_bform(*src)) - return create_cond_branch(dest, target, *src); - - return 0; -} - -#ifdef CONFIG_PPC_BOOK3E_64 -void __patch_exception(int exc, unsigned long addr) -{ - extern unsigned int interrupt_base_book3e; - unsigned int *ibase = &interrupt_base_book3e; - - /* Our exceptions vectors start with a NOP and -then- a branch - * to deal with single stepping from userspace which stops on - * the second instruction. Thus we need to patch the second - * instruction of the exception, not the first one - */ - - patch_branch(ibase + (exc / 4) + 1, addr, 0); -} -#endif - -#ifdef CONFIG_CODE_PATCHING_SELFTEST - -static void __init test_trampoline(void) -{ - asm ("nop;\n"); -} - -#define check(x) \ - if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__); - -static void __init test_branch_iform(void) -{ - unsigned int instr; - unsigned long addr; - - addr = (unsigned long)&instr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_iform(0x48000000)); - /* All bits of target set, and flags */ - check(instr_is_branch_iform(0x4bffffff)); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_iform(0xcbffffff)); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_iform(0x7bffffff)); - - /* Simplest case, branch to self with link */ - check(instr_is_branch_iform(0x48000001)); - /* All bits of targets set */ - check(instr_is_branch_iform(0x4bfffffd)); - /* Some bits of targets set */ - check(instr_is_branch_iform(0x4bff00fd)); - /* Must be a valid branch to start with */ - check(!instr_is_branch_iform(0x7bfffffd)); - - /* Absolute branch to 0x100 */ - instr = 0x48000103; - check(instr_is_branch_to_addr(&instr, 0x100)); - /* Absolute branch to 0x420fc */ - instr = 0x480420ff; - check(instr_is_branch_to_addr(&instr, 0x420fc)); - /* Maximum positive relative branch, + 20MB - 4B */ - instr = 0x49fffffc; - check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); - /* Smallest negative relative branch, - 4B */ - instr = 0x4bfffffc; - check(instr_is_branch_to_addr(&instr, addr - 4)); - /* Largest negative relative branch, - 32 MB */ - instr = 0x4a000000; - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); - - /* Branch to self, with link */ - instr = create_branch(&instr, addr, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr)); - - /* Branch to self - 0x100, with link */ - instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); - - /* Branch to self + 0x100, no link */ - instr = create_branch(&instr, addr + 0x100, 0); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 MB */ - instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); - - /* Out of range relative negative offset, - 32 MB + 4*/ - instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK); - check(instr == 0); - - /* Out of range relative positive offset, + 32 MB */ - instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK); - check(instr == 0); - - /* Unaligned target */ - instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK); - check(instr == 0); - - /* Check flags are masked correctly */ - instr = create_branch(&instr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x48000000); -} - -static void __init test_create_function_call(void) -{ - unsigned int *iptr; - unsigned long dest; - - /* Check we can create a function call */ - iptr = (unsigned int *)ppc_function_entry(test_trampoline); - dest = ppc_function_entry(test_create_function_call); - patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK)); - check(instr_is_branch_to_addr(iptr, dest)); -} - -static void __init test_branch_bform(void) -{ - unsigned long addr; - unsigned int *iptr, instr, flags; - - iptr = &instr; - addr = (unsigned long)iptr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_bform(0x40000000)); - /* All bits of target set, and flags */ - check(instr_is_branch_bform(0x43ffffff)); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_bform(0xc3ffffff)); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_bform(0x7bffffff)); - - /* Absolute conditional branch to 0x100 */ - instr = 0x43ff0103; - check(instr_is_branch_to_addr(&instr, 0x100)); - /* Absolute conditional branch to 0x20fc */ - instr = 0x43ff20ff; - check(instr_is_branch_to_addr(&instr, 0x20fc)); - /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = 0x43ff7ffc; - check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); - /* Smallest negative relative conditional branch, - 4B */ - instr = 0x43fffffc; - check(instr_is_branch_to_addr(&instr, addr - 4)); - /* Largest negative relative conditional branch, - 32 KB */ - instr = 0x43ff8000; - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); - - /* All condition code bits set & link */ - flags = 0x3ff000 | BRANCH_SET_LINK; - - /* Branch to self */ - instr = create_cond_branch(iptr, addr, flags); - check(instr_is_branch_to_addr(&instr, addr)); - - /* Branch to self - 0x100 */ - instr = create_cond_branch(iptr, addr - 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); - - /* Branch to self + 0x100 */ - instr = create_cond_branch(iptr, addr + 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 KB */ - instr = create_cond_branch(iptr, addr - 0x8000, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); - - /* Out of range relative negative offset, - 32 KB + 4*/ - instr = create_cond_branch(iptr, addr - 0x8004, flags); - check(instr == 0); - - /* Out of range relative positive offset, + 32 KB */ - instr = create_cond_branch(iptr, addr + 0x8000, flags); - check(instr == 0); - - /* Unaligned target */ - instr = create_cond_branch(iptr, addr + 3, flags); - check(instr == 0); - - /* Check flags are masked correctly */ - instr = create_cond_branch(iptr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x43FF0000); -} - -static void __init test_translate_branch(void) -{ - unsigned long addr; - unsigned int *p, *q; - void *buf; - - buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); - check(buf); - if (!buf) - return; - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 MB */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 0x2000000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x4a000000); - - /* Maximum positive case, move x to x - 32 MB + 4 */ - p = buf + 0x2000000; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x49fffffc); - - /* Jump to x + 16 MB moved to x + 20 MB */ - p = buf; - addr = 0x1000000 + (unsigned long)buf; - patch_branch(p, addr, BRANCH_SET_LINK); - q = buf + 0x1400000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 16 MB moved to x - 16 MB + 4 */ - p = buf + 0x1000000; - addr = 0x2000000 + (unsigned long)buf; - patch_branch(p, addr, 0); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - - /* Conditional branch tests */ - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0)); - check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 KB */ - p = buf; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); - q = buf + 0x8000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff8000); - - /* Maximum positive case, move x to x - 32 KB + 4 */ - p = buf + 0x8000; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff7ffc); - - /* Jump to x + 12 KB moved to x + 20 KB */ - p = buf; - addr = 0x3000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK)); - q = buf + 0x5000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 8 KB moved to x - 8 KB + 4 */ - p = buf + 0x2000; - addr = 0x4000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, 0)); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Free the buffer we were using */ - vfree(buf); -} - -static int __init test_code_patching(void) -{ - printk(KERN_DEBUG "Running code patching self-tests ...\n"); - - test_branch_iform(); - test_branch_bform(); - test_create_function_call(); - test_translate_branch(); + if (instr_is_branch_iform(ppc_inst_read(src))) + return create_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); + else if (instr_is_branch_bform(ppc_inst_read(src))) + return create_cond_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); - return 0; + return 1; } -late_initcall(test_code_patching); - -#endif /* CONFIG_CODE_PATCHING_SELFTEST */ diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index a3bcf4786e4a..933b685e7ab6 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -4,11 +4,11 @@ * * Copyright (C) 1996-2005 Paul Mackerras. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/code-patching-asm.h> #include <asm/kasan.h> @@ -57,9 +57,6 @@ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/copy_mc_64.S index cb882d9a6d8a..bf1014b28fe8 100644 --- a/arch/powerpc/lib/memcpy_mcsafe_64.S +++ b/arch/powerpc/lib/copy_mc_64.S @@ -4,9 +4,9 @@ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> * Author - Balbir Singh <bsingharora@gmail.com> */ +#include <linux/export.h> #include <asm/ppc_asm.h> #include <asm/errno.h> -#include <asm/export.h> .macro err1 100: @@ -50,7 +50,7 @@ err3; stb r0,0(r3) blr -_GLOBAL(memcpy_mcsafe) +_GLOBAL(copy_mc_generic) mr r7,r5 cmpldi r5,16 blt .Lshort_copy @@ -239,4 +239,4 @@ err1; stb r0,0(r3) 15: li r3,0 blr -EXPORT_SYMBOL_GPL(memcpy_mcsafe); +EXPORT_SYMBOL_GPL(copy_mc_generic); diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index d1091b5ee5da..f33a2e6088e5 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -2,18 +2,13 @@ /* * Copyright (C) 2008 Mark Nelson, IBM Corp. */ +#include <linux/export.h> #include <asm/page.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/feature-fixups.h> - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - _GLOBAL_TOC(copy_page) BEGIN_FTR_SECTION lis r5,PAGE_SIZE@h @@ -23,8 +18,18 @@ FTR_SECTION_ELSE #endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) ori r5,r5,PAGE_SIZE@l +#ifdef CONFIG_PPC_KERNEL_PCREL + /* + * Hack for toolchain - prefixed instructions cause label difference to + * be non-constant even if 8 byte alignment is known, so they can not + * be put in FTR sections. + */ + LOAD_REG_ADDR(r10, ppc64_caches) +BEGIN_FTR_SECTION +#else BEGIN_FTR_SECTION - ld r10,PPC64_CACHES@toc(r2) + LOAD_REG_ADDR(r10, ppc64_caches) +#endif lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */ lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */ li r9,0 diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index a9844c6353cf..07e7cec4d135 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -27,17 +27,7 @@ _GLOBAL(copypage_power7) #endif ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - - /* setup read stream 0 */ - dcbt 0,r4,0b01000 /* addr from */ - dcbt 0,r7,0b01010 /* length and depth from */ - /* setup write stream 1 */ - dcbtst 0,r9,0b01000 /* addr to */ - dcbtst 0,r10,0b01010 /* length and depth to */ - eieio - dcbt 0,r8,0b01010 /* all streams GO */ + DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8) #ifdef CONFIG_ALTIVEC mflr r0 @@ -45,7 +35,7 @@ _GLOBAL(copypage_power7) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_ops + bl CFUNC(enter_vmx_ops) cmpwi r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -88,7 +78,7 @@ _GLOBAL(copypage_power7) addi r3,r3,128 bdnz 1b - b exit_vmx_ops /* tail call optimise */ + b CFUNC(exit_vmx_ops) /* tail call optimise */ #else li r0,(PAGE_SIZE/128) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index db8719a14846..9af969d2cc0c 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 28f0be523c06..8474c682a178 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -47,7 +47,7 @@ ld r15,STK_REG(R15)(r1) ld r14,STK_REG(R14)(r1) .Ldo_err3: - bl exit_vmx_usercopy + bl CFUNC(exit_vmx_usercopy) ld r0,STACKFRAMESIZE+16(r1) mtlr r0 b .Lexit @@ -272,7 +272,7 @@ err1; stb r0,0(r3) mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_usercopy + bl CFUNC(enter_vmx_usercopy) cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -298,17 +298,7 @@ err1; stb r0,0(r3) or r7,r7,r0 ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - - /* setup read stream 0 */ - dcbt 0,r6,0b01000 /* addr from */ - dcbt 0,r7,0b01010 /* length and depth from */ - /* setup write stream 1 */ - dcbtst 0,r9,0b01000 /* addr to */ - dcbtst 0,r10,0b01010 /* length and depth to */ - eieio - dcbt 0,r8,0b01010 /* all streams GO */ + DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) beq cr1,.Lunwind_stack_nonvmx_copy @@ -488,7 +478,7 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b exit_vmx_usercopy /* tail call optimise */ + b CFUNC(exit_vmx_usercopy) /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -691,5 +681,5 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b exit_vmx_usercopy /* tail call optimise */ + b CFUNC(exit_vmx_usercopy) /* tail call optimise */ #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/crc-t10dif-glue.c b/arch/powerpc/lib/crc-t10dif-glue.c new file mode 100644 index 000000000000..f411b0120cc5 --- /dev/null +++ b/arch/powerpc/lib/crc-t10dif-glue.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Calculate a CRC T10-DIF with vpmsum acceleration + * + * Copyright 2017, Daniel Axtens, IBM Corporation. + * [based on crc32c-vpmsum_glue.c] + */ + +#include <linux/crc-t10dif.h> +#include <crypto/internal/simd.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/cpufeature.h> +#include <asm/simd.h> +#include <asm/switch_to.h> + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 64 + +static DEFINE_STATIC_KEY_FALSE(have_vec_crypto); + +u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len); + +u16 crc_t10dif_arch(u16 crci, const u8 *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + u32 crc = crci; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || + !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) + return crc_t10dif_generic(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc_t10dif_generic(crc, p, prealign); + len -= prealign; + p += prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + crc <<= 16; + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); + pagefault_enable(); + preempt_enable(); + crc >>= 16; + } + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc_t10dif_generic(crc, p, tail); + } + + return crc & 0xffff; +} +EXPORT_SYMBOL(crc_t10dif_arch); + +static int __init crc_t10dif_powerpc_init(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + static_branch_enable(&have_vec_crypto); + return 0; +} +arch_initcall(crc_t10dif_powerpc_init); + +static void __exit crc_t10dif_powerpc_exit(void) +{ +} +module_exit(crc_t10dif_powerpc_exit); + +MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>"); +MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c new file mode 100644 index 000000000000..dbd10f339183 --- /dev/null +++ b/arch/powerpc/lib/crc32-glue.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/crc32.h> +#include <crypto/internal/simd.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/cpufeature.h> +#include <asm/simd.h> +#include <asm/switch_to.h> + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 512 + +static DEFINE_STATIC_KEY_FALSE(have_vec_crypto); + +u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_le_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || + !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) + return crc32c_base(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc32c_base(crc, p, prealign); + len -= prealign; + p += prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); + pagefault_enable(); + preempt_enable(); + } + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc32c_base(crc, p, tail); + } + + return crc; +} +EXPORT_SYMBOL(crc32c_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_powerpc_init(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + static_branch_enable(&have_vec_crypto); + return 0; +} +arch_initcall(crc32_powerpc_init); + +static void __exit crc32_powerpc_exit(void) +{ +} +module_exit(crc32_powerpc_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_vec_crypto)) + return CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Anton Blanchard <anton@samba.org>"); +MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/lib/crc32-vpmsum_core.S b/arch/powerpc/lib/crc32-vpmsum_core.S new file mode 100644 index 000000000000..b0f87f595b26 --- /dev/null +++ b/arch/powerpc/lib/crc32-vpmsum_core.S @@ -0,0 +1,746 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Core of the accelerated CRC algorithm. + * In your file, define the constants and CRC_FUNCTION_NAME + * Then include this file. + * + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. + * + * The first step is to reduce it to 1024 bits. We do this in 8 parallel + * chunks in order to mask the latency of the vpmsum instructions. If we + * have more than 32 kB of data to checksum we repeat this step multiple + * times, passing in the previous 1024 bits. + * + * The next step is to reduce the 1024 bits to 64 bits. This step adds + * 32 bits of 0s to the end - this matches what a CRC does. We just + * calculate constants that land the data in this 32 bits. + * + * We then use fixed point Barrett reduction to compute a mod n over GF(2) + * for n = CRC using POWER8 instructions. We use x = 32. + * + * https://en.wikipedia.org/wiki/Barrett_reduction + * + * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM +*/ + +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> + +#define MAX_SIZE 32768 + + .text + +#if defined(__BIG_ENDIAN__) && defined(REFLECT) +#define BYTESWAP_DATA +#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) +#define BYTESWAP_DATA +#else +#undef BYTESWAP_DATA +#endif + +#define off16 r25 +#define off32 r26 +#define off48 r27 +#define off64 r28 +#define off80 r29 +#define off96 r30 +#define off112 r31 + +#define const1 v24 +#define const2 v25 + +#define byteswap v26 +#define mask_32bit v27 +#define mask_64bit v28 +#define zeroes v29 + +#ifdef BYTESWAP_DATA +#define VPERM(A, B, C, D) vperm A, B, C, D +#else +#define VPERM(A, B, C, D) +#endif + +/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */ +FUNC_START(CRC_FUNCTION_NAME) + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + li off16,16 + li off32,32 + li off48,48 + li off64,64 + li off80,80 + li off96,96 + li off112,112 + li r0,0 + + /* Enough room for saving 10 non volatile VMX registers */ + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + stvx v20,0,r6 + stvx v21,off16,r6 + stvx v22,off32,r6 + stvx v23,off48,r6 + stvx v24,off64,r6 + stvx v25,off80,r6 + stvx v26,off96,r6 + stvx v27,off112,r6 + stvx v28,0,r7 + stvx v29,off16,r7 + + mr r10,r3 + + vxor zeroes,zeroes,zeroes + vspltisw v0,-1 + + vsldoi mask_32bit,zeroes,v0,4 + vsldoi mask_64bit,zeroes,v0,8 + + /* Get the initial value into v8 */ + vxor v8,v8,v8 + MTVRD(v8, R3) +#ifdef REFLECT + vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ +#else + vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */ +#endif + +#ifdef BYTESWAP_DATA + LOAD_REG_ADDR(r3, .byteswap_constant) + lvx byteswap,0,r3 + addi r3,r3,16 +#endif + + cmpdi r5,256 + blt .Lshort + + rldicr r6,r5,0,56 + + /* Checksum in blocks of MAX_SIZE */ +1: lis r7,MAX_SIZE@h + ori r7,r7,MAX_SIZE@l + mr r9,r7 + cmpd r6,r7 + bgt 2f + mr r7,r6 +2: subf r6,r7,r6 + + /* our main loop does 128 bytes at a time */ + srdi r7,r7,7 + + /* + * Work out the offset into the constants table to start at. Each + * constant is 16 bytes, and it is used against 128 bytes of input + * data - 128 / 16 = 8 + */ + sldi r8,r7,4 + srdi r9,r9,3 + subf r8,r8,r9 + + /* We reduce our final 128 bytes in a separate step */ + addi r7,r7,-1 + mtctr r7 + + LOAD_REG_ADDR(r3, .constants) + + /* Find the start of our constants */ + add r3,r3,r8 + + /* zero v0-v7 which will contain our checksums */ + vxor v0,v0,v0 + vxor v1,v1,v1 + vxor v2,v2,v2 + vxor v3,v3,v3 + vxor v4,v4,v4 + vxor v5,v5,v5 + vxor v6,v6,v6 + vxor v7,v7,v7 + + lvx const1,0,r3 + + /* + * If we are looping back to consume more data we use the values + * already in v16-v23. + */ + cmpdi r0,1 + beq 2f + + /* First warm up pass */ + lvx v16,0,r4 + lvx v17,off16,r4 + VPERM(v16,v16,v16,byteswap) + VPERM(v17,v17,v17,byteswap) + lvx v18,off32,r4 + lvx v19,off48,r4 + VPERM(v18,v18,v18,byteswap) + VPERM(v19,v19,v19,byteswap) + lvx v20,off64,r4 + lvx v21,off80,r4 + VPERM(v20,v20,v20,byteswap) + VPERM(v21,v21,v21,byteswap) + lvx v22,off96,r4 + lvx v23,off112,r4 + VPERM(v22,v22,v22,byteswap) + VPERM(v23,v23,v23,byteswap) + addi r4,r4,8*16 + + /* xor in initial value */ + vxor v16,v16,v8 + +2: bdz .Lfirst_warm_up_done + + addi r3,r3,16 + lvx const2,0,r3 + + /* Second warm up pass */ + VPMSUMD(v8,v16,const1) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + VPMSUMD(v9,v17,const1) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + VPMSUMD(v10,v18,const1) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + VPMSUMD(v11,v19,const1) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + ori r2,r2,0 + + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdz .Lfirst_cool_down + + /* + * main loop. We modulo schedule it such that it takes three iterations + * to complete - first iteration load, second iteration vpmsum, third + * iteration xor. + */ + .balign 16 +4: lvx const1,0,r3 + addi r3,r3,16 + ori r2,r2,0 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const2) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const2) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const2) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const2) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + lvx const2,0,r3 + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdnz 4b + +.Lfirst_cool_down: + /* First cool down pass */ + lvx const1,0,r3 + addi r3,r3,16 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const1) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const1) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const1) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const1) + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + ori r2,r2,0 + +.Lsecond_cool_down: + /* Second cool down pass */ + vxor v0,v0,v8 + vxor v1,v1,v9 + vxor v2,v2,v10 + vxor v3,v3,v11 + vxor v4,v4,v12 + vxor v5,v5,v13 + vxor v6,v6,v14 + vxor v7,v7,v15 + +#ifdef REFLECT + /* + * vpmsumd produces a 96 bit result in the least significant bits + * of the register. Since we are bit reflected we have to shift it + * left 32 bits so it occupies the least significant bits in the + * bit reflected domain. + */ + vsldoi v0,v0,zeroes,4 + vsldoi v1,v1,zeroes,4 + vsldoi v2,v2,zeroes,4 + vsldoi v3,v3,zeroes,4 + vsldoi v4,v4,zeroes,4 + vsldoi v5,v5,zeroes,4 + vsldoi v6,v6,zeroes,4 + vsldoi v7,v7,zeroes,4 +#endif + + /* xor with last 1024 bits */ + lvx v8,0,r4 + lvx v9,off16,r4 + VPERM(v8,v8,v8,byteswap) + VPERM(v9,v9,v9,byteswap) + lvx v10,off32,r4 + lvx v11,off48,r4 + VPERM(v10,v10,v10,byteswap) + VPERM(v11,v11,v11,byteswap) + lvx v12,off64,r4 + lvx v13,off80,r4 + VPERM(v12,v12,v12,byteswap) + VPERM(v13,v13,v13,byteswap) + lvx v14,off96,r4 + lvx v15,off112,r4 + VPERM(v14,v14,v14,byteswap) + VPERM(v15,v15,v15,byteswap) + + addi r4,r4,8*16 + + vxor v16,v0,v8 + vxor v17,v1,v9 + vxor v18,v2,v10 + vxor v19,v3,v11 + vxor v20,v4,v12 + vxor v21,v5,v13 + vxor v22,v6,v14 + vxor v23,v7,v15 + + li r0,1 + cmpdi r6,0 + addi r6,r6,128 + bne 1b + + /* Work out how many bytes we have left */ + andi. r5,r5,127 + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,128 + add r3,r3,r6 + + /* How many 16 byte chunks are in the tail */ + srdi r7,r5,4 + mtctr r7 + + /* + * Reduce the previously calculated 1024 bits to 64 bits, shifting + * 32 bits to include the trailing 32 bits of zeros + */ + lvx v0,0,r3 + lvx v1,off16,r3 + lvx v2,off32,r3 + lvx v3,off48,r3 + lvx v4,off64,r3 + lvx v5,off80,r3 + lvx v6,off96,r3 + lvx v7,off112,r3 + addi r3,r3,8*16 + + VPMSUMW(v0,v16,v0) + VPMSUMW(v1,v17,v1) + VPMSUMW(v2,v18,v2) + VPMSUMW(v3,v19,v3) + VPMSUMW(v4,v20,v4) + VPMSUMW(v5,v21,v5) + VPMSUMW(v6,v22,v6) + VPMSUMW(v7,v23,v7) + + /* Now reduce the tail (0 - 112 bytes) */ + cmpdi r7,0 + beq 1f + + lvx v16,0,r4 + lvx v17,0,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off16,r4 + lvx v17,off16,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off32,r4 + lvx v17,off32,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off48,r4 + lvx v17,off48,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off64,r4 + lvx v17,off64,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off80,r4 + lvx v17,off80,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off96,r4 + lvx v17,off96,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + + /* Now xor all the parallel chunks together */ +1: vxor v0,v0,v1 + vxor v2,v2,v3 + vxor v4,v4,v5 + vxor v6,v6,v7 + + vxor v0,v0,v2 + vxor v4,v4,v6 + + vxor v0,v0,v4 + +.Lbarrett_reduction: + /* Barrett constants */ + LOAD_REG_ADDR(r3, .barrett_constants) + + lvx const1,0,r3 + lvx const2,off16,r3 + + vsldoi v1,v0,v0,8 + vxor v0,v0,v1 /* xor two 64 bit results together */ + +#ifdef REFLECT + /* shift left one bit */ + vspltisb v1,1 + vsl v0,v0,v1 +#endif + + vand v0,v0,mask_64bit +#ifndef REFLECT + /* + * Now for the Barrett reduction algorithm. The idea is to calculate q, + * the multiple of our polynomial that we need to subtract. By + * doing the computation 2x bits higher (ie 64 bits) and shifting the + * result back down 2x bits, we round down to the nearest multiple. + */ + VPMSUMD(v1,v0,const1) /* ma */ + vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Get the result into r3. We need to shift it left 8 bytes: + * V0 [ 0 1 2 X ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */ +#else + /* + * The reflected version of Barrett reduction. Instead of bit + * reflecting our data (which is expensive to do), we bit reflect our + * constants and our algorithm, which means the intermediate data in + * our vector registers goes from 0-63 instead of 63-0. We can reflect + * the algorithm because we don't carry in mod 2 arithmetic. + */ + vand v1,v0,mask_32bit /* bottom 32 bits of a */ + VPMSUMD(v1,v1,const1) /* ma */ + vand v1,v1,mask_32bit /* bottom 32bits of ma */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in + * the high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ +#endif + + /* Get it into r3 */ + MFVRD(R3, v0) + +.Lout: + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + lvx v20,0,r6 + lvx v21,off16,r6 + lvx v22,off32,r6 + lvx v23,off48,r6 + lvx v24,off64,r6 + lvx v25,off80,r6 + lvx v26,off96,r6 + lvx v27,off112,r6 + lvx v28,0,r7 + lvx v29,off16,r7 + + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + ld r26,-48(r1) + ld r25,-56(r1) + + blr + +.Lfirst_warm_up_done: + lvx const1,0,r3 + addi r3,r3,16 + + VPMSUMD(v8,v16,const1) + VPMSUMD(v9,v17,const1) + VPMSUMD(v10,v18,const1) + VPMSUMD(v11,v19,const1) + VPMSUMD(v12,v20,const1) + VPMSUMD(v13,v21,const1) + VPMSUMD(v14,v22,const1) + VPMSUMD(v15,v23,const1) + + b .Lsecond_cool_down + +.Lshort: + cmpdi r5,0 + beq .Lzero + + LOAD_REG_ADDR(r3, .short_constants) + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,256 + add r3,r3,r6 + + /* How many 16 byte chunks? */ + srdi r7,r5,4 + mtctr r7 + + vxor v19,v19,v19 + vxor v20,v20,v20 + + lvx v0,0,r4 + lvx v16,0,r3 + VPERM(v0,v0,v16,byteswap) + vxor v0,v0,v8 /* xor in initial value */ + VPMSUMW(v0,v0,v16) + bdz .Lv0 + + lvx v1,off16,r4 + lvx v17,off16,r3 + VPERM(v1,v1,v17,byteswap) + VPMSUMW(v1,v1,v17) + bdz .Lv1 + + lvx v2,off32,r4 + lvx v16,off32,r3 + VPERM(v2,v2,v16,byteswap) + VPMSUMW(v2,v2,v16) + bdz .Lv2 + + lvx v3,off48,r4 + lvx v17,off48,r3 + VPERM(v3,v3,v17,byteswap) + VPMSUMW(v3,v3,v17) + bdz .Lv3 + + lvx v4,off64,r4 + lvx v16,off64,r3 + VPERM(v4,v4,v16,byteswap) + VPMSUMW(v4,v4,v16) + bdz .Lv4 + + lvx v5,off80,r4 + lvx v17,off80,r3 + VPERM(v5,v5,v17,byteswap) + VPMSUMW(v5,v5,v17) + bdz .Lv5 + + lvx v6,off96,r4 + lvx v16,off96,r3 + VPERM(v6,v6,v16,byteswap) + VPMSUMW(v6,v6,v16) + bdz .Lv6 + + lvx v7,off112,r4 + lvx v17,off112,r3 + VPERM(v7,v7,v17,byteswap) + VPMSUMW(v7,v7,v17) + bdz .Lv7 + + addi r3,r3,128 + addi r4,r4,128 + + lvx v8,0,r4 + lvx v16,0,r3 + VPERM(v8,v8,v16,byteswap) + VPMSUMW(v8,v8,v16) + bdz .Lv8 + + lvx v9,off16,r4 + lvx v17,off16,r3 + VPERM(v9,v9,v17,byteswap) + VPMSUMW(v9,v9,v17) + bdz .Lv9 + + lvx v10,off32,r4 + lvx v16,off32,r3 + VPERM(v10,v10,v16,byteswap) + VPMSUMW(v10,v10,v16) + bdz .Lv10 + + lvx v11,off48,r4 + lvx v17,off48,r3 + VPERM(v11,v11,v17,byteswap) + VPMSUMW(v11,v11,v17) + bdz .Lv11 + + lvx v12,off64,r4 + lvx v16,off64,r3 + VPERM(v12,v12,v16,byteswap) + VPMSUMW(v12,v12,v16) + bdz .Lv12 + + lvx v13,off80,r4 + lvx v17,off80,r3 + VPERM(v13,v13,v17,byteswap) + VPMSUMW(v13,v13,v17) + bdz .Lv13 + + lvx v14,off96,r4 + lvx v16,off96,r3 + VPERM(v14,v14,v16,byteswap) + VPMSUMW(v14,v14,v16) + bdz .Lv14 + + lvx v15,off112,r4 + lvx v17,off112,r3 + VPERM(v15,v15,v17,byteswap) + VPMSUMW(v15,v15,v17) + +.Lv15: vxor v19,v19,v15 +.Lv14: vxor v20,v20,v14 +.Lv13: vxor v19,v19,v13 +.Lv12: vxor v20,v20,v12 +.Lv11: vxor v19,v19,v11 +.Lv10: vxor v20,v20,v10 +.Lv9: vxor v19,v19,v9 +.Lv8: vxor v20,v20,v8 +.Lv7: vxor v19,v19,v7 +.Lv6: vxor v20,v20,v6 +.Lv5: vxor v19,v19,v5 +.Lv4: vxor v20,v20,v4 +.Lv3: vxor v19,v19,v3 +.Lv2: vxor v20,v20,v2 +.Lv1: vxor v19,v19,v1 +.Lv0: vxor v20,v20,v0 + + vxor v0,v19,v20 + + b .Lbarrett_reduction + +.Lzero: + mr r3,r10 + b .Lout + +FUNC_END(CRC_FUNCTION_NAME) diff --git a/arch/powerpc/lib/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc32c-vpmsum_asm.S new file mode 100644 index 000000000000..bf442004ea1f --- /dev/null +++ b/arch/powerpc/lib/crc32c-vpmsum_asm.S @@ -0,0 +1,842 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Calculate a crc32c with vpmsum acceleration + * + * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM + */ + .section .rodata +.balign 16 + +.byteswap_constant: + /* byte reverse permute constant */ + .octa 0x0F0E0D0C0B0A09080706050403020100 + +.constants: + + /* Reduce 262144 kbits to 1024 bits */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + .octa 0x00000000b6ca9e20000000009c37c408 + + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + .octa 0x00000000350249a800000001b51df26c + + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + .octa 0x00000001862dac54000000000724b9d0 + + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + .octa 0x00000001d87fb48c00000001c00532fe + + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + .octa 0x00000001f39b699e00000000f05a9362 + + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + .octa 0x0000000101da11b400000001e1007970 + + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + .octa 0x00000001cab571e000000000a57366ee + + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + .octa 0x00000000c7020cfe0000000192011284 + + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + .octa 0x00000000cdaed1ae0000000162716d9a + + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + .octa 0x00000001e804effc00000000cd97ecde + + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + .octa 0x0000000077c3ea3a0000000058812bc0 + + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + .octa 0x0000000068df31b40000000088b8c12e + + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + .octa 0x00000000b059b6c200000001230b234c + + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + .octa 0x0000000145fb8ed800000001120b416e + + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + .octa 0x00000000cbc0916800000001974aecb0 + + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + .octa 0x000000005ceeedc2000000008ee3f226 + + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + .octa 0x0000000047d74e8600000001089aba9a + + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + .octa 0x00000001407e9e220000000065113872 + + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + .octa 0x00000001da967bda000000005c07ec10 + + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + .octa 0x000000006c8983680000000187590924 + + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + .octa 0x00000000f2d14c9800000000e35da7c6 + + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + .octa 0x00000001993c6ad4000000000415855a + + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + .octa 0x000000014683d1ac0000000073617758 + + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + .octa 0x00000001a7c93e6c0000000176021d28 + + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + .octa 0x000000010211e90a00000001c358fd0a + + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + .octa 0x000000001119403e00000001ff7a2c18 + + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + .octa 0x000000001c3261aa00000000f2d9f7e4 + + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + .octa 0x000000014e37a634000000016cf1f9c8 + + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + .octa 0x0000000073786c0c000000010af9279a + + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + .octa 0x000000011dc037f80000000004f101e8 + + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + .octa 0x0000000031433dfc0000000070bcf184 + + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + .octa 0x000000009cde8348000000000a8de642 + + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + .octa 0x0000000038d3c2a60000000062ea130c + + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + .octa 0x000000011b25f26000000001eb31cbb2 + + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + .octa 0x000000001629e6f00000000170783448 + + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + .octa 0x0000000160838b4c00000001a684b4c6 + + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + .octa 0x000000007a44011c00000000253ca5b4 + + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + .octa 0x00000000226f417a0000000057b4b1e2 + + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + .octa 0x0000000045eb2eb400000000b6bd084c + + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + .octa 0x000000014459d70c0000000123c2d592 + + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + .octa 0x00000001d406ed8200000000159dafce + + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + .octa 0x0000000160c8e1a80000000127e1a64e + + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + .octa 0x0000000027ba80980000000056860754 + + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + .octa 0x000000006d92d01800000001e661aae8 + + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + .octa 0x000000012ed7e3f200000000f82c6166 + + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + .octa 0x000000002dc8778800000000c4f9c7ae + + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + .octa 0x0000000018240bb80000000074203d20 + + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + .octa 0x000000001ad381580000000198173052 + + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + .octa 0x00000001396b78f200000001ce8aba54 + + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + .octa 0x000000011a68133400000001850d5d94 + + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + .octa 0x000000012104732e00000001d609239c + + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + .octa 0x00000000a140d90c000000001595f048 + + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + .octa 0x00000001b7215eda0000000042ccee08 + + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + .octa 0x00000001aaf1df3c000000010a389d74 + + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + .octa 0x0000000029d15b8a000000012a840da6 + + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + .octa 0x00000000f1a96922000000001d181c0c + + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + .octa 0x00000001ac80d03c0000000068b7d1f6 + + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + .octa 0x000000000f11d56a000000005b0f14fc + + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + .octa 0x00000001f1c022a20000000179e9e730 + + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + .octa 0x0000000173d00ae200000001ce1368d6 + + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + .octa 0x00000001d4ffe4ac0000000112c3a84c + + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + .octa 0x000000016edc5ae400000000de940fee + + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + .octa 0x00000001f1a0214000000000fe896b7e + + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + .octa 0x00000000ca0b28a000000001f797431c + + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + .octa 0x00000001928e30a20000000053e989ba + + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + .octa 0x0000000097b1b002000000003920cd16 + + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + .octa 0x00000000b15bf90600000001e6f579b8 + + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + .octa 0x00000000411c5d52000000007493cb0a + + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + .octa 0x00000001c36f330000000001bdd376d8 + + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + .octa 0x00000001119227e0000000016badfee6 + + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + .octa 0x00000000114d47020000000071de5c58 + + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + .octa 0x00000000458b5b9800000000453f317c + + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + .octa 0x000000012e31fb8e0000000121675cce + + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + .octa 0x000000005cf619d800000001f409ee92 + + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + .octa 0x0000000063f4d8b200000000f36b9c88 + + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + .octa 0x000000004138dc8a0000000036b398f4 + + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + .octa 0x00000001d29ee8e000000001748f9adc + + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + .octa 0x000000006a08ace800000001be94ec00 + + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + .octa 0x0000000127d4201000000000b74370d6 + + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + .octa 0x0000000019d76b6200000001174d0b98 + + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + .octa 0x00000001b1471f6e00000000befc06a4 + + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + .octa 0x00000001f64c19cc00000001ae125288 + + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + .octa 0x00000000003c0ea00000000095c19b34 + + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + .octa 0x000000014d73abf600000001a78496f2 + + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + .octa 0x00000001620eb84400000001ac5390a0 + + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + .octa 0x0000000147655048000000002a80ed6e + + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + .octa 0x0000000067b5077e00000001fa9b0128 + + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + .octa 0x0000000010ffe20600000001ea94929e + + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + .octa 0x000000000fee8f1e0000000125f4305c + + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + .octa 0x00000001da26fbae00000001471e2002 + + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + .octa 0x00000001b3a8bd880000000132d2253a + + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + .octa 0x00000000e8f3898e00000000f26b3592 + + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + .octa 0x00000000b0d0d28c00000000bc8b67b0 + + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + .octa 0x0000000030f2a798000000013a826ef2 + + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + .octa 0x000000000fba10020000000081482c84 + + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + .octa 0x00000000bdb9bd7200000000e77307c2 + + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + .octa 0x0000000075d3bf5a00000000d4a07ec8 + + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + .octa 0x00000000ef1f98a00000000017102100 + + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + .octa 0x00000000689c760200000000db406486 + + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + .octa 0x000000016d5fa5fe0000000192db7f88 + + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + .octa 0x00000001d0d2b9ca000000018bf67b1e + + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + .octa 0x0000000041e7b470000000007c09163e + + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + .octa 0x00000001cbb6495e000000000adac060 + + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + .octa 0x000000010052a0b000000000bd8316ae + + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + .octa 0x00000001d8effb5c000000019f09ab54 + + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + .octa 0x00000001d969853c0000000125155542 + + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + .octa 0x00000000523ccce2000000018fdb5882 + + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + .octa 0x000000001e2436bc00000000e794b3f4 + + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + .octa 0x00000000ddd1c3a2000000016f9bb022 + + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + .octa 0x0000000019fcfe3800000000290c9978 + + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + .octa 0x00000001ce95db640000000083c0f350 + + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + .octa 0x00000000af5828060000000173ea6628 + + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + .octa 0x00000001006388f600000001c8b4e00a + + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + .octa 0x0000000179eca00a00000000de95d6aa + + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + .octa 0x0000000122410a6a000000010b7f7248 + + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + .octa 0x000000004288e87c00000001326e3a06 + + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + .octa 0x000000016c5490da00000000bb62c2e6 + + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + .octa 0x00000000d1c71f6e0000000156a4b2c2 + + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + .octa 0x00000001b4ce08a6000000011dfe763a + + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + .octa 0x00000001466ba60c000000007bcca8e2 + + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + .octa 0x00000001f6c488a40000000186118faa + + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + .octa 0x000000013bfb06820000000111a65a88 + + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + .octa 0x00000000690e9e54000000003565e1c4 + + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + .octa 0x00000000281346b6000000012ed02a82 + + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + .octa 0x000000015646402400000000c486ecfc + + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + .octa 0x000000016063a8dc0000000001b951b2 + + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + .octa 0x0000000116a663620000000048143916 + + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + .octa 0x000000017e8aa4d200000001dc2ae124 + + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + .octa 0x00000001728eb10c00000001416c58d6 + + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + .octa 0x00000001b08fd7fa00000000a479744a + + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + .octa 0x00000001092a16e80000000096ca3a26 + + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + .octa 0x00000000a505637c00000000ff223d4e + + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + .octa 0x00000000d94869b2000000010e84da42 + + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + .octa 0x00000001c8b203ae00000001b61ba3d0 + + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + .octa 0x000000005704aea000000000680f2de8 + + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + .octa 0x000000012e295fa2000000008772a9a8 + + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + .octa 0x000000011d0908bc0000000155f295bc + + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + .octa 0x0000000193ed97ea00000000595f9282 + + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + .octa 0x000000013a0f1c520000000164b1c25a + + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + .octa 0x000000010c2c40c000000000fbd67c50 + + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + .octa 0x00000000ff6fac3e0000000096076268 + + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + .octa 0x000000017b3609c000000001d288e4cc + + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + .octa 0x0000000088c8c92200000001eaac1bdc + + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + .octa 0x00000001751baae600000001f1ea39e2 + + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + .octa 0x000000010795297200000001eb6506fc + + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + .octa 0x0000000162b00abe000000010f806ffe + + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + .octa 0x000000000d7b404c000000010408481e + + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + .octa 0x00000000763b13d40000000188260534 + + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + .octa 0x00000000f6dc22d80000000058fc73e0 + + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + .octa 0x000000007daae06000000000391c59b8 + + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + .octa 0x000000013359ab7c000000018b638400 + + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + .octa 0x000000008add438a000000011738f5c4 + + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + .octa 0x00000001edbefdea000000008cf7c6da + + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + .octa 0x000000004104e0f800000001ef97fb16 + + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + .octa 0x00000000b48a82220000000102130e20 + + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + .octa 0x00000001bcb4684400000000db968898 + + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + .octa 0x000000013293ce0a00000000b5047b5e + + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + .octa 0x00000001710d0844000000010b90fdb2 + + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + .octa 0x0000000117907f6e000000004834a32e + + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + .octa 0x0000000087ddf93e0000000059c8f2b0 + + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + .octa 0x000000005970e9b00000000122cec508 + + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + .octa 0x0000000185b2b7d0000000000a330cda + + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + .octa 0x00000001dcee0efc000000014a47148c + + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + .octa 0x0000000030da27220000000042c61cb8 + + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + .octa 0x000000012f925a180000000012fe6960 + + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + .octa 0x00000000dd2e357c00000000dbda2c20 + + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + .octa 0x00000000071c80de000000011122410c + + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + .octa 0x000000011513140a00000000977b2070 + + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + .octa 0x00000001df876e8e000000014050438e + + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + .octa 0x000000015f81d6ce0000000147c840e8 + + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + .octa 0x000000019dd94dbe00000001cc7c88ce + + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + .octa 0x00000001373d206e00000001476b35a4 + + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + .octa 0x00000000668ccade000000013d52d508 + + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + .octa 0x00000001b192d268000000008e4be32e + + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + .octa 0x00000000e30f3a7800000000024120fe + + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + .octa 0x000000010ef1f7bc00000000ddecddb4 + + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + .octa 0x00000001f5ac738000000000d4d403bc + + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + .octa 0x000000011822ea7000000001734b89aa + + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + .octa 0x00000000c3a33848000000010e7a58d6 + + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + .octa 0x00000001bd151c2400000001f9f04e9c + + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + .octa 0x0000000056002d7600000000b692225e + + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + .octa 0x000000014657c4f4000000019b8d3f3e + + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + .octa 0x0000000113742d7c00000001a874f11e + + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + .octa 0x000000019c5920ba000000010d5a4254 + + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + .octa 0x000000005216d2d600000000bbb2f5d6 + + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + .octa 0x0000000136f5ad8a0000000179cc0e36 + + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + .octa 0x000000018b07beb600000001dca1da4a + + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + .octa 0x00000000db1e93b000000000feb1a192 + + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + .octa 0x000000000b96fa3a00000000d1eeedd6 + + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + .octa 0x00000001d9968af0000000008fad9bb4 + + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + .octa 0x000000000e4a77a200000001884938e4 + + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + .octa 0x00000000508c2ac800000001bc2e9bc0 + + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + .octa 0x0000000021572a8000000001f9658a68 + + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + .octa 0x00000001b859daf2000000001b9224fc + + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + .octa 0x000000016f7884740000000055b2fb84 + + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + .octa 0x00000001b438810e000000018b090348 + + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + .octa 0x0000000095ddc6f2000000011ccbd5ea + + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + .octa 0x00000001d977c20c0000000007ae47f8 + + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + .octa 0x00000000ebedb99a0000000172acbec0 + + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + .octa 0x00000001df9e9e9200000001c6e3ff20 + + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + .octa 0x00000001a4a3f95200000000e1b38744 + + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + .octa 0x00000000e2f5122000000000791585b2 + + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + .octa 0x000000004aa01f3e00000000ac53b894 + + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + .octa 0x00000000b3e90a5800000001ed5f2cf4 + + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + .octa 0x000000000c9ca2aa00000001df48b2e0 + + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + .octa 0x000000015168231600000000049c1c62 + + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + .octa 0x0000000036fce78c000000017c460c12 + + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + .octa 0x000000009037dc10000000015be4da7e + + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + .octa 0x00000000d3298582000000010f38f668 + + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + .octa 0x00000001b42e8ad60000000039f40a00 + + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + .octa 0x00000000142a983800000000bd4c10c4 + + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + .octa 0x0000000109c7f1900000000042db1d98 + + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + .octa 0x0000000056ff931000000001c905bae6 + + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + .octa 0x00000001594513aa00000000069d40ea + + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + .octa 0x00000001e3b5b1e8000000008e4fbad0 + + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + .octa 0x000000011dd5fc080000000047bedd46 + + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + .octa 0x00000001675f0cc20000000026396bf8 + + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + .octa 0x00000000d1c8dd4400000000379beb92 + + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + .octa 0x0000000115ebd3d8000000000abae54a + + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + .octa 0x00000001ecbd0dac0000000007e6a128 + + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + .octa 0x00000000cdf67af2000000000ade29d2 + + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + .octa 0x000000004c01ff4c00000000f974c45c + + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + .octa 0x00000000f2d8657e00000000e77ac60a + + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + .octa 0x000000006bae74c40000000145895816 + + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + .octa 0x0000000152af8aa00000000038e362be + + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + .octa 0x0000000004663802000000007f991a64 + + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + .octa 0x00000001ab2f5afc00000000fa366d3a + + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + .octa 0x0000000074a4ebd400000001a2bb34f0 + + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + .octa 0x00000001d7ab3a4c0000000028a9981e + + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + .octa 0x00000001a8da60c600000001dbc672be + + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + .octa 0x000000013cf6382000000000b04d77f6 + + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + .octa 0x00000000bec12e1e0000000124400d96 + + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + .octa 0x00000001c6368010000000014ca4b414 + + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + .octa 0x00000001e6e78758000000012fe2c938 + + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + .octa 0x000000008d7f2b3c00000001faed01e6 + + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + .octa 0x000000016b4a156e000000007e80ecfe + + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + .octa 0x00000001c63cfeb60000000098daee94 + + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + .octa 0x000000015f902670000000010a04edea + + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + .octa 0x00000001cd5de11e00000001c00b4524 + + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + .octa 0x000000001acaec540000000170296550 + + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + .octa 0x000000002bd0ca780000000181afaa48 + + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + .octa 0x0000000032d63d5c0000000185a31ffa + + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + .octa 0x000000001c6d4e4c000000002469f608 + + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + .octa 0x0000000106a60b92000000006980102a + + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + .octa 0x00000000d3855e120000000111ea9ca8 + + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + .octa 0x00000000e312563600000001bd1d29ce + + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + .octa 0x000000009e8f7ea400000001b34b9580 + + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + .octa 0x00000001c82e562c000000003076054e + + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + .octa 0x00000000ca9f09ce000000012a608ea4 + + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + .octa 0x00000000c63764e600000000784d05fe + + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + .octa 0x0000000168d2e49e000000016ef0d82a + + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + .octa 0x00000000e986c1480000000075bda454 + + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + .octa 0x00000000cfb65894000000003dc0a1c4 + + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + .octa 0x0000000111cadee400000000e9a5d8be + + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + .octa 0x0000000171fb63ce00000001609bc4b4 + +.short_constants: + + /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */ + .octa 0x7fec2963e5bf80485cf015c388e56f72 + + /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */ + .octa 0x38e888d4844752a9963a18920246e2e6 + + /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */ + .octa 0x42316c00730206ad419a441956993a31 + + /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */ + .octa 0x543d5c543e65ddf9924752ba2b830011 + + /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */ + .octa 0x78e87aaf56767c9255bd7f9518e4a304 + + /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */ + .octa 0x8f68fcec1903da7f6d76739fe0553f1e + + /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */ + .octa 0x3f4840246791d588c133722b1fe0b5c3 + + /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */ + .octa 0x34c96751b04de25a64b67ee0e55ef1f3 + + /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */ + .octa 0x156c8e180b4a395b069db049b8fdb1e7 + + /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */ + .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e + + /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */ + .octa 0x041d37768cd75659817cdc5119b29a35 + + /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */ + .octa 0x3a0777818cfaa9651ce9d94b36c41f1c + + /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */ + .octa 0x0e148e8252377a554f256efcb82be955 + + /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */ + .octa 0x9c25531d19e65ddeec1631edb2dea967 + + /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */ + .octa 0x790606ff9957c0a65d27e147510ac59a + + /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */ + .octa 0x82f63b786ea2d55ca66805eb18b8ea18 + + +.barrett_constants: + /* 33 bit reflected Barrett constant m - (4^32)/n */ + .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */ + /* 33 bit reflected Barrett constant n */ + .octa 0x00000000000000000000000105ec76f1 + +#define CRC_FUNCTION_NAME __crc32c_vpmsum +#define REFLECT +#include "crc32-vpmsum_core.S" diff --git a/arch/powerpc/lib/crct10dif-vpmsum_asm.S b/arch/powerpc/lib/crct10dif-vpmsum_asm.S new file mode 100644 index 000000000000..f0b93a0fe168 --- /dev/null +++ b/arch/powerpc/lib/crct10dif-vpmsum_asm.S @@ -0,0 +1,845 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Calculate a CRC T10DIF with vpmsum acceleration + * + * Constants generated by crc32-vpmsum, available at + * https://github.com/antonblanchard/crc32-vpmsum + * + * crc32-vpmsum is + * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM + */ + .section .rodata +.balign 16 + +.byteswap_constant: + /* byte reverse permute constant */ + .octa 0x0F0E0D0C0B0A09080706050403020100 + +.constants: + + /* Reduce 262144 kbits to 1024 bits */ + /* x^261184 mod p(x), x^261120 mod p(x) */ + .octa 0x0000000056d300000000000052550000 + + /* x^260160 mod p(x), x^260096 mod p(x) */ + .octa 0x00000000ee67000000000000a1e40000 + + /* x^259136 mod p(x), x^259072 mod p(x) */ + .octa 0x0000000060830000000000004ad10000 + + /* x^258112 mod p(x), x^258048 mod p(x) */ + .octa 0x000000008cfe0000000000009ab40000 + + /* x^257088 mod p(x), x^257024 mod p(x) */ + .octa 0x000000003e93000000000000fdb50000 + + /* x^256064 mod p(x), x^256000 mod p(x) */ + .octa 0x000000003c2000000000000045480000 + + /* x^255040 mod p(x), x^254976 mod p(x) */ + .octa 0x00000000b1fc0000000000008d690000 + + /* x^254016 mod p(x), x^253952 mod p(x) */ + .octa 0x00000000f82b00000000000024ad0000 + + /* x^252992 mod p(x), x^252928 mod p(x) */ + .octa 0x0000000044420000000000009f1a0000 + + /* x^251968 mod p(x), x^251904 mod p(x) */ + .octa 0x00000000e88c00000000000066ec0000 + + /* x^250944 mod p(x), x^250880 mod p(x) */ + .octa 0x00000000385c000000000000c87d0000 + + /* x^249920 mod p(x), x^249856 mod p(x) */ + .octa 0x000000003227000000000000c8ff0000 + + /* x^248896 mod p(x), x^248832 mod p(x) */ + .octa 0x00000000a9a900000000000033440000 + + /* x^247872 mod p(x), x^247808 mod p(x) */ + .octa 0x00000000abaa00000000000066eb0000 + + /* x^246848 mod p(x), x^246784 mod p(x) */ + .octa 0x000000001ac3000000000000c4ef0000 + + /* x^245824 mod p(x), x^245760 mod p(x) */ + .octa 0x0000000063f000000000000056f30000 + + /* x^244800 mod p(x), x^244736 mod p(x) */ + .octa 0x0000000032cc00000000000002050000 + + /* x^243776 mod p(x), x^243712 mod p(x) */ + .octa 0x00000000f8b5000000000000568e0000 + + /* x^242752 mod p(x), x^242688 mod p(x) */ + .octa 0x000000008db100000000000064290000 + + /* x^241728 mod p(x), x^241664 mod p(x) */ + .octa 0x0000000059ca0000000000006b660000 + + /* x^240704 mod p(x), x^240640 mod p(x) */ + .octa 0x000000005f5c00000000000018f80000 + + /* x^239680 mod p(x), x^239616 mod p(x) */ + .octa 0x0000000061af000000000000b6090000 + + /* x^238656 mod p(x), x^238592 mod p(x) */ + .octa 0x00000000e29e000000000000099a0000 + + /* x^237632 mod p(x), x^237568 mod p(x) */ + .octa 0x000000000975000000000000a8360000 + + /* x^236608 mod p(x), x^236544 mod p(x) */ + .octa 0x0000000043900000000000004f570000 + + /* x^235584 mod p(x), x^235520 mod p(x) */ + .octa 0x00000000f9cd000000000000134c0000 + + /* x^234560 mod p(x), x^234496 mod p(x) */ + .octa 0x000000007c29000000000000ec380000 + + /* x^233536 mod p(x), x^233472 mod p(x) */ + .octa 0x000000004c6a000000000000b0d10000 + + /* x^232512 mod p(x), x^232448 mod p(x) */ + .octa 0x00000000e7290000000000007d3e0000 + + /* x^231488 mod p(x), x^231424 mod p(x) */ + .octa 0x00000000f1ab000000000000f0b20000 + + /* x^230464 mod p(x), x^230400 mod p(x) */ + .octa 0x0000000039db0000000000009c270000 + + /* x^229440 mod p(x), x^229376 mod p(x) */ + .octa 0x000000005e2800000000000092890000 + + /* x^228416 mod p(x), x^228352 mod p(x) */ + .octa 0x00000000d44e000000000000d5ee0000 + + /* x^227392 mod p(x), x^227328 mod p(x) */ + .octa 0x00000000cd0a00000000000041f50000 + + /* x^226368 mod p(x), x^226304 mod p(x) */ + .octa 0x00000000c5b400000000000010520000 + + /* x^225344 mod p(x), x^225280 mod p(x) */ + .octa 0x00000000fd2100000000000042170000 + + /* x^224320 mod p(x), x^224256 mod p(x) */ + .octa 0x000000002f2500000000000095c20000 + + /* x^223296 mod p(x), x^223232 mod p(x) */ + .octa 0x000000001b0100000000000001ce0000 + + /* x^222272 mod p(x), x^222208 mod p(x) */ + .octa 0x000000000d430000000000002aca0000 + + /* x^221248 mod p(x), x^221184 mod p(x) */ + .octa 0x0000000030a6000000000000385e0000 + + /* x^220224 mod p(x), x^220160 mod p(x) */ + .octa 0x00000000e37b0000000000006f7a0000 + + /* x^219200 mod p(x), x^219136 mod p(x) */ + .octa 0x00000000873600000000000024320000 + + /* x^218176 mod p(x), x^218112 mod p(x) */ + .octa 0x00000000e9fb000000000000bd9c0000 + + /* x^217152 mod p(x), x^217088 mod p(x) */ + .octa 0x000000003b9500000000000054bc0000 + + /* x^216128 mod p(x), x^216064 mod p(x) */ + .octa 0x00000000133e000000000000a4660000 + + /* x^215104 mod p(x), x^215040 mod p(x) */ + .octa 0x00000000784500000000000079930000 + + /* x^214080 mod p(x), x^214016 mod p(x) */ + .octa 0x00000000b9800000000000001bb80000 + + /* x^213056 mod p(x), x^212992 mod p(x) */ + .octa 0x00000000687600000000000024400000 + + /* x^212032 mod p(x), x^211968 mod p(x) */ + .octa 0x00000000aff300000000000029e10000 + + /* x^211008 mod p(x), x^210944 mod p(x) */ + .octa 0x0000000024b50000000000005ded0000 + + /* x^209984 mod p(x), x^209920 mod p(x) */ + .octa 0x0000000017e8000000000000b12e0000 + + /* x^208960 mod p(x), x^208896 mod p(x) */ + .octa 0x00000000128400000000000026d20000 + + /* x^207936 mod p(x), x^207872 mod p(x) */ + .octa 0x000000002115000000000000a32a0000 + + /* x^206912 mod p(x), x^206848 mod p(x) */ + .octa 0x000000009595000000000000a1210000 + + /* x^205888 mod p(x), x^205824 mod p(x) */ + .octa 0x00000000281e000000000000ee8b0000 + + /* x^204864 mod p(x), x^204800 mod p(x) */ + .octa 0x0000000006010000000000003d0d0000 + + /* x^203840 mod p(x), x^203776 mod p(x) */ + .octa 0x00000000e2b600000000000034e90000 + + /* x^202816 mod p(x), x^202752 mod p(x) */ + .octa 0x000000001bd40000000000004cdb0000 + + /* x^201792 mod p(x), x^201728 mod p(x) */ + .octa 0x00000000df2800000000000030e90000 + + /* x^200768 mod p(x), x^200704 mod p(x) */ + .octa 0x0000000049c200000000000042590000 + + /* x^199744 mod p(x), x^199680 mod p(x) */ + .octa 0x000000009b97000000000000df950000 + + /* x^198720 mod p(x), x^198656 mod p(x) */ + .octa 0x000000006184000000000000da7b0000 + + /* x^197696 mod p(x), x^197632 mod p(x) */ + .octa 0x00000000461700000000000012510000 + + /* x^196672 mod p(x), x^196608 mod p(x) */ + .octa 0x000000009b40000000000000f37e0000 + + /* x^195648 mod p(x), x^195584 mod p(x) */ + .octa 0x00000000eeb2000000000000ecf10000 + + /* x^194624 mod p(x), x^194560 mod p(x) */ + .octa 0x00000000b2e800000000000050f20000 + + /* x^193600 mod p(x), x^193536 mod p(x) */ + .octa 0x00000000f59a000000000000e0b30000 + + /* x^192576 mod p(x), x^192512 mod p(x) */ + .octa 0x00000000467f0000000000004d5a0000 + + /* x^191552 mod p(x), x^191488 mod p(x) */ + .octa 0x00000000da92000000000000bb010000 + + /* x^190528 mod p(x), x^190464 mod p(x) */ + .octa 0x000000001e1000000000000022a40000 + + /* x^189504 mod p(x), x^189440 mod p(x) */ + .octa 0x0000000058fe000000000000836f0000 + + /* x^188480 mod p(x), x^188416 mod p(x) */ + .octa 0x00000000b9ce000000000000d78d0000 + + /* x^187456 mod p(x), x^187392 mod p(x) */ + .octa 0x0000000022210000000000004f8d0000 + + /* x^186432 mod p(x), x^186368 mod p(x) */ + .octa 0x00000000744600000000000033760000 + + /* x^185408 mod p(x), x^185344 mod p(x) */ + .octa 0x000000001c2e000000000000a1e50000 + + /* x^184384 mod p(x), x^184320 mod p(x) */ + .octa 0x00000000dcc8000000000000a1a40000 + + /* x^183360 mod p(x), x^183296 mod p(x) */ + .octa 0x00000000910f00000000000019a20000 + + /* x^182336 mod p(x), x^182272 mod p(x) */ + .octa 0x0000000055d5000000000000f6ae0000 + + /* x^181312 mod p(x), x^181248 mod p(x) */ + .octa 0x00000000c8ba000000000000a7ac0000 + + /* x^180288 mod p(x), x^180224 mod p(x) */ + .octa 0x0000000031f8000000000000eea20000 + + /* x^179264 mod p(x), x^179200 mod p(x) */ + .octa 0x000000001966000000000000c4d90000 + + /* x^178240 mod p(x), x^178176 mod p(x) */ + .octa 0x00000000b9810000000000002b470000 + + /* x^177216 mod p(x), x^177152 mod p(x) */ + .octa 0x000000008303000000000000f7cf0000 + + /* x^176192 mod p(x), x^176128 mod p(x) */ + .octa 0x000000002ce500000000000035b30000 + + /* x^175168 mod p(x), x^175104 mod p(x) */ + .octa 0x000000002fae0000000000000c7c0000 + + /* x^174144 mod p(x), x^174080 mod p(x) */ + .octa 0x00000000f50c0000000000009edf0000 + + /* x^173120 mod p(x), x^173056 mod p(x) */ + .octa 0x00000000714f00000000000004cd0000 + + /* x^172096 mod p(x), x^172032 mod p(x) */ + .octa 0x00000000c161000000000000541b0000 + + /* x^171072 mod p(x), x^171008 mod p(x) */ + .octa 0x0000000021c8000000000000e2700000 + + /* x^170048 mod p(x), x^169984 mod p(x) */ + .octa 0x00000000b93d00000000000009a60000 + + /* x^169024 mod p(x), x^168960 mod p(x) */ + .octa 0x00000000fbcf000000000000761c0000 + + /* x^168000 mod p(x), x^167936 mod p(x) */ + .octa 0x0000000026350000000000009db30000 + + /* x^166976 mod p(x), x^166912 mod p(x) */ + .octa 0x00000000b64f0000000000003e9f0000 + + /* x^165952 mod p(x), x^165888 mod p(x) */ + .octa 0x00000000bd0e00000000000078590000 + + /* x^164928 mod p(x), x^164864 mod p(x) */ + .octa 0x00000000d9360000000000008bc80000 + + /* x^163904 mod p(x), x^163840 mod p(x) */ + .octa 0x000000002f140000000000008c9f0000 + + /* x^162880 mod p(x), x^162816 mod p(x) */ + .octa 0x000000006a270000000000006af70000 + + /* x^161856 mod p(x), x^161792 mod p(x) */ + .octa 0x000000006685000000000000e5210000 + + /* x^160832 mod p(x), x^160768 mod p(x) */ + .octa 0x0000000062da00000000000008290000 + + /* x^159808 mod p(x), x^159744 mod p(x) */ + .octa 0x00000000bb4b000000000000e4d00000 + + /* x^158784 mod p(x), x^158720 mod p(x) */ + .octa 0x00000000d2490000000000004ae10000 + + /* x^157760 mod p(x), x^157696 mod p(x) */ + .octa 0x00000000c85b00000000000000e70000 + + /* x^156736 mod p(x), x^156672 mod p(x) */ + .octa 0x00000000c37a00000000000015650000 + + /* x^155712 mod p(x), x^155648 mod p(x) */ + .octa 0x0000000018530000000000001c2f0000 + + /* x^154688 mod p(x), x^154624 mod p(x) */ + .octa 0x00000000b46600000000000037bd0000 + + /* x^153664 mod p(x), x^153600 mod p(x) */ + .octa 0x00000000439b00000000000012190000 + + /* x^152640 mod p(x), x^152576 mod p(x) */ + .octa 0x00000000b1260000000000005ece0000 + + /* x^151616 mod p(x), x^151552 mod p(x) */ + .octa 0x00000000d8110000000000002a5e0000 + + /* x^150592 mod p(x), x^150528 mod p(x) */ + .octa 0x00000000099f00000000000052330000 + + /* x^149568 mod p(x), x^149504 mod p(x) */ + .octa 0x00000000f9f9000000000000f9120000 + + /* x^148544 mod p(x), x^148480 mod p(x) */ + .octa 0x000000005cc00000000000000ddc0000 + + /* x^147520 mod p(x), x^147456 mod p(x) */ + .octa 0x00000000343b00000000000012200000 + + /* x^146496 mod p(x), x^146432 mod p(x) */ + .octa 0x000000009222000000000000d12b0000 + + /* x^145472 mod p(x), x^145408 mod p(x) */ + .octa 0x00000000d781000000000000eb2d0000 + + /* x^144448 mod p(x), x^144384 mod p(x) */ + .octa 0x000000000bf400000000000058970000 + + /* x^143424 mod p(x), x^143360 mod p(x) */ + .octa 0x00000000094200000000000013690000 + + /* x^142400 mod p(x), x^142336 mod p(x) */ + .octa 0x00000000d55100000000000051950000 + + /* x^141376 mod p(x), x^141312 mod p(x) */ + .octa 0x000000008f11000000000000954b0000 + + /* x^140352 mod p(x), x^140288 mod p(x) */ + .octa 0x00000000140f000000000000b29e0000 + + /* x^139328 mod p(x), x^139264 mod p(x) */ + .octa 0x00000000c6db000000000000db5d0000 + + /* x^138304 mod p(x), x^138240 mod p(x) */ + .octa 0x00000000715b000000000000dfaf0000 + + /* x^137280 mod p(x), x^137216 mod p(x) */ + .octa 0x000000000dea000000000000e3b60000 + + /* x^136256 mod p(x), x^136192 mod p(x) */ + .octa 0x000000006f94000000000000ddaf0000 + + /* x^135232 mod p(x), x^135168 mod p(x) */ + .octa 0x0000000024e1000000000000e4f70000 + + /* x^134208 mod p(x), x^134144 mod p(x) */ + .octa 0x000000008810000000000000aa110000 + + /* x^133184 mod p(x), x^133120 mod p(x) */ + .octa 0x0000000030c2000000000000a8e60000 + + /* x^132160 mod p(x), x^132096 mod p(x) */ + .octa 0x00000000e6d0000000000000ccf30000 + + /* x^131136 mod p(x), x^131072 mod p(x) */ + .octa 0x000000004da000000000000079bf0000 + + /* x^130112 mod p(x), x^130048 mod p(x) */ + .octa 0x000000007759000000000000b3a30000 + + /* x^129088 mod p(x), x^129024 mod p(x) */ + .octa 0x00000000597400000000000028790000 + + /* x^128064 mod p(x), x^128000 mod p(x) */ + .octa 0x000000007acd000000000000b5820000 + + /* x^127040 mod p(x), x^126976 mod p(x) */ + .octa 0x00000000e6e400000000000026ad0000 + + /* x^126016 mod p(x), x^125952 mod p(x) */ + .octa 0x000000006d49000000000000985b0000 + + /* x^124992 mod p(x), x^124928 mod p(x) */ + .octa 0x000000000f0800000000000011520000 + + /* x^123968 mod p(x), x^123904 mod p(x) */ + .octa 0x000000002c7f000000000000846c0000 + + /* x^122944 mod p(x), x^122880 mod p(x) */ + .octa 0x000000005ce7000000000000ae1d0000 + + /* x^121920 mod p(x), x^121856 mod p(x) */ + .octa 0x00000000d4cb000000000000e21d0000 + + /* x^120896 mod p(x), x^120832 mod p(x) */ + .octa 0x000000003a2300000000000019bb0000 + + /* x^119872 mod p(x), x^119808 mod p(x) */ + .octa 0x000000000e1700000000000095290000 + + /* x^118848 mod p(x), x^118784 mod p(x) */ + .octa 0x000000006e6400000000000050d20000 + + /* x^117824 mod p(x), x^117760 mod p(x) */ + .octa 0x000000008d5c0000000000000cd10000 + + /* x^116800 mod p(x), x^116736 mod p(x) */ + .octa 0x00000000ef310000000000007b570000 + + /* x^115776 mod p(x), x^115712 mod p(x) */ + .octa 0x00000000645d00000000000053d60000 + + /* x^114752 mod p(x), x^114688 mod p(x) */ + .octa 0x0000000018fc00000000000077510000 + + /* x^113728 mod p(x), x^113664 mod p(x) */ + .octa 0x000000000cb3000000000000a7b70000 + + /* x^112704 mod p(x), x^112640 mod p(x) */ + .octa 0x00000000991b000000000000d0780000 + + /* x^111680 mod p(x), x^111616 mod p(x) */ + .octa 0x00000000845a000000000000be3c0000 + + /* x^110656 mod p(x), x^110592 mod p(x) */ + .octa 0x00000000d3a9000000000000df020000 + + /* x^109632 mod p(x), x^109568 mod p(x) */ + .octa 0x0000000017d7000000000000063e0000 + + /* x^108608 mod p(x), x^108544 mod p(x) */ + .octa 0x000000007a860000000000008ab40000 + + /* x^107584 mod p(x), x^107520 mod p(x) */ + .octa 0x00000000fd7c000000000000c7bd0000 + + /* x^106560 mod p(x), x^106496 mod p(x) */ + .octa 0x00000000a56b000000000000efd60000 + + /* x^105536 mod p(x), x^105472 mod p(x) */ + .octa 0x0000000010e400000000000071380000 + + /* x^104512 mod p(x), x^104448 mod p(x) */ + .octa 0x00000000994500000000000004d30000 + + /* x^103488 mod p(x), x^103424 mod p(x) */ + .octa 0x00000000b83c0000000000003b0e0000 + + /* x^102464 mod p(x), x^102400 mod p(x) */ + .octa 0x00000000d6c10000000000008b020000 + + /* x^101440 mod p(x), x^101376 mod p(x) */ + .octa 0x000000009efc000000000000da940000 + + /* x^100416 mod p(x), x^100352 mod p(x) */ + .octa 0x000000005e87000000000000f9f70000 + + /* x^99392 mod p(x), x^99328 mod p(x) */ + .octa 0x000000006c9b00000000000045e40000 + + /* x^98368 mod p(x), x^98304 mod p(x) */ + .octa 0x00000000178a00000000000083940000 + + /* x^97344 mod p(x), x^97280 mod p(x) */ + .octa 0x00000000f0c8000000000000f0a00000 + + /* x^96320 mod p(x), x^96256 mod p(x) */ + .octa 0x00000000f699000000000000b74b0000 + + /* x^95296 mod p(x), x^95232 mod p(x) */ + .octa 0x00000000316d000000000000c1cf0000 + + /* x^94272 mod p(x), x^94208 mod p(x) */ + .octa 0x00000000987e00000000000072680000 + + /* x^93248 mod p(x), x^93184 mod p(x) */ + .octa 0x00000000acff000000000000e0ab0000 + + /* x^92224 mod p(x), x^92160 mod p(x) */ + .octa 0x00000000a1f6000000000000c5a80000 + + /* x^91200 mod p(x), x^91136 mod p(x) */ + .octa 0x0000000061bd000000000000cf690000 + + /* x^90176 mod p(x), x^90112 mod p(x) */ + .octa 0x00000000c9f2000000000000cbcc0000 + + /* x^89152 mod p(x), x^89088 mod p(x) */ + .octa 0x000000005a33000000000000de050000 + + /* x^88128 mod p(x), x^88064 mod p(x) */ + .octa 0x00000000e416000000000000ccd70000 + + /* x^87104 mod p(x), x^87040 mod p(x) */ + .octa 0x0000000058930000000000002f670000 + + /* x^86080 mod p(x), x^86016 mod p(x) */ + .octa 0x00000000a9d3000000000000152f0000 + + /* x^85056 mod p(x), x^84992 mod p(x) */ + .octa 0x00000000c114000000000000ecc20000 + + /* x^84032 mod p(x), x^83968 mod p(x) */ + .octa 0x00000000b9270000000000007c890000 + + /* x^83008 mod p(x), x^82944 mod p(x) */ + .octa 0x000000002e6000000000000006ee0000 + + /* x^81984 mod p(x), x^81920 mod p(x) */ + .octa 0x00000000dfc600000000000009100000 + + /* x^80960 mod p(x), x^80896 mod p(x) */ + .octa 0x000000004911000000000000ad4e0000 + + /* x^79936 mod p(x), x^79872 mod p(x) */ + .octa 0x00000000ae1b000000000000b04d0000 + + /* x^78912 mod p(x), x^78848 mod p(x) */ + .octa 0x0000000005fa000000000000e9900000 + + /* x^77888 mod p(x), x^77824 mod p(x) */ + .octa 0x0000000004a1000000000000cc6f0000 + + /* x^76864 mod p(x), x^76800 mod p(x) */ + .octa 0x00000000af73000000000000ed110000 + + /* x^75840 mod p(x), x^75776 mod p(x) */ + .octa 0x0000000082530000000000008f7e0000 + + /* x^74816 mod p(x), x^74752 mod p(x) */ + .octa 0x00000000cfdc000000000000594f0000 + + /* x^73792 mod p(x), x^73728 mod p(x) */ + .octa 0x00000000a6b6000000000000a8750000 + + /* x^72768 mod p(x), x^72704 mod p(x) */ + .octa 0x00000000fd76000000000000aa0c0000 + + /* x^71744 mod p(x), x^71680 mod p(x) */ + .octa 0x0000000006f500000000000071db0000 + + /* x^70720 mod p(x), x^70656 mod p(x) */ + .octa 0x0000000037ca000000000000ab0c0000 + + /* x^69696 mod p(x), x^69632 mod p(x) */ + .octa 0x00000000d7ab000000000000b7a00000 + + /* x^68672 mod p(x), x^68608 mod p(x) */ + .octa 0x00000000440800000000000090d30000 + + /* x^67648 mod p(x), x^67584 mod p(x) */ + .octa 0x00000000186100000000000054730000 + + /* x^66624 mod p(x), x^66560 mod p(x) */ + .octa 0x000000007368000000000000a3a20000 + + /* x^65600 mod p(x), x^65536 mod p(x) */ + .octa 0x0000000026d0000000000000f9040000 + + /* x^64576 mod p(x), x^64512 mod p(x) */ + .octa 0x00000000fe770000000000009c0a0000 + + /* x^63552 mod p(x), x^63488 mod p(x) */ + .octa 0x000000002cba000000000000d1e70000 + + /* x^62528 mod p(x), x^62464 mod p(x) */ + .octa 0x00000000f8bd0000000000005ac10000 + + /* x^61504 mod p(x), x^61440 mod p(x) */ + .octa 0x000000007372000000000000d68d0000 + + /* x^60480 mod p(x), x^60416 mod p(x) */ + .octa 0x00000000f37f00000000000089f60000 + + /* x^59456 mod p(x), x^59392 mod p(x) */ + .octa 0x00000000078400000000000008a90000 + + /* x^58432 mod p(x), x^58368 mod p(x) */ + .octa 0x00000000d3e400000000000042360000 + + /* x^57408 mod p(x), x^57344 mod p(x) */ + .octa 0x00000000eba800000000000092d50000 + + /* x^56384 mod p(x), x^56320 mod p(x) */ + .octa 0x00000000afbe000000000000b4d50000 + + /* x^55360 mod p(x), x^55296 mod p(x) */ + .octa 0x00000000d8ca000000000000c9060000 + + /* x^54336 mod p(x), x^54272 mod p(x) */ + .octa 0x00000000c2d00000000000008f4f0000 + + /* x^53312 mod p(x), x^53248 mod p(x) */ + .octa 0x00000000373200000000000028690000 + + /* x^52288 mod p(x), x^52224 mod p(x) */ + .octa 0x0000000046ae000000000000c3b30000 + + /* x^51264 mod p(x), x^51200 mod p(x) */ + .octa 0x00000000b243000000000000f8700000 + + /* x^50240 mod p(x), x^50176 mod p(x) */ + .octa 0x00000000f7f500000000000029eb0000 + + /* x^49216 mod p(x), x^49152 mod p(x) */ + .octa 0x000000000c7e000000000000fe730000 + + /* x^48192 mod p(x), x^48128 mod p(x) */ + .octa 0x00000000c38200000000000096000000 + + /* x^47168 mod p(x), x^47104 mod p(x) */ + .octa 0x000000008956000000000000683c0000 + + /* x^46144 mod p(x), x^46080 mod p(x) */ + .octa 0x00000000422d0000000000005f1e0000 + + /* x^45120 mod p(x), x^45056 mod p(x) */ + .octa 0x00000000ac0f0000000000006f810000 + + /* x^44096 mod p(x), x^44032 mod p(x) */ + .octa 0x00000000ce30000000000000031f0000 + + /* x^43072 mod p(x), x^43008 mod p(x) */ + .octa 0x000000003d43000000000000455a0000 + + /* x^42048 mod p(x), x^41984 mod p(x) */ + .octa 0x000000007ebe000000000000a6050000 + + /* x^41024 mod p(x), x^40960 mod p(x) */ + .octa 0x00000000976e00000000000077eb0000 + + /* x^40000 mod p(x), x^39936 mod p(x) */ + .octa 0x000000000872000000000000389c0000 + + /* x^38976 mod p(x), x^38912 mod p(x) */ + .octa 0x000000008979000000000000c7b20000 + + /* x^37952 mod p(x), x^37888 mod p(x) */ + .octa 0x000000005c1e0000000000001d870000 + + /* x^36928 mod p(x), x^36864 mod p(x) */ + .octa 0x00000000aebb00000000000045810000 + + /* x^35904 mod p(x), x^35840 mod p(x) */ + .octa 0x000000004f7e0000000000006d4a0000 + + /* x^34880 mod p(x), x^34816 mod p(x) */ + .octa 0x00000000ea98000000000000b9200000 + + /* x^33856 mod p(x), x^33792 mod p(x) */ + .octa 0x00000000f39600000000000022f20000 + + /* x^32832 mod p(x), x^32768 mod p(x) */ + .octa 0x000000000bc500000000000041ca0000 + + /* x^31808 mod p(x), x^31744 mod p(x) */ + .octa 0x00000000786400000000000078500000 + + /* x^30784 mod p(x), x^30720 mod p(x) */ + .octa 0x00000000be970000000000009e7e0000 + + /* x^29760 mod p(x), x^29696 mod p(x) */ + .octa 0x00000000dd6d000000000000a53c0000 + + /* x^28736 mod p(x), x^28672 mod p(x) */ + .octa 0x000000004c3f00000000000039340000 + + /* x^27712 mod p(x), x^27648 mod p(x) */ + .octa 0x0000000093a4000000000000b58e0000 + + /* x^26688 mod p(x), x^26624 mod p(x) */ + .octa 0x0000000050fb00000000000062d40000 + + /* x^25664 mod p(x), x^25600 mod p(x) */ + .octa 0x00000000f505000000000000a26f0000 + + /* x^24640 mod p(x), x^24576 mod p(x) */ + .octa 0x0000000064f900000000000065e60000 + + /* x^23616 mod p(x), x^23552 mod p(x) */ + .octa 0x00000000e8c2000000000000aad90000 + + /* x^22592 mod p(x), x^22528 mod p(x) */ + .octa 0x00000000720b000000000000a3b00000 + + /* x^21568 mod p(x), x^21504 mod p(x) */ + .octa 0x00000000e992000000000000d2680000 + + /* x^20544 mod p(x), x^20480 mod p(x) */ + .octa 0x000000009132000000000000cf4c0000 + + /* x^19520 mod p(x), x^19456 mod p(x) */ + .octa 0x00000000608a00000000000076610000 + + /* x^18496 mod p(x), x^18432 mod p(x) */ + .octa 0x000000009948000000000000fb9f0000 + + /* x^17472 mod p(x), x^17408 mod p(x) */ + .octa 0x00000000173000000000000003770000 + + /* x^16448 mod p(x), x^16384 mod p(x) */ + .octa 0x000000006fe300000000000004880000 + + /* x^15424 mod p(x), x^15360 mod p(x) */ + .octa 0x00000000e15300000000000056a70000 + + /* x^14400 mod p(x), x^14336 mod p(x) */ + .octa 0x0000000092d60000000000009dfd0000 + + /* x^13376 mod p(x), x^13312 mod p(x) */ + .octa 0x0000000002fd00000000000074c80000 + + /* x^12352 mod p(x), x^12288 mod p(x) */ + .octa 0x00000000c78b000000000000a3ec0000 + + /* x^11328 mod p(x), x^11264 mod p(x) */ + .octa 0x000000009262000000000000b3530000 + + /* x^10304 mod p(x), x^10240 mod p(x) */ + .octa 0x0000000084f200000000000047bf0000 + + /* x^9280 mod p(x), x^9216 mod p(x) */ + .octa 0x0000000067ee000000000000e97c0000 + + /* x^8256 mod p(x), x^8192 mod p(x) */ + .octa 0x00000000535b00000000000091e10000 + + /* x^7232 mod p(x), x^7168 mod p(x) */ + .octa 0x000000007ebb00000000000055060000 + + /* x^6208 mod p(x), x^6144 mod p(x) */ + .octa 0x00000000c6a1000000000000fd360000 + + /* x^5184 mod p(x), x^5120 mod p(x) */ + .octa 0x000000001be500000000000055860000 + + /* x^4160 mod p(x), x^4096 mod p(x) */ + .octa 0x00000000ae0e0000000000005bd00000 + + /* x^3136 mod p(x), x^3072 mod p(x) */ + .octa 0x0000000022040000000000008db20000 + + /* x^2112 mod p(x), x^2048 mod p(x) */ + .octa 0x00000000c9eb000000000000efe20000 + + /* x^1088 mod p(x), x^1024 mod p(x) */ + .octa 0x0000000039b400000000000051d10000 + +.short_constants: + + /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + /* x^2048 mod p(x), x^2016 mod p(x), x^1984 mod p(x), x^1952 mod p(x) */ + .octa 0xefe20000dccf00009440000033590000 + + /* x^1920 mod p(x), x^1888 mod p(x), x^1856 mod p(x), x^1824 mod p(x) */ + .octa 0xee6300002f3f000062180000e0ed0000 + + /* x^1792 mod p(x), x^1760 mod p(x), x^1728 mod p(x), x^1696 mod p(x) */ + .octa 0xcf5f000017ef0000ccbe000023d30000 + + /* x^1664 mod p(x), x^1632 mod p(x), x^1600 mod p(x), x^1568 mod p(x) */ + .octa 0x6d0c0000a30e00000920000042630000 + + /* x^1536 mod p(x), x^1504 mod p(x), x^1472 mod p(x), x^1440 mod p(x) */ + .octa 0x21d30000932b0000a7a00000efcc0000 + + /* x^1408 mod p(x), x^1376 mod p(x), x^1344 mod p(x), x^1312 mod p(x) */ + .octa 0x10be00000b310000666f00000d1c0000 + + /* x^1280 mod p(x), x^1248 mod p(x), x^1216 mod p(x), x^1184 mod p(x) */ + .octa 0x1f240000ce9e0000caad0000589e0000 + + /* x^1152 mod p(x), x^1120 mod p(x), x^1088 mod p(x), x^1056 mod p(x) */ + .octa 0x29610000d02b000039b400007cf50000 + + /* x^1024 mod p(x), x^992 mod p(x), x^960 mod p(x), x^928 mod p(x) */ + .octa 0x51d100009d9d00003c0e0000bfd60000 + + /* x^896 mod p(x), x^864 mod p(x), x^832 mod p(x), x^800 mod p(x) */ + .octa 0xda390000ceae000013830000713c0000 + + /* x^768 mod p(x), x^736 mod p(x), x^704 mod p(x), x^672 mod p(x) */ + .octa 0xb67800001e16000085c0000080a60000 + + /* x^640 mod p(x), x^608 mod p(x), x^576 mod p(x), x^544 mod p(x) */ + .octa 0x0db40000f7f90000371d0000e6580000 + + /* x^512 mod p(x), x^480 mod p(x), x^448 mod p(x), x^416 mod p(x) */ + .octa 0x87e70000044c0000aadb0000a4970000 + + /* x^384 mod p(x), x^352 mod p(x), x^320 mod p(x), x^288 mod p(x) */ + .octa 0x1f990000ad180000d8b30000e7b50000 + + /* x^256 mod p(x), x^224 mod p(x), x^192 mod p(x), x^160 mod p(x) */ + .octa 0xbe6c00006ee300004c1a000006df0000 + + /* x^128 mod p(x), x^96 mod p(x), x^64 mod p(x), x^32 mod p(x) */ + .octa 0xfb0b00002d560000136800008bb70000 + + +.barrett_constants: + /* Barrett constant m - (4^32)/n */ + .octa 0x000000000000000000000001f65a57f8 /* x^64 div p(x) */ + /* Barrett constant n */ + .octa 0x0000000000000000000000018bb70000 + +#define CRC_FUNCTION_NAME __crct10dif_vpmsum +#include "crc32-vpmsum_core.S" diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index 7e5e1c28e56a..8967903c15e9 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -46,7 +46,7 @@ .section ".text" -#ifndef CONFIG_PPC64 +#ifndef __powerpc64__ /* Routines for saving integer registers, called by the compiler. */ /* Called with r11 pointing to the stack header word of the caller of the */ diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c index 407b992fb02f..e834079d2b5c 100644 --- a/arch/powerpc/lib/error-inject.c +++ b/arch/powerpc/lib/error-inject.c @@ -11,6 +11,6 @@ void override_function_with_return(struct pt_regs *regs) * function in the kernel/module, captured on a kprobe. We don't need * to worry about 32-bit userspace on a 64-bit kernel. */ - regs->nip = regs->link; + regs_set_return_ip(regs, regs->link); } NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S index b12168c2447a..480172fbd024 100644 --- a/arch/powerpc/lib/feature-fixups-test.S +++ b/arch/powerpc/lib/feature-fixups-test.S @@ -7,6 +7,7 @@ #include <asm/ppc_asm.h> #include <asm/synch.h> #include <asm/asm-compat.h> +#include <asm/ppc-opcode.h> .text @@ -791,3 +792,71 @@ globl(lwsync_fixup_test_expected_SYNC) 1: or 1,1,1 sync +globl(ftr_fixup_prefix1) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix1) + +globl(ftr_fixup_prefix1_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix1_expected) + or 1,1,1 + nop + nop + or 2,2,2 + +globl(ftr_fixup_prefix2) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix2) + +globl(ftr_fixup_prefix2_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix2_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + +globl(ftr_fixup_prefix2_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + or 2,2,2 + +globl(ftr_fixup_prefix3) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 +globl(end_ftr_fixup_prefix3) + +globl(ftr_fixup_prefix3_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 + +globl(ftr_fixup_prefix3_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + nop + +globl(ftr_fixup_prefix3_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + nop + or 3,3,3 diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4ba634b89ce5..587c8cf1230f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -14,13 +14,23 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include <asm/cputable.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> +#include <asm/interrupt.h> #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/security_features.h> #include <asm/firmware.h> +#include <asm/inst.h> + +/* + * Used to generate warnings if mmu or cpu feature check functions that + * use static keys before they are initialized. + */ +bool static_key_feature_checks_initialized __read_mostly; +EXPORT_SYMBOL_GPL(static_key_feature_checks_initialized); struct fixup_entry { unsigned long mask; @@ -31,30 +41,30 @@ struct fixup_entry { long alt_end_off; }; -static unsigned int *calc_addr(struct fixup_entry *fcur, long offset) +static u32 *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (unsigned int *)((unsigned long)fcur + offset); + return (u32 *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(unsigned int *src, unsigned int *dest, - unsigned int *alt_start, unsigned int *alt_end) +static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { - unsigned int instr; + int err; + ppc_inst_t instr; - instr = *src; + instr = ppc_inst_read(src); - if (instr_is_relative_branch(*src)) { - unsigned int *target = (unsigned int *)branch_target(src); + if (instr_is_relative_branch(ppc_inst_read(src))) { + u32 *target = (u32 *)branch_target(src); /* Branch within the section doesn't need translating */ if (target < alt_start || target > alt_end) { - instr = translate_branch(dest, src); - if (!instr) + err = translate_branch(&instr, dest, src); + if (err) return 1; } } @@ -64,9 +74,10 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, return 0; } -static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +static int patch_feature_section_mask(unsigned long value, unsigned long mask, + struct fixup_entry *fcur) { - unsigned int *start, *end, *alt_start, *alt_end, *src, *dest; + u32 *start, *end, *alt_start, *alt_end, *src, *dest; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -76,24 +87,26 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) if ((alt_end - alt_start) > (end - start)) return 1; - if ((value & fcur->mask) == fcur->value) + if ((value & fcur->mask & mask) == (fcur->value & mask)) return 0; src = alt_start; dest = start; - for (; src < alt_end; src++, dest++) { + for (; src < alt_end; src = ppc_inst_next(src, src), + dest = ppc_inst_next(dest, dest)) { if (patch_alt_instruction(src, dest, alt_start, alt_end)) return 1; } for (; dest < end; dest++) - raw_patch_instruction(dest, PPC_INST_NOP); + raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP())); return 0; } -void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +static void do_feature_fixups_mask(unsigned long value, unsigned long mask, + void *fixup_start, void *fixup_end) { struct fixup_entry *fcur, *fend; @@ -101,7 +114,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) fend = fixup_end; for (; fcur < fend; fcur++) { - if (patch_feature_section(value, fcur)) { + if (patch_feature_section_mask(value, mask, fcur)) { WARN_ON(1); printk("Unable to patch feature section at %p - %p" \ " with %p - %p\n", @@ -113,48 +126,94 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +{ + do_feature_fixups_mask(value, ~0, fixup_start, fixup_end); +} + +#ifdef CONFIG_PPC_BARRIER_NOSPEC +static bool is_fixup_addr_valid(void *dest, size_t size) +{ + return system_state < SYSTEM_FREEING_INITMEM || + !init_section_contains(dest, size); +} + +static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) +{ + int i; + + for (i = 0; start < end; start++, i++) { + int j; + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + for (j = 0; j < num; j++) + patch_instruction(dest + j, ppc_inst(instrs[j])); + } + return i; +} +#endif + #ifdef CONFIG_PPC_BOOK3S_64 +static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, + bool do_fallback, void *fallback) +{ + int i; + + for (i = 0; start < end; start++, i++) { + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + // See comment in do_entry_flush_fixups() RE order of patching + if (do_fallback) { + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK); + } else { + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + } + } + return i; +} + static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; - start = PTRRELOC(&__start___stf_entry_barrier_fixup), + start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } else if (types & STF_BARRIER_SYNC_ORI) { - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R10, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, instrs[0]); - - if (types & STF_BARRIER_FALLBACK) - patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, - BRANCH_SET_LINK); - else - patch_instruction(dest + 1, instrs[1]); - - patch_instruction(dest + 2, instrs[2]); - } + i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK, + &stf_barrier_fallback); printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -166,53 +225,42 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[6], *dest; + unsigned int instrs[6]; long *start, *end; int i; - start = PTRRELOC(&__start___stf_exit_barrier_fixup), + start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x60000000; /* nop */ - instrs[4] = 0x60000000; /* nop */ - instrs[5] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_NOP(); + instrs[4] = PPC_RAW_NOP(); + instrs[5] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ - instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0); } else { - instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ - instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1); } - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ - } else { - instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ - } + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R13, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1); + else + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); - patch_instruction(dest + 3, instrs[3]); - patch_instruction(dest + 4, instrs[4]); - patch_instruction(dest + 5, instrs[5]); - } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : (types == STF_BARRIER_FALLBACK) ? "fallback" : @@ -221,49 +269,205 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static bool stf_exit_reentrant = false; +static bool rfi_exit_reentrant = false; +static DEFINE_MUTEX(exit_flush_lock); + +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} void do_stf_barrier_fixups(enum stf_barrier_type types) { - do_stf_entry_barrier_fixups(types); - do_stf_exit_barrier_fixups(types); + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are + * executing them. So call __do_stf_barrier_fixups() on one CPU while + * all other CPUs spin in the stop machine core with interrupts hard + * disabled. + * + * The branch to mark interrupt exits non-reentrant is enabled first, + * then stop_machine runs which will ensure all CPUs are out of the + * low level interrupt exit code before patching. After the patching, + * if allowed, then flip the branch to allow fast exits. + */ + + // Prevent static key update races with do_rfi_flush_fixups() + mutex_lock(&exit_flush_lock); + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_stf_barrier_fixups, &types, NULL); + + if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI)) + stf_exit_reentrant = false; + else + stf_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); } -void do_rfi_flush_fixups(enum l1d_flush_type types) +void do_uaccess_flush_fixups(enum l1d_flush_type types) { - unsigned int instrs[3], *dest; + unsigned int instrs[4]; long *start, *end; int i; - start = PTRRELOC(&__start___rfi_flush_fixup), - end = PTRRELOC(&__stop___rfi_flush_fixup); + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_BLR(); - if (types & L1D_FLUSH_FALLBACK) - /* b .+16 to fallback flush */ - instrs[0] = 0x48000010; + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = PPC_RAW_NOP(); + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); + + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + +static int __do_entry_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3]; + long *start, *end; + int i; + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); + } + if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; + /* + * If we're patching in or out the fallback flush we need to be careful about the + * order in which we patch instructions. That's because it's possible we could + * take a page fault after patching one instruction, so the sequence of + * instructions must be safe even in a half patched state. + * + * To make that work, when patching in the fallback flush we patch in this order: + * - the mflr (dest) + * - the mtlr (dest + 2) + * - the branch (dest + 1) + * + * That ensures the sequence is safe to execute at any point. In contrast if we + * patch the mtlr last, it's possible we could return from the branch and not + * restore LR, leading to a crash later. + * + * When patching out the fallback flush (either with nops or another flush type), + * we patch in this order: + * - the branch (dest + 1) + * - the mtlr (dest + 2) + * - the mflr (dest) + * + * Note we are protected by stop_machine() from other CPUs executing the code in a + * semi-patched state. + */ - pr_devel("patching dest %lx\n", (unsigned long)dest); + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &entry_flush_fallback); + + start = PTRRELOC(&__start___scv_entry_flush_fixup); + end = PTRRELOC(&__stop___scv_entry_flush_fixup); + i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &scv_entry_flush_fallback); + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); +} + +static int __do_rfi_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3]; + long *start, *end; + int i; - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); + start = PTRRELOC(&__start___rfi_flush_fixup); + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = PPC_RAW_BRANCH(16); + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); + + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); + printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : @@ -272,30 +476,53 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) : "ori type" : (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); + + return 0; +} + +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + /* + * stop_machine gets all CPUs out of the interrupt exit handler same + * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run + * without stop_machine, so this could be achieved with a broadcast + * IPI instead, but this matches the stf sequence. + */ + + // Prevent static key update races with do_stf_barrier_fixups() + mutex_lock(&exit_flush_lock); + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_rfi_flush_fixups, &types, NULL); + + if (types & L1D_FLUSH_FALLBACK) + rfi_exit_reentrant = false; + else + rfi_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); } void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr, *dest; + unsigned int instr; long *start, *end; int i; start = fixup_start; end = fixup_end; - instr = 0x60000000; /* nop */ + instr = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using ORI speculation barrier\n"); - instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr); - } + i = do_patch_fixups(start, end, &instr, 1); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } @@ -307,44 +534,38 @@ void do_barrier_nospec_fixups(bool enable) { void *start, *end; - start = PTRRELOC(&__start___barrier_nospec_fixup), + start = PTRRELOC(&__start___barrier_nospec_fixup); end = PTRRELOC(&__stop___barrier_nospec_fixup); do_barrier_nospec_fixups_range(enable, start, end); } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr[2], *dest; + unsigned int instr[2]; long *start, *end; int i; start = fixup_start; end = fixup_end; - instr[0] = PPC_INST_NOP; - instr[1] = PPC_INST_NOP; + instr[0] = PPC_RAW_NOP(); + instr[1] = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); - instr[0] = PPC_INST_ISYNC; - instr[1] = PPC_INST_SYNC; + instr[0] = PPC_RAW_ISYNC(); + instr[1] = PPC_RAW_SYNC(); } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr[0]); - patch_instruction(dest + 1, instr[1]); - } + i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr)); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } -static void patch_btb_flush_section(long *curr) +static void __init patch_btb_flush_section(long *curr) { unsigned int *start, *end; @@ -352,11 +573,11 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction(start, PPC_INST_NOP); + patch_instruction(start, ppc_inst(PPC_RAW_NOP())); } } -void do_btb_flush_fixups(void) +void __init do_btb_flush_fixups(void) { long *start, *end; @@ -366,12 +587,12 @@ void do_btb_flush_fixups(void) for (; start < end; start += 2) patch_btb_flush_section(start); } -#endif /* CONFIG_PPC_FSL_BOOK3E */ +#endif /* CONFIG_PPC_E500 */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - unsigned int *dest; + u32 *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -381,27 +602,28 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - raw_patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC)); } } -static void do_final_fixups(void) +static void __init do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - int *src, *dest; - unsigned long length; + ppc_inst_t inst; + u32 *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (int *)(KERNELBASE + PHYSICAL_START); - dest = (int *)KERNELBASE; - length = (__end_interrupts - _stext) / sizeof(int); + src = (u32 *)(KERNELBASE + PHYSICAL_START); + dest = (u32 *)KERNELBASE; + end = (void *)src + (__end_interrupts - _stext); - while (length--) { - raw_patch_instruction(dest, *src); - src++; - dest++; + while (src < end) { + inst = ppc_inst_read(src); + raw_patch_instruction(dest, inst); + src = ppc_inst_next(src, src); + dest = ppc_inst_next(dest, dest); } #endif } @@ -443,6 +665,17 @@ void __init apply_feature_fixups(void) do_final_fixups(); } +void __init update_mmu_feature_fixups(unsigned long mask) +{ + saved_mmu_features &= ~mask; + saved_mmu_features |= cur_cpu_spec->mmu_features & mask; + + do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask, + PTRRELOC(&__start___mmu_ftr_fixup), + PTRRELOC(&__stop___mmu_ftr_fixup)); + mmu_feature_keys_init(); +} + void __init setup_feature_keys(void) { /* @@ -453,6 +686,7 @@ void __init setup_feature_keys(void) jump_label_init(); cpu_feature_keys_init(); mmu_feature_keys_init(); + static_key_feature_checks_initialized = true; } static int __init check_features(void) @@ -475,15 +709,20 @@ late_initcall(check_features); #define check(x) \ if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__); +static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +{ + return patch_feature_section_mask(value, ~0, fcur); +} + /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */ static struct fixup_entry fixup; -static long calc_offset(struct fixup_entry *entry, unsigned int *p) +static long __init calc_offset(struct fixup_entry *entry, unsigned int *p) { return (unsigned long)p - (unsigned long)entry; } -static void test_basic_patching(void) +static void __init test_basic_patching(void) { extern unsigned int ftr_fixup_test1[]; extern unsigned int end_ftr_fixup_test1[]; @@ -514,7 +753,7 @@ static void test_basic_patching(void) check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); } -static void test_alternative_patching(void) +static void __init test_alternative_patching(void) { extern unsigned int ftr_fixup_test2[]; extern unsigned int end_ftr_fixup_test2[]; @@ -547,7 +786,7 @@ static void test_alternative_patching(void) check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); } -static void test_alternative_case_too_big(void) +static void __init test_alternative_case_too_big(void) { extern unsigned int ftr_fixup_test3[]; extern unsigned int end_ftr_fixup_test3[]; @@ -573,7 +812,7 @@ static void test_alternative_case_too_big(void) check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); } -static void test_alternative_case_too_small(void) +static void __init test_alternative_case_too_small(void) { extern unsigned int ftr_fixup_test4[]; extern unsigned int end_ftr_fixup_test4[]; @@ -619,7 +858,7 @@ static void test_alternative_case_with_branch(void) check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0); } -static void test_alternative_case_with_external_branch(void) +static void __init test_alternative_case_with_external_branch(void) { extern unsigned int ftr_fixup_test6[]; extern unsigned int end_ftr_fixup_test6[]; @@ -629,7 +868,7 @@ static void test_alternative_case_with_external_branch(void) check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0); } -static void test_alternative_case_with_branch_to_end(void) +static void __init test_alternative_case_with_branch_to_end(void) { extern unsigned int ftr_fixup_test7[]; extern unsigned int end_ftr_fixup_test7[]; @@ -639,7 +878,7 @@ static void test_alternative_case_with_branch_to_end(void) check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0); } -static void test_cpu_macros(void) +static void __init test_cpu_macros(void) { extern u8 ftr_fixup_test_FTR_macros[]; extern u8 ftr_fixup_test_FTR_macros_expected[]; @@ -651,7 +890,7 @@ static void test_cpu_macros(void) ftr_fixup_test_FTR_macros_expected, size) == 0); } -static void test_fw_macros(void) +static void __init test_fw_macros(void) { #ifdef CONFIG_PPC64 extern u8 ftr_fixup_test_FW_FTR_macros[]; @@ -665,7 +904,7 @@ static void test_fw_macros(void) #endif } -static void test_lwsync_macros(void) +static void __init test_lwsync_macros(void) { extern u8 lwsync_fixup_test[]; extern u8 end_lwsync_fixup_test[]; @@ -684,6 +923,78 @@ static void test_lwsync_macros(void) } } +#ifdef CONFIG_PPC64 +static void __init test_prefix_patching(void) +{ + extern unsigned int ftr_fixup_prefix1[]; + extern unsigned int end_ftr_fixup_prefix1[]; + extern unsigned int ftr_fixup_prefix1_orig[]; + extern unsigned int ftr_fixup_prefix1_expected[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3); + fixup.alt_start_off = fixup.alt_end_off = 0; + + /* Sanity check */ + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0); +} + +static void __init test_prefix_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix2[]; + extern unsigned int end_ftr_fixup_prefix2[]; + extern unsigned int ftr_fixup_prefix2_orig[]; + extern unsigned int ftr_fixup_prefix2_expected[]; + extern unsigned int ftr_fixup_prefix2_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0); +} + +static void __init test_prefix_word_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix3[]; + extern unsigned int end_ftr_fixup_prefix3[]; + extern unsigned int ftr_fixup_prefix3_orig[]; + extern unsigned int ftr_fixup_prefix3_expected[]; + extern unsigned int ftr_fixup_prefix3_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0); + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0); +} +#else +static inline void test_prefix_patching(void) {} +static inline void test_prefix_alt_patching(void) {} +static inline void test_prefix_word_alt_patching(void) {} +#endif /* CONFIG_PPC64 */ + static int __init test_feature_fixups(void) { printk(KERN_DEBUG "Running feature fixup self-tests ...\n"); @@ -698,6 +1009,9 @@ static int __init test_feature_fixups(void) test_cpu_macros(); test_fw_macros(); test_lwsync_macros(); + test_prefix_patching(); + test_prefix_alt_patching(); + test_prefix_word_alt_patching(); return 0; } diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 6effad901ef7..151875050da9 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -5,16 +5,16 @@ * * Author: Anton Blanchard <anton@au.ibm.com> */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/feature-fixups.h> /* Note: This code relies on -mminimal-toc */ _GLOBAL(__arch_hweight8) BEGIN_FTR_SECTION - b __sw_hweight8 + b CFUNC(__sw_hweight8) nop nop FTR_SECTION_ELSE @@ -26,7 +26,7 @@ EXPORT_SYMBOL(__arch_hweight8) _GLOBAL(__arch_hweight16) BEGIN_FTR_SECTION - b __sw_hweight16 + b CFUNC(__sw_hweight16) nop nop nop @@ -49,7 +49,7 @@ EXPORT_SYMBOL(__arch_hweight16) _GLOBAL(__arch_hweight32) BEGIN_FTR_SECTION - b __sw_hweight32 + b CFUNC(__sw_hweight32) nop nop nop @@ -75,7 +75,7 @@ EXPORT_SYMBOL(__arch_hweight32) _GLOBAL(__arch_hweight64) BEGIN_FTR_SECTION - b __sw_hweight64 + b CFUNC(__sw_hweight64) nop nop nop diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 6440d5943c00..04165b7a163f 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -27,14 +27,14 @@ void splpar_spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (lock->slock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } EXPORT_SYMBOL_GPL(splpar_spin_yield); @@ -53,13 +53,13 @@ void splpar_rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (rw->lock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } #endif diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 9351ffab409c..6fd06cd20faa 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -4,10 +4,10 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/errno.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/kasan.h> #ifndef CONFIG_KASAN diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S index 5010e376f7b8..f6fca5664e91 100644 --- a/arch/powerpc/lib/memcmp_32.S +++ b/arch/powerpc/lib/memcmp_32.S @@ -7,8 +7,8 @@ * */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> .text diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index 384218df71ba..142c666d3897 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -3,8 +3,8 @@ * Author: Anton Blanchard <anton@au.ibm.com> * Copyright 2015 IBM Corporation. */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/ppc-opcode.h> #define off8 r6 @@ -44,7 +44,7 @@ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ std r0,16(r1); \ stdu r1,-STACKFRAMESIZE(r1); \ - bl enter_vmx_ops; \ + bl CFUNC(enter_vmx_ops); \ cmpwi cr1,r3,0; \ ld r0,STACKFRAMESIZE+16(r1); \ ld r3,STK_REG(R31)(r1); \ @@ -60,7 +60,7 @@ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ std r0,16(r1); \ stdu r1,-STACKFRAMESIZE(r1); \ - bl exit_vmx_ops; \ + bl CFUNC(exit_vmx_ops); \ ld r0,STACKFRAMESIZE+16(r1); \ ld r3,STK_REG(R31)(r1); \ ld r4,STK_REG(R30)(r1); \ diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 016c91e958d8..b5a67e20143f 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> #include <asm/kasan.h> diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 54f226333c94..b7c5e7fca8b9 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -218,7 +218,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_ops + bl CFUNC(enter_vmx_ops) cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -244,15 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) or r7,r7,r0 ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - - dcbt 0,r6,0b01000 - dcbt 0,r7,0b01010 - dcbtst 0,r9,0b01000 - dcbtst 0,r10,0b01010 - eieio - dcbt 0,r8,0b01010 /* GO */ + DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) beq cr1,.Lunwind_stack_nonvmx_copy @@ -433,7 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - b exit_vmx_ops /* tail call optimise */ + b CFUNC(exit_vmx_ops) /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -637,5 +629,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - b exit_vmx_ops /* tail call optimise */ + b CFUNC(exit_vmx_ops) /* tail call optimise */ #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 0666a8d29596..4e724c4c01ad 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -6,23 +6,60 @@ #include <linux/string.h> #include <linux/export.h> #include <linux/uaccess.h> +#include <linux/libnvdimm.h> #include <asm/cacheflush.h> +static inline void __clean_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void __flush_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void clean_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __clean_pmem_range(start, stop); +} + +static inline void flush_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __flush_pmem_range(start, stop); +} + /* * CONFIG_ARCH_HAS_PMEM_API symbols */ void arch_wb_cache_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + flush_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); @@ -35,25 +72,16 @@ long __copy_from_user_flushcache(void *dest, const void __user *src, unsigned long copied, start = (unsigned long) dest; copied = __copy_from_user(dest, src, size); - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); return copied; } -void *memcpy_flushcache(void *dest, const void *src, size_t size) +void memcpy_flushcache(void *dest, const void *src, size_t size) { unsigned long start = (unsigned long) dest; memcpy(dest, src, size); - flush_dcache_range(start, start + size); - - return dest; + clean_pmem_range(start, start + size); } EXPORT_SYMBOL(memcpy_flushcache); - -void memcpy_page_flushcache(char *to, struct page *page, size_t offset, - size_t len) -{ - memcpy_flushcache(to, page_to_virt(page) + offset, len); -} -EXPORT_SYMBOL(memcpy_page_flushcache); diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c new file mode 100644 index 000000000000..bcc7e4dff8c3 --- /dev/null +++ b/arch/powerpc/lib/qspinlock.c @@ -0,0 +1,997 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/percpu.h> +#include <linux/processor.h> +#include <linux/smp.h> +#include <linux/topology.h> +#include <linux/sched/clock.h> +#include <asm/qspinlock.h> +#include <asm/paravirt.h> + +#define MAX_NODES 4 + +struct qnode { + struct qnode *next; + struct qspinlock *lock; + int cpu; + u8 sleepy; /* 1 if the previous vCPU was preempted or + * if the previous node was sleepy */ + u8 locked; /* 1 if lock acquired */ +}; + +struct qnodes { + int count; + struct qnode nodes[MAX_NODES]; +}; + +/* Tuning parameters */ +static int steal_spins __read_mostly = (1 << 5); +static int remote_steal_spins __read_mostly = (1 << 2); +#if _Q_SPIN_TRY_LOCK_STEAL == 1 +static const bool maybe_stealers = true; +#else +static bool maybe_stealers __read_mostly = true; +#endif +static int head_spins __read_mostly = (1 << 8); + +static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_allow_steal __read_mostly = false; +static bool pv_spin_on_preempted_owner __read_mostly = false; +static bool pv_sleepy_lock __read_mostly = true; +static bool pv_sleepy_lock_sticky __read_mostly = false; +static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; +static int pv_sleepy_lock_factor __read_mostly = 256; +static bool pv_yield_prev __read_mostly = true; +static bool pv_yield_sleepy_owner __read_mostly = true; +static bool pv_prod_head __read_mostly = false; + +static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); + +#if _Q_SPIN_SPEC_BARRIER == 1 +#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) +#else +#define spec_barrier() do { } while (0) +#endif + +static __always_inline bool recently_sleepy(void) +{ + /* pv_sleepy_lock is true when this is called */ + if (pv_sleepy_lock_interval_ns) { + u64 seen = this_cpu_read(sleepy_lock_seen_clock); + + if (seen) { + u64 delta = sched_clock() - seen; + if (delta < pv_sleepy_lock_interval_ns) + return true; + this_cpu_write(sleepy_lock_seen_clock, 0); + } + } + + return false; +} + +static __always_inline int get_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return steal_spins * pv_sleepy_lock_factor; + else + return steal_spins; +} + +static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return remote_steal_spins * pv_sleepy_lock_factor; + else + return remote_steal_spins; +} + +static __always_inline int get_head_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return head_spins * pv_sleepy_lock_factor; + else + return head_spins; +} + +static inline u32 encode_tail_cpu(int cpu) +{ + return (cpu + 1) << _Q_TAIL_CPU_OFFSET; +} + +static inline int decode_tail_cpu(u32 val) +{ + return (val >> _Q_TAIL_CPU_OFFSET) - 1; +} + +static inline int get_owner_cpu(u32 val) +{ + return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; +} + +/* + * Try to acquire the lock if it was not already locked. If the tail matches + * mytail then clear it, otherwise leave it unchnaged. Return previous value. + * + * This is used by the head of the queue to acquire the lock and clean up + * its tail if it was the last one queued. + */ +static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) +{ + u32 newval = queued_spin_encode_locked_val(); + u32 prev, tmp; + + asm volatile( +"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" + /* This test is necessary if there could be stealers */ +" andi. %1,%0,%5 \n" +" bne 3f \n" + /* Test whether the lock tail == mytail */ +" and %1,%0,%6 \n" +" cmpw 0,%1,%3 \n" + /* Merge the new locked value */ +" or %1,%1,%4 \n" +" bne 2f \n" + /* If the lock tail matched, then clear it, otherwise leave it. */ +" andc %1,%1,%6 \n" +"2: stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"3: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r"(tail), "r" (newval), + "i" (_Q_LOCKED_VAL), + "r" (_Q_TAIL_CPU_MASK), + "i" (_Q_SPIN_EH_HINT) + : "cr0", "memory"); + + return prev; +} + +/* + * Publish our tail, replacing previous tail. Return previous value. + * + * This provides a release barrier for publishing node, this pairs with the + * acquire barrier in get_tail_qnode() when the next CPU finds this tail + * value. + */ +static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) +{ + u32 prev, tmp; + + kcsan_release(); + + asm volatile( +"\t" PPC_RELEASE_BARRIER " \n" +"1: lwarx %0,0,%2 # publish_tail_cpu \n" +" andc %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" + : "=&r" (prev), "=&r"(tmp) + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 set_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # set_mustq \n" +" or %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 clear_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # clear_mustq \n" +" andc %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) +{ + u32 prev; + u32 new = old | _Q_SLEEPY_VAL; + + BUG_ON(!(old & _Q_LOCKED_VAL)); + BUG_ON(old & _Q_SLEEPY_VAL); + + asm volatile( +"1: lwarx %0,0,%1 # try_set_sleepy \n" +" cmpw 0,%0,%2 \n" +" bne- 2f \n" +" stwcx. %3,0,%1 \n" +" bne- 1b \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r"(old), "r" (new) + : "cr0", "memory"); + + return likely(prev == old); +} + +static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } +} + +static __always_inline void seen_sleepy_lock(void) +{ + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); +} + +static __always_inline void seen_sleepy_node(void) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + /* Don't set sleepy because we likely have a stale val */ + } +} + +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) +{ + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); + int idx; + + /* + * After publishing the new tail and finding a previous tail in the + * previous val (which is the control dependency), this barrier + * orders the release barrier in publish_tail_cpu performed by the + * last CPU, with subsequently looking at its qnode structures + * after the barrier. + */ + smp_acquire__after_ctrl_dep(); + + for (idx = 0; idx < MAX_NODES; idx++) { + struct qnode *qnode = &qnodesp->nodes[idx]; + if (qnode->lock == lock) + return qnode; + } + + BUG(); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) +{ + int owner; + u32 yield_count; + bool preempted = false; + + BUG_ON(!(val & _Q_LOCKED_VAL)); + + if (!paravirt) + goto relax; + + if (!pv_yield_owner) + goto relax; + + owner = get_owner_cpu(val); + yield_count = yield_count_of(owner); + + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + seen_sleepy_owner(lock, val); + preempted = true; + + /* + * Read the lock word after sampling the yield count. On the other side + * there may a wmb because the yield count update is done by the + * hypervisor preemption and the value update by the OS, however this + * ordering might reduce the chance of out of order accesses and + * improve the heuristic. + */ + smp_rmb(); + + if (READ_ONCE(lock->val) == val) { + if (mustq) + clear_mustq(lock); + yield_to_preempted(owner, yield_count); + if (mustq) + set_mustq(lock); + spin_begin(); + + /* Don't relax if we yielded. Maybe we should? */ + return preempted; + } + spin_begin(); +relax: + spin_cpu_relax(); + + return preempted; +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + return __yield_to_locked_owner(lock, val, paravirt, false); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + bool mustq = false; + + if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) + mustq = true; + + return __yield_to_locked_owner(lock, val, paravirt, mustq); +} + +static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt) +{ + struct qnode *next; + int owner; + + if (!paravirt) + return; + if (!pv_yield_sleepy_owner) + return; + + next = READ_ONCE(node->next); + if (!next) + return; + + if (next->sleepy) + return; + + owner = get_owner_cpu(val); + if (vcpu_is_preempted(owner)) + next->sleepy = 1; +} + +/* Called inside spin_begin() */ +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) +{ + u32 yield_count; + bool preempted = false; + + if (!paravirt) + goto relax; + + if (!pv_yield_sleepy_owner) + goto yield_prev; + + /* + * If the previous waiter was preempted it might not be able to + * propagate sleepy to us, so check the lock in that case too. + */ + if (node->sleepy || vcpu_is_preempted(prev_cpu)) { + u32 val = READ_ONCE(lock->val); + + if (val & _Q_LOCKED_VAL) { + if (node->next && !node->next->sleepy) { + /* + * Propagate sleepy to next waiter. Only if + * owner is preempted, which allows the queue + * to become "non-sleepy" if vCPU preemption + * ceases to occur, even if the lock remains + * highly contended. + */ + if (vcpu_is_preempted(get_owner_cpu(val))) + node->next->sleepy = 1; + } + + preempted = yield_to_locked_owner(lock, val, paravirt); + if (preempted) + return preempted; + } + node->sleepy = false; + } + +yield_prev: + if (!pv_yield_prev) + goto relax; + + yield_count = yield_count_of(prev_cpu); + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + preempted = true; + seen_sleepy_node(); + + smp_rmb(); /* See __yield_to_locked_owner comment */ + + if (!READ_ONCE(node->locked)) { + yield_to_preempted(prev_cpu, yield_count); + spin_begin(); + return preempted; + } + spin_begin(); + +relax: + spin_cpu_relax(); + + return preempted; +} + +static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) +{ + if (iters >= get_steal_spins(paravirt, sleepy)) + return true; + + if (IS_ENABLED(CONFIG_NUMA) && + (iters >= get_remote_steal_spins(paravirt, sleepy))) { + int cpu = get_owner_cpu(val); + if (numa_node_id() != cpu_to_node(cpu)) + return true; + } + return false; +} + +static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) +{ + bool seen_preempted = false; + bool sleepy = false; + int iters = 0; + u32 val; + + if (!steal_spins) { + /* XXX: should spin_on_preempted_owner do anything here? */ + return false; + } + + /* Attempt to steal the lock */ + spin_begin(); + do { + bool preempted = false; + + val = READ_ONCE(lock->val); + if (val & _Q_MUST_Q_VAL) + break; + spec_barrier(); + + if (unlikely(!(val & _Q_LOCKED_VAL))) { + spin_end(); + if (__queued_spin_trylock_steal(lock)) + return true; + spin_begin(); + } else { + preempted = yield_to_locked_owner(lock, val, paravirt); + } + + if (paravirt && pv_sleepy_lock) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + if (preempted) { + seen_preempted = true; + sleepy = true; + if (!pv_spin_on_preempted_owner) + iters++; + /* + * pv_spin_on_preempted_owner don't increase iters + * while the owner is preempted -- we won't interfere + * with it by definition. This could introduce some + * latency issue if we continually observe preempted + * owners, but hopefully that's a rare corner case of + * a badly oversubscribed system. + */ + } else { + iters++; + } + } while (!steal_break(val, iters, paravirt, sleepy)); + + spin_end(); + + return false; +} + +static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) +{ + struct qnodes *qnodesp; + struct qnode *next, *node; + u32 val, old, tail; + bool seen_preempted = false; + bool sleepy = false; + bool mustq = false; + int idx; + int iters = 0; + + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + + qnodesp = this_cpu_ptr(&qnodes); + if (unlikely(qnodesp->count >= MAX_NODES)) { + spec_barrier(); + while (!queued_spin_trylock(lock)) + cpu_relax(); + return; + } + + idx = qnodesp->count++; + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + node = &qnodesp->nodes[idx]; + node->next = NULL; + node->lock = lock; + node->cpu = smp_processor_id(); + node->sleepy = 0; + node->locked = 0; + + tail = encode_tail_cpu(node->cpu); + + /* + * Assign all attributes of a node before it can be published. + * Issues an lwsync, serving as a release barrier, as well as a + * compiler barrier. + */ + old = publish_tail_cpu(lock, tail); + + /* + * If there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_CPU_MASK) { + int prev_cpu = decode_tail_cpu(old); + struct qnode *prev = get_tail_qnode(lock, prev_cpu); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + /* Wait for mcs node lock to be released */ + spin_begin(); + while (!READ_ONCE(node->locked)) { + spec_barrier(); + + if (yield_to_prev(lock, node, prev_cpu, paravirt)) + seen_preempted = true; + } + spec_barrier(); + spin_end(); + + smp_rmb(); /* acquire barrier for the mcs lock */ + + /* + * Generic qspinlocks have this prefetch here, but it seems + * like it could cause additional line transitions because + * the waiter will keep loading from it. + */ + if (_Q_SPIN_PREFETCH_NEXT) { + next = READ_ONCE(node->next); + if (next) + prefetchw(next); + } + } + + /* We're at the head of the waitqueue, wait for the lock. */ +again: + spin_begin(); + for (;;) { + bool preempted; + + val = READ_ONCE(lock->val); + if (!(val & _Q_LOCKED_VAL)) + break; + spec_barrier(); + + if (paravirt && pv_sleepy_lock && maybe_stealers) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + propagate_sleepy(node, val, paravirt); + preempted = yield_head_to_locked_owner(lock, val, paravirt); + if (!maybe_stealers) + continue; + + if (preempted) + seen_preempted = true; + + if (paravirt && preempted) { + sleepy = true; + + if (!pv_spin_on_preempted_owner) + iters++; + } else { + iters++; + } + + if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { + mustq = true; + set_mustq(lock); + val |= _Q_MUST_Q_VAL; + } + } + spec_barrier(); + spin_end(); + + /* If we're the last queued, must clean up the tail. */ + old = trylock_clean_tail(lock, tail); + if (unlikely(old & _Q_LOCKED_VAL)) { + BUG_ON(!maybe_stealers); + goto again; /* Can only be true if maybe_stealers. */ + } + + if ((old & _Q_TAIL_CPU_MASK) == tail) + goto release; /* We were the tail, no next. */ + + /* There is a next, must wait for node->next != NULL (MCS protocol) */ + next = READ_ONCE(node->next); + if (!next) { + spin_begin(); + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + spin_end(); + } + spec_barrier(); + + /* + * Unlock the next mcs waiter node. Release barrier is not required + * here because the acquirer is only accessing the lock word, and + * the acquire barrier we took the lock with orders that update vs + * this store to locked. The corresponding barrier is the smp_rmb() + * acquire barrier for mcs lock, above. + */ + if (paravirt && pv_prod_head) { + int next_cpu = next->cpu; + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + if (vcpu_is_preempted(next_cpu)) + prod_cpu(next_cpu); + } else { + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + } + +release: + /* + * Clear the lock before releasing the node, as another CPU might see stale + * values if an interrupt occurs after we increment qnodesp->count + * but before node->lock is initialized. The barrier ensures that + * there are no further stores to the node after it has been released. + */ + node->lock = NULL; + barrier(); + qnodesp->count--; +} + +void queued_spin_lock_slowpath(struct qspinlock *lock) +{ + /* + * This looks funny, but it induces the compiler to inline both + * sides of the branch rather than share code as when the condition + * is passed as the paravirt argument to the functions. + */ + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { + if (try_to_steal_lock(lock, true)) { + spec_barrier(); + return; + } + queued_spin_lock_mcs_queue(lock, true); + } else { + if (try_to_steal_lock(lock, false)) { + spec_barrier(); + return; + } + queued_spin_lock_mcs_queue(lock, false); + } +} +EXPORT_SYMBOL(queued_spin_lock_slowpath); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void) +{ +} +#endif + +#include <linux/debugfs.h> +static int steal_spins_set(void *data, u64 val) +{ +#if _Q_SPIN_TRY_LOCK_STEAL == 1 + /* MAYBE_STEAL remains true */ + steal_spins = val; +#else + static DEFINE_MUTEX(lock); + + /* + * The lock slow path has a !maybe_stealers case that can assume + * the head of queue will not see concurrent waiters. That waiter + * is unsafe in the presence of stealers, so must keep them away + * from one another. + */ + + mutex_lock(&lock); + if (val && !steal_spins) { + maybe_stealers = true; + /* wait for queue head waiter to go away */ + synchronize_rcu(); + steal_spins = val; + } else if (!val && steal_spins) { + steal_spins = val; + /* wait for all possible stealers to go away */ + synchronize_rcu(); + maybe_stealers = false; + } else { + steal_spins = val; + } + mutex_unlock(&lock); +#endif + + return 0; +} + +static int steal_spins_get(void *data, u64 *val) +{ + *val = steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); + +static int remote_steal_spins_set(void *data, u64 val) +{ + remote_steal_spins = val; + + return 0; +} + +static int remote_steal_spins_get(void *data, u64 *val) +{ + *val = remote_steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); + +static int head_spins_set(void *data, u64 val) +{ + head_spins = val; + + return 0; +} + +static int head_spins_get(void *data, u64 *val) +{ + *val = head_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); + +static int pv_yield_owner_set(void *data, u64 val) +{ + pv_yield_owner = !!val; + + return 0; +} + +static int pv_yield_owner_get(void *data, u64 *val) +{ + *val = pv_yield_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); + +static int pv_yield_allow_steal_set(void *data, u64 val) +{ + pv_yield_allow_steal = !!val; + + return 0; +} + +static int pv_yield_allow_steal_get(void *data, u64 *val) +{ + *val = pv_yield_allow_steal; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); + +static int pv_spin_on_preempted_owner_set(void *data, u64 val) +{ + pv_spin_on_preempted_owner = !!val; + + return 0; +} + +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) +{ + *val = pv_spin_on_preempted_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); + +static int pv_sleepy_lock_set(void *data, u64 val) +{ + pv_sleepy_lock = !!val; + + return 0; +} + +static int pv_sleepy_lock_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); + +static int pv_sleepy_lock_sticky_set(void *data, u64 val) +{ + pv_sleepy_lock_sticky = !!val; + + return 0; +} + +static int pv_sleepy_lock_sticky_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_sticky; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); + +static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) +{ + pv_sleepy_lock_interval_ns = val; + + return 0; +} + +static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_interval_ns; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); + +static int pv_sleepy_lock_factor_set(void *data, u64 val) +{ + pv_sleepy_lock_factor = val; + + return 0; +} + +static int pv_sleepy_lock_factor_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_factor; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); + +static int pv_yield_prev_set(void *data, u64 val) +{ + pv_yield_prev = !!val; + + return 0; +} + +static int pv_yield_prev_get(void *data, u64 *val) +{ + *val = pv_yield_prev; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); + +static int pv_yield_sleepy_owner_set(void *data, u64 val) +{ + pv_yield_sleepy_owner = !!val; + + return 0; +} + +static int pv_yield_sleepy_owner_get(void *data, u64 *val) +{ + *val = pv_yield_sleepy_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n"); + +static int pv_prod_head_set(void *data, u64 val) +{ + pv_prod_head = !!val; + + return 0; +} + +static int pv_prod_head_get(void *data, u64 *val) +{ + *val = pv_prod_head; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); + +static __init int spinlock_debugfs_init(void) +{ + debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); + debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); + if (is_shared_processor()) { + debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); + debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); + debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); + debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); + debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); + debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner); + debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); + } + + return 0; +} +device_initcall(spinlock_debugfs_init); diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c new file mode 100644 index 000000000000..bccb662c1b7b --- /dev/null +++ b/arch/powerpc/lib/restart_table.c @@ -0,0 +1,56 @@ +#include <asm/interrupt.h> +#include <asm/kprobes.h> + +struct soft_mask_table_entry { + unsigned long start; + unsigned long end; +}; + +struct restart_table_entry { + unsigned long start; + unsigned long end; + unsigned long fixup; +}; + +extern struct soft_mask_table_entry __start___soft_mask_table[]; +extern struct soft_mask_table_entry __stop___soft_mask_table[]; + +extern struct restart_table_entry __start___restart_table[]; +extern struct restart_table_entry __stop___restart_table[]; + +/* Given an address, look for it in the soft mask table */ +bool search_kernel_soft_mask_table(unsigned long addr) +{ + struct soft_mask_table_entry *smte = __start___soft_mask_table; + + while (smte < __stop___soft_mask_table) { + unsigned long start = smte->start; + unsigned long end = smte->end; + + if (addr >= start && addr < end) + return true; + + smte++; + } + return false; +} +NOKPROBE_SYMBOL(search_kernel_soft_mask_table); + +/* Given an address, look for it in the kernel exception table */ +unsigned long search_kernel_restart_table(unsigned long addr) +{ + struct restart_table_entry *rte = __start___restart_table; + + while (rte < __stop___restart_table) { + unsigned long start = rte->start; + unsigned long end = rte->end; + unsigned long fixup = rte->fixup; + + if (addr >= start && addr < end) + return fixup; + + rte++; + } + return 0; +} +NOKPROBE_SYMBOL(search_kernel_restart_table); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c077acb983a1..ac3ee19531d8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -13,8 +13,7 @@ #include <linux/uaccess.h> #include <asm/cpu_has_feature.h> #include <asm/cputable.h> - -extern char system_call_common[]; +#include <asm/disassemble.h> #ifdef CONFIG_PPC64 /* Bits in SRR1 that are copied from MSR */ @@ -30,6 +29,10 @@ extern char system_call_common[]; #define XER_OV32 0x00080000U #define XER_CA32 0x00040000U +#ifdef CONFIG_VSX +#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe)) +#endif + #ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S @@ -69,10 +72,8 @@ extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) { -#ifdef __powerpc64__ if ((msr & MSR_64BIT) == 0) val &= 0xffffffffUL; -#endif return val; } @@ -106,11 +107,11 @@ static nokprobe_inline long address_ok(struct pt_regs *regs, { if (!user_mode(regs)) return 1; - if (__access_ok(ea, nb, USER_DS)) + if (access_ok((void __user *)ea, nb)) return 1; - if (__access_ok(ea, 1, USER_DS)) + if (access_ok((void __user *)ea, 1)) /* Access overlaps the end of the user region */ - regs->dar = USER_DS.seg; + regs->dar = TASK_SIZE_MAX - 1; else regs->dar = ea; return 0; @@ -188,6 +189,47 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr, } /* + * Calculate effective address for a MLS:D-form / 8LS:D-form + * prefixed instruction + */ +static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, + unsigned int suffix, + const struct pt_regs *regs) +{ + int ra, prefix_r; + unsigned int dd; + unsigned long ea, d0, d1, d; + + prefix_r = GET_PREFIX_R(instr); + ra = GET_PREFIX_RA(suffix); + + d0 = instr & 0x3ffff; + d1 = suffix & 0xffff; + d = (d0 << 16) | d1; + + /* + * sign extend a 34 bit number + */ + dd = (unsigned int)(d >> 2); + ea = (signed int)dd; + ea = (ea << 2) | (d & 0x3); + + if (!prefix_r && ra) + ea += regs->gpr[ra]; + else if (!prefix_r && !ra) + ; /* Leave ea as is */ + else if (prefix_r) + ea += regs->nip; + + /* + * (prefix_r && ra) is an invalid form. Should already be + * checked for by caller! + */ + + return ea; +} + +/* * Return the largest power of 2, not greater than sizeof(unsigned long), * such that x is a multiple of it. */ @@ -236,39 +278,70 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) up[1] = tmp; break; } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + #endif default: WARN_ON_ONCE(1); } } -static nokprobe_inline int read_mem_aligned(unsigned long *dest, - unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int +__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; unsigned long x = 0; switch (nb) { case 1: - err = __get_user(x, (unsigned char __user *) ea); + unsafe_get_user(x, (unsigned char __user *)ea, Efault); break; case 2: - err = __get_user(x, (unsigned short __user *) ea); + unsafe_get_user(x, (unsigned short __user *)ea, Efault); break; case 4: - err = __get_user(x, (unsigned int __user *) ea); + unsafe_get_user(x, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __get_user(x, (unsigned long __user *) ea); + unsafe_get_user(x, (unsigned long __user *)ea, Efault); break; #endif } - if (!err) - *dest = x; - else + *dest = x; + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int +read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __read_mem_aligned(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __read_mem_aligned(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; regs->dar = ea; + } + return err; } @@ -276,10 +349,8 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest, * Copy from userspace to a buffer, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; int c; for (; nb > 0; nb -= c) { @@ -288,31 +359,46 @@ static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, c = max_align(nb); switch (c) { case 1: - err = __get_user(*dest, (unsigned char __user *) ea); + unsafe_get_user(*dest, (u8 __user *)ea, Efault); break; case 2: - err = __get_user(*(u16 *)dest, - (unsigned short __user *) ea); + unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault); break; case 4: - err = __get_user(*(u32 *)dest, - (unsigned int __user *) ea); + unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __get_user(*(unsigned long *)dest, - (unsigned long __user *) ea); + unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault); break; #endif } - if (err) { - regs->dar = ea; - return err; - } dest += c; ea += c; } return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_in(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __copy_mem_in(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; } static nokprobe_inline int read_mem_unaligned(unsigned long *dest, @@ -350,30 +436,48 @@ static int read_mem(unsigned long *dest, unsigned long ea, int nb, } NOKPROBE_SYMBOL(read_mem); -static nokprobe_inline int write_mem_aligned(unsigned long val, - unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int +__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; - switch (nb) { case 1: - err = __put_user(val, (unsigned char __user *) ea); + unsafe_put_user(val, (unsigned char __user *)ea, Efault); break; case 2: - err = __put_user(val, (unsigned short __user *) ea); + unsafe_put_user(val, (unsigned short __user *)ea, Efault); break; case 4: - err = __put_user(val, (unsigned int __user *) ea); + unsafe_put_user(val, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __put_user(val, (unsigned long __user *) ea); + unsafe_put_user(val, (unsigned long __user *)ea, Efault); break; #endif } - if (err) + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int +write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __write_mem_aligned(val, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __write_mem_aligned(val, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; regs->dar = ea; + } + return err; } @@ -381,10 +485,8 @@ static nokprobe_inline int write_mem_aligned(unsigned long val, * Copy from a buffer to userspace, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; int c; for (; nb > 0; nb -= c) { @@ -393,31 +495,46 @@ static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, c = max_align(nb); switch (c) { case 1: - err = __put_user(*dest, (unsigned char __user *) ea); + unsafe_put_user(*dest, (u8 __user *)ea, Efault); break; case 2: - err = __put_user(*(u16 *)dest, - (unsigned short __user *) ea); + unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault); break; case 4: - err = __put_user(*(u32 *)dest, - (unsigned int __user *) ea); + unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __put_user(*(unsigned long *)dest, - (unsigned long __user *) ea); + unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault); break; #endif } - if (err) { - regs->dar = ea; - return err; - } dest += c; ea += c; } return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_out(dest, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __copy_mem_out(dest, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; } static nokprobe_inline int write_mem_unaligned(unsigned long val, @@ -469,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -519,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -563,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -571,7 +695,7 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, if (err) return err; if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); preempt_disable(); if (regs->msr & MSR_VEC) put_vr(rn, &u.v); @@ -590,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -602,7 +729,7 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u.v = current->thread.vr_state.vr[rn]; preempt_enable(); if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); return copy_mem_out(&u.b[ea & 0xf], ea, size, regs); } #endif /* CONFIG_ALTIVEC */ @@ -653,8 +780,8 @@ static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, #endif /* __powerpc64 */ #ifdef CONFIG_VSX -void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem, bool rev) +static nokprobe_inline void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem, bool rev) { int size, read_size; int i, j; @@ -666,6 +793,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, reg->d[0] = reg->d[1] = 0; switch (op->element_size) { + case 32: + /* [p]lxvp[x] */ case 16: /* whole vector; lxv[x] or lxvl[l] */ if (size == 0) @@ -674,7 +803,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) rev = !rev; if (rev) - do_byte_reverse(reg, 16); + do_byte_reverse(reg, size); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ @@ -734,11 +863,9 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_load); -NOKPROBE_SYMBOL(emulate_vsx_load); -void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem, bool rev) +static nokprobe_inline void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem, bool rev) { int size, write_size; int i, j; @@ -750,6 +877,22 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, size = GETSIZE(op->type); switch (op->element_size) { + case 32: + /* [p]stxvp[x] */ + if (size == 0) + break; + if (rev) { + /* reverse 32 bytes */ + union vsx_reg buf32[2]; + buf32[0].d[0] = byterev_8(reg[1].d[1]); + buf32[0].d[1] = byterev_8(reg[1].d[0]); + buf32[1].d[0] = byterev_8(reg[0].d[1]); + buf32[1].d[1] = byterev_8(reg[0].d[0]); + memcpy(mem, buf32, size); + } else { + memcpy(mem, reg, size); + } + break; case 16: /* stxv, stxvx, stxvl, stxvll */ if (size == 0) @@ -810,36 +953,49 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_store); -NOKPROBE_SYMBOL(emulate_vsx_store); static nokprobe_inline int do_vsx_load(struct instruction_op *op, unsigned long ea, struct pt_regs *regs, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem, cross_endian); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); + emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } } else { - current->thread.fp_state.fpr[reg][0] = buf.d[0]; - current->thread.fp_state.fpr[reg][1] = buf.d[1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0]; + current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1]; + } } } else { - if (regs->msr & MSR_VEC) - load_vsrn(reg, &buf); - else - current->thread.vr_state.vr[reg - 32] = buf.v; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.vr_state.vr[reg - 32 + i] = buf[j].v; + } + } } preempt_enable(); return 0; @@ -850,63 +1006,96 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size)) return -EFAULT; + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - store_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } } else { - buf.d[0] = current->thread.fp_state.fpr[reg][0]; - buf.d[1] = current->thread.fp_state.fpr[reg][1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0]; + buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1]; + } } } else { - if (regs->msr & MSR_VEC) - store_vsrn(reg, &buf); - else - buf.v = current->thread.vr_state.vr[reg - 32]; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].v = current->thread.vr_state.vr[reg - 32 + i]; + } + } } preempt_enable(); - emulate_vsx_store(op, &buf, mem, cross_endian); + emulate_vsx_store(op, buf, mem, cross_endian); return copy_mem_out(mem, ea, size, regs); } #endif /* CONFIG_VSX */ +static __always_inline int __emulate_dcbz(unsigned long ea) +{ + unsigned long i; + unsigned long size = l1_dcache_bytes(); + + for (i = 0; i < size; i += sizeof(long)) + unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault); + + return 0; + +Efault: + return -EFAULT; +} + int emulate_dcbz(unsigned long ea, struct pt_regs *regs) { int err; - unsigned long i, size; + unsigned long size = l1_dcache_bytes(); -#ifdef __powerpc64__ - size = ppc64_caches.l1d.block_size; - if (!(regs->msr & MSR_64BIT)) - ea &= 0xffffffffUL; -#else - size = L1_CACHE_BYTES; -#endif + ea = truncate_if_32bit(regs->msr, ea); ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; - for (i = 0; i < size; i += sizeof(long)) { - err = __put_user(0, (unsigned long __user *) (ea + i)); - if (err) { - regs->dar = ea; - return err; - } + + if (is_kernel_addr(ea)) { + err = __emulate_dcbz(ea); + } else if (user_write_access_begin((void __user *)ea, size)) { + err = __emulate_dcbz(ea); + user_write_access_end(); + } else { + err = -EFAULT; } - return 0; + + if (err) + regs->dar = ea; + + + return err; } NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -919,7 +1108,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -948,10 +1140,8 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs, op->type |= SETCC; op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) val = (int) val; -#endif if (val < 0) op->ccval |= 0x80000000; else if (val > 0) @@ -979,15 +1169,11 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, if (carry_in) ++val; - op->type = COMPUTE + SETREG + SETXER; + op->type = COMPUTE | SETREG | SETXER; op->reg = rd; op->val = val; -#ifdef __powerpc64__ - if (!(regs->msr & MSR_64BIT)) { - val = (unsigned int) val; - val1 = (unsigned int) val1; - } -#endif + val = truncate_if_32bit(regs->msr, val); + val1 = truncate_if_32bit(regs->msr, val1); op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; @@ -1004,7 +1190,7 @@ static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs, { unsigned int crval, shift; - op->type = COMPUTE + SETCC; + op->type = COMPUTE | SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -1023,7 +1209,7 @@ static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs, { unsigned int crval, shift; - op->type = COMPUTE + SETCC; + op->type = COMPUTE | SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -1163,54 +1349,64 @@ static nokprobe_inline int trap_compare(long v1, long v2) * otherwise. */ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - unsigned int instr) + ppc_inst_t instr) { +#ifdef CONFIG_PPC64 + unsigned int suffixopcode, prefixtype, prefix_r; +#endif unsigned int opcode, ra, rb, rc, rd, spr, u; unsigned long int imm; unsigned long int val, val2; unsigned int mb, me, sh; + unsigned int word, suffix; long ival; + word = ppc_inst_val(instr); + suffix = ppc_inst_suffix(instr); + op->type = COMPUTE; - opcode = instr >> 26; + opcode = ppc_inst_primary_opcode(instr); switch (opcode) { case 16: /* bc */ op->type = BRANCH; - imm = (signed short)(instr & 0xfffc); - if ((instr & 2) == 0) + imm = (signed short)(word & 0xfffc); + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; -#ifdef CONFIG_PPC64 case 17: /* sc */ - if ((instr & 0xfe2) == 2) + if ((word & 0xfe2) == 2) op->type = SYSCALL; - else + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && + (word & 0xfe3) == 1) { /* scv */ + op->type = SYSCALL_VECTORED_0; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + } else op->type = UNKNOWN; return 0; -#endif case 18: /* b */ op->type = BRANCH | BRTAKEN; - imm = instr & 0x03fffffc; + imm = word & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; - if ((instr & 2) == 0) + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; return 1; case 19: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 0: /* mcrf */ op->type = COMPUTE + SETCC; - rd = 7 - ((instr >> 23) & 0x7); - ra = 7 - ((instr >> 18) & 0x7); + rd = 7 - ((word >> 23) & 0x7); + ra = 7 - ((word >> 18) & 0x7); rd *= 4; ra *= 4; val = (regs->ccr >> ra) & 0xf; @@ -1220,16 +1416,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 16: /* bclr */ case 528: /* bcctr */ op->type = BRANCH; - imm = (instr & 0x400)? regs->ctr: regs->link; + imm = (word & 0x400)? regs->ctr: regs->link; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; case 18: /* rfid, scary */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = RFI; return 0; @@ -1247,23 +1443,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 417: /* crorc */ case 449: /* cror */ op->type = COMPUTE + SETCC; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rd = (instr >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rd = (word >> 21) & 0x1f; ra = (regs->ccr >> (31 - ra)) & 1; rb = (regs->ccr >> (31 - rb)) & 1; - val = (instr >> (6 + ra * 2 + rb)) & 1; + val = (word >> (6 + ra * 2 + rb)) & 1; op->ccval = (regs->ccr & ~(1UL << (31 - rd))) | (val << (31 - rd)); return 1; } break; case 31: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 598: /* sync */ op->type = BARRIER + BARRIER_SYNC; #ifdef __powerpc64__ - switch ((instr >> 21) & 3) { + switch ((word >> 21) & 3) { case 1: /* lwsync */ op->type = BARRIER + BARRIER_LWSYNC; break; @@ -1281,33 +1477,57 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } - /* Following cases refer to regs->gpr[], so we need all regs */ - if (!FULL_REGS(regs)) - return -1; - - rd = (instr >> 21) & 0x1f; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rc = (instr >> 6) & 0x1f; + rd = (word >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rc = (word >> 6) & 0x1f; switch (opcode) { #ifdef __powerpc64__ + case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 2: + if (prefix_r && ra) + return 0; + switch (suffixopcode) { + case 14: /* paddi */ + op->type = COMPUTE | PREFIXED; + op->val = mlsd_8lsd_ea(word, suffix, regs); + goto compute_done; + } + } + break; case 2: /* tdi */ - if (rd & trap_compare(regs->gpr[ra], (short) instr)) + if (rd & trap_compare(regs->gpr[ra], (short) word)) goto trap; return 1; #endif case 3: /* twi */ - if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + if (rd & trap_compare((int)regs->gpr[ra], (short) word)) goto trap; return 1; #ifdef __powerpc64__ case 4: + /* + * There are very many instructions with this primary opcode + * introduced in the ISA as early as v2.03. However, the ones + * we currently emulate were all introduced with ISA 3.0 + */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; - switch (instr & 0x3f) { + switch (word & 0x3f) { case 48: /* maddhd */ asm volatile(PPC_MADDHD(%0, %1, %2, %3) : "=r" (op->val) : "r" (regs->gpr[ra]), @@ -1331,20 +1551,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * There are other instructions from ISA 3.0 with the same * primary opcode which do not have emulation support yet. */ - return -1; + goto unknown_opcode; #endif case 7: /* mulli */ - op->val = regs->gpr[ra] * (short) instr; + op->val = regs->gpr[ra] * (short) word; goto compute_done; case 8: /* subfic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1); return 1; case 10: /* cmpli */ - imm = (unsigned short) instr; + imm = (unsigned short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1354,7 +1574,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 11: /* cmpi */ - imm = (short) instr; + imm = (short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1364,35 +1584,37 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 12: /* addic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); return 1; case 13: /* addic. */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); set_cr0(regs, op); return 1; case 14: /* addi */ - imm = (short) instr; + imm = (short) word; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 15: /* addis */ - imm = ((short) instr) << 16; + imm = ((short) word) << 16; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 19: - if (((instr >> 1) & 0x1f) == 2) { + if (((word >> 1) & 0x1f) == 2) { /* addpcis */ - imm = (short) (instr & 0xffc1); /* d0 + d2 fields */ - imm |= (instr >> 15) & 0x3e; /* d1 field */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + imm = (short) (word & 0xffc1); /* d0 + d2 fields */ + imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; goto compute_done; } @@ -1400,65 +1622,65 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 20: /* rlwimi */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); imm = MASK32(mb, me); op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); goto logical_done; case 21: /* rlwinm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 23: /* rlwnm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; rb = regs->gpr[rb] & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 24: /* ori */ - op->val = regs->gpr[rd] | (unsigned short) instr; + op->val = regs->gpr[rd] | (unsigned short) word; goto logical_done_nocc; case 25: /* oris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] | (imm << 16); goto logical_done_nocc; case 26: /* xori */ - op->val = regs->gpr[rd] ^ (unsigned short) instr; + op->val = regs->gpr[rd] ^ (unsigned short) word; goto logical_done_nocc; case 27: /* xoris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] ^ (imm << 16); goto logical_done_nocc; case 28: /* andi. */ - op->val = regs->gpr[rd] & (unsigned short) instr; + op->val = regs->gpr[rd] & (unsigned short) word; set_cr0(regs, op); goto logical_done_nocc; case 29: /* andis. */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] & (imm << 16); set_cr0(regs, op); goto logical_done_nocc; #ifdef __powerpc64__ case 30: /* rld* */ - mb = ((instr >> 6) & 0x1f) | (instr & 0x20); + mb = ((word >> 6) & 0x1f) | (word & 0x20); val = regs->gpr[rd]; - if ((instr & 0x10) == 0) { - sh = rb | ((instr & 2) << 4); + if ((word & 0x10) == 0) { + sh = rb | ((word & 2) << 4); val = ROTATE(val, sh); - switch ((instr >> 2) & 3) { + switch ((word >> 2) & 3) { case 0: /* rldicl */ val &= MASK64_L(mb); break; @@ -1478,7 +1700,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } else { sh = regs->gpr[rb] & 0x3f; val = ROTATE(val, sh); - switch ((instr >> 1) & 7) { + switch ((word >> 1) & 7) { case 0: /* rldcl */ op->val = val & MASK64_L(mb); goto logical_done; @@ -1493,8 +1715,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 31: /* isel occupies 32 minor opcodes */ - if (((instr >> 1) & 0x1f) == 15) { - mb = (instr >> 6) & 0x1f; /* bc field */ + if (((word >> 1) & 0x1f) == 15) { + mb = (word >> 6) & 0x1f; /* bc field */ val = (regs->ccr >> (31 - mb)) & 1; val2 = (ra) ? regs->gpr[ra] : 0; @@ -1502,7 +1724,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 4: /* tw */ if (rd == 0x1f || (rd & trap_compare((int)regs->gpr[ra], @@ -1516,13 +1738,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; #endif case 83: /* mfmsr */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MFMSR; op->reg = rd; return 0; case 146: /* mtmsr */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MTMSR; op->reg = rd; @@ -1530,23 +1752,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MTMSR; op->reg = rd; /* only MSR_EE and MSR_RI get changed if bit 15 set */ /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL; op->val = imm; return 0; #endif case 19: /* mfcr */ imm = 0xffffffffUL; - if ((instr >> 20) & 1) { + if ((word >> 20) & 1) { imm = 0xf0000000UL; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) break; imm >>= 4; } @@ -1554,13 +1776,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = regs->ccr & imm; goto compute_done; + case 128: /* setb */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + /* + * 'ra' encodes the CR field number (bfa) in the top 3 bits. + * Since each CR field is 4 bits, + * we can simply mask off the bottom two bits (bfa * 4) + * to yield the first bit in the CR field. + */ + ra = ra & ~0x3; + /* 'val' stores bits of the CR field (bfa) */ + val = regs->ccr >> (CR0_SHIFT - ra); + /* checks if the LT bit of CR field (bfa) is set */ + if (val & 8) + op->val = -1; + /* checks if the GT bit of CR field (bfa) is set */ + else if (val & 4) + op->val = 1; + else + op->val = 0; + goto compute_done; + case 144: /* mtcrf */ op->type = COMPUTE + SETCC; imm = 0xf0000000UL; val = regs->gpr[rd]; op->ccval = regs->ccr; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) op->ccval = (op->ccval & ~imm) | (val & imm); imm >>= 4; @@ -1568,7 +1812,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 339: /* mfspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MFSPR; op->reg = rd; op->spr = spr; @@ -1578,7 +1822,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 467: /* mtspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MTSPR; op->val = regs->gpr[rd]; op->spr = spr; @@ -1703,7 +1947,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 265: /* modud */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = regs->gpr[ra] % regs->gpr[rb]; goto compute_done; #endif @@ -1713,7 +1957,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 267: /* moduw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (unsigned int) regs->gpr[ra] % (unsigned int) regs->gpr[rb]; goto compute_done; @@ -1736,10 +1980,21 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = (int) regs->gpr[ra] / (int) regs->gpr[rb]; goto arith_done; - +#ifdef __powerpc64__ + case 425: /* divde[.] */ + asm volatile(PPC_DIVDE(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; + case 393: /* divdeu[.] */ + asm volatile(PPC_DIVDEU(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; +#endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (ra & 0x3) { case 0: /* 32-bit conditioned */ @@ -1757,18 +2012,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - return -1; + goto unknown_opcode; #ifdef __powerpc64__ case 777: /* modsd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (long int) regs->gpr[ra] % (long int) regs->gpr[rb]; goto compute_done; #endif case 779: /* modsw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (int) regs->gpr[ra] % (int) regs->gpr[rb]; goto compute_done; @@ -1845,14 +2100,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 538: /* cnttzw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = (unsigned int) regs->gpr[rd]; op->val = (val ? __builtin_ctz(val) : 32); goto logical_done; #ifdef __powerpc64__ case 570: /* cnttzd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = regs->gpr[rd]; op->val = (val ? __builtin_ctzl(val) : 64); goto logical_done; @@ -1948,7 +2203,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 826: /* sradi with sh_5 = 0 */ case 827: /* sradi with sh_5 = 1 */ op->type = COMPUTE + SETREG + SETXER; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); ival = (signed long int) regs->gpr[rd]; op->val = ival >> sh; op->xerval = regs->xer; @@ -1962,9 +2217,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 890: /* extswsli with sh_5 = 0 */ case 891: /* extswsli with sh_5 = 1 */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->type = COMPUTE + SETREG; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; if (sh) op->val = ROTATE(val, sh) & MASK64(0, 63 - sh); @@ -1979,34 +2234,34 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, */ case 54: /* dcbst */ op->type = MKOP(CACHEOP, DCBST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 86: /* dcbf */ op->type = MKOP(CACHEOP, DCBF, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 246: /* dcbtst */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 278: /* dcbt */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 982: /* icbi */ op->type = MKOP(CACHEOP, ICBI, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 1014: /* dcbz */ op->type = MKOP(CACHEOP, DCBZ, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; } break; @@ -2019,14 +2274,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->update_reg = ra; op->reg = rd; op->val = regs->gpr[rd]; - u = (instr >> 20) & UPDATE; + u = (word >> 20) & UPDATE; op->vsx_flags = 0; switch (opcode) { case 31: - u = instr & UPDATE; - op->ea = xform_ea(instr, regs); - switch ((instr >> 1) & 0x3ff) { + u = word & UPDATE; + op->ea = xform_ea(word, regs); + switch ((word >> 1) & 0x3ff) { case 20: /* lwarx */ op->type = MKOP(LARX, 0, 4); break; @@ -2035,15 +2290,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(STCX, 0, 4); break; -#ifdef __powerpc64__ - case 84: /* ldarx */ - op->type = MKOP(LARX, 0, 8); - break; - - case 214: /* stdcx. */ - op->type = MKOP(STCX, 0, 8); - break; - +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 52: /* lbarx */ op->type = MKOP(LARX, 0, 1); break; @@ -2059,6 +2306,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 726: /* sthcx. */ op->type = MKOP(STCX, 0, 2); break; +#endif +#ifdef __powerpc64__ + case 84: /* ldarx */ + op->type = MKOP(LARX, 0, 8); + break; + + case 214: /* stdcx. */ + op->type = MKOP(STCX, 0, 8); + break; case 276: /* lqarx */ if (!((rd & 1) || rd == ra || rd == rb)) @@ -2271,25 +2527,27 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 12: /* lxsiwzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; break; case 76: /* lxsiwax */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, SIGNEXT, 4); op->element_size = 8; break; case 140: /* stxsiwx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; break; case 268: /* lxvx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2298,33 +2556,47 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(LOAD_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } case 332: /* lxvdsx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_SPLAT; break; + case 333: /* lxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, 0, 32); + op->element_size = 32; + break; + case 364: /* lxvwsx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC; break; case 396: /* stxvx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2333,118 +2605,143 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(STORE_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } + case 461: /* stxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, 0, 32); + op->element_size = 32; + break; case 524: /* lxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 588: /* lxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; break; case 652: /* stxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 716: /* stxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; break; case 780: /* lxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 4; break; case 781: /* lxsibzx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 812: /* lxvh8x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 813: /* lxsihzx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 844: /* lxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 8; break; case 876: /* lxvb16x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; break; case 908: /* stxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 4; break; case 909: /* stxsibx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 940: /* stxvh8x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 941: /* stxsihx */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 972: /* stxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 8; break; case 1004: /* stxvb16x */ - op->reg = rd | ((instr & 1) << 5); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; @@ -2457,80 +2754,80 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 32: /* lwz */ case 33: /* lwzu */ op->type = MKOP(LOAD, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 34: /* lbz */ case 35: /* lbzu */ op->type = MKOP(LOAD, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 36: /* stw */ case 37: /* stwu */ op->type = MKOP(STORE, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 38: /* stb */ case 39: /* stbu */ op->type = MKOP(STORE, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 40: /* lhz */ case 41: /* lhzu */ op->type = MKOP(LOAD, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 42: /* lha */ case 43: /* lhau */ op->type = MKOP(LOAD, SIGNEXT | u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 44: /* sth */ case 45: /* sthu */ op->type = MKOP(STORE, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 46: /* lmw */ if (ra >= rd) break; /* invalid form, ra in range to load */ op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 47: /* stmw */ op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ op->type = MKOP(LOAD_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 50: /* lfd */ case 51: /* lfdu */ op->type = MKOP(LOAD_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 52: /* stfs */ case 53: /* stfsu */ op->type = MKOP(STORE_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 54: /* stfd */ case 55: /* stfdu */ op->type = MKOP(STORE_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #endif @@ -2538,26 +2835,30 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 56: /* lq */ if (!((rd & 1) || (rd == ra))) op->type = MKOP(LOAD, 0, 16); - op->ea = dqform_ea(instr, regs); + op->ea = dqform_ea(word, regs); break; #endif #ifdef CONFIG_VSX case 57: /* lfdp, lxsd, lxssp */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* lfdp */ if (rd & 1) break; /* reg must be even */ op->type = MKOP(LOAD_FP, 0, 16); break; case 2: /* lxsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 3: /* lxssp */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; @@ -2569,8 +2870,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 58: /* ld[u], lwa */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* ld */ op->type = MKOP(LOAD, 0, 8); break; @@ -2585,17 +2886,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif #ifdef CONFIG_VSX + case 6: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + op->reg = VSX_REGISTER_XTP(rd); + op->element_size = 32; + switch (word & 0xf) { + case 0: /* lxvp */ + op->type = MKOP(LOAD_VSX, 0, 32); + break; + case 1: /* stxvp */ + op->type = MKOP(STORE_VSX, 0, 32); + break; + } + break; + case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ - switch (instr & 7) { + switch (word & 7) { case 0: /* stfdp with LSB of DS field = 0 */ case 4: /* stfdp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->type = MKOP(STORE_FP, 0, 16); break; case 1: /* lxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2604,7 +2923,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; @@ -2613,7 +2934,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; @@ -2621,8 +2944,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2634,8 +2959,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 62: /* std[u] */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* std */ op->type = MKOP(STORE, 0, 8); break; @@ -2648,10 +2973,161 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } break; + case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + op->update_reg = ra; + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 0: /* Type 00 Eight-Byte Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 41: /* plwa */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); + break; +#ifdef CONFIG_VSX + case 42: /* plxsd */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 43: /* plxssp */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 46: /* pstxsd */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 47: /* pstxssp */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 51: /* plxv1 */ + op->reg += 32; + fallthrough; + case 50: /* plxv0 */ + op->type = MKOP(LOAD_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 55: /* pstxv1 */ + op->reg = rd + 32; + fallthrough; + case 54: /* pstxv0 */ + op->type = MKOP(STORE_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; +#endif /* CONFIG_VSX */ + case 56: /* plq */ + op->type = MKOP(LOAD, PREFIXED, 16); + break; + case 57: /* pld */ + op->type = MKOP(LOAD, PREFIXED, 8); + break; +#ifdef CONFIG_VSX + case 58: /* plxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + case 60: /* pstq */ + op->type = MKOP(STORE, PREFIXED, 16); + break; + case 61: /* pstd */ + op->type = MKOP(STORE, PREFIXED, 8); + break; +#ifdef CONFIG_VSX + case 62: /* pstxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + } + break; + case 1: /* Type 01 Eight-Byte Register-to-Register */ + break; + case 2: /* Type 10 Modified Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 32: /* plwz */ + op->type = MKOP(LOAD, PREFIXED, 4); + break; + case 34: /* plbz */ + op->type = MKOP(LOAD, PREFIXED, 1); + break; + case 36: /* pstw */ + op->type = MKOP(STORE, PREFIXED, 4); + break; + case 38: /* pstb */ + op->type = MKOP(STORE, PREFIXED, 1); + break; + case 40: /* plhz */ + op->type = MKOP(LOAD, PREFIXED, 2); + break; + case 42: /* plha */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2); + break; + case 44: /* psth */ + op->type = MKOP(STORE, PREFIXED, 2); + break; + case 48: /* plfs */ + op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4); + break; + case 50: /* plfd */ + op->type = MKOP(LOAD_FP, PREFIXED, 8); + break; + case 52: /* pstfs */ + op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4); + break; + case 54: /* pstfd */ + op->type = MKOP(STORE_FP, PREFIXED, 8); + break; + } + break; + case 3: /* Type 11 Modified Register-to-Register */ + break; + } #endif /* __powerpc64__ */ } + if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) { + switch (GETTYPE(op->type)) { + case LOAD: + if (ra == rd) + goto unknown_opcode; + fallthrough; + case STORE: + case LOAD_FP: + case STORE_FP: + if (ra == 0) + goto unknown_opcode; + } + } + #ifdef CONFIG_VSX if ((GETTYPE(op->type) == LOAD_VSX || GETTYPE(op->type) == STORE_VSX) && @@ -2662,8 +3138,12 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; + unknown_opcode: + op->type = UNKNOWN; + return 0; + logical_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); logical_done_nocc: op->reg = ra; @@ -2671,7 +3151,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; arith_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); compute_done: op->reg = rd; @@ -2701,15 +3181,6 @@ NOKPROBE_SYMBOL(analyse_instr); */ static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel stack overflow - */ - if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); - return -EINVAL; - } -#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. @@ -2756,7 +3227,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) { unsigned long next_pc; - next_pc = truncate_if_32bit(regs->msr, regs->nip + 4); + next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type)); switch (GETTYPE(op->type)) { case COMPUTE: if (op->type & SETREG) @@ -2787,12 +3258,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; @@ -2831,7 +3304,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) default: WARN_ON_ONCE(1); } - regs->nip = next_pc; + regs_set_return_ip(regs, next_pc); } NOKPROBE_SYMBOL(emulate_update_regs); @@ -2868,7 +3341,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) err = 0; val = 0; switch (size) { -#ifdef __powerpc64__ +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 1: __get_user_asmx(val, ea, err, "lbarx"); break; @@ -2910,7 +3383,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: @@ -2960,14 +3433,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) * stored in the thread_struct. If the instruction is in * the kernel, we must not touch the state in the thread_struct. */ - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) + if (!user_mode(regs) && !(regs->msr & MSR_FP)) return 0; err = do_fp_load(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) return 0; err = do_vec_load(op->reg, ea, size, regs, cross_endian); break; @@ -2982,7 +3455,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) */ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) + if (!user_mode(regs) && !(regs->msr & msrbit)) return 0; err = do_vsx_load(op, ea, regs, cross_endian); break; @@ -3018,8 +3491,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) } #endif if ((op->type & UPDATE) && size == sizeof(long) && - op->reg == 1 && op->update_reg == 1 && - !(regs->msr & MSR_PR) && + op->reg == 1 && op->update_reg == 1 && !user_mode(regs) && ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { err = handle_stack_update(ea, regs); break; @@ -3031,14 +3503,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) + if (!user_mode(regs) && !(regs->msr & MSR_FP)) return 0; err = do_fp_store(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) return 0; err = do_vec_store(op->reg, ea, size, regs, cross_endian); break; @@ -3053,7 +3525,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) */ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) + if (!user_mode(regs) && !(regs->msr & msrbit)) return 0; err = do_vsx_store(op, ea, regs, cross_endian); break; @@ -3101,7 +3573,7 @@ NOKPROBE_SYMBOL(emulate_loadstore); * or -1 if the instruction is one that should not be stepped, * such as an rfid, or a mtmsrd that would clear MSR_RI. */ -int emulate_step(struct pt_regs *regs, unsigned int instr) +int emulate_step(struct pt_regs *regs, ppc_inst_t instr) { struct instruction_op op; int r, err, type; @@ -3169,38 +3641,31 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) /* can't step mtmsr[d] that would clear MSR_RI */ return -1; /* here op.val is the mask of bits to change */ - regs->msr = (regs->msr & ~op.val) | (val & op.val); + regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val)); goto instr_done; -#ifdef CONFIG_PPC64 case SYSCALL: /* sc */ /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. + * Per ISA v3.1, section 7.5.15 'Trace Interrupt', we can't + * single step a system call instruction: + * + * Successful completion for an instruction means that the + * instruction caused no other interrupt. Thus a Trace + * interrupt never occurs for a System Call or System Call + * Vectored instruction, or for a Trap instruction that + * traps. */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; - + return -1; + case SYSCALL_VECTORED_0: /* scv 0 */ + return -1; case RFI: return -1; -#endif } return 0; instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + regs_set_return_ip(regs, + truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type))); return 1; } NOKPROBE_SYMBOL(emulate_step); diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 2752b1cc1d45..daa72061dc0c 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -4,8 +4,8 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/cache.h> .text diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfb..3ee45619a3f8 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -7,8 +7,8 @@ * */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/cache.h> .text @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 507b18b1660e..a25eb8588434 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -6,18 +6,13 @@ * Author: Anton Blanchard <anton@au.ibm.com> */ +#include <linux/export.h> #include <asm/ppc_asm.h> #include <asm/linkage.h> #include <asm/asm-offsets.h> -#include <asm/export.h> - - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -58,7 +53,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -133,7 +128,7 @@ err1; stb r0,0(r3) blr .Llong_clear: - ld r5,PPC64_CACHES@toc(r2) + LOAD_REG_ADDR(r5, ppc64_caches) bf cr7*4+0,11f err2; std r0,0(r3) @@ -181,4 +176,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S index 0a8d3f64d493..bbd24feb233f 100644 --- a/arch/powerpc/lib/strlen_32.S +++ b/arch/powerpc/lib/strlen_32.S @@ -6,8 +6,8 @@ * * Inspired from glibc implementation */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> #include <asm/cache.h> .text diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c new file mode 100644 index 000000000000..1440d99630b3 --- /dev/null +++ b/arch/powerpc/lib/test-code-patching.c @@ -0,0 +1,495 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008 Michael Ellerman, IBM Corporation. + */ + +#include <linux/vmalloc.h> +#include <linux/init.h> + +#include <asm/text-patching.h> + +static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + +static void __init test_trampoline(void) +{ + asm ("nop;nop;\n"); +} + +#define check(x) do { \ + if (!(x)) \ + pr_err("code-patching: test failed at line %d\n", __LINE__); \ +} while (0) + +static void __init test_branch_iform(void) +{ + int err; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned long addr = (unsigned long)tmp; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_iform(ppc_inst(0x48000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); + + /* Simplest case, branch to self with link */ + check(instr_is_branch_iform(ppc_inst(0x48000001))); + /* All bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); + /* Some bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); + /* Must be a valid branch to start with */ + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); + + /* Absolute branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute branch to 0x420fc */ + ppc_inst_write(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); + /* Maximum positive relative branch, + 20MB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); + /* Smallest negative relative branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative branch, - 32 MB */ + ppc_inst_write(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Branch to self, with link */ + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100, with link */ + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100, no link */ + err = create_branch(&instr, iptr, addr + 0x100, 0); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 MB */ + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Out of range relative negative offset, - 32 MB + 4*/ + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); + check(err); + + /* Out of range relative positive offset, + 32 MB */ + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); + check(err); + + /* Unaligned target */ + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); + check(err); + + /* Check flags are masked correctly */ + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); +} + +static void __init test_create_function_call(void) +{ + u32 *iptr; + unsigned long dest; + ppc_inst_t instr; + + /* Check we can create a function call */ + iptr = (u32 *)ppc_function_entry(test_trampoline); + dest = ppc_function_entry(test_create_function_call); + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, dest)); +} + +static void __init test_branch_bform(void) +{ + int err; + unsigned long addr; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned int flags; + + addr = (unsigned long)iptr; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_bform(ppc_inst(0x40000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); + + /* Absolute conditional branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute conditional branch to 0x20fc */ + ppc_inst_write(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); + /* Maximum positive relative conditional branch, + 32 KB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); + /* Smallest negative relative conditional branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative conditional branch, - 32 KB */ + ppc_inst_write(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* All condition code bits set & link */ + flags = 0x3ff000 | BRANCH_SET_LINK; + + /* Branch to self */ + err = create_cond_branch(&instr, iptr, addr, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100 */ + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100 */ + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 KB */ + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* Out of range relative negative offset, - 32 KB + 4*/ + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); + check(err); + + /* Out of range relative positive offset, + 32 KB */ + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); + check(err); + + /* Unaligned target */ + err = create_cond_branch(&instr, iptr, addr + 3, flags); + check(err); + + /* Check flags are masked correctly */ + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); +} + +static void __init test_translate_branch(void) +{ + unsigned long addr; + void *p, *q; + ppc_inst_t instr; + void *buf; + + buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); + check(buf); + if (!buf) + return; + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = p + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 MB */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 0x2000000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); + + /* Maximum positive case, move x to x - 32 MB + 4 */ + p = buf + 0x2000000; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); + + /* Jump to x + 16 MB moved to x + 20 MB */ + p = buf; + addr = 0x1000000 + (unsigned long)buf; + create_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x1400000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 16 MB moved to x - 16 MB + 4 */ + p = buf + 0x1000000; + addr = 0x2000000 + (unsigned long)buf; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + + /* Conditional branch tests */ + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 KB */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 0x8000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); + + /* Maximum positive case, move x to x - 32 KB + 4 */ + p = buf + 0x8000; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); + + /* Jump to x + 12 KB moved to x + 20 KB */ + p = buf; + addr = 0x3000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x5000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 8 KB moved to x - 8 KB + 4 */ + p = buf + 0x2000; + addr = 0x4000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Free the buffer we were using */ + vfree(buf); +} + +static void __init test_prefixed_patching(void) +{ + u32 *iptr = (u32 *)ppc_function_entry(test_trampoline); + u32 expected[2] = {OP_PREFIX << 26, 0}; + ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0); + + if (!IS_ENABLED(CONFIG_PPC64)) + return; + + patch_instruction(iptr, inst); + + check(!memcmp(iptr, expected, sizeof(expected))); +} + +static void __init test_multi_instruction_patching(void) +{ + u32 code[32]; + void *buf; + u32 *addr32; + u64 *addr64; + ppc_inst_t inst64 = ppc_inst_prefix(OP_PREFIX << 26 | 3UL << 24, PPC_RAW_TRAP()); + u32 inst32 = PPC_RAW_NOP(); + + buf = vzalloc(PAGE_SIZE * 8); + check(buf); + if (!buf) + return; + + /* Test single page 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test single page 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 2; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test single page memcpy */ + addr32 = buf + PAGE_SIZE * 3; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + /* Test multipage 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE * 4 - 8; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test multipage 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 5 - 8; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test multipage memcpy */ + addr32 = buf + PAGE_SIZE * 6 - 12; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + vfree(buf); +} + +static void __init test_data_patching(void) +{ + void *buf; + u32 *addr32; + + buf = vzalloc(PAGE_SIZE); + check(buf); + if (!buf) + return; + + addr32 = buf + 128; + + addr32[1] = 0xA0A1A2A3; + addr32[2] = 0xB0B1B2B3; + + check(!patch_uint(&addr32[1], 0xC0C1C2C3)); + + check(addr32[0] == 0); + check(addr32[1] == 0xC0C1C2C3); + check(addr32[2] == 0xB0B1B2B3); + check(addr32[3] == 0); + + /* Unaligned patch_ulong() should fail */ + if (IS_ENABLED(CONFIG_PPC64)) + check(patch_ulong(&addr32[1], 0xD0D1D2D3) == -EINVAL); + + check(!patch_ulong(&addr32[2], 0xD0D1D2D3)); + + check(addr32[0] == 0); + check(addr32[1] == 0xC0C1C2C3); + check(*(unsigned long *)(&addr32[2]) == 0xD0D1D2D3); + + if (!IS_ENABLED(CONFIG_PPC64)) + check(addr32[3] == 0); + + check(addr32[4] == 0); + + vfree(buf); +} + +static int __init test_code_patching(void) +{ + pr_info("Running code patching self-tests ...\n"); + + test_branch_iform(); + test_branch_bform(); + test_create_function_call(); + test_translate_branch(); + test_prefixed_patching(); + test_multi_instruction_patching(); + test_data_patching(); + + return 0; +} +late_initcall(test_code_patching); diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 42347067739c..66b5b4fa1686 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -8,57 +8,50 @@ #define pr_fmt(fmt) "emulate_step_test: " fmt #include <linux/ptrace.h> +#include <asm/cpu_has_feature.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> -#include <asm/code-patching.h> - -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) - -/* - * Defined with TEST_ prefix so it does not conflict with other - * definitions. - */ -#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define TEST_ADDC(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADDC_DOT(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#include <asm/text-patching.h> +#include <asm/inst.h> #define MAX_SUBTESTS 16 #define IGNORE_GPR(n) (0x1UL << (n)) #define IGNORE_XER (0x1UL << 32) #define IGNORE_CCR (0x1UL << 33) +#define NEGATIVE_TEST (0x1UL << 63) + +#define TEST_PLD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLWZ(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_LWZ(r, base, i)) + +#define TEST_PSTD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PADDI(t, a, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_ADDI(t, a, i)) static void __init init_pt_regs(struct pt_regs *regs) { @@ -103,7 +96,7 @@ static void __init test_ld(void) regs.gpr[3] = (unsigned long) &a; /* ld r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("ld", "PASS"); @@ -111,6 +104,29 @@ static void __init test_ld(void) show_result("ld", "FAIL"); } +static void __init test_pld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* pld r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLD(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("pld", "PASS"); + else + show_result("pld", "FAIL"); +} + static void __init test_lwz(void) { struct pt_regs regs; @@ -121,7 +137,7 @@ static void __init test_lwz(void) regs.gpr[3] = (unsigned long) &a; /* lwz r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZ(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("lwz", "PASS"); @@ -129,6 +145,30 @@ static void __init test_lwz(void) show_result("lwz", "FAIL"); } +static void __init test_plwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* plwz r5, 0(r3), 0 */ + + stepped = emulate_step(®s, TEST_PLWZ(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("plwz", "PASS"); + else + show_result("plwz", "FAIL"); +} + static void __init test_lwzx(void) { struct pt_regs regs; @@ -141,7 +181,7 @@ static void __init test_lwzx(void) regs.gpr[5] = 0x8765; /* lwzx r5, r3, r4 */ - stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZX(5, 3, 4))); if (stepped == 1 && regs.gpr[5] == a[2]) show_result("lwzx", "PASS"); else @@ -159,13 +199,36 @@ static void __init test_std(void) regs.gpr[5] = 0x5678; /* std r5, 0(r3) */ - stepped = emulate_step(®s, TEST_STD(5, 3, 0)); - if (stepped == 1 || regs.gpr[5] == a) + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STD(5, 3, 0))); + if (stepped == 1 && regs.gpr[5] == a) show_result("std", "PASS"); else show_result("std", "FAIL"); } +static void __init test_pstd(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + regs.gpr[5] = 0x5678; + + /* pstd r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTD(5, 3, 0, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("pstd", "PASS"); + else + show_result("pstd", "FAIL"); +} + static void __init test_ldarx_stdcx(void) { struct pt_regs regs; @@ -184,7 +247,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x5678; /* ldarx r5, r3, r4, 0 */ - stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0))); /* * Don't touch 'a' here. Touching 'a' can do Load/store @@ -202,7 +265,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x9ABC; /* stdcx. r5, r3, r4 */ - stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STDCX(5, 3, 4))); /* * Two possible scenarios that indicates successful emulation @@ -242,7 +305,7 @@ static void __init test_lfsx_stfsx(void) regs.gpr[4] = 0; /* lfsx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFSX(10, 3, 4))); if (stepped == 1) show_result("lfsx", "PASS"); @@ -255,7 +318,7 @@ static void __init test_lfsx_stfsx(void) c.a = 678.91; /* stfsx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFSX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfsx", "PASS"); @@ -263,6 +326,53 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "FAIL"); } +static void __init test_plfs_pstfs(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfs ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfs frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFS(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfs", "PASS"); + else + show_result("plfs", "FAIL"); + + + /*** pstfs ***/ + + c.a = 678.91; + + /* pstfs frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFS(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfs", "PASS"); + else + show_result("pstfs", "FAIL"); +} + static void __init test_lfdx_stfdx(void) { struct pt_regs regs; @@ -285,7 +395,7 @@ static void __init test_lfdx_stfdx(void) regs.gpr[4] = 0; /* lfdx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFDX(10, 3, 4))); if (stepped == 1) show_result("lfdx", "PASS"); @@ -298,13 +408,60 @@ static void __init test_lfdx_stfdx(void) c.a = 987654.32; /* stfdx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFDX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfdx", "PASS"); else show_result("stfdx", "FAIL"); } + +static void __init test_plfd_pstfd(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfd ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfd frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFD(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfd", "PASS"); + else + show_result("plfd", "FAIL"); + + + /*** pstfd ***/ + + c.a = 987654.32; + + /* pstfd frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFD(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfd", "PASS"); + else + show_result("pstfd", "FAIL"); +} #else static void __init test_lfsx_stfsx(void) { @@ -312,11 +469,23 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); } +static void __init test_plfs_pstfs(void) +{ + show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)"); +} + static void __init test_lfdx_stfdx(void) { show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); } + +static void __init test_plfd_pstfd(void) +{ + show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)"); +} #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC @@ -344,7 +513,7 @@ static void __init test_lvx_stvx(void) regs.gpr[4] = 0; /* lvx vrt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LVX(10, 3, 4))); if (stepped == 1) show_result("lvx", "PASS"); @@ -360,7 +529,7 @@ static void __init test_lvx_stvx(void) c.b[3] = 498532; /* stvx vrs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STVX(10, 3, 4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) @@ -401,7 +570,7 @@ static void __init test_lxvd2x_stxvd2x(void) regs.gpr[4] = 0; /* lxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4))); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { show_result("lxvd2x", "PASS"); @@ -421,7 +590,7 @@ static void __init test_lxvd2x_stxvd2x(void) c.b[3] = 4; /* stxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3] && @@ -442,36 +611,315 @@ static void __init test_lxvd2x_stxvd2x(void) } #endif /* CONFIG_VSX */ +#ifdef CONFIG_VSX +static void __init test_lxvp_stxvp(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[4] = (unsigned long)&c[0].a; + + /* + * lxvp XTp,DQ(RA) + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVP(34, 4, 0))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvp", "FAIL"); + } + + /*** stxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvp XSp,DQ(RA) + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVP(34, 4, 0))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvp", "FAIL"); + } +} +#else +static void __init test_lxvp_stxvp(void) +{ + show_result("lxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvpx_stxvpx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvpx ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[3] = (unsigned long)&c[0].a; + regs.gpr[4] = 0; + + /* + * lxvpx XTp,RA,RB + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVPX(34, 3, 4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvpx", "FAIL"); + } + + /*** stxvpx ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvpx XSp,RA,RB + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVPX(34, 3, 4))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvpx", "FAIL"); + } +} +#else +static void __init test_lxvpx_stxvpx(void) +{ + show_result("lxvpx", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvpx", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_plxvp_pstxvp(void) +{ + ppc_inst_t instr; + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + /*** plxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&c[0].a; + + /* + * plxvp XTp,D(RA),R + * XTp = 32xTX + 2xTp + * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0)); + + stepped = emulate_step(®s, instr); + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("plxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("plxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("plxvp", "FAIL"); + } + + /*** pstxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * pstxvp XSp,D(RA),R + * XSp = 32xSX + 2xSp + * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0)); + + stepped = emulate_step(®s, instr); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("pstxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("pstxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("pstxvp", "FAIL"); + } +} +#else +static void __init test_plxvp_pstxvp(void) +{ + show_result("plxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("pstxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + static void __init run_tests_load_store(void) { test_ld(); + test_pld(); test_lwz(); + test_plwz(); test_lwzx(); test_std(); + test_pstd(); test_ldarx_stdcx(); test_lfsx_stfsx(); + test_plfs_pstfs(); test_lfdx_stfdx(); + test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); + test_lxvp_stxvp(); + test_lxvpx_stxvpx(); + test_plxvp_pstxvp(); } struct compute_test { char *mnemonic; + unsigned long cpu_feature; struct { char *descr; unsigned long flags; - unsigned int instr; + ppc_inst_t instr; struct pt_regs regs; } subtests[MAX_SUBTESTS + 1]; }; +/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */ +#define SI_MIN BIT(33) +#define SI_MAX (BIT(33) - 1) +#define SI_UMAX (BIT(34) - 1) + static struct compute_test compute_tests[] = { { .mnemonic = "nop", .subtests = { { .descr = "R0 = LONG_MAX", - .instr = PPC_INST_NOP, + .instr = ppc_inst(PPC_RAW_NOP()), .regs = { .gpr[0] = LONG_MAX, } @@ -479,11 +927,38 @@ static struct compute_test compute_tests[] = { } }, { + .mnemonic = "setb", + .cpu_feature = CPU_FTR_ARCH_300, + .subtests = { + { + .descr = "BFA = 1, CR = GT", + .instr = ppc_inst(PPC_RAW_SETB(20, 1)), + .regs = { + .ccr = 0x4000000, + } + }, + { + .descr = "BFA = 4, CR = LT", + .instr = ppc_inst(PPC_RAW_SETB(20, 4)), + .regs = { + .ccr = 0x8000, + } + }, + { + .descr = "BFA = 5, CR = EQ", + .instr = ppc_inst(PPC_RAW_SETB(20, 5)), + .regs = { + .ccr = 0x200, + } + } + } + }, + { .mnemonic = "add", .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -491,7 +966,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -499,7 +974,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -507,7 +982,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -515,7 +990,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -523,7 +998,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -531,7 +1006,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -539,7 +1014,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -547,7 +1022,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -555,7 +1030,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -569,7 +1044,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -577,7 +1052,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -586,7 +1061,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -594,7 +1069,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -602,7 +1077,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -610,7 +1085,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -618,7 +1093,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -626,7 +1101,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -634,7 +1109,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -642,7 +1117,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -655,7 +1130,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -663,7 +1138,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -671,7 +1146,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -679,7 +1154,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -687,7 +1162,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -695,7 +1170,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -703,7 +1178,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -711,7 +1186,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -719,7 +1194,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -727,7 +1202,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -735,7 +1210,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, @@ -749,7 +1224,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -757,7 +1232,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -766,7 +1241,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -774,7 +1249,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -782,7 +1257,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -790,7 +1265,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -798,7 +1273,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -806,7 +1281,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -814,7 +1289,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -822,7 +1297,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -830,47 +1305,336 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, } } } + }, + { + .mnemonic = "divde", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divde.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divdeu", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "divdeu.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "paddi", + .cpu_feature = CPU_FTR_ARCH_31, + .subtests = { + { + .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_UMAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 0, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + } + }, + { + .descr = "RA = 0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + .gpr[22] = 0x0, + } + }, + { + .descr = "RA is r0, SI = 0, R = 1", + .instr = TEST_PADDI(21, 0, 0, 1), + .regs = { + .gpr[21] = 0, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 1", + .instr = TEST_PADDI(21, 0, SI_MIN, 1), + .regs = { + .gpr[21] = 0, + } + }, + /* Invalid instruction form with R = 1 and RA != 0 */ + { + .descr = "RA = R22(0), SI = 0, R = 1", + .instr = TEST_PADDI(21, 22, 0, 1), + .flags = NEGATIVE_TEST, + .regs = { + .gpr[21] = 0, + .gpr[22] = 0, + } + } + } } }; static int __init emulate_compute_instr(struct pt_regs *regs, - unsigned int instr) + ppc_inst_t instr, + bool negative) { + int analysed; struct instruction_op op; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; - if (analyse_instr(&op, regs, instr) != 1 || - GETTYPE(op.type) != COMPUTE) { - pr_info("emulation failed, instruction = 0x%08x\n", instr); + /* This is not a return frame regs */ + regs->nip = patch_site_addr(&patch__exec_instr); + + analysed = analyse_instr(&op, regs, instr); + if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { + if (negative) + return -EFAULT; + pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } - - emulate_update_regs(regs, &op); + if (analysed == 1 && negative) + pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); + if (!negative) + emulate_update_regs(regs, &op); return 0; } static int __init execute_compute_instr(struct pt_regs *regs, - unsigned int instr) + ppc_inst_t instr) { extern int exec_instr(struct pt_regs *regs); - extern s32 patch__exec_instr; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = 0x%08x\n", instr); + pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } @@ -890,16 +1654,23 @@ static void __init run_tests_compute(void) unsigned long flags; struct compute_test *test; struct pt_regs *regs, exp, got; - unsigned int i, j, k, instr; - bool ignore_gpr, ignore_xer, ignore_ccr, passed; + unsigned int i, j, k; + ppc_inst_t instr; + bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { test = &compute_tests[i]; + if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) { + show_result(test->mnemonic, "SKIP (!CPU_FTR)"); + continue; + } + for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) { instr = test->subtests[j].instr; flags = test->subtests[j].flags; regs = &test->subtests[j].regs; + negative = flags & NEGATIVE_TEST; ignore_xer = flags & IGNORE_XER; ignore_ccr = flags & IGNORE_CCR; passed = true; @@ -914,8 +1685,12 @@ static void __init run_tests_compute(void) exp.msr = MSR_KERNEL; got.msr = MSR_KERNEL; - if (emulate_compute_instr(&got, instr) || - execute_compute_instr(&exp, instr)) { + rc = emulate_compute_instr(&got, instr, negative) != 0; + if (negative) { + /* skip executing instruction */ + passed = rc; + goto print; + } else if (rc || execute_compute_instr(&exp, instr)) { passed = false; goto print; } diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 1580f34f4f4f..e2b646a4f7fa 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -16,7 +16,7 @@ _GLOBAL(exec_instr) /* * Stack frame layout (INT_FRAME_SIZE bytes) - * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD) + * In-memory pt_regs (SP + STACK_INT_FRAME_REGS) * Scratch space (SP + 8) * Back chain (SP + 0) */ @@ -37,7 +37,7 @@ _GLOBAL(exec_instr) * The stack pointer (GPR1) and the thread pointer (GPR13) are not * saved as these should not be modified anyway. */ - SAVE_2GPRS(2, r1) + SAVE_GPRS(2, 3, r1) SAVE_NVGPRS(r1) /* @@ -75,12 +75,13 @@ _GLOBAL(exec_instr) /* Load GPRs from pt_regs */ REST_GPR(0, r31) - REST_10GPRS(2, r31) - REST_GPR(12, r31) + REST_GPRS(2, 12, r31) REST_NVGPRS(r31) /* Placeholder for the test instruction */ + .balign 64 1: nop + nop patch_site 1b patch__exec_instr /* @@ -97,8 +98,7 @@ _GLOBAL(exec_instr) subi r3, r3, GPR0 SAVE_GPR(0, r3) SAVE_GPR(2, r3) - SAVE_8GPRS(4, r3) - SAVE_GPR(12, r3) + SAVE_GPRS(4, 12, r3) SAVE_NVGPRS(r3) /* Save resulting LR to pt_regs */ diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 62e6c3045252..58ed6bd613a6 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -9,7 +9,6 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <asm/switch_to.h> -#include <asm/asm-prototypes.h> int enter_vmx_usercopy(void) { @@ -37,7 +36,17 @@ int exit_vmx_usercopy(void) { disable_kernel_altivec(); pagefault_enable(); - preempt_enable(); + preempt_enable_no_resched(); + /* + * Must never explicitly call schedule (including preempt_enable()) + * while in a kuap-unlocked user copy, because the AMR register will + * not be saved and restored across context switch. However preempt + * kernels need to be preempted as soon as possible if need_resched is + * set and we are preemptible. The hack here is to schedule a + * decrementer to fire here and reschedule for us if necessary. + */ + if (IS_ENABLED(CONFIG_PREEMPTION) && need_resched()) + set_dec(1); return 0; } diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c index 54e61979e80e..aab49d056d18 100644 --- a/arch/powerpc/lib/xor_vmx.c +++ b/arch/powerpc/lib/xor_vmx.c @@ -49,8 +49,9 @@ typedef vector signed char unative_t; V1##_3 = vec_xor(V1##_3, V2##_3); \ } while (0) -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) { DEFINE(v1); DEFINE(v2); @@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) { DEFINE(v1); DEFINE(v2); @@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) { DEFINE(v1); DEFINE(v2); @@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) { DEFINE(v1); DEFINE(v2); diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h index 5c2b0839b179..573c41d90dac 100644 --- a/arch/powerpc/lib/xor_vmx.h +++ b/arch/powerpc/lib/xor_vmx.h @@ -6,16 +6,17 @@ * outside of the enable/disable altivec block. */ -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in); - -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in); - -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in); - -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in); +void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c index 80dba916c367..35d917ece4d1 100644 --- a/arch/powerpc/lib/xor_vmx_glue.c +++ b/arch/powerpc/lib/xor_vmx_glue.c @@ -12,47 +12,51 @@ #include <asm/xor_altivec.h> #include "xor_vmx.h" -void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_2(bytes, v1_in, v2_in); + __xor_altivec_2(bytes, p1, p2); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_2); -void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_3(bytes, v1_in, v2_in, v3_in); + __xor_altivec_3(bytes, p1, p2, p3); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_3); -void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in); + __xor_altivec_4(bytes, p1, p2, p3, p4); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_4); -void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); disable_kernel_altivec(); preempt_enable(); } |