diff options
Diffstat (limited to 'arch/powerpc/lib')
45 files changed, 10847 insertions, 3168 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 450433276699..f14ecab674a3 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -1,33 +1,81 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for ppc-specific library files.. # -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror +CFLAGS_code-patching.o += -fno-stack-protector +CFLAGS_feature-fixups.o += -fno-stack-protector -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) +CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_code-patching.o = -pg -CFLAGS_REMOVE_feature-fixups.o = -pg +KASAN_SANITIZE_code-patching.o := n +KASAN_SANITIZE_feature-fixups.o := n +# restart_table.o contains functions called in the NMI interrupt path +# which can be in real mode. Disable KASAN. +KASAN_SANITIZE_restart_table.o := n +KCSAN_SANITIZE_code-patching.o := n +KCSAN_SANITIZE_feature-fixups.o := n -obj-y := string.o alloc.o \ - checksum_$(CONFIG_WORD_SIZE).o crtsavres.o -obj-$(CONFIG_PPC32) += div64.o copy_32.o -obj-$(CONFIG_HAS_IOMEM) += devres.o +ifdef CONFIG_KASAN +CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING +endif + +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + +obj-y += code-patching.o feature-fixups.o pmem.o + +obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o + +ifndef CONFIG_KASAN +obj-y += string.o memcmp_$(BITS).o +obj-$(CONFIG_PPC32) += strlen_32.o +endif + +obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o + +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o -obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - memcpy_64.o usercopy_64.o mem_64.o string.o \ - checksum_wrappers_64.o hweight_64.o \ - copyuser_power7.o string_64.o copypage_power7.o \ - memcpy_power7.o -obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o +# See corresponding test in arch/powerpc/Makefile +# 64-bit linker creates .sfpr on demand for final link (vmlinux), +# so it is only needed for modules, and only for older linkers which +# do not support --save-restore-funcs +ifndef CONFIG_LD_IS_BFD +always-$(CONFIG_PPC64) += crtsavres.o +endif + +obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ + memcpy_power7.o restart_table.o -ifeq ($(CONFIG_PPC64),y) -obj-$(CONFIG_SMP) += locks.o -obj-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ + memcpy_64.o copy_mc_64.o + +ifdef CONFIG_PPC_QUEUED_SPINLOCKS +obj-$(CONFIG_SMP) += qspinlock.o +else +obj64-$(CONFIG_SMP) += locks.o endif +obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ + test_emulate_step_exec_instr.o + +obj-y += checksum_$(BITS).o checksum_wrappers.o \ + string_$(BITS).o + +obj-y += sstep.o +obj-$(CONFIG_PPC_FPU) += ldstfp.o +obj64-y += quad.o + obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o -obj-y += code-patching.o -obj-y += feature-fixups.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o + +obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o +CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) +# Enable <altivec.h> +CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) + +obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c deleted file mode 100644 index da22c84a8fed..000000000000 --- a/arch/powerpc/lib/alloc.c +++ /dev/null @@ -1,21 +0,0 @@ -#include <linux/types.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/bootmem.h> -#include <linux/string.h> -#include <asm/setup.h> - - -void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) -{ - void *p; - - if (mem_init_done) - p = kzalloc(size, mask); - else { - p = alloc_bootmem(size); - if (p) - memset(p, 0, size); - } - return p; -} diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 7874e8a80455..cd00b9bdd772 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -1,225 +1,309 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * This file contains assembly-language implementations * of IP-style 1's complement checksum routines. * * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/processor.h> +#include <asm/cache.h> #include <asm/errno.h> #include <asm/ppc_asm.h> .text /* - * ip_fast_csum(buf, len) -- Optimized for IP header - * len is in words and is always >= 5. - */ -_GLOBAL(ip_fast_csum) - lwz r0,0(r3) - lwzu r5,4(r3) - addic. r4,r4,-2 - addc r0,r0,r5 - mtctr r4 - blelr- -1: lwzu r4,4(r3) - adde r0,r0,r4 - bdnz 1b - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - -/* - * Compute checksum of TCP or UDP pseudo-header: - * csum_tcpudp_magic(saddr, daddr, len, proto, sum) - */ -_GLOBAL(csum_tcpudp_magic) - rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ - addc r0,r3,r4 /* add 4 32-bit words together */ - adde r0,r0,r5 - adde r0,r0,r7 - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - -/* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) * - * csum_partial(buff, len, sum) + * __csum_partial(buff, len, sum) */ -_GLOBAL(csum_partial) - addic r0,r5,0 +_GLOBAL(__csum_partial) subi r3,r3,4 - srwi. r6,r4,2 + srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ beq 3f /* if we're doing < 4 bytes */ - andi. r5,r3,2 /* Align buffer to longword boundary */ + andi. r0,r3,2 /* Align buffer to longword boundary */ beq+ 1f - lhz r5,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 + lhz r0,4(r3) /* do 2 bytes to get aligned */ subi r4,r4,2 - addc r0,r0,r5 + addi r3,r3,2 srwi. r6,r4,2 /* # words to do */ + adde r5,r5,r0 beq 3f -1: mtctr r6 -2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */ - adde r0,r0,r5 /* be unnecessary to unroll this loop */ +1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ + beq 21f + mtctr r6 +2: lwzu r0,4(r3) + adde r5,r5,r0 bdnz 2b - andi. r4,r4,3 -3: cmpwi 0,r4,2 - blt+ 4f - lhz r5,4(r3) +21: srwi. r6,r4,4 /* # blocks of 4 words to do */ + beq 3f + lwz r0,4(r3) + mtctr r6 + lwz r6,8(r3) + adde r5,r5,r0 + lwz r7,12(r3) + adde r5,r5,r6 + lwzu r8,16(r3) + adde r5,r5,r7 + bdz 23f +22: lwz r0,4(r3) + adde r5,r5,r8 + lwz r6,8(r3) + adde r5,r5,r0 + lwz r7,12(r3) + adde r5,r5,r6 + lwzu r8,16(r3) + adde r5,r5,r7 + bdnz 22b +23: adde r5,r5,r8 +3: andi. r0,r4,2 + beq+ 4f + lhz r0,4(r3) addi r3,r3,2 - subi r4,r4,2 - adde r0,r0,r5 -4: cmpwi 0,r4,1 - bne+ 5f - lbz r5,4(r3) - slwi r5,r5,8 /* Upper byte of word */ - adde r0,r0,r5 -5: addze r3,r0 /* add in final carry */ + adde r5,r5,r0 +4: andi. r0,r4,1 + beq+ 5f + lbz r0,4(r3) + slwi r0,r0,8 /* Upper byte of word */ + adde r5,r5,r0 +5: addze r3,r5 /* add in final carry */ blr +EXPORT_SYMBOL(__csum_partial) /* * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively, and (for an error on - * src) zeroes the rest of dst. + * and adds in 0xffffffff, while copying the block to dst. + * If an access exception occurs it returns zero. * - * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) + * csum_partial_copy_generic(src, dst, len) */ +#define CSUM_COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ + adde r12,r12,r7; \ +8 ## n ## 5: \ + stw r8,8(r6); \ + adde r12,r12,r8; \ +8 ## n ## 6: \ + stw r9,12(r6); \ + adde r12,r12,r9; \ +8 ## n ## 7: \ + stwu r10,16(r6); \ + adde r12,r12,r10 + +#define CSUM_COPY_16_BYTES_EXCODE(n) \ + EX_TABLE(8 ## n ## 0b, fault); \ + EX_TABLE(8 ## n ## 1b, fault); \ + EX_TABLE(8 ## n ## 2b, fault); \ + EX_TABLE(8 ## n ## 3b, fault); \ + EX_TABLE(8 ## n ## 4b, fault); \ + EX_TABLE(8 ## n ## 5b, fault); \ + EX_TABLE(8 ## n ## 6b, fault); \ + EX_TABLE(8 ## n ## 7b, fault); + + .text + +CACHELINE_BYTES = L1_CACHE_BYTES +LG_CACHELINE_BYTES = L1_CACHE_SHIFT +CACHELINE_MASK = (L1_CACHE_BYTES-1) + _GLOBAL(csum_partial_copy_generic) - addic r0,r6,0 - subi r3,r3,4 - subi r4,r4,4 - srwi. r6,r5,2 - beq 3f /* if we're doing < 4 bytes */ - andi. r9,r4,2 /* Align dst to longword boundary */ - beq+ 1f -81: lhz r6,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 - subi r5,r5,2 -91: sth r6,4(r4) - addi r4,r4,2 - addc r0,r0,r6 - srwi. r6,r5,2 /* # words to do */ - beq 3f -1: srwi. r6,r5,4 /* # groups of 4 words to do */ - beq 10f - mtctr r6 -71: lwz r6,4(r3) -72: lwz r9,8(r3) -73: lwz r10,12(r3) -74: lwzu r11,16(r3) - adde r0,r0,r6 -75: stw r6,4(r4) - adde r0,r0,r9 -76: stw r9,8(r4) - adde r0,r0,r10 -77: stw r10,12(r4) - adde r0,r0,r11 -78: stwu r11,16(r4) - bdnz 71b -10: rlwinm. r6,r5,30,30,31 /* # words left to do */ - beq 13f - mtctr r6 -82: lwzu r9,4(r3) -92: stwu r9,4(r4) - adde r0,r0,r9 - bdnz 82b -13: andi. r5,r5,3 -3: cmpwi 0,r5,2 - blt+ 4f -83: lhz r6,4(r3) - addi r3,r3,2 - subi r5,r5,2 -93: sth r6,4(r4) - addi r4,r4,2 - adde r0,r0,r6 -4: cmpwi 0,r5,1 - bne+ 5f -84: lbz r6,4(r3) -94: stb r6,4(r4) - slwi r6,r6,8 /* Upper byte of word */ - adde r0,r0,r6 -5: addze r3,r0 /* add in final carry */ - blr + li r12,-1 + addic r0,r0,0 /* clear carry */ + addi r6,r4,-4 + neg r0,r4 + addi r4,r3,-4 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + crset 4*cr7+eq + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + rlwinm r7,r6,3,0x8 + rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r7,0 /* is destination address even ? */ + andi. r8,r0,3 /* get it word-aligned first */ + mtctr r8 + beq+ 61f + li r3,0 +70: lbz r9,4(r4) /* do some bytes */ + addi r4,r4,1 + slwi r3,r3,8 + rlwimi r3,r9,0,24,31 +71: stb r9,4(r6) + addi r6,r6,1 + bdnz 70b + adde r12,r12,r3 +61: subf r5,r0,r5 + srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + adde r12,r12,r9 +73: stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + beq 63f + + /* Here we decide how far ahead to prefetch the source */ + li r3,4 + cmpwi r0,1 + li r7,0 + ble 114f + li r7,1 +#if MAX_COPY_PREFETCH > 1 + /* Heuristically, for large transfers we prefetch + MAX_COPY_PREFETCH cachelines ahead. For small transfers + we prefetch 1 cacheline ahead. */ + cmpwi r0,MAX_COPY_PREFETCH + ble 112f + li r7,MAX_COPY_PREFETCH +112: mtctr r7 +111: dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES + bdnz 111b +#else + dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES +#endif /* MAX_COPY_PREFETCH > 1 */ + +114: subf r8,r7,r0 + mr r0,r7 + mtctr r8 + +53: dcbt r3,r4 +54: dcbz r11,r6 +/* the main body of the cacheline loop */ + CSUM_COPY_16_BYTES_WITHEX(0) +#if L1_CACHE_BYTES >= 32 + CSUM_COPY_16_BYTES_WITHEX(1) +#if L1_CACHE_BYTES >= 64 + CSUM_COPY_16_BYTES_WITHEX(2) + CSUM_COPY_16_BYTES_WITHEX(3) +#if L1_CACHE_BYTES >= 128 + CSUM_COPY_16_BYTES_WITHEX(4) + CSUM_COPY_16_BYTES_WITHEX(5) + CSUM_COPY_16_BYTES_WITHEX(6) + CSUM_COPY_16_BYTES_WITHEX(7) +#endif +#endif +#endif + bdnz 53b + cmpwi r0,0 + li r3,4 + li r7,0 + bne 114b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + adde r12,r12,r0 +31: stwu r0,4(r6) + bdnz 30b -/* These shouldn't go in the fixup section, since that would - cause the ex_table addresses to get out of order. */ - -src_error_4: - mfctr r6 /* update # bytes remaining from ctr */ - rlwimi r5,r6,4,0,27 - b 79f -src_error_1: - li r6,0 - subi r5,r5,2 -95: sth r6,4(r4) +64: andi. r0,r5,2 + beq+ 65f +40: lhz r0,4(r4) addi r4,r4,2 -79: srwi. r6,r5,2 - beq 3f - mtctr r6 -src_error_2: - li r6,0 -96: stwu r6,4(r4) - bdnz 96b -3: andi. r5,r5,3 - beq src_error -src_error_3: - li r6,0 - mtctr r5 - addi r4,r4,3 -97: stbu r6,1(r4) - bdnz 97b -src_error: - cmpwi 0,r7,0 - beq 1f - li r6,-EFAULT - stw r6,0(r7) -1: addze r3,r0 +41: sth r0,4(r6) + adde r12,r12,r0 + addi r6,r6,2 +65: andi. r0,r5,1 + beq+ 66f +50: lbz r0,4(r4) +51: stb r0,4(r6) + slwi r0,r0,8 + adde r12,r12,r0 +66: addze r3,r12 + beqlr+ cr7 + rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ blr -dst_error: - cmpwi 0,r8,0 - beq 1f - li r6,-EFAULT - stw r6,0(r8) -1: addze r3,r0 +fault: + li r3,0 blr -.section __ex_table,"a" - .long 81b,src_error_1 - .long 91b,dst_error - .long 71b,src_error_4 - .long 72b,src_error_4 - .long 73b,src_error_4 - .long 74b,src_error_4 - .long 75b,dst_error - .long 76b,dst_error - .long 77b,dst_error - .long 78b,dst_error - .long 82b,src_error_2 - .long 92b,dst_error - .long 83b,src_error_3 - .long 93b,dst_error - .long 84b,src_error_3 - .long 94b,dst_error - .long 95b,dst_error - .long 96b,dst_error - .long 97b,dst_error + EX_TABLE(70b, fault); + EX_TABLE(71b, fault); + EX_TABLE(72b, fault); + EX_TABLE(73b, fault); + EX_TABLE(54b, fault); + +/* + * this stuff handles faults in the cacheline loop and branches to either + * fault (if in read part) or fault (if in write part) + */ + CSUM_COPY_16_BYTES_EXCODE(0) +#if L1_CACHE_BYTES >= 32 + CSUM_COPY_16_BYTES_EXCODE(1) +#if L1_CACHE_BYTES >= 64 + CSUM_COPY_16_BYTES_EXCODE(2) + CSUM_COPY_16_BYTES_EXCODE(3) +#if L1_CACHE_BYTES >= 128 + CSUM_COPY_16_BYTES_EXCODE(4) + CSUM_COPY_16_BYTES_EXCODE(5) + CSUM_COPY_16_BYTES_EXCODE(6) + CSUM_COPY_16_BYTES_EXCODE(7) +#endif +#endif +#endif + + EX_TABLE(30b, fault); + EX_TABLE(31b, fault); + EX_TABLE(40b, fault); + EX_TABLE(41b, fault); + EX_TABLE(50b, fault); + EX_TABLE(51b, fault); + +EXPORT_SYMBOL(csum_partial_copy_generic) + +/* + * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + * const struct in6_addr *daddr, + * __u32 len, __u8 proto, __wsum sum) + */ + +_GLOBAL(csum_ipv6_magic) + lwz r8, 0(r3) + lwz r9, 4(r3) + addc r0, r7, r8 + lwz r10, 8(r3) + adde r0, r0, r9 + lwz r11, 12(r3) + adde r0, r0, r10 + lwz r8, 0(r4) + adde r0, r0, r11 + lwz r9, 4(r4) + adde r0, r0, r8 + lwz r10, 8(r4) + adde r0, r0, r9 + lwz r11, 12(r4) + adde r0, r0, r10 + add r5, r5, r6 /* assumption: len + proto doesn't carry */ + adde r0, r0, r11 + adde r0, r0, r5 + addze r0, r0 + rotlwi r3, r0, 16 + add r3, r0, r3 + not r3, r3 + rlwinm r3, r3, 16, 16, 31 + blr +EXPORT_SYMBOL(csum_ipv6_magic) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 167f72555d60..d53d8f09a2c2 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -1,77 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * This file contains assembly-language implementations * of IP-style 1's complement checksum routines. * * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/processor.h> #include <asm/errno.h> #include <asm/ppc_asm.h> /* - * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header - * len is in words and is always >= 5. - * - * In practice len == 5, but this is not guaranteed. So this code does not - * attempt to use doubleword instructions. - */ -_GLOBAL(ip_fast_csum) - lwz r0,0(r3) - lwzu r5,4(r3) - addic. r4,r4,-2 - addc r0,r0,r5 - mtctr r4 - blelr- -1: lwzu r4,4(r3) - adde r0,r0,r4 - bdnz 1b - addze r0,r0 /* add in final carry */ - rldicl r4,r0,32,0 /* fold two 32-bit halves together */ - add r0,r0,r4 - srdi r0,r0,32 - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - -/* - * Compute checksum of TCP or UDP pseudo-header: - * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) - * No real gain trying to do this specially for 64 bit, but - * the 32 bit addition may spill into the upper bits of - * the doubleword so we still must fold it down from 64. - */ -_GLOBAL(csum_tcpudp_magic) - rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ - addc r0,r3,r4 /* add 4 32-bit words together */ - adde r0,r0,r5 - adde r0,r0,r7 - rldicl r4,r0,32,0 /* fold 64 bit value */ - add r0,r4,r0 - srdi r0,r0,32 - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - -/* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). * - * csum_partial(r3=buff, r4=len, r5=sum) + * __csum_partial(r3=buff, r4=len, r5=sum) */ -_GLOBAL(csum_partial) +_GLOBAL(__csum_partial) addic r0,r5,0 /* clear carry */ srdi. r6,r4,3 /* less than 8 bytes? */ @@ -83,7 +32,7 @@ _GLOBAL(csum_partial) * work to calculate the correct checksum, we ignore that case * and take the potential slowdown of unaligned loads. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcsum_aligned li r7,4 @@ -122,9 +71,9 @@ _GLOBAL(csum_partial) ld r11,24(r3) /* - * On POWER6 and POWER7 back to back addes take 2 cycles because of - * the XER dependency. This means the fastest this loop can go is - * 16 cycles per iteration. The scheduling of the loop below has + * On POWER6 and POWER7 back to back adde instructions take 2 cycles + * because of the XER dependency. This means the fastest this loop can + * go is 16 cycles per iteration. The scheduling of the loop below has * been shown to hit this on both POWER6 and POWER7. */ .align 5 @@ -215,8 +164,12 @@ _GLOBAL(csum_partial) beq .Lcsum_finish lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif .Lcsum_finish: addze r0,r0 /* add in final carry */ @@ -224,34 +177,38 @@ _GLOBAL(csum_partial) add r3,r4,r0 srdi r3,r3,32 blr +EXPORT_SYMBOL(__csum_partial) - .macro source + .macro srcnr 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Lsrc_error - .previous + EX_TABLE(100b,.Lerror_nr) .endm - .macro dest + .macro source +150: + EX_TABLE(150b,.Lerror) + .endm + + .macro dstnr 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldest_error - .previous + EX_TABLE(200b,.Lerror_nr) + .endm + + .macro dest +250: + EX_TABLE(250b,.Lerror) .endm /* * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively. The caller must take any action - * required in this case (zeroing memory, recalculating partial checksum etc). + * and adds in 0xffffffff (32-bit), while copying the block to dst. + * If an access exception occurs, it returns 0. * - * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + * csum_partial_copy_generic(r3=src, r4=dst, r5=len) */ _GLOBAL(csum_partial_copy_generic) + li r6,-1 addic r0,r6,0 /* clear carry */ srdi. r6,r5,3 /* less than 8 bytes? */ @@ -266,19 +223,19 @@ _GLOBAL(csum_partial_copy_generic) * If the source and destination are relatively unaligned we only * align the source. This keeps things simple. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcopy_aligned - li r7,4 - sub r6,r7,r6 + li r9,4 + sub r6,r9,r6 mtctr r6 1: -source; lhz r6,0(r3) /* align to doubleword */ +srcnr; lhz r6,0(r3) /* align to doubleword */ subi r5,r5,2 addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 bdnz 1b @@ -307,9 +264,9 @@ source; ld r10,16(r3) source; ld r11,24(r3) /* - * On POWER6 and POWER7 back to back addes take 2 cycles because of - * the XER dependency. This means the fastest this loop can go is - * 16 cycles per iteration. The scheduling of the loop below has + * On POWER6 and POWER7 back to back adde instructions take 2 cycles + * because of the XER dependency. This means the fastest this loop can + * go is 16 cycles per iteration. The scheduling of the loop below has * been shown to hit this on both POWER6 and POWER7. */ .align 5 @@ -392,10 +349,10 @@ dest; std r16,56(r4) mtctr r6 3: -source; ld r6,0(r3) +srcnr; ld r6,0(r3) addi r3,r3,8 adde r0,r0,r6 -dest; std r6,0(r4) +dstnr; std r6,0(r4) addi r4,r4,8 bdnz 3b @@ -405,10 +362,10 @@ dest; std r6,0(r4) srdi. r6,r5,2 beq .Lcopy_tail_halfword -source; lwz r6,0(r3) +srcnr; lwz r6,0(r3) addi r3,r3,4 adde r0,r0,r6 -dest; stw r6,0(r4) +dstnr; stw r6,0(r4) addi r4,r4,4 subi r5,r5,4 @@ -416,10 +373,10 @@ dest; stw r6,0(r4) srdi. r6,r5,1 beq .Lcopy_tail_byte -source; lhz r6,0(r3) +srcnr; lhz r6,0(r3) addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 subi r5,r5,2 @@ -427,10 +384,14 @@ dest; sth r6,0(r4) andi. r6,r5,1 beq .Lcopy_finish -source; lbz r6,0(r3) +srcnr; lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 -dest; stb r6,0(r4) +#else + adde r0,r0,r6 +#endif +dstnr; stb r6,0(r4) .Lcopy_finish: addze r0,r0 /* add in final carry */ @@ -439,16 +400,44 @@ dest; stb r6,0(r4) srdi r3,r3,32 blr -.Lsrc_error: - cmpdi 0,r7,0 - beqlr - li r6,-EFAULT - stw r6,0(r7) +.Lerror: + ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + addi r1,r1,STACKFRAMESIZE +.Lerror_nr: + li r3,0 blr -.Ldest_error: - cmpdi 0,r8,0 - beqlr - li r6,-EFAULT - stw r6,0(r8) +EXPORT_SYMBOL(csum_partial_copy_generic) + +/* + * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + * const struct in6_addr *daddr, + * __u32 len, __u8 proto, __wsum sum) + */ + +_GLOBAL(csum_ipv6_magic) + ld r8, 0(r3) + ld r9, 8(r3) + add r5, r5, r6 + addc r0, r8, r9 + ld r10, 0(r4) + ld r11, 8(r4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + rotldi r5, r5, 8 +#endif + adde r0, r0, r10 + add r5, r5, r7 + adde r0, r0, r11 + adde r0, r0, r5 + addze r0, r0 + rotldi r3, r0, 32 /* fold two 32 bit halves together */ + add r3, r0, r3 + srdi r0, r3, 32 + rotlwi r3, r0, 16 /* fold two 16 bit halves together */ + add r3, r0, r3 + not r3, r3 + rlwinm r3, r3, 16, 16, 31 blr +EXPORT_SYMBOL(csum_ipv6_magic) diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c new file mode 100644 index 000000000000..1a14c8780278 --- /dev/null +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) IBM Corporation, 2010 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <linux/export.h> +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/checksum.h> +#include <linux/uaccess.h> + +__wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len) +{ + __wsum csum; + + if (unlikely(!user_read_access_begin(src, len))) + return 0; + + csum = csum_partial_copy_generic((void __force *)src, dst, len); + + user_read_access_end(); + return csum; +} + +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len) +{ + __wsum csum; + + if (unlikely(!user_write_access_begin(dst, len))) + return 0; + + csum = csum_partial_copy_generic(src, (void __force *)dst, len); + + user_write_access_end(); + return csum; +} diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c deleted file mode 100644 index 08e3a3356c40..000000000000 --- a/arch/powerpc/lib/checksum_wrappers_64.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2010 - * - * Author: Anton Blanchard <anton@au.ibm.com> - */ -#include <linux/export.h> -#include <linux/compiler.h> -#include <linux/types.h> -#include <asm/checksum.h> -#include <asm/uaccess.h> - -__wsum csum_and_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) -{ - unsigned int csum; - - might_sleep(); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { - *err_ptr = -EFAULT; - csum = (__force unsigned int)sum; - goto out; - } - - csum = csum_partial_copy_generic((void __force *)src, dst, - len, sum, err_ptr, NULL); - - if (unlikely(*err_ptr)) { - int missing = __copy_from_user(dst, src, len); - - if (missing) { - memset(dst + len - missing, 0, missing); - *err_ptr = -EFAULT; - } else { - *err_ptr = 0; - } - - csum = csum_partial(dst, len, sum); - } - -out: - return (__force __wsum)csum; -} -EXPORT_SYMBOL(csum_and_copy_from_user); - -__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, - __wsum sum, int *err_ptr) -{ - unsigned int csum; - - might_sleep(); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - goto out; - } - - csum = csum_partial_copy_generic(src, (void __force *)dst, - len, sum, NULL, err_ptr); - - if (unlikely(*err_ptr)) { - csum = csum_partial(src, len, sum); - - if (copy_to_user(dst, src, len)) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - } - } - -out: - return (__force __wsum)csum; -} -EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 17e5b2364312..f84e0337cc02 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -1,455 +1,697 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2008 Michael Ellerman, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ -#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/mmu_context.h> +#include <linux/random.h> #include <linux/vmalloc.h> #include <linux/init.h> -#include <linux/mm.h> +#include <linux/cpuhotplug.h> +#include <linux/uaccess.h> +#include <linux/jump_label.h> + +#include <asm/debug.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> #include <asm/page.h> -#include <asm/code-patching.h> -#include <asm/uaccess.h> +#include <asm/text-patching.h> +#include <asm/inst.h> + +static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword) +{ + if (!IS_ENABLED(CONFIG_PPC64) || likely(!is_dword)) { + /* For big endian correctness: plain address would use the wrong half */ + u32 val32 = val; + + __put_kernel_nofault(patch_addr, &val32, u32, failed); + } else { + __put_kernel_nofault(patch_addr, &val, u64, failed); + } + + asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), + "r" (exec_addr)); + + return 0; + +failed: + mb(); /* sync */ + return -EPERM; +} + +int raw_patch_instruction(u32 *addr, ppc_inst_t instr) +{ + if (ppc_inst_prefixed(instr)) + return __patch_mem(addr, ppc_inst_as_ulong(instr), addr, true); + else + return __patch_mem(addr, ppc_inst_val(instr), addr, false); +} + +struct patch_context { + union { + struct vm_struct *area; + struct mm_struct *mm; + }; + unsigned long addr; + pte_t *pte; +}; +static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); + +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + +static bool mm_patch_enabled(void) +{ + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); +} + +/* + * The following applies for Radix MMU. Hash MMU has different requirements, + * and so is not supported. + * + * Changing mm requires context synchronising instructions on both sides of + * the context switch, as well as a hwsync between the last instruction for + * which the address of an associated storage access was translated using + * the current context. + * + * switch_mm_irqs_off() performs an isync after the context switch. It is + * the responsibility of the caller to perform the CSI and hwsync before + * starting/stopping the temp mm. + */ +static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) +{ + struct mm_struct *orig_mm = current->active_mm; + + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(orig_mm, temp_mm, current); + + WARN_ON(!mm_is_thread_local(temp_mm)); + + suspend_breakpoints(); + return orig_mm; +} -int patch_instruction(unsigned int *addr, unsigned int instr) +static void stop_using_temp_mm(struct mm_struct *temp_mm, + struct mm_struct *orig_mm) { + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(temp_mm, orig_mm, current); + restore_breakpoints(); +} + +static int text_area_cpu_up(unsigned int cpu) +{ + struct vm_struct *area; + unsigned long addr; int err; - __put_user_size(instr, addr, 4, err); + area = get_vm_area(PAGE_SIZE, 0); + if (!area) { + WARN_ONCE(1, "Failed to create text area for cpu %d\n", + cpu); + return -1; + } + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); if (err) return err; - asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr)); + + unmap_patch_area(addr); + + this_cpu_write(cpu_patching_context.area, area); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); + return 0; } -int patch_branch(unsigned int *addr, unsigned long target, int flags) +static int text_area_cpu_down(unsigned int cpu) { - return patch_instruction(addr, create_branch(addr, target, flags)); + free_vm_area(this_cpu_read(cpu_patching_context.area)); + this_cpu_write(cpu_patching_context.area, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); + return 0; } -unsigned int create_branch(const unsigned int *addr, - unsigned long target, int flags) +static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) { - unsigned int instruction; - long offset; + struct mmu_gather tlb; - offset = target; - if (! (flags & BRANCH_ABSOLUTE)) - offset = offset - (unsigned long)addr; + tlb_gather_mmu(&tlb, mm); + free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); + mmput(mm); +} - /* Check we can represent the target in the instruction format */ - if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3) - return 0; +static int text_area_cpu_up_mm(unsigned int cpu) +{ + struct mm_struct *mm; + unsigned long addr; + pte_t *pte; + spinlock_t *ptl; + + mm = mm_alloc(); + if (WARN_ON(!mm)) + goto fail_no_mm; + + /* + * Choose a random page-aligned address from the interval + * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. + * The lower address bound is PAGE_SIZE to avoid the zero-page. + */ + addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; + + /* + * PTE allocation uses GFP_KERNEL which means we need to + * pre-allocate the PTE here because we cannot do the + * allocation during patching when IRQs are disabled. + * + * Using get_locked_pte() to avoid open coding, the lock + * is unnecessary. + */ + pte = get_locked_pte(mm, addr, &ptl); + if (!pte) + goto fail_no_pte; + pte_unmap_unlock(pte, ptl); + + this_cpu_write(cpu_patching_context.mm, mm); + this_cpu_write(cpu_patching_context.addr, addr); - /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC); + return 0; - return instruction; +fail_no_pte: + put_patching_mm(mm, addr); +fail_no_mm: + return -ENOMEM; } -unsigned int create_cond_branch(const unsigned int *addr, - unsigned long target, int flags) +static int text_area_cpu_down_mm(unsigned int cpu) { - unsigned int instruction; - long offset; + put_patching_mm(this_cpu_read(cpu_patching_context.mm), + this_cpu_read(cpu_patching_context.addr)); - offset = target; - if (! (flags & BRANCH_ABSOLUTE)) - offset = offset - (unsigned long)addr; + this_cpu_write(cpu_patching_context.mm, NULL); + this_cpu_write(cpu_patching_context.addr, 0); - /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) - return 0; + return 0; +} - /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); - return instruction; +void __init poking_init(void) +{ + int ret; + + if (mm_patch_enabled()) + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke_mm:online", + text_area_cpu_up_mm, + text_area_cpu_down_mm); + else + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); + + /* cpuhp_setup_state returns >= 0 on success */ + if (WARN_ON(ret < 0)) + return; + + static_branch_enable(&poking_init_done); } -static unsigned int branch_opcode(unsigned int instr) +static unsigned long get_patch_pfn(void *addr) { - return (instr >> 26) & 0x3F; + if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr)) + return vmalloc_to_pfn(addr); + else + return __pa_symbol(addr) >> PAGE_SHIFT; } -static int instr_is_branch_iform(unsigned int instr) +/* + * This can be called for kernel text or a module. + */ +static int map_patch_area(void *addr, unsigned long text_poke_addr) { - return branch_opcode(instr) == 18; + unsigned long pfn = get_patch_pfn(addr); + + return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); } -static int instr_is_branch_bform(unsigned int instr) +static void unmap_patch_area(unsigned long addr) { - return branch_opcode(instr) == 16; + pte_t *ptep; + pmd_t *pmdp; + pud_t *pudp; + p4d_t *p4dp; + pgd_t *pgdp; + + pgdp = pgd_offset_k(addr); + if (WARN_ON(pgd_none(*pgdp))) + return; + + p4dp = p4d_offset(pgdp, addr); + if (WARN_ON(p4d_none(*p4dp))) + return; + + pudp = pud_offset(p4dp, addr); + if (WARN_ON(pud_none(*pudp))) + return; + + pmdp = pmd_offset(pudp, addr); + if (WARN_ON(pmd_none(*pmdp))) + return; + + ptep = pte_offset_kernel(pmdp, addr); + if (WARN_ON(pte_none(*ptep))) + return; + + /* + * In hash, pte_clear flushes the tlb, in radix, we have to + */ + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } -int instr_is_relative_branch(unsigned int instr) +static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword) { - if (instr & BRANCH_ABSOLUTE) - return 0; + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + struct mm_struct *patching_mm; + struct mm_struct *orig_mm; + spinlock_t *ptl; - return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync": : :"memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_mem(addr, val, patch_addr, is_dword); + + /* context synchronisation performed by __patch_instruction (isync or exception) */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; } -static unsigned long branch_iform_target(const unsigned int *instr) +static int __do_patch_mem(void *addr, unsigned long val, bool is_dword) { - signed long imm; + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); - imm = *instr & 0x3FFFFFC; + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); - /* If the top bit of the immediate value is set this is negative */ - if (imm & 0x2000000) - imm -= 0x4000000; + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync": : :"memory"); - if ((*instr & BRANCH_ABSOLUTE) == 0) - imm += (unsigned long)instr; + err = __patch_mem(addr, val, patch_addr, is_dword); - return (unsigned long)imm; + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; } -static unsigned long branch_bform_target(const unsigned int *instr) +static int patch_mem(void *addr, unsigned long val, bool is_dword) { - signed long imm; + int err; + unsigned long flags; + + /* + * During early early boot patch_instruction is called + * when text_poke_area is not ready, but we still need + * to allow patching. We just do the plain old patching + */ + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || + !static_branch_likely(&poking_init_done)) + return __patch_mem(addr, val, addr, is_dword); + + local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_mem_mm(addr, val, is_dword); + else + err = __do_patch_mem(addr, val, is_dword); + local_irq_restore(flags); + + return err; +} - imm = *instr & 0xFFFC; +#ifdef CONFIG_PPC64 - /* If the top bit of the immediate value is set this is negative */ - if (imm & 0x8000) - imm -= 0x10000; +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + if (ppc_inst_prefixed(instr)) + return patch_mem(addr, ppc_inst_as_ulong(instr), true); + else + return patch_mem(addr, ppc_inst_val(instr), false); +} +NOKPROBE_SYMBOL(patch_instruction); - if ((*instr & BRANCH_ABSOLUTE) == 0) - imm += (unsigned long)instr; +int patch_uint(void *addr, unsigned int val) +{ + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int))) + return -EINVAL; - return (unsigned long)imm; + return patch_mem(addr, val, false); } +NOKPROBE_SYMBOL(patch_uint); -unsigned long branch_target(const unsigned int *instr) +int patch_ulong(void *addr, unsigned long val) { - if (instr_is_branch_iform(*instr)) - return branch_iform_target(instr); - else if (instr_is_branch_bform(*instr)) - return branch_bform_target(instr); + if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long))) + return -EINVAL; + + return patch_mem(addr, val, true); +} +NOKPROBE_SYMBOL(patch_ulong); + +#else + +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + return patch_mem(addr, ppc_inst_val(instr), false); +} +NOKPROBE_SYMBOL(patch_instruction) + +#endif + +static int patch_memset64(u64 *addr, u64 val, size_t count) +{ + for (u64 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u64, failed); return 0; + +failed: + return -EPERM; } -int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr) +static int patch_memset32(u32 *addr, u32 val, size_t count) { - if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr)) - return branch_target(instr) == addr; + for (u32 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u32, failed); return 0; + +failed: + return -EPERM; } -unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) { - unsigned long target; + unsigned long start = (unsigned long)patch_addr; + int err; - target = branch_target(src); + /* Repeat instruction */ + if (repeat_instr) { + ppc_inst_t instr = ppc_inst_read(code); + + if (ppc_inst_prefixed(instr)) { + u64 val = ppc_inst_as_ulong(instr); + + err = patch_memset64((u64 *)patch_addr, val, len / 8); + } else { + u32 val = ppc_inst_val(instr); + + err = patch_memset32(patch_addr, val, len / 4); + } + } else { + err = copy_to_kernel_nofault(patch_addr, code, len); + } + + smp_wmb(); /* smp write barrier */ + flush_icache_range(start, start + len); + return err; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + struct mm_struct *patching_mm, *orig_mm; + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + spinlock_t *ptl; + u32 *patch_addr; + pte_t *pte; + int err; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync" ::: "memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); - if (instr_is_branch_iform(*src)) - return create_branch(dest, target, *src); - else if (instr_is_branch_bform(*src)) - return create_cond_branch(dest, target, *src); + kasan_disable_current(); + err = __patch_instructions(patch_addr, code, len, repeat_instr); + kasan_enable_current(); + + /* context synchronisation performed by __patch_instructions */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + u32 *patch_addr; + pte_t *pte; + int err; + + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync" ::: "memory"); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; +} + +/* + * Patch 'addr' with 'len' bytes of instructions from 'code'. + * + * If repeat_instr is true, the same instruction is filled for + * 'len' bytes. + */ +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + while (len > 0) { + unsigned long flags; + size_t plen; + int err; + + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); + + local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); + else + err = __do_patch_instructions(addr, code, plen, repeat_instr); + local_irq_restore(flags); + if (err) + return err; + + len -= plen; + addr = (u32 *)((unsigned long)addr + plen); + if (!repeat_instr) + code = (u32 *)((unsigned long)code + plen); + } return 0; } +NOKPROBE_SYMBOL(patch_instructions); +int patch_branch(u32 *addr, unsigned long target, int flags) +{ + ppc_inst_t instr; + + if (create_branch(&instr, addr, target, flags)) + return -ERANGE; -#ifdef CONFIG_CODE_PATCHING_SELFTEST + return patch_instruction(addr, instr); +} -static void __init test_trampoline(void) +/* + * Helper to check if a given instruction is a conditional branch + * Derived from the conditional checks in analyse_instr() + */ +bool is_conditional_branch(ppc_inst_t instr) { - asm ("nop;\n"); + unsigned int opcode = ppc_inst_primary_opcode(instr); + + if (opcode == 16) /* bc, bca, bcl, bcla */ + return true; + if (opcode == 19) { + switch ((ppc_inst_val(instr) >> 1) & 0x3ff) { + case 16: /* bclr, bclrl */ + case 528: /* bcctr, bcctrl */ + case 560: /* bctar, bctarl */ + return true; + } + } + return false; } +NOKPROBE_SYMBOL(is_conditional_branch); -#define check(x) \ - if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__); +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, + unsigned long target, int flags) +{ + long offset; + + offset = target; + if (! (flags & BRANCH_ABSOLUTE)) + offset = offset - (unsigned long)addr; + + /* Check we can represent the target in the instruction format */ + if (!is_offset_in_cond_branch_range(offset)) + return 1; -static void __init test_branch_iform(void) + /* Mask out the flags and target, so they don't step on each other. */ + *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC)); + + return 0; +} + +int instr_is_relative_branch(ppc_inst_t instr) { - unsigned int instr; - unsigned long addr; + if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) + return 0; - addr = (unsigned long)&instr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_iform(0x48000000)); - /* All bits of target set, and flags */ - check(instr_is_branch_iform(0x4bffffff)); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_iform(0xcbffffff)); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_iform(0x7bffffff)); - - /* Simplest case, branch to self with link */ - check(instr_is_branch_iform(0x48000001)); - /* All bits of targets set */ - check(instr_is_branch_iform(0x4bfffffd)); - /* Some bits of targets set */ - check(instr_is_branch_iform(0x4bff00fd)); - /* Must be a valid branch to start with */ - check(!instr_is_branch_iform(0x7bfffffd)); - - /* Absolute branch to 0x100 */ - instr = 0x48000103; - check(instr_is_branch_to_addr(&instr, 0x100)); - /* Absolute branch to 0x420fc */ - instr = 0x480420ff; - check(instr_is_branch_to_addr(&instr, 0x420fc)); - /* Maximum positive relative branch, + 20MB - 4B */ - instr = 0x49fffffc; - check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); - /* Smallest negative relative branch, - 4B */ - instr = 0x4bfffffc; - check(instr_is_branch_to_addr(&instr, addr - 4)); - /* Largest negative relative branch, - 32 MB */ - instr = 0x4a000000; - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); - - /* Branch to self, with link */ - instr = create_branch(&instr, addr, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr)); - - /* Branch to self - 0x100, with link */ - instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); - - /* Branch to self + 0x100, no link */ - instr = create_branch(&instr, addr + 0x100, 0); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 MB */ - instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); - - /* Out of range relative negative offset, - 32 MB + 4*/ - instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK); - check(instr == 0); - - /* Out of range relative positive offset, + 32 MB */ - instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK); - check(instr == 0); - - /* Unaligned target */ - instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK); - check(instr == 0); - - /* Check flags are masked correctly */ - instr = create_branch(&instr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x48000000); -} - -static void __init test_create_function_call(void) -{ - unsigned int *iptr; - unsigned long dest; - - /* Check we can create a function call */ - iptr = (unsigned int *)ppc_function_entry(test_trampoline); - dest = ppc_function_entry(test_create_function_call); - patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK)); - check(instr_is_branch_to_addr(iptr, dest)); -} - -static void __init test_branch_bform(void) + return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); +} + +int instr_is_relative_link_branch(ppc_inst_t instr) { - unsigned long addr; - unsigned int *iptr, instr, flags; - - iptr = &instr; - addr = (unsigned long)iptr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_bform(0x40000000)); - /* All bits of target set, and flags */ - check(instr_is_branch_bform(0x43ffffff)); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_bform(0xc3ffffff)); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_bform(0x7bffffff)); - - /* Absolute conditional branch to 0x100 */ - instr = 0x43ff0103; - check(instr_is_branch_to_addr(&instr, 0x100)); - /* Absolute conditional branch to 0x20fc */ - instr = 0x43ff20ff; - check(instr_is_branch_to_addr(&instr, 0x20fc)); - /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = 0x43ff7ffc; - check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); - /* Smallest negative relative conditional branch, - 4B */ - instr = 0x43fffffc; - check(instr_is_branch_to_addr(&instr, addr - 4)); - /* Largest negative relative conditional branch, - 32 KB */ - instr = 0x43ff8000; - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); - - /* All condition code bits set & link */ - flags = 0x3ff000 | BRANCH_SET_LINK; - - /* Branch to self */ - instr = create_cond_branch(iptr, addr, flags); - check(instr_is_branch_to_addr(&instr, addr)); - - /* Branch to self - 0x100 */ - instr = create_cond_branch(iptr, addr - 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); - - /* Branch to self + 0x100 */ - instr = create_cond_branch(iptr, addr + 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 KB */ - instr = create_cond_branch(iptr, addr - 0x8000, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); - - /* Out of range relative negative offset, - 32 KB + 4*/ - instr = create_cond_branch(iptr, addr - 0x8004, flags); - check(instr == 0); - - /* Out of range relative positive offset, + 32 KB */ - instr = create_cond_branch(iptr, addr + 0x8000, flags); - check(instr == 0); - - /* Unaligned target */ - instr = create_cond_branch(iptr, addr + 3, flags); - check(instr == 0); - - /* Check flags are masked correctly */ - instr = create_cond_branch(iptr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x43FF0000); -} - -static void __init test_translate_branch(void) + return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); +} + +static unsigned long branch_iform_target(const u32 *instr) { - unsigned long addr; - unsigned int *p, *q; - void *buf; + signed long imm; - buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); - check(buf); - if (!buf) - return; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; + + /* If the top bit of the immediate value is set this is negative */ + if (imm & 0x2000000) + imm -= 0x4000000; - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 MB */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 0x2000000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x4a000000); - - /* Maximum positive case, move x to x - 32 MB + 4 */ - p = buf + 0x2000000; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x49fffffc); - - /* Jump to x + 16 MB moved to x + 20 MB */ - p = buf; - addr = 0x1000000 + (unsigned long)buf; - patch_branch(p, addr, BRANCH_SET_LINK); - q = buf + 0x1400000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 16 MB moved to x - 16 MB + 4 */ - p = buf + 0x1000000; - addr = 0x2000000 + (unsigned long)buf; - patch_branch(p, addr, 0); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - - /* Conditional branch tests */ - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0)); - check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 KB */ - p = buf; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); - q = buf + 0x8000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff8000); - - /* Maximum positive case, move x to x - 32 KB + 4 */ - p = buf + 0x8000; - addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff7ffc); - - /* Jump to x + 12 KB moved to x + 20 KB */ - p = buf; - addr = 0x3000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK)); - q = buf + 0x5000; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 8 KB moved to x - 8 KB + 4 */ - p = buf + 0x2000; - addr = 0x4000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, 0)); - q = buf + 4; - patch_instruction(q, translate_branch(q, p)); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Free the buffer we were using */ - vfree(buf); -} - -static int __init test_code_patching(void) -{ - printk(KERN_DEBUG "Running code patching self-tests ...\n"); - - test_branch_iform(); - test_branch_bform(); - test_create_function_call(); - test_translate_branch(); + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) + imm += (unsigned long)instr; + + return (unsigned long)imm; +} + +static unsigned long branch_bform_target(const u32 *instr) +{ + signed long imm; + + imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; + + /* If the top bit of the immediate value is set this is negative */ + if (imm & 0x8000) + imm -= 0x10000; + + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) + imm += (unsigned long)instr; + + return (unsigned long)imm; +} + +unsigned long branch_target(const u32 *instr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr))) + return branch_iform_target(instr); + else if (instr_is_branch_bform(ppc_inst_read(instr))) + return branch_bform_target(instr); return 0; } -late_initcall(test_code_patching); -#endif /* CONFIG_CODE_PATCHING_SELFTEST */ +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) +{ + unsigned long target; + target = branch_target(src); + + if (instr_is_branch_iform(ppc_inst_read(src))) + return create_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); + else if (instr_is_branch_bform(ppc_inst_read(src))) + return create_cond_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); + + return 1; +} diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 55f19f9fd708..933b685e7ab6 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -1,17 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Memory copy functions for 32-bit PowerPC. * * Copyright (C) 1996-2005 Paul Mackerras. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ppc_asm.h> +#include <asm/code-patching-asm.h> +#include <asm/kasan.h> #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -48,43 +47,66 @@ 9 ## n ## 1: \ addi r5,r5,-(16 * n); \ b 105f; \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,9 ## n ## 0b; \ - .long 8 ## n ## 1b,9 ## n ## 0b; \ - .long 8 ## n ## 2b,9 ## n ## 0b; \ - .long 8 ## n ## 3b,9 ## n ## 0b; \ - .long 8 ## n ## 4b,9 ## n ## 1b; \ - .long 8 ## n ## 5b,9 ## n ## 1b; \ - .long 8 ## n ## 6b,9 ## n ## 1b; \ - .long 8 ## n ## 7b,9 ## n ## 1b; \ - .text + EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) +#ifndef CONFIG_KASAN +_GLOBAL(memset16) + rlwinm. r0 ,r5, 31, 1, 31 + addi r6, r3, -4 + beq- 2f + rlwimi r4 ,r4 ,16 ,0 ,15 + mtctr r0 +1: stwu r4, 4(r6) + bdnz 1b +2: andi. r0, r5, 1 + beqlr + sth r4, 4(r6) + blr +EXPORT_SYMBOL(memset16) +#endif + /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination * area is cacheable. -- paulus + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore skip the optimised bloc that uses dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ -_GLOBAL(cacheable_memzero) - mr r5,r4 - li r4,0 - addi r6,r3,-4 +_GLOBAL_KASAN(memset) cmplwi 0,r5,4 blt 7f - stwu r4,4(r6) + + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + + stw r4,0(r3) beqlr - andi. r0,r6,3 + andi. r0,r3,3 add r5,r0,r5 - subf r6,r0,r6 + subf r6,r0,r3 + cmplwi 0,r4,0 + /* + * Skip optimised bloc until cache is enabled. Will be replaced + * by 'bne' during boot to use normal procedure if r4 is not zero + */ +5: b 2f + patch_site 5b, patch__memset_nocache + clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 srwi r9,r8,LG_CACHELINE_BYTES @@ -103,13 +125,13 @@ _GLOBAL(cacheable_memzero) bdnz 10b clrlwi r5,r8,32-LG_CACHELINE_BYTES addi r5,r5,4 + 2: srwi r0,r5,2 mtctr r0 bdz 6f 1: stwu r4,4(r6) bdnz 1b 6: andi. r5,r5,3 -7: cmpwi 0,r5,0 beqlr mtctr r5 addi r6,r6,3 @@ -117,30 +139,15 @@ _GLOBAL(cacheable_memzero) bdnz 8b blr -_GLOBAL(memset) - rlwimi r4,r4,8,16,23 - rlwimi r4,r4,16,0,15 - addi r6,r3,-4 - cmplwi 0,r5,4 - blt 7f - stwu r4,4(r6) - beqlr - andi. r0,r6,3 - add r5,r0,r5 - subf r6,r0,r6 - srwi r0,r5,2 - mtctr r0 - bdz 6f -1: stwu r4,4(r6) - bdnz 1b -6: andi. r5,r5,3 7: cmpwi 0,r5,0 beqlr mtctr r5 - addi r6,r6,3 -8: stbu r4,1(r6) - bdnz 8b + addi r6,r3,-1 +9: stbu r4,1(r6) + bdnz 9b blr +EXPORT_SYMBOL(memset) +EXPORT_SYMBOL_KASAN(memset) /* * This version uses dcbz on the complete cache lines in the @@ -148,14 +155,26 @@ _GLOBAL(memset) * the destination area is cacheable. * We only use this version if the source and dest don't overlap. * -- paulus. + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ -_GLOBAL(cacheable_memcpy) +_GLOBAL_KASAN(memmove) + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + +_GLOBAL_KASAN(memcpy) +1: b generic_memcpy + patch_site 1b, patch__memcpy_nocache + add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 cmplw 1,r3,r8 crand 0,0,4 /* cr0.lt &= cr1.lt */ - blt memcpy /* if regions overlap */ + blt generic_memcpy /* if regions overlap */ addi r4,r4,-4 addi r6,r3,-4 @@ -170,9 +189,9 @@ _GLOBAL(cacheable_memcpy) mtctr r8 beq+ 61f 70: lbz r9,4(r4) /* do some bytes */ - stb r9,4(r6) addi r4,r4,1 addi r6,r6,1 + stb r9,3(r6) bdnz 70b 61: srwi. r0,r0,2 mtctr r0 @@ -214,19 +233,18 @@ _GLOBAL(cacheable_memcpy) 64: andi. r0,r5,3 mtctr r0 beq+ 65f -40: lbz r0,4(r4) - stb r0,4(r6) - addi r4,r4,1 - addi r6,r6,1 + addi r4,r4,3 + addi r6,r6,3 +40: lbzu r0,1(r4) + stbu r0,1(r6) bdnz 40b 65: blr +EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL_KASAN(memcpy) +EXPORT_SYMBOL_KASAN(memmove) -_GLOBAL(memmove) - cmplw 0,r3,r4 - bgt backwards_memcpy - /* fall through */ - -_GLOBAL(memcpy) +generic_memcpy: srwi. r7,r5,3 addi r6,r3,-4 addi r4,r4,-4 @@ -328,13 +346,10 @@ _GLOBAL(__copy_tofrom_user) 73: stwu r9,4(r6) bdnz 72b - .section __ex_table,"a" - .align 2 - .long 70b,100f - .long 71b,101f - .long 72b,102f - .long 73b,103f - .text + EX_TABLE(70b,100f) + EX_TABLE(71b,101f) + EX_TABLE(72b,102f) + EX_TABLE(73b,103f) 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ clrlwi r5,r5,32-LG_CACHELINE_BYTES @@ -369,10 +384,7 @@ _GLOBAL(__copy_tofrom_user) 53: dcbt r3,r4 54: dcbz r11,r6 - .section __ex_table,"a" - .align 2 - .long 54b,105f - .text + EX_TABLE(54b,105f) /* the main body of the cacheline loop */ COPY_16_BYTES_WITHEX(0) #if L1_CACHE_BYTES >= 32 @@ -491,28 +503,13 @@ _GLOBAL(__copy_tofrom_user) bdnz 130b /* then clear out the destination: r3 bytes starting at 4(r6) */ 132: mfctr r3 - srwi. r0,r3,2 - li r9,0 - mtctr r0 - beq 113f -112: stwu r9,4(r6) - bdnz 112b -113: andi. r0,r3,3 - mtctr r0 - beq 120f -114: stb r9,4(r6) - addi r6,r6,1 - bdnz 114b 120: blr - .section __ex_table,"a" - .align 2 - .long 30b,108b - .long 31b,109b - .long 40b,110b - .long 41b,111b - .long 130b,132b - .long 131b,120b - .long 112b,120b - .long 114b,120b - .text + EX_TABLE(30b,108b) + EX_TABLE(31b,109b) + EX_TABLE(40b,110b) + EX_TABLE(41b,111b) + EX_TABLE(130b,132b) + EX_TABLE(131b,120b) + +EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S new file mode 100644 index 000000000000..bf1014b28fe8 --- /dev/null +++ b/arch/powerpc/lib/copy_mc_64.S @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) IBM Corporation, 2011 + * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> + * Author - Balbir Singh <bsingharora@gmail.com> + */ +#include <linux/export.h> +#include <asm/ppc_asm.h> +#include <asm/errno.h> + + .macro err1 +100: + EX_TABLE(100b,.Ldo_err1) + .endm + + .macro err2 +200: + EX_TABLE(200b,.Ldo_err2) + .endm + + .macro err3 +300: EX_TABLE(300b,.Ldone) + .endm + +.Ldo_err2: + ld r22,STK_REG(R22)(r1) + ld r21,STK_REG(R21)(r1) + ld r20,STK_REG(R20)(r1) + ld r19,STK_REG(R19)(r1) + ld r18,STK_REG(R18)(r1) + ld r17,STK_REG(R17)(r1) + ld r16,STK_REG(R16)(r1) + ld r15,STK_REG(R15)(r1) + ld r14,STK_REG(R14)(r1) + addi r1,r1,STACKFRAMESIZE +.Ldo_err1: + /* Do a byte by byte copy to get the exact remaining size */ + mtctr r7 +46: +err3; lbz r0,0(r4) + addi r4,r4,1 +err3; stb r0,0(r3) + addi r3,r3,1 + bdnz 46b + li r3,0 + blr + +.Ldone: + mfctr r3 + blr + + +_GLOBAL(copy_mc_generic) + mr r7,r5 + cmpldi r5,16 + blt .Lshort_copy + +.Lcopy: + /* Get the source 8B aligned */ + neg r6,r4 + mtocrf 0x01,r6 + clrldi r6,r6,(64-3) + + bf cr7*4+3,1f +err1; lbz r0,0(r4) + addi r4,r4,1 +err1; stb r0,0(r3) + addi r3,r3,1 + subi r7,r7,1 + +1: bf cr7*4+2,2f +err1; lhz r0,0(r4) + addi r4,r4,2 +err1; sth r0,0(r3) + addi r3,r3,2 + subi r7,r7,2 + +2: bf cr7*4+1,3f +err1; lwz r0,0(r4) + addi r4,r4,4 +err1; stw r0,0(r3) + addi r3,r3,4 + subi r7,r7,4 + +3: sub r5,r5,r6 + cmpldi r5,128 + + mflr r0 + stdu r1,-STACKFRAMESIZE(r1) + std r14,STK_REG(R14)(r1) + std r15,STK_REG(R15)(r1) + std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) + std r18,STK_REG(R18)(r1) + std r19,STK_REG(R19)(r1) + std r20,STK_REG(R20)(r1) + std r21,STK_REG(R21)(r1) + std r22,STK_REG(R22)(r1) + std r0,STACKFRAMESIZE+16(r1) + + blt 5f + srdi r6,r5,7 + mtctr r6 + + /* Now do cacheline (128B) sized loads and stores. */ + .align 5 +4: +err2; ld r0,0(r4) +err2; ld r6,8(r4) +err2; ld r8,16(r4) +err2; ld r9,24(r4) +err2; ld r10,32(r4) +err2; ld r11,40(r4) +err2; ld r12,48(r4) +err2; ld r14,56(r4) +err2; ld r15,64(r4) +err2; ld r16,72(r4) +err2; ld r17,80(r4) +err2; ld r18,88(r4) +err2; ld r19,96(r4) +err2; ld r20,104(r4) +err2; ld r21,112(r4) +err2; ld r22,120(r4) + addi r4,r4,128 +err2; std r0,0(r3) +err2; std r6,8(r3) +err2; std r8,16(r3) +err2; std r9,24(r3) +err2; std r10,32(r3) +err2; std r11,40(r3) +err2; std r12,48(r3) +err2; std r14,56(r3) +err2; std r15,64(r3) +err2; std r16,72(r3) +err2; std r17,80(r3) +err2; std r18,88(r3) +err2; std r19,96(r3) +err2; std r20,104(r3) +err2; std r21,112(r3) +err2; std r22,120(r3) + addi r3,r3,128 + subi r7,r7,128 + bdnz 4b + + clrldi r5,r5,(64-7) + + /* Up to 127B to go */ +5: srdi r6,r5,4 + mtocrf 0x01,r6 + +6: bf cr7*4+1,7f +err2; ld r0,0(r4) +err2; ld r6,8(r4) +err2; ld r8,16(r4) +err2; ld r9,24(r4) +err2; ld r10,32(r4) +err2; ld r11,40(r4) +err2; ld r12,48(r4) +err2; ld r14,56(r4) + addi r4,r4,64 +err2; std r0,0(r3) +err2; std r6,8(r3) +err2; std r8,16(r3) +err2; std r9,24(r3) +err2; std r10,32(r3) +err2; std r11,40(r3) +err2; std r12,48(r3) +err2; std r14,56(r3) + addi r3,r3,64 + subi r7,r7,64 + +7: ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) + ld r18,STK_REG(R18)(r1) + ld r19,STK_REG(R19)(r1) + ld r20,STK_REG(R20)(r1) + ld r21,STK_REG(R21)(r1) + ld r22,STK_REG(R22)(r1) + addi r1,r1,STACKFRAMESIZE + + /* Up to 63B to go */ + bf cr7*4+2,8f +err1; ld r0,0(r4) +err1; ld r6,8(r4) +err1; ld r8,16(r4) +err1; ld r9,24(r4) + addi r4,r4,32 +err1; std r0,0(r3) +err1; std r6,8(r3) +err1; std r8,16(r3) +err1; std r9,24(r3) + addi r3,r3,32 + subi r7,r7,32 + + /* Up to 31B to go */ +8: bf cr7*4+3,9f +err1; ld r0,0(r4) +err1; ld r6,8(r4) + addi r4,r4,16 +err1; std r0,0(r3) +err1; std r6,8(r3) + addi r3,r3,16 + subi r7,r7,16 + +9: clrldi r5,r5,(64-4) + + /* Up to 15B to go */ +.Lshort_copy: + mtocrf 0x01,r5 + bf cr7*4+0,12f +err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ +err1; lwz r6,4(r4) + addi r4,r4,8 +err1; stw r0,0(r3) +err1; stw r6,4(r3) + addi r3,r3,8 + subi r7,r7,8 + +12: bf cr7*4+1,13f +err1; lwz r0,0(r4) + addi r4,r4,4 +err1; stw r0,0(r3) + addi r3,r3,4 + subi r7,r7,4 + +13: bf cr7*4+2,14f +err1; lhz r0,0(r4) + addi r4,r4,2 +err1; sth r0,0(r3) + addi r3,r3,2 + subi r7,r7,2 + +14: bf cr7*4+3,15f +err1; lbz r0,0(r4) +err1; stb r0,0(r3) + +15: li r3,0 + blr + +EXPORT_SYMBOL_GPL(copy_mc_generic); diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 9f9434a85264..f33a2e6088e5 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -1,32 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2008 Mark Nelson, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ +#include <linux/export.h> #include <asm/page.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/feature-fixups.h> - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - -_GLOBAL(copy_page) +_GLOBAL_TOC(copy_page) BEGIN_FTR_SECTION lis r5,PAGE_SIZE@h FTR_SECTION_ELSE - b .copypage_power7 +#ifdef CONFIG_PPC_BOOK3S_64 + b copypage_power7 +#endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) ori r5,r5,PAGE_SIZE@l +#ifdef CONFIG_PPC_KERNEL_PCREL + /* + * Hack for toolchain - prefixed instructions cause label difference to + * be non-constant even if 8 byte alignment is known, so they can not + * be put in FTR sections. + */ + LOAD_REG_ADDR(r10, ppc64_caches) +BEGIN_FTR_SECTION +#else BEGIN_FTR_SECTION - ld r10,PPC64_CACHES@toc(r2) - lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ - lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */ + LOAD_REG_ADDR(r10, ppc64_caches) +#endif + lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */ + lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */ li r9,0 srd r8,r5,r11 @@ -110,3 +115,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) std r11,120(r3) std r12,128(r3) blr +EXPORT_SYMBOL(copy_page) diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 395c594722a2..07e7cec4d135 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -1,17 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2012 * @@ -39,32 +27,19 @@ _GLOBAL(copypage_power7) #endif ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - -.machine push -.machine "power4" - /* setup read stream 0 */ - dcbt r0,r4,0b01000 /* addr from */ - dcbt r0,r7,0b01010 /* length and depth from */ - /* setup write stream 1 */ - dcbtst r0,r9,0b01000 /* addr to */ - dcbtst r0,r10,0b01010 /* length and depth to */ - eieio - dcbt r0,r8,0b01010 /* all streams GO */ -.machine pop + DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8) #ifdef CONFIG_ALTIVEC mflr r0 - std r3,48(r1) - std r4,56(r1) + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl .enter_vmx_copy + bl CFUNC(enter_vmx_ops) cmpwi r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) + ld r3,STK_REG(R31)(r1) + ld r4,STK_REG(R30)(r1) mtlr r0 li r0,(PAGE_SIZE/128) @@ -83,27 +58,27 @@ _GLOBAL(copypage_power7) li r12,112 .align 5 -1: lvx vr7,r0,r4 - lvx vr6,r4,r6 - lvx vr5,r4,r7 - lvx vr4,r4,r8 - lvx vr3,r4,r9 - lvx vr2,r4,r10 - lvx vr1,r4,r11 - lvx vr0,r4,r12 +1: lvx v7,0,r4 + lvx v6,r4,r6 + lvx v5,r4,r7 + lvx v4,r4,r8 + lvx v3,r4,r9 + lvx v2,r4,r10 + lvx v1,r4,r11 + lvx v0,r4,r12 addi r4,r4,128 - stvx vr7,r0,r3 - stvx vr6,r3,r6 - stvx vr5,r3,r7 - stvx vr4,r3,r8 - stvx vr3,r3,r9 - stvx vr2,r3,r10 - stvx vr1,r3,r11 - stvx vr0,r3,r12 + stvx v7,0,r3 + stvx v6,r3,r6 + stvx v5,r3,r7 + stvx v4,r3,r8 + stvx v3,r3,r9 + stvx v2,r3,r10 + stvx v1,r3,r11 + stvx v0,r3,r12 addi r3,r3,128 bdnz 1b - b .exit_vmx_copy /* tail call optimise */ + b CFUNC(exit_vmx_ops) /* tail call optimise */ #else li r0,(PAGE_SIZE/128) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index d73a59014900..9af969d2cc0c 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -1,23 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> +#include <asm/asm-compat.h> +#include <asm/feature-fixups.h> + +#ifndef SELFTEST_CASE +/* 0 == most CPUs, 1 == POWER6, 2 == Cell */ +#define SELFTEST_CASE 0 +#endif + +#ifdef __BIG_ENDIAN__ +#define sLd sld /* Shift towards low-numbered address. */ +#define sHd srd /* Shift towards high-numbered address. */ +#else +#define sLd srd /* Shift towards low-numbered address. */ +#define sHd sld /* Shift towards high-numbered address. */ +#endif + +/* + * These macros are used to generate exception table entries. + * The exception handlers below use the original arguments + * (stored on the stack) and the point where we're up to in + * the destination buffer, i.e. the address of the first + * unmodified byte. Generally r3 points into the destination + * buffer, but the first unmodified byte is at a variable + * offset from r3. In the code below, the symbol r3_offset + * is set to indicate the current offset at each point in + * the code. This offset is then used as a negative offset + * from the exception handler code, and those instructions + * before the exception handlers are addi instructions that + * adjust r3 to point to the correct place. + */ + .macro lex /* exception handler for load */ +100: EX_TABLE(100b, .Lld_exc - r3_offset) + .endm + + .macro stex /* exception handler for store */ +100: EX_TABLE(100b, .Lst_exc - r3_offset) + .endm .align 7 -_GLOBAL(__copy_tofrom_user) +_GLOBAL_TOC(__copy_tofrom_user) +#ifdef CONFIG_PPC_BOOK3S_64 BEGIN_FTR_SECTION nop FTR_SECTION_ELSE b __copy_tofrom_user_power7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +#endif _GLOBAL(__copy_tofrom_user_base) - /* first check for a whole page copy on a page boundary */ + /* first check for a 4kB copy on a 4kB boundary */ cmpldi cr1,r5,16 cmpdi cr6,r5,4096 or r0,r3,r4 @@ -38,6 +74,7 @@ _GLOBAL(__copy_tofrom_user_base) * At the time of writing the only CPU that has this combination of bits * set is Power6. */ +test_feature = (SELFTEST_CASE == 1) BEGIN_FTR_SECTION nop FTR_SECTION_ELSE @@ -46,6 +83,8 @@ ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ CPU_FTR_UNALIGNED_LD_STD) .Ldst_aligned: addi r3,r3,-16 +r3_offset = 16 +test_feature = (SELFTEST_CASE == 0) BEGIN_FTR_SECTION andi. r0,r4,7 bne .Lsrc_unaligned @@ -53,57 +92,69 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blt cr1,.Ldo_tail /* if < 16 bytes to copy */ srdi r0,r5,5 cmpdi cr1,r0,0 -20: ld r7,0(r4) -220: ld r6,8(r4) +lex; ld r7,0(r4) +lex; ld r6,8(r4) addi r4,r4,16 mtctr r0 andi. r0,r5,0x10 beq 22f addi r3,r3,16 +r3_offset = 0 addi r4,r4,-16 mr r9,r7 mr r8,r6 beq cr1,72f -21: ld r7,16(r4) -221: ld r6,24(r4) +21: +lex; ld r7,16(r4) +lex; ld r6,24(r4) addi r4,r4,32 -70: std r9,0(r3) -270: std r8,8(r3) -22: ld r9,0(r4) -222: ld r8,8(r4) -71: std r7,16(r3) -271: std r6,24(r3) +stex; std r9,0(r3) +r3_offset = 8 +stex; std r8,8(r3) +r3_offset = 16 +22: +lex; ld r9,0(r4) +lex; ld r8,8(r4) +stex; std r7,16(r3) +r3_offset = 24 +stex; std r6,24(r3) addi r3,r3,32 +r3_offset = 0 bdnz 21b -72: std r9,0(r3) -272: std r8,8(r3) +72: +stex; std r9,0(r3) +r3_offset = 8 +stex; std r8,8(r3) +r3_offset = 16 andi. r5,r5,0xf beq+ 3f addi r4,r4,16 .Ldo_tail: addi r3,r3,16 +r3_offset = 0 bf cr7*4+0,246f -244: ld r9,0(r4) +lex; ld r9,0(r4) addi r4,r4,8 -245: std r9,0(r3) +stex; std r9,0(r3) addi r3,r3,8 246: bf cr7*4+1,1f -23: lwz r9,0(r4) +lex; lwz r9,0(r4) addi r4,r4,4 -73: stw r9,0(r3) +stex; stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f -44: lhz r9,0(r4) +lex; lhz r9,0(r4) addi r4,r4,2 -74: sth r9,0(r3) +stex; sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f -45: lbz r9,0(r4) -75: stb r9,0(r3) +lex; lbz r9,0(r4) +stex; stb r9,0(r3) 3: li r3,0 blr .Lsrc_unaligned: +r3_offset = 16 srdi r6,r5,3 addi r5,r5,-16 subf r4,r0,r4 @@ -116,117 +167,151 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) add r5,r5,r0 bt cr7*4+0,28f -24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ -25: ld r0,8(r4) - sld r6,r9,r10 -26: ldu r9,16(r4) - srd r7,r0,r11 - sld r8,r0,r10 +lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */ +lex; ld r0,8(r4) + sLd r6,r9,r10 +lex; ldu r9,16(r4) + sHd r7,r0,r11 + sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f -27: ld r0,8(r4) +lex; ld r0,8(r4) b 2f -28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ -29: ldu r9,8(r4) - sld r8,r0,r10 +28: +lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */ +lex; ldu r9,8(r4) + sLd r8,r0,r10 addi r3,r3,-8 +r3_offset = 24 blt cr6,5f -30: ld r0,8(r4) - srd r12,r9,r11 - sld r6,r9,r10 -31: ldu r9,16(r4) +lex; ld r0,8(r4) + sHd r12,r9,r11 + sLd r6,r9,r10 +lex; ldu r9,16(r4) or r12,r8,r12 - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 addi r3,r3,16 +r3_offset = 8 beq cr6,78f 1: or r7,r7,r6 -32: ld r0,8(r4) -76: std r12,8(r3) -2: srd r12,r9,r11 - sld r6,r9,r10 -33: ldu r9,16(r4) +lex; ld r0,8(r4) +stex; std r12,8(r3) +r3_offset = 16 +2: sHd r12,r9,r11 + sLd r6,r9,r10 +lex; ldu r9,16(r4) or r12,r8,r12 -77: stdu r7,16(r3) - srd r7,r0,r11 - sld r8,r0,r10 +stex; stdu r7,16(r3) +r3_offset = 8 + sHd r7,r0,r11 + sLd r8,r0,r10 bdnz 1b -78: std r12,8(r3) +78: +stex; std r12,8(r3) +r3_offset = 16 or r7,r7,r6 -79: std r7,16(r3) -5: srd r12,r9,r11 +79: +stex; std r7,16(r3) +r3_offset = 24 +5: sHd r12,r9,r11 or r12,r8,r12 -80: std r12,24(r3) +stex; std r12,24(r3) +r3_offset = 32 bne 6f li r3,0 blr 6: cmpwi cr1,r5,8 addi r3,r3,32 - sld r9,r9,r10 +r3_offset = 0 + sLd r9,r9,r10 ble cr1,7f -34: ld r0,8(r4) - srd r7,r0,r11 +lex; ld r0,8(r4) + sHd r7,r0,r11 or r9,r7,r9 7: bf cr7*4+1,1f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,32 -94: stw r9,0(r3) +#endif +stex; stw r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,32 +#endif addi r3,r3,4 1: bf cr7*4+2,2f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,16 -95: sth r9,0(r3) +#endif +stex; sth r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,16 +#endif addi r3,r3,2 2: bf cr7*4+3,3f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,8 -96: stb r9,0(r3) +#endif +stex; stb r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,8 +#endif 3: li r3,0 blr .Ldst_unaligned: +r3_offset = 0 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ subf r5,r6,r5 li r7,0 cmpldi cr1,r5,16 bf cr7*4+3,1f -35: lbz r0,0(r4) -81: stb r0,0(r3) +100: EX_TABLE(100b, .Lld_exc_r7) + lbz r0,0(r4) +100: EX_TABLE(100b, .Lst_exc_r7) + stb r0,0(r3) addi r7,r7,1 1: bf cr7*4+2,2f -36: lhzx r0,r7,r4 -82: sthx r0,r7,r3 +100: EX_TABLE(100b, .Lld_exc_r7) + lhzx r0,r7,r4 +100: EX_TABLE(100b, .Lst_exc_r7) + sthx r0,r7,r3 addi r7,r7,2 2: bf cr7*4+1,3f -37: lwzx r0,r7,r4 -83: stwx r0,r7,r3 +100: EX_TABLE(100b, .Lld_exc_r7) + lwzx r0,r7,r4 +100: EX_TABLE(100b, .Lst_exc_r7) + stwx r0,r7,r3 3: PPC_MTOCRF(0x01,r5) add r4,r6,r4 add r3,r6,r3 b .Ldst_aligned .Lshort_copy: +r3_offset = 0 bf cr7*4+0,1f -38: lwz r0,0(r4) -39: lwz r9,4(r4) +lex; lwz r0,0(r4) +lex; lwz r9,4(r4) addi r4,r4,8 -84: stw r0,0(r3) -85: stw r9,4(r3) +stex; stw r0,0(r3) +stex; stw r9,4(r3) addi r3,r3,8 1: bf cr7*4+1,2f -40: lwz r0,0(r4) +lex; lwz r0,0(r4) addi r4,r4,4 -86: stw r0,0(r3) +stex; stw r0,0(r3) addi r3,r3,4 2: bf cr7*4+2,3f -41: lhz r0,0(r4) +lex; lhz r0,0(r4) addi r4,r4,2 -87: sth r0,0(r3) +stex; sth r0,0(r3) addi r3,r3,2 3: bf cr7*4+3,4f -42: lbz r0,0(r4) -88: stb r0,0(r3) +lex; lbz r0,0(r4) +stex; stb r0,0(r3) 4: li r3,0 blr @@ -234,48 +319,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) * exception handlers follow * we have to return the number of bytes not copied * for an exception on a load, we set the rest of the destination to 0 + * Note that the number of bytes of instructions for adjusting r3 needs + * to equal the amount of the adjustment, due to the trick of using + * .Lld_exc - r3_offset as the handler address. */ -136: -137: +.Lld_exc_r7: add r3,r3,r7 - b 1f -130: -131: + b .Lld_exc + + /* adjust by 24 */ addi r3,r3,8 -120: -320: -122: -322: -124: -125: -126: -127: -128: -129: -133: + nop + /* adjust by 16 */ addi r3,r3,8 -132: + nop + /* adjust by 8 */ addi r3,r3,8 -121: -321: -344: -134: -135: -138: -139: -140: -141: -142: -123: -144: -145: + nop /* - * here we have had a fault on a load and r3 points to the first - * unmodified byte of the destination + * Here we have had a fault on a load and r3 points to the first + * unmodified byte of the destination. We use the original arguments + * and r3 to work out how much wasn't copied. Since we load some + * distance ahead of the stores, we continue copying byte-by-byte until + * we hit the load fault again in order to copy as much as possible. */ -1: ld r6,-24(r1) +.Lld_exc: + ld r6,-24(r1) ld r4,-16(r1) ld r5,-8(r1) subf r6,r6,r3 @@ -286,161 +357,76 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) * first see if we can copy any more bytes before hitting another exception */ mtctr r5 +r3_offset = 0 +100: EX_TABLE(100b, .Ldone) 43: lbz r0,0(r4) addi r4,r4,1 -89: stb r0,0(r3) +stex; stb r0,0(r3) addi r3,r3,1 bdnz 43b li r3,0 /* huh? all copied successfully this time? */ blr /* - * here we have trapped again, need to clear ctr bytes starting at r3 + * here we have trapped again, amount remaining is in ctr. */ -143: mfctr r5 - li r0,0 - mr r4,r3 - mr r3,r5 /* return the number of bytes not copied */ -1: andi. r9,r4,7 - beq 3f -90: stb r0,0(r4) - addic. r5,r5,-1 - addi r4,r4,1 - bne 1b - blr -3: cmpldi cr1,r5,8 - srdi r9,r5,3 - andi. r5,r5,7 - blt cr1,93f - mtctr r9 -91: std r0,0(r4) - addi r4,r4,8 - bdnz 91b -93: beqlr - mtctr r5 -92: stb r0,0(r4) - addi r4,r4,1 - bdnz 92b +.Ldone: + mfctr r3 blr /* - * exception handlers for stores: we just need to work - * out how many bytes weren't copied + * exception handlers for stores: we need to work out how many bytes + * weren't copied, and we may need to copy some more. + * Note that the number of bytes of instructions for adjusting r3 needs + * to equal the amount of the adjustment, due to the trick of using + * .Lst_exc - r3_offset as the handler address. */ -182: -183: +.Lst_exc_r7: add r3,r3,r7 - b 1f -371: -180: + b .Lst_exc + + /* adjust by 24 */ addi r3,r3,8 -171: -177: + nop + /* adjust by 16 */ addi r3,r3,8 -370: -372: -176: -178: + nop + /* adjust by 8 */ addi r3,r3,4 -185: + /* adjust by 4 */ addi r3,r3,4 -170: -172: -345: -173: -174: -175: -179: -181: -184: -186: -187: -188: -189: -194: -195: -196: -1: - ld r6,-24(r1) - ld r5,-8(r1) - add r6,r6,r5 - subf r3,r3,r6 /* #bytes not copied */ -190: -191: -192: - blr /* #bytes not copied in r3 */ - - .section __ex_table,"a" - .align 3 - .llong 20b,120b - .llong 220b,320b - .llong 21b,121b - .llong 221b,321b - .llong 70b,170b - .llong 270b,370b - .llong 22b,122b - .llong 222b,322b - .llong 71b,171b - .llong 271b,371b - .llong 72b,172b - .llong 272b,372b - .llong 244b,344b - .llong 245b,345b - .llong 23b,123b - .llong 73b,173b - .llong 44b,144b - .llong 74b,174b - .llong 45b,145b - .llong 75b,175b - .llong 24b,124b - .llong 25b,125b - .llong 26b,126b - .llong 27b,127b - .llong 28b,128b - .llong 29b,129b - .llong 30b,130b - .llong 31b,131b - .llong 32b,132b - .llong 76b,176b - .llong 33b,133b - .llong 77b,177b - .llong 78b,178b - .llong 79b,179b - .llong 80b,180b - .llong 34b,134b - .llong 94b,194b - .llong 95b,195b - .llong 96b,196b - .llong 35b,135b - .llong 81b,181b - .llong 36b,136b - .llong 82b,182b - .llong 37b,137b - .llong 83b,183b - .llong 38b,138b - .llong 39b,139b - .llong 84b,184b - .llong 85b,185b - .llong 40b,140b - .llong 86b,186b - .llong 41b,141b - .llong 87b,187b - .llong 42b,142b - .llong 88b,188b - .llong 43b,143b - .llong 89b,189b - .llong 90b,190b - .llong 91b,191b - .llong 92b,192b - - .text +.Lst_exc: + ld r6,-24(r1) /* original destination pointer */ + ld r4,-16(r1) /* original source pointer */ + ld r5,-8(r1) /* original number of bytes */ + add r7,r6,r5 + /* + * If the destination pointer isn't 8-byte aligned, + * we may have got the exception as a result of a + * store that overlapped a page boundary, so we may be + * able to copy a few more bytes. + */ +17: andi. r0,r3,7 + beq 19f + subf r8,r6,r3 /* #bytes copied */ +100: EX_TABLE(100b,19f) + lbzx r0,r8,r4 +100: EX_TABLE(100b,19f) + stb r0,0(r3) + addi r3,r3,1 + cmpld r3,r7 + blt 17b +19: subf r3,r3,r7 /* #bytes not copied in r3 */ + blr /* * Routine to copy a whole page of data, optimized for POWER4. * On POWER4 it is more than 50% faster than the simple loop - * above (following the .Ldst_aligned label) but it runs slightly - * slower on POWER3. + * above (following the .Ldst_aligned label). */ + .macro exc +100: EX_TABLE(100b, .Labort) + .endm .Lcopy_page_4K: std r31,-32(1) std r30,-40(1) @@ -459,86 +445,86 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) li r0,5 0: addi r5,r5,-24 mtctr r0 -20: ld r22,640(4) -21: ld r21,512(4) -22: ld r20,384(4) -23: ld r11,256(4) -24: ld r9,128(4) -25: ld r7,0(4) -26: ld r25,648(4) -27: ld r24,520(4) -28: ld r23,392(4) -29: ld r10,264(4) -30: ld r8,136(4) -31: ldu r6,8(4) +exc; ld r22,640(4) +exc; ld r21,512(4) +exc; ld r20,384(4) +exc; ld r11,256(4) +exc; ld r9,128(4) +exc; ld r7,0(4) +exc; ld r25,648(4) +exc; ld r24,520(4) +exc; ld r23,392(4) +exc; ld r10,264(4) +exc; ld r8,136(4) +exc; ldu r6,8(4) cmpwi r5,24 1: -32: std r22,648(3) -33: std r21,520(3) -34: std r20,392(3) -35: std r11,264(3) -36: std r9,136(3) -37: std r7,8(3) -38: ld r28,648(4) -39: ld r27,520(4) -40: ld r26,392(4) -41: ld r31,264(4) -42: ld r30,136(4) -43: ld r29,8(4) -44: std r25,656(3) -45: std r24,528(3) -46: std r23,400(3) -47: std r10,272(3) -48: std r8,144(3) -49: std r6,16(3) -50: ld r22,656(4) -51: ld r21,528(4) -52: ld r20,400(4) -53: ld r11,272(4) -54: ld r9,144(4) -55: ld r7,16(4) -56: std r28,664(3) -57: std r27,536(3) -58: std r26,408(3) -59: std r31,280(3) -60: std r30,152(3) -61: stdu r29,24(3) -62: ld r25,664(4) -63: ld r24,536(4) -64: ld r23,408(4) -65: ld r10,280(4) -66: ld r8,152(4) -67: ldu r6,24(4) +exc; std r22,648(3) +exc; std r21,520(3) +exc; std r20,392(3) +exc; std r11,264(3) +exc; std r9,136(3) +exc; std r7,8(3) +exc; ld r28,648(4) +exc; ld r27,520(4) +exc; ld r26,392(4) +exc; ld r31,264(4) +exc; ld r30,136(4) +exc; ld r29,8(4) +exc; std r25,656(3) +exc; std r24,528(3) +exc; std r23,400(3) +exc; std r10,272(3) +exc; std r8,144(3) +exc; std r6,16(3) +exc; ld r22,656(4) +exc; ld r21,528(4) +exc; ld r20,400(4) +exc; ld r11,272(4) +exc; ld r9,144(4) +exc; ld r7,16(4) +exc; std r28,664(3) +exc; std r27,536(3) +exc; std r26,408(3) +exc; std r31,280(3) +exc; std r30,152(3) +exc; stdu r29,24(3) +exc; ld r25,664(4) +exc; ld r24,536(4) +exc; ld r23,408(4) +exc; ld r10,280(4) +exc; ld r8,152(4) +exc; ldu r6,24(4) bdnz 1b -68: std r22,648(3) -69: std r21,520(3) -70: std r20,392(3) -71: std r11,264(3) -72: std r9,136(3) -73: std r7,8(3) -74: addi r4,r4,640 -75: addi r3,r3,648 +exc; std r22,648(3) +exc; std r21,520(3) +exc; std r20,392(3) +exc; std r11,264(3) +exc; std r9,136(3) +exc; std r7,8(3) + addi r4,r4,640 + addi r3,r3,648 bge 0b mtctr r5 -76: ld r7,0(4) -77: ld r8,8(4) -78: ldu r9,16(4) +exc; ld r7,0(4) +exc; ld r8,8(4) +exc; ldu r9,16(4) 3: -79: ld r10,8(4) -80: std r7,8(3) -81: ld r7,16(4) -82: std r8,16(3) -83: ld r8,24(4) -84: std r9,24(3) -85: ldu r9,32(4) -86: stdu r10,32(3) +exc; ld r10,8(4) +exc; std r7,8(3) +exc; ld r7,16(4) +exc; std r8,16(3) +exc; ld r8,24(4) +exc; std r9,24(3) +exc; ldu r9,32(4) +exc; stdu r10,32(3) bdnz 3b 4: -87: ld r10,8(4) -88: std r7,8(3) -89: std r8,16(3) -90: std r9,24(3) -91: std r10,32(3) +exc; ld r10,8(4) +exc; std r7,8(3) +exc; std r8,16(3) +exc; std r9,24(3) +exc; std r10,32(3) 9: ld r20,-120(1) ld r21,-112(1) ld r22,-104(1) @@ -558,7 +544,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) * on an exception, reset to the beginning and jump back into the * standard __copy_tofrom_user */ -100: ld r20,-120(1) +.Labort: + ld r20,-120(1) ld r21,-112(1) ld r22,-104(1) ld r23,-96(1) @@ -574,78 +561,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) ld r4,-16(r1) li r5,4096 b .Ldst_aligned - - .section __ex_table,"a" - .align 3 - .llong 20b,100b - .llong 21b,100b - .llong 22b,100b - .llong 23b,100b - .llong 24b,100b - .llong 25b,100b - .llong 26b,100b - .llong 27b,100b - .llong 28b,100b - .llong 29b,100b - .llong 30b,100b - .llong 31b,100b - .llong 32b,100b - .llong 33b,100b - .llong 34b,100b - .llong 35b,100b - .llong 36b,100b - .llong 37b,100b - .llong 38b,100b - .llong 39b,100b - .llong 40b,100b - .llong 41b,100b - .llong 42b,100b - .llong 43b,100b - .llong 44b,100b - .llong 45b,100b - .llong 46b,100b - .llong 47b,100b - .llong 48b,100b - .llong 49b,100b - .llong 50b,100b - .llong 51b,100b - .llong 52b,100b - .llong 53b,100b - .llong 54b,100b - .llong 55b,100b - .llong 56b,100b - .llong 57b,100b - .llong 58b,100b - .llong 59b,100b - .llong 60b,100b - .llong 61b,100b - .llong 62b,100b - .llong 63b,100b - .llong 64b,100b - .llong 65b,100b - .llong 66b,100b - .llong 67b,100b - .llong 68b,100b - .llong 69b,100b - .llong 70b,100b - .llong 71b,100b - .llong 72b,100b - .llong 73b,100b - .llong 74b,100b - .llong 75b,100b - .llong 76b,100b - .llong 77b,100b - .llong 78b,100b - .llong 79b,100b - .llong 80b,100b - .llong 81b,100b - .llong 82b,100b - .llong 83b,100b - .llong 84b,100b - .llong 85b,100b - .llong 86b,100b - .llong 87b,100b - .llong 88b,100b - .llong 89b,100b - .llong 90b,100b - .llong 91b,100b +EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index d1f11795a7ad..8474c682a178 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -1,17 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2011 * @@ -19,37 +7,38 @@ */ #include <asm/ppc_asm.h> +#ifndef SELFTEST_CASE +/* 0 == don't use VMX, 1 == use VMX */ +#define SELFTEST_CASE 0 +#endif + +#ifdef __BIG_ENDIAN__ +#define LVS(VRT,RA,RB) lvsl VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC +#else +#define LVS(VRT,RA,RB) lvsr VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC +#endif + .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm #ifdef CONFIG_ALTIVEC .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .macro err4 400: - .section __ex_table,"a" - .align 3 - .llong 400b,.Ldo_err4 - .previous + EX_TABLE(400b,.Ldo_err4) .endm @@ -58,7 +47,7 @@ ld r15,STK_REG(R15)(r1) ld r14,STK_REG(R14)(r1) .Ldo_err3: - bl .exit_vmx_usercopy + bl CFUNC(exit_vmx_usercopy) ld r0,STACKFRAMESIZE+16(r1) mtlr r0 b .Lexit @@ -77,31 +66,27 @@ .Lexit: addi r1,r1,STACKFRAMESIZE .Ldo_err1: - ld r3,48(r1) - ld r4,56(r1) - ld r5,64(r1) + ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1) + ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1) b __copy_tofrom_user_base _GLOBAL(__copy_tofrom_user_power7) -#ifdef CONFIG_ALTIVEC cmpldi r5,16 - cmpldi cr1,r5,4096 + cmpldi cr1,r5,3328 - std r3,48(r1) - std r4,56(r1) - std r5,64(r1) + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) blt .Lshort_copy - bgt cr1,.Lvmx_copy -#else - cmpldi r5,16 - - std r3,48(r1) - std r4,56(r1) - std r5,64(r1) - blt .Lshort_copy +#ifdef CONFIG_ALTIVEC +test_feature = SELFTEST_CASE +BEGIN_FTR_SECTION + bgt cr1,.Lvmx_copy +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif .Lnonvmx_copy: @@ -282,17 +267,17 @@ err1; stb r0,0(r3) addi r1,r1,STACKFRAMESIZE b .Lnonvmx_copy -#ifdef CONFIG_ALTIVEC .Lvmx_copy: +#ifdef CONFIG_ALTIVEC mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl .enter_vmx_usercopy + bl CFUNC(enter_vmx_usercopy) cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) - ld r5,STACKFRAMESIZE+64(r1) + ld r3,STK_REG(R31)(r1) + ld r4,STK_REG(R30)(r1) + ld r5,STK_REG(R29)(r1) mtlr r0 /* @@ -313,20 +298,7 @@ err1; stb r0,0(r3) or r7,r7,r0 ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - -.machine push -.machine "power4" - /* setup read stream 0 */ - dcbt r0,r6,0b01000 /* addr from */ - dcbt r0,r7,0b01010 /* length and depth from */ - /* setup write stream 1 */ - dcbtst r0,r9,0b01000 /* addr to */ - dcbtst r0,r10,0b01010 /* length and depth to */ - eieio - dcbt r0,r8,0b01010 /* all streams GO */ -.machine pop + DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) beq cr1,.Lunwind_stack_nonvmx_copy @@ -380,29 +352,29 @@ err3; std r0,0(r3) li r11,48 bf cr7*4+3,5f -err3; lvx vr1,r0,r4 +err3; lvx v1,0,r4 addi r4,r4,16 -err3; stvx vr1,r0,r3 +err3; stvx v1,0,r3 addi r3,r3,16 5: bf cr7*4+2,6f -err3; lvx vr1,r0,r4 -err3; lvx vr0,r4,r9 +err3; lvx v1,0,r4 +err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx vr1,r0,r3 -err3; stvx vr0,r3,r9 +err3; stvx v1,0,r3 +err3; stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx vr3,r0,r4 -err3; lvx vr2,r4,r9 -err3; lvx vr1,r4,r10 -err3; lvx vr0,r4,r11 +err3; lvx v3,0,r4 +err3; lvx v2,r4,r9 +err3; lvx v1,r4,r10 +err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx vr3,r0,r3 -err3; stvx vr2,r3,r9 -err3; stvx vr1,r3,r10 -err3; stvx vr0,r3,r11 +err3; stvx v3,0,r3 +err3; stvx v2,r3,r9 +err3; stvx v1,r3,r10 +err3; stvx v0,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -425,23 +397,23 @@ err3; stvx vr0,r3,r11 */ .align 5 8: -err4; lvx vr7,r0,r4 -err4; lvx vr6,r4,r9 -err4; lvx vr5,r4,r10 -err4; lvx vr4,r4,r11 -err4; lvx vr3,r4,r12 -err4; lvx vr2,r4,r14 -err4; lvx vr1,r4,r15 -err4; lvx vr0,r4,r16 +err4; lvx v7,0,r4 +err4; lvx v6,r4,r9 +err4; lvx v5,r4,r10 +err4; lvx v4,r4,r11 +err4; lvx v3,r4,r12 +err4; lvx v2,r4,r14 +err4; lvx v1,r4,r15 +err4; lvx v0,r4,r16 addi r4,r4,128 -err4; stvx vr7,r0,r3 -err4; stvx vr6,r3,r9 -err4; stvx vr5,r3,r10 -err4; stvx vr4,r3,r11 -err4; stvx vr3,r3,r12 -err4; stvx vr2,r3,r14 -err4; stvx vr1,r3,r15 -err4; stvx vr0,r3,r16 +err4; stvx v7,0,r3 +err4; stvx v6,r3,r9 +err4; stvx v5,r3,r10 +err4; stvx v4,r3,r11 +err4; stvx v3,r3,r12 +err4; stvx v2,r3,r14 +err4; stvx v1,r3,r15 +err4; stvx v0,r3,r16 addi r3,r3,128 bdnz 8b @@ -455,29 +427,29 @@ err4; stvx vr0,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx vr3,r0,r4 -err3; lvx vr2,r4,r9 -err3; lvx vr1,r4,r10 -err3; lvx vr0,r4,r11 +err3; lvx v3,0,r4 +err3; lvx v2,r4,r9 +err3; lvx v1,r4,r10 +err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx vr3,r0,r3 -err3; stvx vr2,r3,r9 -err3; stvx vr1,r3,r10 -err3; stvx vr0,r3,r11 +err3; stvx v3,0,r3 +err3; stvx v2,r3,r9 +err3; stvx v1,r3,r10 +err3; stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx vr1,r0,r4 -err3; lvx vr0,r4,r9 +err3; lvx v1,0,r4 +err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx vr1,r0,r3 -err3; stvx vr0,r3,r9 +err3; stvx v1,0,r3 +err3; stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx vr1,r0,r4 +err3; lvx v1,0,r4 addi r4,r4,16 -err3; stvx vr1,r0,r3 +err3; stvx v1,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -506,7 +478,7 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b .exit_vmx_usercopy /* tail call optimise */ + b CFUNC(exit_vmx_usercopy) /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -552,42 +524,42 @@ err3; stw r7,4(r3) li r10,32 li r11,48 - lvsl vr16,0,r4 /* Setup permute control vector */ -err3; lvx vr0,0,r4 + LVS(v16,0,r4) /* Setup permute control vector */ +err3; lvx v0,0,r4 addi r4,r4,16 bf cr7*4+3,5f -err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 +err3; lvx v1,0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx vr8,r0,r3 +err3; stvx v8,0,r3 addi r3,r3,16 - vor vr0,vr1,vr1 + vor v0,v1,v1 5: bf cr7*4+2,6f -err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 -err3; lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 +err3; lvx v1,0,r4 + VPERM(v8,v0,v1,v16) +err3; lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 +err3; stvx v8,0,r3 +err3; stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 -err3; lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 -err3; lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 -err3; lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 +err3; lvx v3,0,r4 + VPERM(v8,v0,v3,v16) +err3; lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) +err3; lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) +err3; lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 -err3; stvx vr10,r3,r10 -err3; stvx vr11,r3,r11 +err3; stvx v8,0,r3 +err3; stvx v9,r3,r9 +err3; stvx v10,r3,r10 +err3; stvx v11,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -610,31 +582,31 @@ err3; stvx vr11,r3,r11 */ .align 5 8: -err4; lvx vr7,r0,r4 - vperm vr8,vr0,vr7,vr16 -err4; lvx vr6,r4,r9 - vperm vr9,vr7,vr6,vr16 -err4; lvx vr5,r4,r10 - vperm vr10,vr6,vr5,vr16 -err4; lvx vr4,r4,r11 - vperm vr11,vr5,vr4,vr16 -err4; lvx vr3,r4,r12 - vperm vr12,vr4,vr3,vr16 -err4; lvx vr2,r4,r14 - vperm vr13,vr3,vr2,vr16 -err4; lvx vr1,r4,r15 - vperm vr14,vr2,vr1,vr16 -err4; lvx vr0,r4,r16 - vperm vr15,vr1,vr0,vr16 +err4; lvx v7,0,r4 + VPERM(v8,v0,v7,v16) +err4; lvx v6,r4,r9 + VPERM(v9,v7,v6,v16) +err4; lvx v5,r4,r10 + VPERM(v10,v6,v5,v16) +err4; lvx v4,r4,r11 + VPERM(v11,v5,v4,v16) +err4; lvx v3,r4,r12 + VPERM(v12,v4,v3,v16) +err4; lvx v2,r4,r14 + VPERM(v13,v3,v2,v16) +err4; lvx v1,r4,r15 + VPERM(v14,v2,v1,v16) +err4; lvx v0,r4,r16 + VPERM(v15,v1,v0,v16) addi r4,r4,128 -err4; stvx vr8,r0,r3 -err4; stvx vr9,r3,r9 -err4; stvx vr10,r3,r10 -err4; stvx vr11,r3,r11 -err4; stvx vr12,r3,r12 -err4; stvx vr13,r3,r14 -err4; stvx vr14,r3,r15 -err4; stvx vr15,r3,r16 +err4; stvx v8,0,r3 +err4; stvx v9,r3,r9 +err4; stvx v10,r3,r10 +err4; stvx v11,r3,r11 +err4; stvx v12,r3,r12 +err4; stvx v13,r3,r14 +err4; stvx v14,r3,r15 +err4; stvx v15,r3,r16 addi r3,r3,128 bdnz 8b @@ -648,36 +620,36 @@ err4; stvx vr15,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 -err3; lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 -err3; lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 -err3; lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 +err3; lvx v3,0,r4 + VPERM(v8,v0,v3,v16) +err3; lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) +err3; lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) +err3; lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 -err3; stvx vr10,r3,r10 -err3; stvx vr11,r3,r11 +err3; stvx v8,0,r3 +err3; stvx v9,r3,r9 +err3; stvx v10,r3,r10 +err3; stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 -err3; lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 +err3; lvx v1,0,r4 + VPERM(v8,v0,v1,v16) +err3; lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 +err3; stvx v8,0,r3 +err3; stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 +err3; lvx v1,0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx vr8,r0,r3 +err3; stvx v8,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -709,5 +681,5 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b .exit_vmx_usercopy /* tail call optimise */ -#endif /* CONFiG_ALTIVEC */ + b CFUNC(exit_vmx_usercopy) /* tail call optimise */ +#endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index b2c68ce139ae..8967903c15e9 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -44,10 +44,10 @@ #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -#ifndef CONFIG_PPC64 - .section ".text" +#ifndef __powerpc64__ + /* Routines for saving integer registers, called by the compiler. */ /* Called with r11 pointing to the stack header word of the caller of the */ /* function, just beyond the end of the integer save area. */ @@ -231,9 +231,88 @@ _GLOBAL(_rest32gpr_31_x) mr 1,11 blr -#else /* CONFIG_PPC64 */ +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +_GLOBAL(_savevr_20) + li r11,-192 + stvx v20,r11,r0 +_GLOBAL(_savevr_21) + li r11,-176 + stvx v21,r11,r0 +_GLOBAL(_savevr_22) + li r11,-160 + stvx v22,r11,r0 +_GLOBAL(_savevr_23) + li r11,-144 + stvx v23,r11,r0 +_GLOBAL(_savevr_24) + li r11,-128 + stvx v24,r11,r0 +_GLOBAL(_savevr_25) + li r11,-112 + stvx v25,r11,r0 +_GLOBAL(_savevr_26) + li r11,-96 + stvx v26,r11,r0 +_GLOBAL(_savevr_27) + li r11,-80 + stvx v27,r11,r0 +_GLOBAL(_savevr_28) + li r11,-64 + stvx v28,r11,r0 +_GLOBAL(_savevr_29) + li r11,-48 + stvx v29,r11,r0 +_GLOBAL(_savevr_30) + li r11,-32 + stvx v30,r11,r0 +_GLOBAL(_savevr_31) + li r11,-16 + stvx v31,r11,r0 + blr + +_GLOBAL(_restvr_20) + li r11,-192 + lvx v20,r11,r0 +_GLOBAL(_restvr_21) + li r11,-176 + lvx v21,r11,r0 +_GLOBAL(_restvr_22) + li r11,-160 + lvx v22,r11,r0 +_GLOBAL(_restvr_23) + li r11,-144 + lvx v23,r11,r0 +_GLOBAL(_restvr_24) + li r11,-128 + lvx v24,r11,r0 +_GLOBAL(_restvr_25) + li r11,-112 + lvx v25,r11,r0 +_GLOBAL(_restvr_26) + li r11,-96 + lvx v26,r11,r0 +_GLOBAL(_restvr_27) + li r11,-80 + lvx v27,r11,r0 +_GLOBAL(_restvr_28) + li r11,-64 + lvx v28,r11,r0 +_GLOBAL(_restvr_29) + li r11,-48 + lvx v29,r11,r0 +_GLOBAL(_restvr_30) + li r11,-32 + lvx v30,r11,r0 +_GLOBAL(_restvr_31) + li r11,-16 + lvx v31,r11,r0 + blr - .section ".text.save.restore","ax",@progbits +#endif /* CONFIG_ALTIVEC */ + +#else /* CONFIG_PPC64 */ .globl _savegpr0_14 _savegpr0_14: @@ -356,6 +435,111 @@ _restgpr0_31: mtlr r0 blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +.globl _savevr_20 +_savevr_20: + li r12,-192 + stvx v20,r12,r0 +.globl _savevr_21 +_savevr_21: + li r12,-176 + stvx v21,r12,r0 +.globl _savevr_22 +_savevr_22: + li r12,-160 + stvx v22,r12,r0 +.globl _savevr_23 +_savevr_23: + li r12,-144 + stvx v23,r12,r0 +.globl _savevr_24 +_savevr_24: + li r12,-128 + stvx v24,r12,r0 +.globl _savevr_25 +_savevr_25: + li r12,-112 + stvx v25,r12,r0 +.globl _savevr_26 +_savevr_26: + li r12,-96 + stvx v26,r12,r0 +.globl _savevr_27 +_savevr_27: + li r12,-80 + stvx v27,r12,r0 +.globl _savevr_28 +_savevr_28: + li r12,-64 + stvx v28,r12,r0 +.globl _savevr_29 +_savevr_29: + li r12,-48 + stvx v29,r12,r0 +.globl _savevr_30 +_savevr_30: + li r12,-32 + stvx v30,r12,r0 +.globl _savevr_31 +_savevr_31: + li r12,-16 + stvx v31,r12,r0 + blr + +.globl _restvr_20 +_restvr_20: + li r12,-192 + lvx v20,r12,r0 +.globl _restvr_21 +_restvr_21: + li r12,-176 + lvx v21,r12,r0 +.globl _restvr_22 +_restvr_22: + li r12,-160 + lvx v22,r12,r0 +.globl _restvr_23 +_restvr_23: + li r12,-144 + lvx v23,r12,r0 +.globl _restvr_24 +_restvr_24: + li r12,-128 + lvx v24,r12,r0 +.globl _restvr_25 +_restvr_25: + li r12,-112 + lvx v25,r12,r0 +.globl _restvr_26 +_restvr_26: + li r12,-96 + lvx v26,r12,r0 +.globl _restvr_27 +_restvr_27: + li r12,-80 + lvx v27,r12,r0 +.globl _restvr_28 +_restvr_28: + li r12,-64 + lvx v28,r12,r0 +.globl _restvr_29 +_restvr_29: + li r12,-48 + lvx v29,r12,r0 +.globl _restvr_30 +_restvr_30: + li r12,-32 + lvx v30,r12,r0 +.globl _restvr_31 +_restvr_31: + li r12,-16 + lvx v31,r12,r0 + blr + +#endif /* CONFIG_ALTIVEC */ + #endif /* CONFIG_PPC64 */ #endif diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c deleted file mode 100644 index 8df55fc3aad6..000000000000 --- a/arch/powerpc/lib/devres.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2008 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/device.h> /* devres_*(), devm_ioremap_release() */ -#include <linux/gfp.h> -#include <linux/io.h> /* ioremap_prot() */ -#include <linux/export.h> /* EXPORT_SYMBOL() */ - -/** - * devm_ioremap_prot - Managed ioremap_prot() - * @dev: Generic device to remap IO address for - * @offset: BUS offset to map - * @size: Size of map - * @flags: Page flags - * - * Managed ioremap_prot(). Map is automatically unmapped on driver - * detach. - */ -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags) -{ - void __iomem **ptr, *addr; - - ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return NULL; - - addr = ioremap_prot(offset, size, flags); - if (addr) { - *ptr = addr; - devres_add(dev, ptr); - } else - devres_free(ptr); - - return addr; -} -EXPORT_SYMBOL(devm_ioremap_prot); diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S index 83d9832fd919..3d5426e7dcc4 100644 --- a/arch/powerpc/lib/div64.S +++ b/arch/powerpc/lib/div64.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Divide a 64-bit unsigned number by a 32-bit unsigned number. * This routine assumes that the top 32 bits of the dividend are @@ -7,11 +8,6 @@ * On exit, r3 contains the remainder. * * Copyright (C) 2002 Paul Mackerras, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <asm/ppc_asm.h> #include <asm/processor.h> diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c new file mode 100644 index 000000000000..e834079d2b5c --- /dev/null +++ b/arch/powerpc/lib/error-inject.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/error-injection.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> + +void override_function_with_return(struct pt_regs *regs) +{ + /* + * Emulate 'blr'. 'regs' represents the state on entry of a predefined + * function in the kernel/module, captured on a kprobe. We don't need + * to worry about 32-bit userspace on a 64-bit kernel. + */ + regs_set_return_ip(regs, regs->link); +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S index f4613118132e..480172fbd024 100644 --- a/arch/powerpc/lib/feature-fixups-test.S +++ b/arch/powerpc/lib/feature-fixups-test.S @@ -1,16 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright 2008 Michael Ellerman, IBM Corporation. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <asm/feature-fixups.h> #include <asm/ppc_asm.h> #include <asm/synch.h> +#include <asm/asm-compat.h> +#include <asm/ppc-opcode.h> .text @@ -167,16 +164,52 @@ globl(ftr_fixup_test6_expected) blt 2b b 3f b 1b -2: or 1,1,1 +3: or 1,1,1 or 2,2,2 -3: or 3,3,3 + or 3,3,3 +globl(ftr_fixup_test7) + or 1,1,1 +BEGIN_FTR_SECTION + or 2,2,2 + or 2,2,2 + or 2,2,2 + or 2,2,2 + or 2,2,2 + or 2,2,2 + or 2,2,2 +FTR_SECTION_ELSE +2: b 3f +3: or 5,5,5 + beq 3b + b 1f + or 6,6,6 + b 2b + bdnz 3b +1: +ALT_FTR_SECTION_END(0, 1) + or 1,1,1 + or 1,1,1 + +globl(end_ftr_fixup_test7) + nop + +globl(ftr_fixup_test7_expected) + or 1,1,1 +2: b 3f +3: or 5,5,5 + beq 3b + b 1f + or 6,6,6 + b 2b + bdnz 3b +1: or 1,1,1 #if 0 /* Test that if we have a larger else case the assembler spots it and * reports an error. #if 0'ed so as not to break the build normally. */ -ftr_fixup_test7: +ftr_fixup_test_too_big: or 1,1,1 BEGIN_FTR_SECTION or 2,2,2 @@ -759,3 +792,71 @@ globl(lwsync_fixup_test_expected_SYNC) 1: or 1,1,1 sync +globl(ftr_fixup_prefix1) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix1) + +globl(ftr_fixup_prefix1_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix1_expected) + or 1,1,1 + nop + nop + or 2,2,2 + +globl(ftr_fixup_prefix2) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix2) + +globl(ftr_fixup_prefix2_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix2_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + +globl(ftr_fixup_prefix2_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + or 2,2,2 + +globl(ftr_fixup_prefix3) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 +globl(end_ftr_fixup_prefix3) + +globl(ftr_fixup_prefix3_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 + +globl(ftr_fixup_prefix3_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + nop + +globl(ftr_fixup_prefix3_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + nop + or 3,3,3 diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 7a8a7487cee8..587c8cf1230f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) * @@ -5,22 +6,31 @@ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> * * Copyright 2008 Michael Ellerman, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> +#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include <asm/cputable.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> +#include <asm/interrupt.h> #include <asm/page.h> #include <asm/sections.h> +#include <asm/setup.h> +#include <asm/security_features.h> +#include <asm/firmware.h> +#include <asm/inst.h> +/* + * Used to generate warnings if mmu or cpu feature check functions that + * use static keys before they are initialized. + */ +bool static_key_feature_checks_initialized __read_mostly; +EXPORT_SYMBOL_GPL(static_key_feature_checks_initialized); struct fixup_entry { unsigned long mask; @@ -31,42 +41,43 @@ struct fixup_entry { long alt_end_off; }; -static unsigned int *calc_addr(struct fixup_entry *fcur, long offset) +static u32 *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (unsigned int *)((unsigned long)fcur + offset); + return (u32 *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(unsigned int *src, unsigned int *dest, - unsigned int *alt_start, unsigned int *alt_end) +static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { - unsigned int instr; + int err; + ppc_inst_t instr; - instr = *src; + instr = ppc_inst_read(src); - if (instr_is_relative_branch(*src)) { - unsigned int *target = (unsigned int *)branch_target(src); + if (instr_is_relative_branch(ppc_inst_read(src))) { + u32 *target = (u32 *)branch_target(src); /* Branch within the section doesn't need translating */ - if (target < alt_start || target >= alt_end) { - instr = translate_branch(dest, src); - if (!instr) + if (target < alt_start || target > alt_end) { + err = translate_branch(&instr, dest, src); + if (err) return 1; } } - patch_instruction(dest, instr); + raw_patch_instruction(dest, instr); return 0; } -static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +static int patch_feature_section_mask(unsigned long value, unsigned long mask, + struct fixup_entry *fcur) { - unsigned int *start, *end, *alt_start, *alt_end, *src, *dest; + u32 *start, *end, *alt_start, *alt_end, *src, *dest; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -76,24 +87,26 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) if ((alt_end - alt_start) > (end - start)) return 1; - if ((value & fcur->mask) == fcur->value) + if ((value & fcur->mask & mask) == (fcur->value & mask)) return 0; src = alt_start; dest = start; - for (; src < alt_end; src++, dest++) { + for (; src < alt_end; src = ppc_inst_next(src, src), + dest = ppc_inst_next(dest, dest)) { if (patch_alt_instruction(src, dest, alt_start, alt_end)) return 1; } for (; dest < end; dest++) - patch_instruction(dest, PPC_INST_NOP); + raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP())); return 0; } -void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +static void do_feature_fixups_mask(unsigned long value, unsigned long mask, + void *fixup_start, void *fixup_end) { struct fixup_entry *fcur, *fend; @@ -101,7 +114,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) fend = fixup_end; for (; fcur < fend; fcur++) { - if (patch_feature_section(value, fcur)) { + if (patch_feature_section_mask(value, mask, fcur)) { WARN_ON(1); printk("Unable to patch feature section at %p - %p" \ " with %p - %p\n", @@ -113,10 +126,473 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +{ + do_feature_fixups_mask(value, ~0, fixup_start, fixup_end); +} + +#ifdef CONFIG_PPC_BARRIER_NOSPEC +static bool is_fixup_addr_valid(void *dest, size_t size) +{ + return system_state < SYSTEM_FREEING_INITMEM || + !init_section_contains(dest, size); +} + +static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) +{ + int i; + + for (i = 0; start < end; start++, i++) { + int j; + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + for (j = 0; j < num; j++) + patch_instruction(dest + j, ppc_inst(instrs[j])); + } + return i; +} +#endif + +#ifdef CONFIG_PPC_BOOK3S_64 +static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, + bool do_fallback, void *fallback) +{ + int i; + + for (i = 0; start < end; start++, i++) { + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + // See comment in do_entry_flush_fixups() RE order of patching + if (do_fallback) { + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK); + } else { + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + } + } + return i; +} + +static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[3]; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_entry_barrier_fixup); + end = PTRRELOC(&__stop___stf_entry_barrier_fixup); + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + + i = 0; + if (types & STF_BARRIER_FALLBACK) { + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ + } else if (types & STF_BARRIER_SYNC_ORI) { + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R10, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + } + + i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK, + &stf_barrier_fallback); + + printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[6]; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_exit_barrier_fixup); + end = PTRRELOC(&__stop___stf_exit_barrier_fixup); + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_NOP(); + instrs[4] = PPC_RAW_NOP(); + instrs[5] = PPC_RAW_NOP(); + + i = 0; + if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0); + } else { + instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1); + } + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R13, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1); + else + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2); + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ + } + + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); + + printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +static bool stf_exit_reentrant = false; +static bool rfi_exit_reentrant = false; +static DEFINE_MUTEX(exit_flush_lock); + +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} + +void do_stf_barrier_fixups(enum stf_barrier_type types) +{ + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are + * executing them. So call __do_stf_barrier_fixups() on one CPU while + * all other CPUs spin in the stop machine core with interrupts hard + * disabled. + * + * The branch to mark interrupt exits non-reentrant is enabled first, + * then stop_machine runs which will ensure all CPUs are out of the + * low level interrupt exit code before patching. After the patching, + * if allowed, then flip the branch to allow fast exits. + */ + + // Prevent static key update races with do_rfi_flush_fixups() + mutex_lock(&exit_flush_lock); + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_stf_barrier_fixups, &types, NULL); + + if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI)) + stf_exit_reentrant = false; + else + stf_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); +} + +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4]; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_BLR(); + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = PPC_RAW_NOP(); + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); + + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + +static int __do_entry_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3]; + long *start, *end; + int i; + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); + + /* + * If we're patching in or out the fallback flush we need to be careful about the + * order in which we patch instructions. That's because it's possible we could + * take a page fault after patching one instruction, so the sequence of + * instructions must be safe even in a half patched state. + * + * To make that work, when patching in the fallback flush we patch in this order: + * - the mflr (dest) + * - the mtlr (dest + 2) + * - the branch (dest + 1) + * + * That ensures the sequence is safe to execute at any point. In contrast if we + * patch the mtlr last, it's possible we could return from the branch and not + * restore LR, leading to a crash later. + * + * When patching out the fallback flush (either with nops or another flush type), + * we patch in this order: + * - the branch (dest + 1) + * - the mtlr (dest + 2) + * - the mflr (dest) + * + * Note we are protected by stop_machine() from other CPUs executing the code in a + * semi-patched state. + */ + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &entry_flush_fallback); + + start = PTRRELOC(&__start___scv_entry_flush_fixup); + end = PTRRELOC(&__stop___scv_entry_flush_fixup); + i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &scv_entry_flush_fallback); + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); +} + +static int __do_rfi_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3]; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup); + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = PPC_RAW_BRANCH(16); + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); + + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); + + printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); + + return 0; +} + +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + /* + * stop_machine gets all CPUs out of the interrupt exit handler same + * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run + * without stop_machine, so this could be achieved with a broadcast + * IPI instead, but this matches the stf sequence. + */ + + // Prevent static key update races with do_stf_barrier_fixups() + mutex_lock(&exit_flush_lock); + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_rfi_flush_fixups, &types, NULL); + + if (types & L1D_FLUSH_FALLBACK) + rfi_exit_reentrant = false; + else + rfi_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); +} + +void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) +{ + unsigned int instr; + long *start, *end; + int i; + + start = fixup_start; + end = fixup_end; + + instr = PPC_RAW_NOP(); + + if (enable) { + pr_info("barrier-nospec: using ORI speculation barrier\n"); + instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + } + + i = do_patch_fixups(start, end, &instr, 1); + + printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); +} + +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BARRIER_NOSPEC +void do_barrier_nospec_fixups(bool enable) +{ + void *start, *end; + + start = PTRRELOC(&__start___barrier_nospec_fixup); + end = PTRRELOC(&__stop___barrier_nospec_fixup); + + do_barrier_nospec_fixups_range(enable, start, end); +} +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ + +#ifdef CONFIG_PPC_E500 +void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) +{ + unsigned int instr[2]; + long *start, *end; + int i; + + start = fixup_start; + end = fixup_end; + + instr[0] = PPC_RAW_NOP(); + instr[1] = PPC_RAW_NOP(); + + if (enable) { + pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); + instr[0] = PPC_RAW_ISYNC(); + instr[1] = PPC_RAW_SYNC(); + } + + i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr)); + + printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); +} + +static void __init patch_btb_flush_section(long *curr) +{ + unsigned int *start, *end; + + start = (void *)curr + *curr; + end = (void *)curr + *(curr + 1); + for (; start < end; start++) { + pr_devel("patching dest %lx\n", (unsigned long)start); + patch_instruction(start, ppc_inst(PPC_RAW_NOP())); + } +} + +void __init do_btb_flush_fixups(void) +{ + long *start, *end; + + start = PTRRELOC(&__start__btb_flush_fixup); + end = PTRRELOC(&__stop__btb_flush_fixup); + + for (; start < end; start += 2) + patch_btb_flush_section(start); +} +#endif /* CONFIG_PPC_E500 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - unsigned int *dest; + u32 *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -126,235 +602,399 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC)); } } -void do_final_fixups(void) +static void __init do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - int *src, *dest; - unsigned long length; + ppc_inst_t inst; + u32 *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (int *)(KERNELBASE + PHYSICAL_START); - dest = (int *)KERNELBASE; - length = (__end_interrupts - _stext) / sizeof(int); + src = (u32 *)(KERNELBASE + PHYSICAL_START); + dest = (u32 *)KERNELBASE; + end = (void *)src + (__end_interrupts - _stext); - while (length--) { - patch_instruction(dest, *src); - src++; - dest++; + while (src < end) { + inst = ppc_inst_read(src); + raw_patch_instruction(dest, inst); + src = ppc_inst_next(src, src); + dest = ppc_inst_next(dest, dest); } #endif } +static unsigned long __initdata saved_cpu_features; +static unsigned int __initdata saved_mmu_features; +#ifdef CONFIG_PPC64 +static unsigned long __initdata saved_firmware_features; +#endif + +void __init apply_feature_fixups(void) +{ + struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec)); + + *PTRRELOC(&saved_cpu_features) = spec->cpu_features; + *PTRRELOC(&saved_mmu_features) = spec->mmu_features; + + /* + * Apply the CPU-specific and firmware specific fixups to kernel text + * (nop out sections not relevant to this CPU or this firmware). + */ + do_feature_fixups(spec->cpu_features, + PTRRELOC(&__start___ftr_fixup), + PTRRELOC(&__stop___ftr_fixup)); + + do_feature_fixups(spec->mmu_features, + PTRRELOC(&__start___mmu_ftr_fixup), + PTRRELOC(&__stop___mmu_ftr_fixup)); + + do_lwsync_fixups(spec->cpu_features, + PTRRELOC(&__start___lwsync_fixup), + PTRRELOC(&__stop___lwsync_fixup)); + +#ifdef CONFIG_PPC64 + saved_firmware_features = powerpc_firmware_features; + do_feature_fixups(powerpc_firmware_features, + &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); +#endif + do_final_fixups(); +} + +void __init update_mmu_feature_fixups(unsigned long mask) +{ + saved_mmu_features &= ~mask; + saved_mmu_features |= cur_cpu_spec->mmu_features & mask; + + do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask, + PTRRELOC(&__start___mmu_ftr_fixup), + PTRRELOC(&__stop___mmu_ftr_fixup)); + mmu_feature_keys_init(); +} + +void __init setup_feature_keys(void) +{ + /* + * Initialise jump label. This causes all the cpu/mmu_has_feature() + * checks to take on their correct polarity based on the current set of + * CPU/MMU features. + */ + jump_label_init(); + cpu_feature_keys_init(); + mmu_feature_keys_init(); + static_key_feature_checks_initialized = true; +} + +static int __init check_features(void) +{ + WARN(saved_cpu_features != cur_cpu_spec->cpu_features, + "CPU features changed after feature patching!\n"); + WARN(saved_mmu_features != cur_cpu_spec->mmu_features, + "MMU features changed after feature patching!\n"); +#ifdef CONFIG_PPC64 + WARN(saved_firmware_features != powerpc_firmware_features, + "Firmware features changed after feature patching!\n"); +#endif + + return 0; +} +late_initcall(check_features); + #ifdef CONFIG_FTR_FIXUP_SELFTEST #define check(x) \ if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__); +static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +{ + return patch_feature_section_mask(value, ~0, fcur); +} + /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */ static struct fixup_entry fixup; -static long calc_offset(struct fixup_entry *entry, unsigned int *p) +static long __init calc_offset(struct fixup_entry *entry, unsigned int *p) { return (unsigned long)p - (unsigned long)entry; } -void test_basic_patching(void) +static void __init test_basic_patching(void) { - extern unsigned int ftr_fixup_test1; - extern unsigned int end_ftr_fixup_test1; - extern unsigned int ftr_fixup_test1_orig; - extern unsigned int ftr_fixup_test1_expected; - int size = &end_ftr_fixup_test1 - &ftr_fixup_test1; + extern unsigned int ftr_fixup_test1[]; + extern unsigned int end_ftr_fixup_test1[]; + extern unsigned int ftr_fixup_test1_orig[]; + extern unsigned int ftr_fixup_test1_expected[]; + int size = 4 * (end_ftr_fixup_test1 - ftr_fixup_test1); fixup.value = fixup.mask = 8; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2); fixup.alt_start_off = fixup.alt_end_off = 0; /* Sanity check */ - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0); /* Check we don't patch if the value matches */ patch_feature_section(8, &fixup); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0); /* Check we do patch if the value doesn't match */ patch_feature_section(0, &fixup); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); /* Check we do patch if the mask doesn't match */ - memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0); + memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0); patch_feature_section(~8, &fixup); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); } -static void test_alternative_patching(void) +static void __init test_alternative_patching(void) { - extern unsigned int ftr_fixup_test2; - extern unsigned int end_ftr_fixup_test2; - extern unsigned int ftr_fixup_test2_orig; - extern unsigned int ftr_fixup_test2_alt; - extern unsigned int ftr_fixup_test2_expected; - int size = &end_ftr_fixup_test2 - &ftr_fixup_test2; + extern unsigned int ftr_fixup_test2[]; + extern unsigned int end_ftr_fixup_test2[]; + extern unsigned int ftr_fixup_test2_orig[]; + extern unsigned int ftr_fixup_test2_alt[]; + extern unsigned int ftr_fixup_test2_expected[]; + int size = 4 * (end_ftr_fixup_test2 - ftr_fixup_test2); fixup.value = fixup.mask = 0xF; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2); - fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt); - fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1); /* Sanity check */ - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0); /* Check we don't patch if the value matches */ patch_feature_section(0xF, &fixup); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0); /* Check we do patch if the value doesn't match */ patch_feature_section(0, &fixup); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); /* Check we do patch if the mask doesn't match */ - memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0); + memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0); patch_feature_section(~0xF, &fixup); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); } -static void test_alternative_case_too_big(void) +static void __init test_alternative_case_too_big(void) { - extern unsigned int ftr_fixup_test3; - extern unsigned int end_ftr_fixup_test3; - extern unsigned int ftr_fixup_test3_orig; - extern unsigned int ftr_fixup_test3_alt; - int size = &end_ftr_fixup_test3 - &ftr_fixup_test3; + extern unsigned int ftr_fixup_test3[]; + extern unsigned int end_ftr_fixup_test3[]; + extern unsigned int ftr_fixup_test3_orig[]; + extern unsigned int ftr_fixup_test3_alt[]; + int size = 4 * (end_ftr_fixup_test3 - ftr_fixup_test3); fixup.value = fixup.mask = 0xC; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2); - fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt); - fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2); /* Sanity check */ - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); /* Expect nothing to be patched, and the error returned to us */ check(patch_feature_section(0xF, &fixup) == 1); - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); check(patch_feature_section(0, &fixup) == 1); - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); check(patch_feature_section(~0xF, &fixup) == 1); - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); } -static void test_alternative_case_too_small(void) +static void __init test_alternative_case_too_small(void) { - extern unsigned int ftr_fixup_test4; - extern unsigned int end_ftr_fixup_test4; - extern unsigned int ftr_fixup_test4_orig; - extern unsigned int ftr_fixup_test4_alt; - extern unsigned int ftr_fixup_test4_expected; - int size = &end_ftr_fixup_test4 - &ftr_fixup_test4; + extern unsigned int ftr_fixup_test4[]; + extern unsigned int end_ftr_fixup_test4[]; + extern unsigned int ftr_fixup_test4_orig[]; + extern unsigned int ftr_fixup_test4_alt[]; + extern unsigned int ftr_fixup_test4_expected[]; + int size = 4 * (end_ftr_fixup_test4 - ftr_fixup_test4); unsigned long flag; /* Check a high-bit flag */ flag = 1UL << ((sizeof(unsigned long) - 1) * 8); fixup.value = fixup.mask = flag; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5); - fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt); - fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2); /* Sanity check */ - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0); /* Check we don't patch if the value matches */ patch_feature_section(flag, &fixup); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0); /* Check we do patch if the value doesn't match */ patch_feature_section(0, &fixup); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0); /* Check we do patch if the mask doesn't match */ - memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0); + memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0); patch_feature_section(~flag, &fixup); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0); } static void test_alternative_case_with_branch(void) { - extern unsigned int ftr_fixup_test5; - extern unsigned int end_ftr_fixup_test5; - extern unsigned int ftr_fixup_test5_expected; - int size = &end_ftr_fixup_test5 - &ftr_fixup_test5; + extern unsigned int ftr_fixup_test5[]; + extern unsigned int end_ftr_fixup_test5[]; + extern unsigned int ftr_fixup_test5_expected[]; + int size = 4 * (end_ftr_fixup_test5 - ftr_fixup_test5); + + check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0); +} + +static void __init test_alternative_case_with_external_branch(void) +{ + extern unsigned int ftr_fixup_test6[]; + extern unsigned int end_ftr_fixup_test6[]; + extern unsigned int ftr_fixup_test6_expected[]; + int size = 4 * (end_ftr_fixup_test6 - ftr_fixup_test6); - check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0); + check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0); } -static void test_alternative_case_with_external_branch(void) +static void __init test_alternative_case_with_branch_to_end(void) { - extern unsigned int ftr_fixup_test6; - extern unsigned int end_ftr_fixup_test6; - extern unsigned int ftr_fixup_test6_expected; - int size = &end_ftr_fixup_test6 - &ftr_fixup_test6; + extern unsigned int ftr_fixup_test7[]; + extern unsigned int end_ftr_fixup_test7[]; + extern unsigned int ftr_fixup_test7_expected[]; + int size = 4 * (end_ftr_fixup_test7 - ftr_fixup_test7); - check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0); + check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0); } -static void test_cpu_macros(void) +static void __init test_cpu_macros(void) { - extern u8 ftr_fixup_test_FTR_macros; - extern u8 ftr_fixup_test_FTR_macros_expected; - unsigned long size = &ftr_fixup_test_FTR_macros_expected - - &ftr_fixup_test_FTR_macros; + extern u8 ftr_fixup_test_FTR_macros[]; + extern u8 ftr_fixup_test_FTR_macros_expected[]; + unsigned long size = ftr_fixup_test_FTR_macros_expected - + ftr_fixup_test_FTR_macros; /* The fixups have already been done for us during boot */ - check(memcmp(&ftr_fixup_test_FTR_macros, - &ftr_fixup_test_FTR_macros_expected, size) == 0); + check(memcmp(ftr_fixup_test_FTR_macros, + ftr_fixup_test_FTR_macros_expected, size) == 0); } -static void test_fw_macros(void) +static void __init test_fw_macros(void) { #ifdef CONFIG_PPC64 - extern u8 ftr_fixup_test_FW_FTR_macros; - extern u8 ftr_fixup_test_FW_FTR_macros_expected; - unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected - - &ftr_fixup_test_FW_FTR_macros; + extern u8 ftr_fixup_test_FW_FTR_macros[]; + extern u8 ftr_fixup_test_FW_FTR_macros_expected[]; + unsigned long size = ftr_fixup_test_FW_FTR_macros_expected - + ftr_fixup_test_FW_FTR_macros; /* The fixups have already been done for us during boot */ - check(memcmp(&ftr_fixup_test_FW_FTR_macros, - &ftr_fixup_test_FW_FTR_macros_expected, size) == 0); + check(memcmp(ftr_fixup_test_FW_FTR_macros, + ftr_fixup_test_FW_FTR_macros_expected, size) == 0); #endif } -static void test_lwsync_macros(void) +static void __init test_lwsync_macros(void) { - extern u8 lwsync_fixup_test; - extern u8 end_lwsync_fixup_test; - extern u8 lwsync_fixup_test_expected_LWSYNC; - extern u8 lwsync_fixup_test_expected_SYNC; - unsigned long size = &end_lwsync_fixup_test - - &lwsync_fixup_test; + extern u8 lwsync_fixup_test[]; + extern u8 end_lwsync_fixup_test[]; + extern u8 lwsync_fixup_test_expected_LWSYNC[]; + extern u8 lwsync_fixup_test_expected_SYNC[]; + unsigned long size = end_lwsync_fixup_test - + lwsync_fixup_test; /* The fixups have already been done for us during boot */ if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) { - check(memcmp(&lwsync_fixup_test, - &lwsync_fixup_test_expected_LWSYNC, size) == 0); + check(memcmp(lwsync_fixup_test, + lwsync_fixup_test_expected_LWSYNC, size) == 0); } else { - check(memcmp(&lwsync_fixup_test, - &lwsync_fixup_test_expected_SYNC, size) == 0); + check(memcmp(lwsync_fixup_test, + lwsync_fixup_test_expected_SYNC, size) == 0); } } +#ifdef CONFIG_PPC64 +static void __init test_prefix_patching(void) +{ + extern unsigned int ftr_fixup_prefix1[]; + extern unsigned int end_ftr_fixup_prefix1[]; + extern unsigned int ftr_fixup_prefix1_orig[]; + extern unsigned int ftr_fixup_prefix1_expected[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3); + fixup.alt_start_off = fixup.alt_end_off = 0; + + /* Sanity check */ + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0); +} + +static void __init test_prefix_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix2[]; + extern unsigned int end_ftr_fixup_prefix2[]; + extern unsigned int ftr_fixup_prefix2_orig[]; + extern unsigned int ftr_fixup_prefix2_expected[]; + extern unsigned int ftr_fixup_prefix2_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0); +} + +static void __init test_prefix_word_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix3[]; + extern unsigned int end_ftr_fixup_prefix3[]; + extern unsigned int ftr_fixup_prefix3_orig[]; + extern unsigned int ftr_fixup_prefix3_expected[]; + extern unsigned int ftr_fixup_prefix3_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0); + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0); +} +#else +static inline void test_prefix_patching(void) {} +static inline void test_prefix_alt_patching(void) {} +static inline void test_prefix_word_alt_patching(void) {} +#endif /* CONFIG_PPC64 */ + static int __init test_feature_fixups(void) { printk(KERN_DEBUG "Running feature fixup self-tests ...\n"); @@ -365,9 +1005,13 @@ static int __init test_feature_fixups(void) test_alternative_case_too_small(); test_alternative_case_with_branch(); test_alternative_case_with_external_branch(); + test_alternative_case_with_branch_to_end(); test_cpu_macros(); test_fw_macros(); test_lwsync_macros(); + test_prefix_patching(); + test_prefix_alt_patching(); + test_prefix_word_alt_patching(); return 0; } diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 9b96ff2ecd4d..151875050da9 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -1,30 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2010 * * Author: Anton Blanchard <anton@au.ibm.com> */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> +#include <asm/feature-fixups.h> /* Note: This code relies on -mminimal-toc */ _GLOBAL(__arch_hweight8) BEGIN_FTR_SECTION - b .__sw_hweight8 + b CFUNC(__sw_hweight8) nop nop FTR_SECTION_ELSE @@ -32,10 +22,11 @@ FTR_SECTION_ELSE clrldi r3,r3,64-8 blr ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) +EXPORT_SYMBOL(__arch_hweight8) _GLOBAL(__arch_hweight16) BEGIN_FTR_SECTION - b .__sw_hweight16 + b CFUNC(__sw_hweight16) nop nop nop @@ -54,10 +45,11 @@ FTR_SECTION_ELSE blr ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50) ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) +EXPORT_SYMBOL(__arch_hweight16) _GLOBAL(__arch_hweight32) BEGIN_FTR_SECTION - b .__sw_hweight32 + b CFUNC(__sw_hweight32) nop nop nop @@ -79,10 +71,11 @@ FTR_SECTION_ELSE blr ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51) ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) +EXPORT_SYMBOL(__arch_hweight32) _GLOBAL(__arch_hweight64) BEGIN_FTR_SECTION - b .__sw_hweight64 + b CFUNC(__sw_hweight64) nop nop nop @@ -108,3 +101,4 @@ FTR_SECTION_ELSE blr ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52) ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) +EXPORT_SYMBOL(__arch_hweight64) diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index 85aec08ab234..e00abeabc54d 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -1,13 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Floating-point, VMX/Altivec and VSX loads and stores * for use in instruction emulation. * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <asm/processor.h> @@ -15,39 +11,24 @@ #include <asm/ppc-opcode.h> #include <asm/reg.h> #include <asm/asm-offsets.h> +#include <asm/asm-compat.h> #include <linux/errno.h> -#ifdef CONFIG_PPC_FPU - #define STKFRM (PPC_MIN_STKFRM + 16) - .macro extab instr,handler - .section __ex_table,"a" - PPC_LONG \instr,\handler - .previous - .endm - - .macro inst32 op -reg = 0 - .rept 32 -20: \op reg,0,r4 - b 3f - extab 20b,99f -reg = reg + 1 - .endr - .endm - -/* Get the contents of frN into fr0; N is in r3. */ +/* Get the contents of frN into *p; N is in r3 and p is in r4. */ _GLOBAL(get_fpr) mflr r0 + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* fr0 is already in fr0 */ - nop -reg = 1 - .rept 31 - fmr fr0,reg - blr +reg = 0 + .rept 32 + stfd reg, 0(r4) + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -55,18 +36,23 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr +2: MTMSRD(r6) + isync + blr -/* Put the contents of fr0 into frN; N is in r3. */ +/* Put the contents of *p into frN; N is in r3 and p is in r4. */ _GLOBAL(put_fpr) mflr r0 + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* fr0 is already in fr0 */ - nop -reg = 1 - .rept 31 - fmr reg,fr0 - blr +reg = 0 + .rept 32 + lfd reg, 0(r4) + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -74,127 +60,24 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr - -/* Load FP reg N from float at *p. N is in r3, p in r4. */ -_GLOBAL(do_lfs) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 - MTMSRD(r7) - isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) -1: li r9,-EFAULT -2: lfs fr0,0(r4) - li r9,0 -3: bl put_fpr - beq cr7,4f - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) +2: MTMSRD(r6) isync - mr r3,r9 - addi r1,r1,STKFRM - blr - extab 2b,3b - -/* Load FP reg N from double at *p. N is in r3, p in r4. */ -_GLOBAL(do_lfd) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 - MTMSRD(r7) - isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) -1: li r9,-EFAULT -2: lfd fr0,0(r4) - li r9,0 -3: beq cr7,4f - bl put_fpr - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM blr - extab 2b,3b -/* Store FP reg N to float at *p. N is in r3, p in r4. */ -_GLOBAL(do_stfs) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 - MTMSRD(r7) - isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) - bl get_fpr -1: li r9,-EFAULT -2: stfs fr0,0(r4) - li r9,0 -3: beq cr7,4f - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM - blr - extab 2b,3b - -/* Store FP reg N to double at *p. N is in r3, p in r4. */ -_GLOBAL(do_stfd) - PPC_STLU r1,-STKFRM(r1) +#ifdef CONFIG_ALTIVEC +/* Get the contents of vrN into *p; N is in r3 and p is in r4. */ +_GLOBAL(get_vr) mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) mfmsr r6 - ori r7,r6,MSR_FP - cmpwi cr7,r3,0 + oris r7, r6, MSR_VEC@h MTMSRD(r7) isync - beq cr7,1f - stfd fr0,STKFRM-16(r1) - bl get_fpr -1: li r9,-EFAULT -2: stfd fr0,0(r4) - li r9,0 -3: beq cr7,4f - lfd fr0,STKFRM-16(r1) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM - blr - extab 2b,3b - -#ifdef CONFIG_ALTIVEC -/* Get the contents of vrN into vr0; N is in r3. */ -_GLOBAL(get_vr) - mflr r0 rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* vr0 is already in vr0 */ - nop -reg = 1 - .rept 31 - vor vr0,reg,reg /* assembler doesn't know vmr? */ - blr +reg = 0 + .rept 32 + stvx reg, 0, r4 + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -202,18 +85,23 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr +2: MTMSRD(r6) + isync + blr -/* Put the contents of vr0 into vrN; N is in r3. */ +/* Put the contents of *p into vrN; N is in r3 and p is in r4. */ _GLOBAL(put_vr) mflr r0 + mfmsr r6 + oris r7, r6, MSR_VEC@h + MTMSRD(r7) + isync rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* vr0 is already in vr0 */ - nop -reg = 1 - .rept 31 - vor reg,vr0,vr0 - blr +reg = 0 + .rept 32 + lvx reg, 0, r4 + b 2f reg = reg + 1 .endr 1: mflr r5 @@ -221,71 +109,18 @@ reg = reg + 1 mtctr r5 mtlr r0 bctr - -/* Load vector reg N from *p. N is in r3, p in r4. */ -_GLOBAL(do_lvx) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - oris r7,r6,MSR_VEC@h - cmpwi cr7,r3,0 - li r8,STKFRM-16 - MTMSRD(r7) - isync - beq cr7,1f - stvx vr0,r1,r8 -1: li r9,-EFAULT -2: lvx vr0,0,r4 - li r9,0 -3: beq cr7,4f - bl put_vr - lvx vr0,r1,r8 -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) - isync - mr r3,r9 - addi r1,r1,STKFRM - blr - extab 2b,3b - -/* Store vector reg N to *p. N is in r3, p in r4. */ -_GLOBAL(do_stvx) - PPC_STLU r1,-STKFRM(r1) - mflr r0 - PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) - mfmsr r6 - oris r7,r6,MSR_VEC@h - cmpwi cr7,r3,0 - li r8,STKFRM-16 - MTMSRD(r7) - isync - beq cr7,1f - stvx vr0,r1,r8 - bl get_vr -1: li r9,-EFAULT -2: stvx vr0,0,r4 - li r9,0 -3: beq cr7,4f - lvx vr0,r1,r8 -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) - mtlr r0 - MTMSRD(r6) +2: MTMSRD(r6) isync - mr r3,r9 - addi r1,r1,STKFRM blr - extab 2b,3b #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -/* Get the contents of vsrN into vsr0; N is in r3. */ +/* Get the contents of vsN into vs0; N is in r3. */ _GLOBAL(get_vsr) mflr r0 rlwinm r3,r3,3,0x1f8 bcl 20,31,1f - blr /* vsr0 is already in vsr0 */ + blr /* vs0 is already in vs0 */ nop reg = 1 .rept 63 @@ -299,12 +134,12 @@ reg = reg + 1 mtlr r0 bctr -/* Put the contents of vsr0 into vsrN; N is in r3. */ +/* Put the contents of vs0 into vsN; N is in r3. */ _GLOBAL(put_vsr) mflr r0 rlwinm r3,r3,3,0x1f8 bcl 20,31,1f - blr /* vr0 is already in vr0 */ + blr /* v0 is already in v0 */ nop reg = 1 .rept 63 @@ -319,7 +154,7 @@ reg = reg + 1 bctr /* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */ -_GLOBAL(do_lxvd2x) +_GLOBAL(load_vsrn) PPC_STLU r1,-STKFRM(r1) mflr r0 PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) @@ -331,49 +166,72 @@ _GLOBAL(do_lxvd2x) isync beq cr7,1f STXVD2X(0,R1,R8) -1: li r9,-EFAULT -2: LXVD2X(0,R0,R4) - li r9,0 -3: beq cr7,4f +1: LXVD2X(0,R0,R4) +#ifdef __LITTLE_ENDIAN__ + XXSWAPD(0,0) +#endif + beq cr7,4f bl put_vsr LXVD2X(0,R1,R8) 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 MTMSRD(r6) isync - mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b /* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */ -_GLOBAL(do_stxvd2x) +_GLOBAL(store_vsrn) PPC_STLU r1,-STKFRM(r1) mflr r0 PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1) mfmsr r6 oris r7,r6,MSR_VSX@h - cmpwi cr7,r3,0 li r8,STKFRM-16 MTMSRD(r7) isync - beq cr7,1f STXVD2X(0,R1,R8) bl get_vsr -1: li r9,-EFAULT -2: STXVD2X(0,R0,R4) - li r9,0 -3: beq cr7,4f +#ifdef __LITTLE_ENDIAN__ + XXSWAPD(0,0) +#endif + STXVD2X(0,R0,R4) LXVD2X(0,R1,R8) -4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) + PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 MTMSRD(r6) isync mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b - #endif /* CONFIG_VSX */ -#endif /* CONFIG_PPC_FPU */ +/* Convert single-precision to double, without disturbing FPRs. */ +/* conv_sp_to_dp(float *sp, double *dp) */ +_GLOBAL(conv_sp_to_dp) + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync + stfd fr0, -16(r1) + lfs fr0, 0(r3) + stfd fr0, 0(r4) + lfd fr0, -16(r1) + MTMSRD(r6) + isync + blr + +/* Convert single-precision to double, without disturbing FPRs. */ +/* conv_sp_to_dp(double *dp, float *sp) */ +_GLOBAL(conv_dp_to_sp) + mfmsr r6 + ori r7, r6, MSR_FP + MTMSRD(r7) + isync + stfd fr0, -16(r1) + lfd fr0, 0(r3) + stfs fr0, 0(r4) + lfd fr0, -16(r1) + MTMSRD(r6) + isync + blr diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index bb7cfecf2788..04165b7a163f 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Spin and read/write lock operations. * @@ -5,17 +6,11 @@ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM * Rework to support virtual processors - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> #include <linux/spinlock.h> #include <linux/export.h> -#include <linux/stringify.h> #include <linux/smp.h> /* waiting for a spinlock... */ @@ -23,7 +18,7 @@ #include <asm/hvcall.h> #include <asm/smp.h> -void __spin_yield(arch_spinlock_t *lock) +void splpar_spin_yield(arch_spinlock_t *lock) { unsigned int lock_value, holder_cpu, yield_count; @@ -32,22 +27,23 @@ void __spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca_of(holder_cpu).yield_count; + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (lock->slock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } +EXPORT_SYMBOL_GPL(splpar_spin_yield); /* * Waiting for a read lock or a write lock on a rwlock... * This turns out to be the same for read and write locks, since * we only know the holder if it is write-locked. */ -void __rw_yield(arch_rwlock_t *rw) +void splpar_rw_yield(arch_rwlock_t *rw) { int lock_value; unsigned int holder_cpu, yield_count; @@ -57,25 +53,13 @@ void __rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca_of(holder_cpu).yield_count; + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (rw->lock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } #endif - -void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - while (lock->slock) { - HMT_low(); - if (SHARED_PROCESSOR) - __spin_yield(lock); - } - HMT_medium(); -} - -EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index f4fcb0bc6563..6fd06cd20faa 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -1,28 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * String handling functions for PowerPC. * * Copyright (C) 1996 Paul Mackerras. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/errno.h> #include <asm/ppc_asm.h> +#include <asm/kasan.h> + +#ifndef CONFIG_KASAN +_GLOBAL(__memset16) + rlwimi r4,r4,16,0,15 + /* fall through */ + +_GLOBAL(__memset32) + rldimi r4,r4,32,0 + /* fall through */ -_GLOBAL(memset) +_GLOBAL(__memset64) + neg r0,r3 + andi. r0,r0,7 + cmplw cr1,r5,r0 + b .Lms +EXPORT_SYMBOL(__memset16) +EXPORT_SYMBOL(__memset32) +EXPORT_SYMBOL(__memset64) +#endif + +_GLOBAL_KASAN(memset) neg r0,r3 rlwimi r4,r4,8,16,23 andi. r0,r0,7 /* # bytes to be 8-byte aligned */ rlwimi r4,r4,16,0,15 cmplw cr1,r5,r0 /* do we get that far? */ rldimi r4,r4,32,0 - PPC_MTOCRF(1,r0) +.Lms: PPC_MTOCRF(1,r0) mr r6,r3 blt cr1,8f - beq+ 3f /* if already 8-byte aligned */ + beq 3f /* if already 8-byte aligned */ subf r5,r0,r5 bf 31,1f stb r4,0(r6) @@ -37,6 +54,7 @@ _GLOBAL(memset) clrldi r5,r5,58 mtctr r0 beq 5f + .balign 16 4: std r4,0(r6) std r4,8(r6) std r4,16(r6) @@ -66,7 +84,7 @@ _GLOBAL(memset) addi r6,r6,8 8: cmpwi r5,0 PPC_MTOCRF(1,r5) - beqlr+ + beqlr bf 29,9f stw r4,0(r6) addi r6,r6,4 @@ -76,11 +94,13 @@ _GLOBAL(memset) 10: bflr 31 stb r4,0(r6) blr +EXPORT_SYMBOL(memset) +EXPORT_SYMBOL_KASAN(memset) -_GLOBAL(memmove) +_GLOBAL_TOC_KASAN(memmove) cmplw 0,r3,r4 - bgt .backwards_memcpy - b .memcpy + bgt backwards_memcpy + b memcpy _GLOBAL(backwards_memcpy) rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ @@ -90,6 +110,7 @@ _GLOBAL(backwards_memcpy) andi. r0,r6,3 mtctr r7 bne 5f + .balign 16 1: lwz r7,-4(r4) lwzu r8,-8(r4) stw r7,-4(r6) @@ -117,3 +138,5 @@ _GLOBAL(backwards_memcpy) beq 2b mtctr r7 b 1b +EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL_KASAN(memmove) diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S new file mode 100644 index 000000000000..f6fca5664e91 --- /dev/null +++ b/arch/powerpc/lib/memcmp_32.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * memcmp for PowerPC32 + * + * Copyright (C) 1996 Paul Mackerras. + * + */ + +#include <linux/export.h> +#include <asm/ppc_asm.h> + + .text + +_GLOBAL(memcmp) + srawi. r7, r5, 2 /* Divide len by 4 */ + mr r6, r3 + beq- 3f + mtctr r7 + li r7, 0 +1: lwzx r3, r6, r7 + lwzx r0, r4, r7 + addi r7, r7, 4 + cmplw cr0, r3, r0 + bdnzt eq, 1b + bne 5f +3: andi. r3, r5, 3 + beqlr + cmplwi cr1, r3, 2 + blt- cr1, 4f + lhzx r3, r6, r7 + lhzx r0, r4, r7 + addi r7, r7, 2 + subf. r3, r0, r3 + beqlr cr1 + bnelr +4: lbzx r3, r6, r7 + lbzx r0, r4, r7 + subf. r3, r0, r3 + blr +5: li r3, 1 + bgtlr + li r3, -1 + blr +EXPORT_SYMBOL(memcmp) diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S new file mode 100644 index 000000000000..142c666d3897 --- /dev/null +++ b/arch/powerpc/lib/memcmp_64.S @@ -0,0 +1,638 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Author: Anton Blanchard <anton@au.ibm.com> + * Copyright 2015 IBM Corporation. + */ +#include <linux/export.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> + +#define off8 r6 +#define off16 r7 +#define off24 r8 + +#define rA r9 +#define rB r10 +#define rC r11 +#define rD r27 +#define rE r28 +#define rF r29 +#define rG r30 +#define rH r31 + +#ifdef __LITTLE_ENDIAN__ +#define LH lhbrx +#define LW lwbrx +#define LD ldbrx +#define LVS lvsr +#define VPERM(_VRT,_VRA,_VRB,_VRC) \ + vperm _VRT,_VRB,_VRA,_VRC +#else +#define LH lhzx +#define LW lwzx +#define LD ldx +#define LVS lvsl +#define VPERM(_VRT,_VRA,_VRB,_VRC) \ + vperm _VRT,_VRA,_VRB,_VRC +#endif + +#define VMX_THRESH 4096 +#define ENTER_VMX_OPS \ + mflr r0; \ + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \ + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \ + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ + std r0,16(r1); \ + stdu r1,-STACKFRAMESIZE(r1); \ + bl CFUNC(enter_vmx_ops); \ + cmpwi cr1,r3,0; \ + ld r0,STACKFRAMESIZE+16(r1); \ + ld r3,STK_REG(R31)(r1); \ + ld r4,STK_REG(R30)(r1); \ + ld r5,STK_REG(R29)(r1); \ + addi r1,r1,STACKFRAMESIZE; \ + mtlr r0 + +#define EXIT_VMX_OPS \ + mflr r0; \ + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \ + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \ + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ + std r0,16(r1); \ + stdu r1,-STACKFRAMESIZE(r1); \ + bl CFUNC(exit_vmx_ops); \ + ld r0,STACKFRAMESIZE+16(r1); \ + ld r3,STK_REG(R31)(r1); \ + ld r4,STK_REG(R30)(r1); \ + ld r5,STK_REG(R29)(r1); \ + addi r1,r1,STACKFRAMESIZE; \ + mtlr r0 + +/* + * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with + * 16 bytes boundary and permute the result with the 1st 16 bytes. + + * | y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z | + * ^ ^ ^ + * 0xbbbb10 0xbbbb20 0xbbb30 + * ^ + * _vaddr + * + * + * _vmask is the mask generated by LVS + * _v1st_qw is the 1st aligned QW of current addr which is already loaded. + * for example: 0xyyyyyyyyyyyyy012 for big endian + * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded. + * for example: 0x3456789abcdefzzz for big endian + * The permute result is saved in _v_res. + * for example: 0x0123456789abcdef for big endian. + */ +#define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \ + lvx _v2nd_qw,_vaddr,off16; \ + VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask) + +/* + * There are 2 categories for memcmp: + * 1) src/dst has the same offset to the 8 bytes boundary. The handlers + * are named like .Lsameoffset_xxxx + * 2) src/dst has different offset to the 8 bytes boundary. The handlers + * are named like .Ldiffoffset_xxxx + */ +_GLOBAL_TOC(memcmp) + cmpdi cr1,r5,0 + + /* Use the short loop if the src/dst addresses are not + * with the same offset of 8 bytes align boundary. + */ + xor r6,r3,r4 + andi. r6,r6,7 + + /* Fall back to short loop if compare at aligned addrs + * with less than 8 bytes. + */ + cmpdi cr6,r5,7 + + beq cr1,.Lzero + bgt cr6,.Lno_short + +.Lshort: + mtctr r5 +1: lbz rA,0(r3) + lbz rB,0(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,1(r3) + lbz rB,1(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,2(r3) + lbz rB,2(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,3(r3) + lbz rB,3(r4) + subf. rC,rB,rA + bne .Lnon_zero + + addi r3,r3,4 + addi r4,r4,4 + + bdnz 1b + +.Lzero: + li r3,0 + blr + +.Lno_short: + dcbt 0,r3 + dcbt 0,r4 + bne .Ldiffoffset_8bytes_make_align_start + + +.Lsameoffset_8bytes_make_align_start: + /* attempt to compare bytes not aligned with 8 bytes so that + * rest comparison can run based on 8 bytes alignment. + */ + andi. r6,r3,7 + + /* Try to compare the first double word which is not 8 bytes aligned: + * load the first double word at (src & ~7UL) and shift left appropriate + * bits before comparision. + */ + rlwinm r6,r3,3,26,28 + beq .Lsameoffset_8bytes_aligned + clrrdi r3,r3,3 + clrrdi r4,r4,3 + LD rA,0,r3 + LD rB,0,r4 + sld rA,rA,r6 + sld rB,rB,r6 + cmpld cr0,rA,rB + srwi r6,r6,3 + bne cr0,.LcmpAB_lightweight + subfic r6,r6,8 + subf. r5,r6,r5 + addi r3,r3,8 + addi r4,r4,8 + beq .Lzero + +.Lsameoffset_8bytes_aligned: + /* now we are aligned with 8 bytes. + * Use .Llong loop if left cmp bytes are equal or greater than 32B. + */ + cmpdi cr6,r5,31 + bgt cr6,.Llong + +.Lcmp_lt32bytes: + /* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */ + cmpdi cr5,r5,7 + srdi r0,r5,3 + ble cr5,.Lcmp_rest_lt8bytes + + /* handle 8 ~ 31 bytes */ + clrldi r5,r5,61 + mtctr r0 +2: + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + bne cr0,.LcmpAB_lightweight + bdnz 2b + + cmpwi r5,0 + beq .Lzero + +.Lcmp_rest_lt8bytes: + /* + * Here we have less than 8 bytes to compare. At least s1 is aligned to + * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a + * page boundary, otherwise we might read past the end of the buffer and + * trigger a page fault. We use 4K as the conservative minimum page + * size. If we detect that case we go to the byte-by-byte loop. + * + * Otherwise the next double word is loaded from s1 and s2, and shifted + * right to compare the appropriate bits. + */ + clrldi r6,r4,(64-12) // r6 = r4 & 0xfff + cmpdi r6,0xff8 + bgt .Lshort + + subfic r6,r5,8 + slwi r6,r6,3 + LD rA,0,r3 + LD rB,0,r4 + srd rA,rA,r6 + srd rB,rB,r6 + cmpld cr0,rA,rB + bne cr0,.LcmpAB_lightweight + b .Lzero + +.Lnon_zero: + mr r3,rC + blr + +.Llong: +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + /* Try to use vmx loop if length is equal or greater than 4K */ + cmpldi cr6,r5,VMX_THRESH + bge cr6,.Lsameoffset_vmx_cmp +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +.Llong_novmx_cmp: +#endif + /* At least s1 addr is aligned with 8 bytes */ + li off8,8 + li off16,16 + li off24,24 + + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + + srdi r0,r5,5 + mtctr r0 + andi. r5,r5,31 + + LD rA,0,r3 + LD rB,0,r4 + + LD rC,off8,r3 + LD rD,off8,r4 + + LD rE,off16,r3 + LD rF,off16,r4 + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + + addi r3,r3,32 + addi r4,r4,32 + + bdz .Lfirst32 + + LD rA,0,r3 + LD rB,0,r4 + cmpld cr1,rC,rD + + LD rC,off8,r3 + LD rD,off8,r4 + cmpld cr6,rE,rF + + LD rE,off16,r3 + LD rF,off16,r4 + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + bne cr1,.LcmpCD + + addi r3,r3,32 + addi r4,r4,32 + + bdz .Lsecond32 + + .balign 16 + +1: LD rA,0,r3 + LD rB,0,r4 + cmpld cr1,rC,rD + bne cr6,.LcmpEF + + LD rC,off8,r3 + LD rD,off8,r4 + cmpld cr6,rE,rF + bne cr7,.LcmpGH + + LD rE,off16,r3 + LD rF,off16,r4 + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + bne cr1,.LcmpCD + + addi r3,r3,32 + addi r4,r4,32 + + bdnz 1b + +.Lsecond32: + cmpld cr1,rC,rD + bne cr6,.LcmpEF + + cmpld cr6,rE,rF + bne cr7,.LcmpGH + + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + bne cr1,.LcmpCD + bne cr6,.LcmpEF + bne cr7,.LcmpGH + +.Ltail: + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + + cmpdi r5,0 + beq .Lzero + b .Lshort + +.Lfirst32: + cmpld cr1,rC,rD + cmpld cr6,rE,rF + cmpld cr7,rG,rH + + bne cr0,.LcmpAB + bne cr1,.LcmpCD + bne cr6,.LcmpEF + bne cr7,.LcmpGH + + b .Ltail + +.LcmpAB: + li r3,1 + bgt cr0,.Lout + li r3,-1 + b .Lout + +.LcmpCD: + li r3,1 + bgt cr1,.Lout + li r3,-1 + b .Lout + +.LcmpEF: + li r3,1 + bgt cr6,.Lout + li r3,-1 + b .Lout + +.LcmpGH: + li r3,1 + bgt cr7,.Lout + li r3,-1 + +.Lout: + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + blr + +.LcmpAB_lightweight: /* skip NV GPRS restore */ + li r3,1 + bgtlr + li r3,-1 + blr + +#ifdef CONFIG_ALTIVEC +.Lsameoffset_vmx_cmp: + /* Enter with src/dst addrs has the same offset with 8 bytes + * align boundary. + * + * There is an optimization based on following fact: memcmp() + * prones to fail early at the first 32 bytes. + * Before applying VMX instructions which will lead to 32x128bits + * VMX regs load/restore penalty, we compare the first 32 bytes + * so that we can catch the ~80% fail cases. + */ + + li r0,4 + mtctr r0 +.Lsameoffset_prechk_32B_loop: + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + bne cr0,.LcmpAB_lightweight + addi r5,r5,-8 + bdnz .Lsameoffset_prechk_32B_loop + + ENTER_VMX_OPS + beq cr1,.Llong_novmx_cmp + +3: + /* need to check whether r4 has the same offset with r3 + * for 16 bytes boundary. + */ + xor r0,r3,r4 + andi. r0,r0,0xf + bne .Ldiffoffset_vmx_cmp_start + + /* len is no less than 4KB. Need to align with 16 bytes further. + */ + andi. rA,r3,8 + LD rA,0,r3 + beq 4f + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + addi r5,r5,-8 + + beq cr0,4f + /* save and restore cr0 */ + mfocrf r5,128 + EXIT_VMX_OPS + mtocrf 128,r5 + b .LcmpAB_lightweight + +4: + /* compare 32 bytes for each loop */ + srdi r0,r5,5 + mtctr r0 + clrldi r5,r5,59 + li off16,16 + +.balign 16 +5: + lvx v0,0,r3 + lvx v1,0,r4 + VCMPEQUD_RC(v0,v0,v1) + bnl cr6,7f + lvx v0,off16,r3 + lvx v1,off16,r4 + VCMPEQUD_RC(v0,v0,v1) + bnl cr6,6f + addi r3,r3,32 + addi r4,r4,32 + bdnz 5b + + EXIT_VMX_OPS + cmpdi r5,0 + beq .Lzero + b .Lcmp_lt32bytes + +6: + addi r3,r3,16 + addi r4,r4,16 + +7: + /* diff the last 16 bytes */ + EXIT_VMX_OPS + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + li off8,8 + bne cr0,.LcmpAB_lightweight + + LD rA,off8,r3 + LD rB,off8,r4 + cmpld cr0,rA,rB + bne cr0,.LcmpAB_lightweight + b .Lzero +#endif + +.Ldiffoffset_8bytes_make_align_start: + /* now try to align s1 with 8 bytes */ + rlwinm r6,r3,3,26,28 + beq .Ldiffoffset_align_s1_8bytes + + clrrdi r3,r3,3 + LD rA,0,r3 + LD rB,0,r4 /* unaligned load */ + sld rA,rA,r6 + srd rA,rA,r6 + srd rB,rB,r6 + cmpld cr0,rA,rB + srwi r6,r6,3 + bne cr0,.LcmpAB_lightweight + + subfic r6,r6,8 + subf. r5,r6,r5 + addi r3,r3,8 + add r4,r4,r6 + + beq .Lzero + +.Ldiffoffset_align_s1_8bytes: + /* now s1 is aligned with 8 bytes. */ +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + /* only do vmx ops when the size equal or greater than 4K bytes */ + cmpdi cr5,r5,VMX_THRESH + bge cr5,.Ldiffoffset_vmx_cmp +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +.Ldiffoffset_novmx_cmp: +#endif + + + cmpdi cr5,r5,31 + ble cr5,.Lcmp_lt32bytes + +#ifdef CONFIG_ALTIVEC + b .Llong_novmx_cmp +#else + b .Llong +#endif + +#ifdef CONFIG_ALTIVEC +.Ldiffoffset_vmx_cmp: + /* perform a 32 bytes pre-checking before + * enable VMX operations. + */ + li r0,4 + mtctr r0 +.Ldiffoffset_prechk_32B_loop: + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + bne cr0,.LcmpAB_lightweight + addi r5,r5,-8 + bdnz .Ldiffoffset_prechk_32B_loop + + ENTER_VMX_OPS + beq cr1,.Ldiffoffset_novmx_cmp + +.Ldiffoffset_vmx_cmp_start: + /* Firstly try to align r3 with 16 bytes */ + andi. r6,r3,0xf + li off16,16 + beq .Ldiffoffset_vmx_s1_16bytes_align + + LVS v3,0,r3 + LVS v4,0,r4 + + lvx v5,0,r3 + lvx v6,0,r4 + LD_VSR_CROSS16B(r3,v3,v5,v7,v9) + LD_VSR_CROSS16B(r4,v4,v6,v8,v10) + + VCMPEQUB_RC(v7,v9,v10) + bnl cr6,.Ldiffoffset_vmx_diff_found + + subfic r6,r6,16 + subf r5,r6,r5 + add r3,r3,r6 + add r4,r4,r6 + +.Ldiffoffset_vmx_s1_16bytes_align: + /* now s1 is aligned with 16 bytes */ + lvx v6,0,r4 + LVS v4,0,r4 + srdi r6,r5,5 /* loop for 32 bytes each */ + clrldi r5,r5,59 + mtctr r6 + +.balign 16 +.Ldiffoffset_vmx_32bytesloop: + /* the first qw of r4 was saved in v6 */ + lvx v9,0,r3 + LD_VSR_CROSS16B(r4,v4,v6,v8,v10) + VCMPEQUB_RC(v7,v9,v10) + vor v6,v8,v8 + bnl cr6,.Ldiffoffset_vmx_diff_found + + addi r3,r3,16 + addi r4,r4,16 + + lvx v9,0,r3 + LD_VSR_CROSS16B(r4,v4,v6,v8,v10) + VCMPEQUB_RC(v7,v9,v10) + vor v6,v8,v8 + bnl cr6,.Ldiffoffset_vmx_diff_found + + addi r3,r3,16 + addi r4,r4,16 + + bdnz .Ldiffoffset_vmx_32bytesloop + + EXIT_VMX_OPS + + cmpdi r5,0 + beq .Lzero + b .Lcmp_lt32bytes + +.Ldiffoffset_vmx_diff_found: + EXIT_VMX_OPS + /* anyway, the diff will appear in next 16 bytes */ + li r5,16 + b .Lcmp_lt32bytes + +#endif +EXPORT_SYMBOL(memcmp) diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index d2bbbc8d7dc0..b5a67e20143f 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -1,21 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ +#include <linux/export.h> #include <asm/processor.h> #include <asm/ppc_asm.h> +#include <asm/asm-compat.h> +#include <asm/feature-fixups.h> +#include <asm/kasan.h> + +#ifndef SELFTEST_CASE +/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ +#define SELFTEST_CASE 0 +#endif .align 7 -_GLOBAL(memcpy) +_GLOBAL_TOC_KASAN(memcpy) BEGIN_FTR_SECTION - std r3,48(r1) /* save destination pointer for return value */ +#ifdef __LITTLE_ENDIAN__ + cmpdi cr7,r5,0 +#else + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ +#endif FTR_SECTION_ELSE +#ifdef CONFIG_PPC_BOOK3S_64 b memcpy_power7 +#endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +#ifdef __LITTLE_ENDIAN__ + /* dumb little-endian memcpy that will get replaced at runtime */ + addi r9,r3,-1 + addi r4,r4,-1 + beqlr cr7 + mtctr r5 +1: lbzu r10,1(r4) + stbu r10,1(r9) + bdnz 1b + blr +#else PPC_MTOCRF(0x01,r5) cmpldi cr1,r5,16 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry @@ -27,6 +49,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) cleared. At the time of writing the only CPU that has this combination of bits set is Power6. */ +test_feature = (SELFTEST_CASE == 1) BEGIN_FTR_SECTION nop FTR_SECTION_ELSE @@ -35,6 +58,7 @@ ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ CPU_FTR_UNALIGNED_LD_STD) .Ldst_aligned: addi r3,r3,-16 +test_feature = (SELFTEST_CASE == 0) BEGIN_FTR_SECTION andi. r0,r4,7 bne .Lsrc_unaligned @@ -71,7 +95,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+3,3f lbz r9,8(r4) stb r9,0(r3) -3: ld r3,48(r1) /* return dest pointer */ +3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr .Lsrc_unaligned: @@ -154,7 +178,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+3,3f rotldi r9,r9,8 stb r9,0(r3) -3: ld r3,48(r1) /* return dest pointer */ +3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr .Ldst_unaligned: @@ -199,5 +223,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 3: bf cr7*4+3,4f lbz r0,0(r4) stb r0,0(r3) -4: ld r3,48(r1) /* return dest pointer */ +4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr +#endif +EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL_KASAN(memcpy) diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 0663630baf3b..b7c5e7fca8b9 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -1,17 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2012 * @@ -19,21 +7,30 @@ */ #include <asm/ppc_asm.h> +#ifndef SELFTEST_CASE +/* 0 == don't use VMX, 1 == use VMX */ +#define SELFTEST_CASE 0 +#endif + +#ifdef __BIG_ENDIAN__ +#define LVS(VRT,RA,RB) lvsl VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC +#else +#define LVS(VRT,RA,RB) lvsr VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC +#endif + _GLOBAL(memcpy_power7) -#ifdef CONFIG_ALTIVEC cmpldi r5,16 cmpldi cr1,r5,4096 - - std r3,48(r1) - + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) blt .Lshort_copy - bgt cr1,.Lvmx_copy -#else - cmpldi r5,16 - std r3,48(r1) - - blt .Lshort_copy +#ifdef CONFIG_ALTIVEC +test_feature = SELFTEST_CASE +BEGIN_FTR_SECTION + bgt cr1, .Lvmx_copy +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif .Lnonvmx_copy: @@ -207,26 +204,26 @@ _GLOBAL(memcpy_power7) lbz r0,0(r4) stb r0,0(r3) -15: ld r3,48(r1) +15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) blr .Lunwind_stack_nonvmx_copy: addi r1,r1,STACKFRAMESIZE b .Lnonvmx_copy -#ifdef CONFIG_ALTIVEC .Lvmx_copy: +#ifdef CONFIG_ALTIVEC mflr r0 - std r4,56(r1) - std r5,64(r1) + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl .enter_vmx_copy + bl CFUNC(enter_vmx_ops) cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) - ld r5,STACKFRAMESIZE+64(r1) + ld r3,STK_REG(R31)(r1) + ld r4,STK_REG(R30)(r1) + ld r5,STK_REG(R29)(r1) mtlr r0 /* @@ -247,18 +244,7 @@ _GLOBAL(memcpy_power7) or r7,r7,r0 ori r10,r7,1 /* stream=1 */ - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - -.machine push -.machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 - eieio - dcbt r0,r8,0b01010 /* GO */ -.machine pop + DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) beq cr1,.Lunwind_stack_nonvmx_copy @@ -312,29 +298,29 @@ _GLOBAL(memcpy_power7) li r11,48 bf cr7*4+3,5f - lvx vr1,r0,r4 + lvx v1,0,r4 addi r4,r4,16 - stvx vr1,r0,r3 + stvx v1,0,r3 addi r3,r3,16 5: bf cr7*4+2,6f - lvx vr1,r0,r4 - lvx vr0,r4,r9 + lvx v1,0,r4 + lvx v0,r4,r9 addi r4,r4,32 - stvx vr1,r0,r3 - stvx vr0,r3,r9 + stvx v1,0,r3 + stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx vr3,r0,r4 - lvx vr2,r4,r9 - lvx vr1,r4,r10 - lvx vr0,r4,r11 + lvx v3,0,r4 + lvx v2,r4,r9 + lvx v1,r4,r10 + lvx v0,r4,r11 addi r4,r4,64 - stvx vr3,r0,r3 - stvx vr2,r3,r9 - stvx vr1,r3,r10 - stvx vr0,r3,r11 + stvx v3,0,r3 + stvx v2,r3,r9 + stvx v1,r3,r10 + stvx v0,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -357,23 +343,23 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx vr7,r0,r4 - lvx vr6,r4,r9 - lvx vr5,r4,r10 - lvx vr4,r4,r11 - lvx vr3,r4,r12 - lvx vr2,r4,r14 - lvx vr1,r4,r15 - lvx vr0,r4,r16 + lvx v7,0,r4 + lvx v6,r4,r9 + lvx v5,r4,r10 + lvx v4,r4,r11 + lvx v3,r4,r12 + lvx v2,r4,r14 + lvx v1,r4,r15 + lvx v0,r4,r16 addi r4,r4,128 - stvx vr7,r0,r3 - stvx vr6,r3,r9 - stvx vr5,r3,r10 - stvx vr4,r3,r11 - stvx vr3,r3,r12 - stvx vr2,r3,r14 - stvx vr1,r3,r15 - stvx vr0,r3,r16 + stvx v7,0,r3 + stvx v6,r3,r9 + stvx v5,r3,r10 + stvx v4,r3,r11 + stvx v3,r3,r12 + stvx v2,r3,r14 + stvx v1,r3,r15 + stvx v0,r3,r16 addi r3,r3,128 bdnz 8b @@ -387,29 +373,29 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx vr3,r0,r4 - lvx vr2,r4,r9 - lvx vr1,r4,r10 - lvx vr0,r4,r11 + lvx v3,0,r4 + lvx v2,r4,r9 + lvx v1,r4,r10 + lvx v0,r4,r11 addi r4,r4,64 - stvx vr3,r0,r3 - stvx vr2,r3,r9 - stvx vr1,r3,r10 - stvx vr0,r3,r11 + stvx v3,0,r3 + stvx v2,r3,r9 + stvx v1,r3,r10 + stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx vr1,r0,r4 - lvx vr0,r4,r9 + lvx v1,0,r4 + lvx v0,r4,r9 addi r4,r4,32 - stvx vr1,r0,r3 - stvx vr0,r3,r9 + stvx v1,0,r3 + stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx vr1,r0,r4 + lvx v1,0,r4 addi r4,r4,16 - stvx vr1,r0,r3 + stvx v1,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -438,8 +424,8 @@ _GLOBAL(memcpy_power7) stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - ld r3,48(r1) - b .exit_vmx_copy /* tail call optimise */ + ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + b CFUNC(exit_vmx_ops) /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -485,42 +471,42 @@ _GLOBAL(memcpy_power7) li r10,32 li r11,48 - lvsl vr16,0,r4 /* Setup permute control vector */ - lvx vr0,0,r4 + LVS(v16,0,r4) /* Setup permute control vector */ + lvx v0,0,r4 addi r4,r4,16 bf cr7*4+3,5f - lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + lvx v1,0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx vr8,r0,r3 + stvx v8,0,r3 addi r3,r3,16 - vor vr0,vr1,vr1 + vor v0,v1,v1 5: bf cr7*4+2,6f - lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 - lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 + lvx v1,0,r4 + VPERM(v8,v0,v1,v16) + lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx vr8,r0,r3 - stvx vr9,r3,r9 + stvx v8,0,r3 + stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 - lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 - lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 - lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 + lvx v3,0,r4 + VPERM(v8,v0,v3,v16) + lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) + lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) + lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx vr8,r0,r3 - stvx vr9,r3,r9 - stvx vr10,r3,r10 - stvx vr11,r3,r11 + stvx v8,0,r3 + stvx v9,r3,r9 + stvx v10,r3,r10 + stvx v11,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -543,31 +529,31 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx vr7,r0,r4 - vperm vr8,vr0,vr7,vr16 - lvx vr6,r4,r9 - vperm vr9,vr7,vr6,vr16 - lvx vr5,r4,r10 - vperm vr10,vr6,vr5,vr16 - lvx vr4,r4,r11 - vperm vr11,vr5,vr4,vr16 - lvx vr3,r4,r12 - vperm vr12,vr4,vr3,vr16 - lvx vr2,r4,r14 - vperm vr13,vr3,vr2,vr16 - lvx vr1,r4,r15 - vperm vr14,vr2,vr1,vr16 - lvx vr0,r4,r16 - vperm vr15,vr1,vr0,vr16 + lvx v7,0,r4 + VPERM(v8,v0,v7,v16) + lvx v6,r4,r9 + VPERM(v9,v7,v6,v16) + lvx v5,r4,r10 + VPERM(v10,v6,v5,v16) + lvx v4,r4,r11 + VPERM(v11,v5,v4,v16) + lvx v3,r4,r12 + VPERM(v12,v4,v3,v16) + lvx v2,r4,r14 + VPERM(v13,v3,v2,v16) + lvx v1,r4,r15 + VPERM(v14,v2,v1,v16) + lvx v0,r4,r16 + VPERM(v15,v1,v0,v16) addi r4,r4,128 - stvx vr8,r0,r3 - stvx vr9,r3,r9 - stvx vr10,r3,r10 - stvx vr11,r3,r11 - stvx vr12,r3,r12 - stvx vr13,r3,r14 - stvx vr14,r3,r15 - stvx vr15,r3,r16 + stvx v8,0,r3 + stvx v9,r3,r9 + stvx v10,r3,r10 + stvx v11,r3,r11 + stvx v12,r3,r12 + stvx v13,r3,r14 + stvx v14,r3,r15 + stvx v15,r3,r16 addi r3,r3,128 bdnz 8b @@ -581,36 +567,36 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 - lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 - lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 - lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 + lvx v3,0,r4 + VPERM(v8,v0,v3,v16) + lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) + lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) + lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx vr8,r0,r3 - stvx vr9,r3,r9 - stvx vr10,r3,r10 - stvx vr11,r3,r11 + stvx v8,0,r3 + stvx v9,r3,r9 + stvx v10,r3,r10 + stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 - lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 + lvx v1,0,r4 + VPERM(v8,v0,v1,v16) + lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx vr8,r0,r3 - stvx vr9,r3,r9 + stvx v8,0,r3 + stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + lvx v1,0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx vr8,r0,r3 + stvx v8,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -642,6 +628,6 @@ _GLOBAL(memcpy_power7) stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - ld r3,48(r1) - b .exit_vmx_copy /* tail call optimise */ -#endif /* CONFiG_ALTIVEC */ + ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + b CFUNC(exit_vmx_ops) /* tail call optimise */ +#endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c new file mode 100644 index 000000000000..4e724c4c01ad --- /dev/null +++ b/arch/powerpc/lib/pmem.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2017 IBM Corporation. All rights reserved. + */ + +#include <linux/string.h> +#include <linux/export.h> +#include <linux/uaccess.h> +#include <linux/libnvdimm.h> + +#include <asm/cacheflush.h> + +static inline void __clean_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void __flush_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void clean_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __clean_pmem_range(start, stop); +} + +static inline void flush_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __flush_pmem_range(start, stop); +} + +/* + * CONFIG_ARCH_HAS_PMEM_API symbols + */ +void arch_wb_cache_pmem(void *addr, size_t size) +{ + unsigned long start = (unsigned long) addr; + clean_pmem_range(start, start + size); +} +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); + +void arch_invalidate_pmem(void *addr, size_t size) +{ + unsigned long start = (unsigned long) addr; + flush_pmem_range(start, start + size); +} +EXPORT_SYMBOL_GPL(arch_invalidate_pmem); + +/* + * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols + */ +long __copy_from_user_flushcache(void *dest, const void __user *src, + unsigned size) +{ + unsigned long copied, start = (unsigned long) dest; + + copied = __copy_from_user(dest, src, size); + clean_pmem_range(start, start + size); + + return copied; +} + +void memcpy_flushcache(void *dest, const void *src, size_t size) +{ + unsigned long start = (unsigned long) dest; + + memcpy(dest, src, size); + clean_pmem_range(start, start + size); +} +EXPORT_SYMBOL(memcpy_flushcache); diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c new file mode 100644 index 000000000000..95ab4cdf582e --- /dev/null +++ b/arch/powerpc/lib/qspinlock.c @@ -0,0 +1,998 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/percpu.h> +#include <linux/processor.h> +#include <linux/smp.h> +#include <linux/topology.h> +#include <linux/sched/clock.h> +#include <asm/qspinlock.h> +#include <asm/paravirt.h> +#include <trace/events/lock.h> + +#define MAX_NODES 4 + +struct qnode { + struct qnode *next; + struct qspinlock *lock; + int cpu; + u8 sleepy; /* 1 if the previous vCPU was preempted or + * if the previous node was sleepy */ + u8 locked; /* 1 if lock acquired */ +}; + +struct qnodes { + int count; + struct qnode nodes[MAX_NODES]; +}; + +/* Tuning parameters */ +static int steal_spins __read_mostly = (1 << 5); +static int remote_steal_spins __read_mostly = (1 << 2); +#if _Q_SPIN_TRY_LOCK_STEAL == 1 +static const bool maybe_stealers = true; +#else +static bool maybe_stealers __read_mostly = true; +#endif +static int head_spins __read_mostly = (1 << 8); + +static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_allow_steal __read_mostly = false; +static bool pv_spin_on_preempted_owner __read_mostly = false; +static bool pv_sleepy_lock __read_mostly = true; +static bool pv_sleepy_lock_sticky __read_mostly = false; +static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; +static int pv_sleepy_lock_factor __read_mostly = 256; +static bool pv_yield_prev __read_mostly = true; +static bool pv_yield_sleepy_owner __read_mostly = true; +static bool pv_prod_head __read_mostly = false; + +static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); + +#if _Q_SPIN_SPEC_BARRIER == 1 +#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) +#else +#define spec_barrier() do { } while (0) +#endif + +static __always_inline bool recently_sleepy(void) +{ + /* pv_sleepy_lock is true when this is called */ + if (pv_sleepy_lock_interval_ns) { + u64 seen = this_cpu_read(sleepy_lock_seen_clock); + + if (seen) { + u64 delta = sched_clock() - seen; + if (delta < pv_sleepy_lock_interval_ns) + return true; + this_cpu_write(sleepy_lock_seen_clock, 0); + } + } + + return false; +} + +static __always_inline int get_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return steal_spins * pv_sleepy_lock_factor; + else + return steal_spins; +} + +static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return remote_steal_spins * pv_sleepy_lock_factor; + else + return remote_steal_spins; +} + +static __always_inline int get_head_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return head_spins * pv_sleepy_lock_factor; + else + return head_spins; +} + +static inline u32 encode_tail_cpu(int cpu) +{ + return (cpu + 1) << _Q_TAIL_CPU_OFFSET; +} + +static inline int decode_tail_cpu(u32 val) +{ + return (val >> _Q_TAIL_CPU_OFFSET) - 1; +} + +static inline int get_owner_cpu(u32 val) +{ + return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; +} + +/* + * Try to acquire the lock if it was not already locked. If the tail matches + * mytail then clear it, otherwise leave it unchnaged. Return previous value. + * + * This is used by the head of the queue to acquire the lock and clean up + * its tail if it was the last one queued. + */ +static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) +{ + u32 newval = queued_spin_encode_locked_val(); + u32 prev, tmp; + + asm volatile( +"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" + /* This test is necessary if there could be stealers */ +" andi. %1,%0,%5 \n" +" bne 3f \n" + /* Test whether the lock tail == mytail */ +" and %1,%0,%6 \n" +" cmpw 0,%1,%3 \n" + /* Merge the new locked value */ +" or %1,%1,%4 \n" +" bne 2f \n" + /* If the lock tail matched, then clear it, otherwise leave it. */ +" andc %1,%1,%6 \n" +"2: stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"3: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r"(tail), "r" (newval), + "i" (_Q_LOCKED_VAL), + "r" (_Q_TAIL_CPU_MASK), + "i" (_Q_SPIN_EH_HINT) + : "cr0", "memory"); + + return prev; +} + +/* + * Publish our tail, replacing previous tail. Return previous value. + * + * This provides a release barrier for publishing node, this pairs with the + * acquire barrier in get_tail_qnode() when the next CPU finds this tail + * value. + */ +static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) +{ + u32 prev, tmp; + + kcsan_release(); + + asm volatile( +"\t" PPC_RELEASE_BARRIER " \n" +"1: lwarx %0,0,%2 # publish_tail_cpu \n" +" andc %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" + : "=&r" (prev), "=&r"(tmp) + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 set_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # set_mustq \n" +" or %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 clear_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # clear_mustq \n" +" andc %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) +{ + u32 prev; + u32 new = old | _Q_SLEEPY_VAL; + + BUG_ON(!(old & _Q_LOCKED_VAL)); + BUG_ON(old & _Q_SLEEPY_VAL); + + asm volatile( +"1: lwarx %0,0,%1 # try_set_sleepy \n" +" cmpw 0,%0,%2 \n" +" bne- 2f \n" +" stwcx. %3,0,%1 \n" +" bne- 1b \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r"(old), "r" (new) + : "cr0", "memory"); + + return likely(prev == old); +} + +static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } +} + +static __always_inline void seen_sleepy_lock(void) +{ + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); +} + +static __always_inline void seen_sleepy_node(void) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + /* Don't set sleepy because we likely have a stale val */ + } +} + +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) +{ + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); + int idx; + + /* + * After publishing the new tail and finding a previous tail in the + * previous val (which is the control dependency), this barrier + * orders the release barrier in publish_tail_cpu performed by the + * last CPU, with subsequently looking at its qnode structures + * after the barrier. + */ + smp_acquire__after_ctrl_dep(); + + for (idx = 0; idx < MAX_NODES; idx++) { + struct qnode *qnode = &qnodesp->nodes[idx]; + if (qnode->lock == lock) + return qnode; + } + + BUG(); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) +{ + int owner; + u32 yield_count; + bool preempted = false; + + BUG_ON(!(val & _Q_LOCKED_VAL)); + + if (!paravirt) + goto relax; + + if (!pv_yield_owner) + goto relax; + + owner = get_owner_cpu(val); + yield_count = yield_count_of(owner); + + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + seen_sleepy_owner(lock, val); + preempted = true; + + /* + * Read the lock word after sampling the yield count. On the other side + * there may a wmb because the yield count update is done by the + * hypervisor preemption and the value update by the OS, however this + * ordering might reduce the chance of out of order accesses and + * improve the heuristic. + */ + smp_rmb(); + + if (READ_ONCE(lock->val) == val) { + if (mustq) + clear_mustq(lock); + yield_to_preempted(owner, yield_count); + if (mustq) + set_mustq(lock); + spin_begin(); + + /* Don't relax if we yielded. Maybe we should? */ + return preempted; + } + spin_begin(); +relax: + spin_cpu_relax(); + + return preempted; +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + return __yield_to_locked_owner(lock, val, paravirt, false); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + bool mustq = false; + + if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) + mustq = true; + + return __yield_to_locked_owner(lock, val, paravirt, mustq); +} + +static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt) +{ + struct qnode *next; + int owner; + + if (!paravirt) + return; + if (!pv_yield_sleepy_owner) + return; + + next = READ_ONCE(node->next); + if (!next) + return; + + if (next->sleepy) + return; + + owner = get_owner_cpu(val); + if (vcpu_is_preempted(owner)) + next->sleepy = 1; +} + +/* Called inside spin_begin() */ +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) +{ + u32 yield_count; + bool preempted = false; + + if (!paravirt) + goto relax; + + if (!pv_yield_sleepy_owner) + goto yield_prev; + + /* + * If the previous waiter was preempted it might not be able to + * propagate sleepy to us, so check the lock in that case too. + */ + if (node->sleepy || vcpu_is_preempted(prev_cpu)) { + u32 val = READ_ONCE(lock->val); + + if (val & _Q_LOCKED_VAL) { + if (node->next && !node->next->sleepy) { + /* + * Propagate sleepy to next waiter. Only if + * owner is preempted, which allows the queue + * to become "non-sleepy" if vCPU preemption + * ceases to occur, even if the lock remains + * highly contended. + */ + if (vcpu_is_preempted(get_owner_cpu(val))) + node->next->sleepy = 1; + } + + preempted = yield_to_locked_owner(lock, val, paravirt); + if (preempted) + return preempted; + } + node->sleepy = false; + } + +yield_prev: + if (!pv_yield_prev) + goto relax; + + yield_count = yield_count_of(prev_cpu); + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + preempted = true; + seen_sleepy_node(); + + smp_rmb(); /* See __yield_to_locked_owner comment */ + + if (!READ_ONCE(node->locked)) { + yield_to_preempted(prev_cpu, yield_count); + spin_begin(); + return preempted; + } + spin_begin(); + +relax: + spin_cpu_relax(); + + return preempted; +} + +static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) +{ + if (iters >= get_steal_spins(paravirt, sleepy)) + return true; + + if (IS_ENABLED(CONFIG_NUMA) && + (iters >= get_remote_steal_spins(paravirt, sleepy))) { + int cpu = get_owner_cpu(val); + if (numa_node_id() != cpu_to_node(cpu)) + return true; + } + return false; +} + +static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) +{ + bool seen_preempted = false; + bool sleepy = false; + int iters = 0; + u32 val; + + if (!steal_spins) { + /* XXX: should spin_on_preempted_owner do anything here? */ + return false; + } + + /* Attempt to steal the lock */ + spin_begin(); + do { + bool preempted = false; + + val = READ_ONCE(lock->val); + if (val & _Q_MUST_Q_VAL) + break; + spec_barrier(); + + if (unlikely(!(val & _Q_LOCKED_VAL))) { + spin_end(); + if (__queued_spin_trylock_steal(lock)) + return true; + spin_begin(); + } else { + preempted = yield_to_locked_owner(lock, val, paravirt); + } + + if (paravirt && pv_sleepy_lock) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + if (preempted) { + seen_preempted = true; + sleepy = true; + if (!pv_spin_on_preempted_owner) + iters++; + /* + * pv_spin_on_preempted_owner don't increase iters + * while the owner is preempted -- we won't interfere + * with it by definition. This could introduce some + * latency issue if we continually observe preempted + * owners, but hopefully that's a rare corner case of + * a badly oversubscribed system. + */ + } else { + iters++; + } + } while (!steal_break(val, iters, paravirt, sleepy)); + + spin_end(); + + return false; +} + +static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) +{ + struct qnodes *qnodesp; + struct qnode *next, *node; + u32 val, old, tail; + bool seen_preempted = false; + bool sleepy = false; + bool mustq = false; + int idx; + int iters = 0; + + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + + qnodesp = this_cpu_ptr(&qnodes); + if (unlikely(qnodesp->count >= MAX_NODES)) { + spec_barrier(); + while (!queued_spin_trylock(lock)) + cpu_relax(); + return; + } + + idx = qnodesp->count++; + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + node = &qnodesp->nodes[idx]; + node->next = NULL; + node->lock = lock; + node->cpu = smp_processor_id(); + node->sleepy = 0; + node->locked = 0; + + tail = encode_tail_cpu(node->cpu); + + /* + * Assign all attributes of a node before it can be published. + * Issues an lwsync, serving as a release barrier, as well as a + * compiler barrier. + */ + old = publish_tail_cpu(lock, tail); + + /* + * If there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_CPU_MASK) { + int prev_cpu = decode_tail_cpu(old); + struct qnode *prev = get_tail_qnode(lock, prev_cpu); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + /* Wait for mcs node lock to be released */ + spin_begin(); + while (!READ_ONCE(node->locked)) { + spec_barrier(); + + if (yield_to_prev(lock, node, prev_cpu, paravirt)) + seen_preempted = true; + } + spec_barrier(); + spin_end(); + + smp_rmb(); /* acquire barrier for the mcs lock */ + + /* + * Generic qspinlocks have this prefetch here, but it seems + * like it could cause additional line transitions because + * the waiter will keep loading from it. + */ + if (_Q_SPIN_PREFETCH_NEXT) { + next = READ_ONCE(node->next); + if (next) + prefetchw(next); + } + } + + /* We're at the head of the waitqueue, wait for the lock. */ +again: + spin_begin(); + for (;;) { + bool preempted; + + val = READ_ONCE(lock->val); + if (!(val & _Q_LOCKED_VAL)) + break; + spec_barrier(); + + if (paravirt && pv_sleepy_lock && maybe_stealers) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + propagate_sleepy(node, val, paravirt); + preempted = yield_head_to_locked_owner(lock, val, paravirt); + if (!maybe_stealers) + continue; + + if (preempted) + seen_preempted = true; + + if (paravirt && preempted) { + sleepy = true; + + if (!pv_spin_on_preempted_owner) + iters++; + } else { + iters++; + } + + if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { + mustq = true; + set_mustq(lock); + val |= _Q_MUST_Q_VAL; + } + } + spec_barrier(); + spin_end(); + + /* If we're the last queued, must clean up the tail. */ + old = trylock_clean_tail(lock, tail); + if (unlikely(old & _Q_LOCKED_VAL)) { + BUG_ON(!maybe_stealers); + goto again; /* Can only be true if maybe_stealers. */ + } + + if ((old & _Q_TAIL_CPU_MASK) == tail) + goto release; /* We were the tail, no next. */ + + /* There is a next, must wait for node->next != NULL (MCS protocol) */ + next = READ_ONCE(node->next); + if (!next) { + spin_begin(); + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + spin_end(); + } + spec_barrier(); + + /* + * Unlock the next mcs waiter node. Release barrier is not required + * here because the acquirer is only accessing the lock word, and + * the acquire barrier we took the lock with orders that update vs + * this store to locked. The corresponding barrier is the smp_rmb() + * acquire barrier for mcs lock, above. + */ + if (paravirt && pv_prod_head) { + int next_cpu = next->cpu; + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + if (vcpu_is_preempted(next_cpu)) + prod_cpu(next_cpu); + } else { + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + } + +release: + /* + * Clear the lock before releasing the node, as another CPU might see stale + * values if an interrupt occurs after we increment qnodesp->count + * but before node->lock is initialized. The barrier ensures that + * there are no further stores to the node after it has been released. + */ + node->lock = NULL; + barrier(); + qnodesp->count--; +} + +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock) +{ + trace_contention_begin(lock, LCB_F_SPIN); + /* + * This looks funny, but it induces the compiler to inline both + * sides of the branch rather than share code as when the condition + * is passed as the paravirt argument to the functions. + */ + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { + if (try_to_steal_lock(lock, true)) + spec_barrier(); + else + queued_spin_lock_mcs_queue(lock, true); + } else { + if (try_to_steal_lock(lock, false)) + spec_barrier(); + else + queued_spin_lock_mcs_queue(lock, false); + } + trace_contention_end(lock, 0); +} +EXPORT_SYMBOL(queued_spin_lock_slowpath); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void) +{ +} +#endif + +#include <linux/debugfs.h> +static int steal_spins_set(void *data, u64 val) +{ +#if _Q_SPIN_TRY_LOCK_STEAL == 1 + /* MAYBE_STEAL remains true */ + steal_spins = val; +#else + static DEFINE_MUTEX(lock); + + /* + * The lock slow path has a !maybe_stealers case that can assume + * the head of queue will not see concurrent waiters. That waiter + * is unsafe in the presence of stealers, so must keep them away + * from one another. + */ + + mutex_lock(&lock); + if (val && !steal_spins) { + maybe_stealers = true; + /* wait for queue head waiter to go away */ + synchronize_rcu(); + steal_spins = val; + } else if (!val && steal_spins) { + steal_spins = val; + /* wait for all possible stealers to go away */ + synchronize_rcu(); + maybe_stealers = false; + } else { + steal_spins = val; + } + mutex_unlock(&lock); +#endif + + return 0; +} + +static int steal_spins_get(void *data, u64 *val) +{ + *val = steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); + +static int remote_steal_spins_set(void *data, u64 val) +{ + remote_steal_spins = val; + + return 0; +} + +static int remote_steal_spins_get(void *data, u64 *val) +{ + *val = remote_steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); + +static int head_spins_set(void *data, u64 val) +{ + head_spins = val; + + return 0; +} + +static int head_spins_get(void *data, u64 *val) +{ + *val = head_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); + +static int pv_yield_owner_set(void *data, u64 val) +{ + pv_yield_owner = !!val; + + return 0; +} + +static int pv_yield_owner_get(void *data, u64 *val) +{ + *val = pv_yield_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); + +static int pv_yield_allow_steal_set(void *data, u64 val) +{ + pv_yield_allow_steal = !!val; + + return 0; +} + +static int pv_yield_allow_steal_get(void *data, u64 *val) +{ + *val = pv_yield_allow_steal; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); + +static int pv_spin_on_preempted_owner_set(void *data, u64 val) +{ + pv_spin_on_preempted_owner = !!val; + + return 0; +} + +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) +{ + *val = pv_spin_on_preempted_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); + +static int pv_sleepy_lock_set(void *data, u64 val) +{ + pv_sleepy_lock = !!val; + + return 0; +} + +static int pv_sleepy_lock_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); + +static int pv_sleepy_lock_sticky_set(void *data, u64 val) +{ + pv_sleepy_lock_sticky = !!val; + + return 0; +} + +static int pv_sleepy_lock_sticky_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_sticky; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); + +static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) +{ + pv_sleepy_lock_interval_ns = val; + + return 0; +} + +static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_interval_ns; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); + +static int pv_sleepy_lock_factor_set(void *data, u64 val) +{ + pv_sleepy_lock_factor = val; + + return 0; +} + +static int pv_sleepy_lock_factor_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_factor; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); + +static int pv_yield_prev_set(void *data, u64 val) +{ + pv_yield_prev = !!val; + + return 0; +} + +static int pv_yield_prev_get(void *data, u64 *val) +{ + *val = pv_yield_prev; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); + +static int pv_yield_sleepy_owner_set(void *data, u64 val) +{ + pv_yield_sleepy_owner = !!val; + + return 0; +} + +static int pv_yield_sleepy_owner_get(void *data, u64 *val) +{ + *val = pv_yield_sleepy_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n"); + +static int pv_prod_head_set(void *data, u64 val) +{ + pv_prod_head = !!val; + + return 0; +} + +static int pv_prod_head_get(void *data, u64 *val) +{ + *val = pv_prod_head; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); + +static __init int spinlock_debugfs_init(void) +{ + debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); + debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); + if (is_shared_processor()) { + debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); + debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); + debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); + debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); + debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); + debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner); + debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); + } + + return 0; +} +device_initcall(spinlock_debugfs_init); diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S new file mode 100644 index 000000000000..da71760e50b5 --- /dev/null +++ b/arch/powerpc/lib/quad.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Quadword loads and stores + * for use in instruction emulation. + * + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + */ + +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> +#include <asm/reg.h> +#include <asm/asm-offsets.h> +#include <linux/errno.h> + +/* do_lq(unsigned long ea, unsigned long *regs) */ +_GLOBAL(do_lq) +1: lq r6, 0(r3) + std r6, 0(r4) + std r7, 8(r4) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) + +/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */ +_GLOBAL(do_stq) +1: stq r4, 0(r3) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) + +/* do_lqarx(unsigned long ea, unsigned long *regs) */ +_GLOBAL(do_lqarx) +1: PPC_LQARX(6, 0, 3, 0) + std r6, 0(r4) + std r7, 8(r4) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) + +/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, + unsigned int *crp) */ + +_GLOBAL(do_stqcx) +1: PPC_STQCX(4, 0, 3) + mfcr r5 + stw r5, 0(r6) + li r3, 0 + blr +2: li r3, -EFAULT + blr + EX_TABLE(1b, 2b) diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c new file mode 100644 index 000000000000..bccb662c1b7b --- /dev/null +++ b/arch/powerpc/lib/restart_table.c @@ -0,0 +1,56 @@ +#include <asm/interrupt.h> +#include <asm/kprobes.h> + +struct soft_mask_table_entry { + unsigned long start; + unsigned long end; +}; + +struct restart_table_entry { + unsigned long start; + unsigned long end; + unsigned long fixup; +}; + +extern struct soft_mask_table_entry __start___soft_mask_table[]; +extern struct soft_mask_table_entry __stop___soft_mask_table[]; + +extern struct restart_table_entry __start___restart_table[]; +extern struct restart_table_entry __stop___restart_table[]; + +/* Given an address, look for it in the soft mask table */ +bool search_kernel_soft_mask_table(unsigned long addr) +{ + struct soft_mask_table_entry *smte = __start___soft_mask_table; + + while (smte < __stop___soft_mask_table) { + unsigned long start = smte->start; + unsigned long end = smte->end; + + if (addr >= start && addr < end) + return true; + + smte++; + } + return false; +} +NOKPROBE_SYMBOL(search_kernel_soft_mask_table); + +/* Given an address, look for it in the kernel exception table */ +unsigned long search_kernel_restart_table(unsigned long addr) +{ + struct restart_table_entry *rte = __start___restart_table; + + while (rte < __stop___restart_table) { + unsigned long start = rte->start; + unsigned long end = rte->end; + unsigned long fixup = rte->fixup; + + if (addr >= start && addr < end) + return fixup; + + rte++; + } + return 0; +} +NOKPROBE_SYMBOL(search_kernel_restart_table); diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c index a1060a868e69..6aa774aa5b16 100644 --- a/arch/powerpc/lib/rheap.c +++ b/arch/powerpc/lib/rheap.c @@ -54,7 +54,7 @@ static int grow(rh_info_t * info, int max_blocks) new_blocks = max_blocks - info->max_blocks; - block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_ATOMIC); + block = kmalloc_array(max_blocks, sizeof(rh_block_t), GFP_ATOMIC); if (block == NULL) return -ENOMEM; @@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rh_create); */ void rh_destroy(rh_info_t * info) { - if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL) + if ((info->flags & RHIF_STATIC_BLOCK) == 0) kfree(info->block); if ((info->flags & RHIF_STATIC_INFO) == 0) @@ -325,7 +325,7 @@ void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks, } EXPORT_SYMBOL_GPL(rh_init); -/* Attach a free memory region, coalesces regions if adjuscent */ +/* Attach a free memory region, coalesces regions if adjacent */ int rh_attach_region(rh_info_t * info, unsigned long start, int size) { rh_block_t *blk; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 99c7fc16dc0d..ac3ee19531d8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Single-step support. * * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> #include <linux/kprobes.h> @@ -14,10 +10,10 @@ #include <linux/prefetch.h> #include <asm/sstep.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <asm/cpu_has_feature.h> #include <asm/cputable.h> - -extern char system_call_common[]; +#include <asm/disassemble.h> #ifdef CONFIG_PPC64 /* Bits in SRR1 that are copied from MSR */ @@ -30,45 +26,71 @@ extern char system_call_common[]; #define XER_SO 0x80000000U #define XER_OV 0x40000000U #define XER_CA 0x20000000U +#define XER_OV32 0x00080000U +#define XER_CA32 0x00040000U + +#ifdef CONFIG_VSX +#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe)) +#endif #ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S */ -extern int do_lfs(int rn, unsigned long ea); -extern int do_lfd(int rn, unsigned long ea); -extern int do_stfs(int rn, unsigned long ea); -extern int do_stfd(int rn, unsigned long ea); -extern int do_lvx(int rn, unsigned long ea); -extern int do_stvx(int rn, unsigned long ea); -extern int do_lxvd2x(int rn, unsigned long ea); -extern int do_stxvd2x(int rn, unsigned long ea); +extern void get_fpr(int rn, double *p); +extern void put_fpr(int rn, const double *p); +extern void get_vr(int rn, __vector128 *p); +extern void put_vr(int rn, __vector128 *p); +extern void load_vsrn(int vsr, const void *p); +extern void store_vsrn(int vsr, void *p); +extern void conv_sp_to_dp(const float *sp, double *dp); +extern void conv_dp_to_sp(const double *dp, float *sp); +#endif + +#ifdef __powerpc64__ +/* + * Functions in quad.S + */ +extern int do_lq(unsigned long ea, unsigned long *regs); +extern int do_stq(unsigned long ea, unsigned long val0, unsigned long val1); +extern int do_lqarx(unsigned long ea, unsigned long *regs); +extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, + unsigned int *crp); +#endif + +#ifdef __LITTLE_ENDIAN__ +#define IS_LE 1 +#define IS_BE 0 +#else +#define IS_LE 0 +#define IS_BE 1 #endif /* * Emulate the truncation of 64 bit values in 32-bit mode. */ -static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) +static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, + unsigned long val) { -#ifdef __powerpc64__ if ((msr & MSR_64BIT) == 0) val &= 0xffffffffUL; -#endif return val; } /* * Determine whether a conditional branch instruction would branch. */ -static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs) +static nokprobe_inline int branch_taken(unsigned int instr, + const struct pt_regs *regs, + struct instruction_op *op) { unsigned int bo = (instr >> 21) & 0x1f; unsigned int bi; if ((bo & 4) == 0) { /* decrement counter */ - --regs->ctr; - if (((bo >> 1) & 1) ^ (regs->ctr == 0)) + op->type |= DECCTR; + if (((bo >> 1) & 1) ^ (regs->ctr == 1)) return 0; } if ((bo & 0x10) == 0) { @@ -80,59 +102,79 @@ static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs) return 1; } - -static long __kprobes address_ok(struct pt_regs *regs, unsigned long ea, int nb) +static nokprobe_inline long address_ok(struct pt_regs *regs, + unsigned long ea, int nb) { if (!user_mode(regs)) return 1; - return __access_ok(ea, nb, USER_DS); + if (access_ok((void __user *)ea, nb)) + return 1; + if (access_ok((void __user *)ea, 1)) + /* Access overlaps the end of the user region */ + regs->dar = TASK_SIZE_MAX - 1; + else + regs->dar = ea; + return 0; } /* * Calculate effective address for a D-form instruction */ -static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs) +static nokprobe_inline unsigned long dform_ea(unsigned int instr, + const struct pt_regs *regs) { int ra; unsigned long ea; ra = (instr >> 16) & 0x1f; ea = (signed short) instr; /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (instr & 0x04000000) /* update forms */ - regs->gpr[ra] = ea; - } - return truncate_if_32bit(regs->msr, ea); + return ea; } #ifdef __powerpc64__ /* * Calculate effective address for a DS-form instruction */ -static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *regs) +static nokprobe_inline unsigned long dsform_ea(unsigned int instr, + const struct pt_regs *regs) { int ra; unsigned long ea; ra = (instr >> 16) & 0x1f; ea = (signed short) (instr & ~3); /* sign-extend */ - if (ra) { + if (ra) + ea += regs->gpr[ra]; + + return ea; +} + +/* + * Calculate effective address for a DQ-form instruction + */ +static nokprobe_inline unsigned long dqform_ea(unsigned int instr, + const struct pt_regs *regs) +{ + int ra; + unsigned long ea; + + ra = (instr >> 16) & 0x1f; + ea = (signed short) (instr & ~0xf); /* sign-extend */ + if (ra) ea += regs->gpr[ra]; - if ((instr & 3) == 1) /* update forms */ - regs->gpr[ra] = ea; - } - return truncate_if_32bit(regs->msr, ea); + return ea; } #endif /* __powerpc64 */ /* * Calculate effective address for an X-form instruction */ -static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs, - int do_update) +static nokprobe_inline unsigned long xform_ea(unsigned int instr, + const struct pt_regs *regs) { int ra, rb; unsigned long ea; @@ -140,314 +182,942 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; ea = regs->gpr[rb]; - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (do_update) /* update forms */ - regs->gpr[ra] = ea; - } - return truncate_if_32bit(regs->msr, ea); + return ea; +} + +/* + * Calculate effective address for a MLS:D-form / 8LS:D-form + * prefixed instruction + */ +static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, + unsigned int suffix, + const struct pt_regs *regs) +{ + int ra, prefix_r; + unsigned int dd; + unsigned long ea, d0, d1, d; + + prefix_r = GET_PREFIX_R(instr); + ra = GET_PREFIX_RA(suffix); + + d0 = instr & 0x3ffff; + d1 = suffix & 0xffff; + d = (d0 << 16) | d1; + + /* + * sign extend a 34 bit number + */ + dd = (unsigned int)(d >> 2); + ea = (signed int)dd; + ea = (ea << 2) | (d & 0x3); + + if (!prefix_r && ra) + ea += regs->gpr[ra]; + else if (!prefix_r && !ra) + ; /* Leave ea as is */ + else if (prefix_r) + ea += regs->nip; + + /* + * (prefix_r && ra) is an invalid form. Should already be + * checked for by caller! + */ + + return ea; } /* * Return the largest power of 2, not greater than sizeof(unsigned long), * such that x is a multiple of it. */ -static inline unsigned long max_align(unsigned long x) +static nokprobe_inline unsigned long max_align(unsigned long x) { x |= sizeof(unsigned long); return x & -x; /* isolates rightmost bit */ } - -static inline unsigned long byterev_2(unsigned long x) +static nokprobe_inline unsigned long byterev_2(unsigned long x) { return ((x >> 8) & 0xff) | ((x & 0xff) << 8); } -static inline unsigned long byterev_4(unsigned long x) +static nokprobe_inline unsigned long byterev_4(unsigned long x) { return ((x >> 24) & 0xff) | ((x >> 8) & 0xff00) | ((x & 0xff00) << 8) | ((x & 0xff) << 24); } #ifdef __powerpc64__ -static inline unsigned long byterev_8(unsigned long x) +static nokprobe_inline unsigned long byterev_8(unsigned long x) { return (byterev_4(x) << 32) | byterev_4(x >> 32); } #endif -static int __kprobes read_mem_aligned(unsigned long *dest, unsigned long ea, - int nb) +static nokprobe_inline void do_byte_reverse(void *ptr, int nb) +{ + switch (nb) { + case 2: + *(u16 *)ptr = byterev_2(*(u16 *)ptr); + break; + case 4: + *(u32 *)ptr = byterev_4(*(u32 *)ptr); + break; +#ifdef __powerpc64__ + case 8: + *(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr); + break; + case 16: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[1]); + up[1] = tmp; + break; + } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + +#endif + default: + WARN_ON_ONCE(1); + } +} + +static __always_inline int +__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; unsigned long x = 0; switch (nb) { case 1: - err = __get_user(x, (unsigned char __user *) ea); + unsafe_get_user(x, (unsigned char __user *)ea, Efault); break; case 2: - err = __get_user(x, (unsigned short __user *) ea); + unsafe_get_user(x, (unsigned short __user *)ea, Efault); break; case 4: - err = __get_user(x, (unsigned int __user *) ea); + unsafe_get_user(x, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __get_user(x, (unsigned long __user *) ea); + unsafe_get_user(x, (unsigned long __user *)ea, Efault); break; #endif } - if (!err) - *dest = x; - return err; + *dest = x; + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; } -static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea, - int nb, struct pt_regs *regs) +static nokprobe_inline int +read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) { int err; - unsigned long x, b, c; - /* unaligned, do this in pieces */ - x = 0; + if (is_kernel_addr(ea)) + return __read_mem_aligned(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __read_mem_aligned(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; +} + +/* + * Copy from userspace to a buffer, using the largest possible + * aligned accesses, up to sizeof(long). + */ +static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int c; + for (; nb > 0; nb -= c) { c = max_align(ea); if (c > nb) c = max_align(nb); - err = read_mem_aligned(&b, ea, c); - if (err) - return err; - x = (x << (8 * c)) + b; + switch (c) { + case 1: + unsafe_get_user(*dest, (u8 __user *)ea, Efault); + break; + case 2: + unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault); + break; + case 4: + unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault); + break; +#ifdef __powerpc64__ + case 8: + unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault); + break; +#endif + } + dest += c; ea += c; } - *dest = x; return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_in(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __copy_mem_in(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; +} + +static nokprobe_inline int read_mem_unaligned(unsigned long *dest, + unsigned long ea, int nb, + struct pt_regs *regs) +{ + union { + unsigned long ul; + u8 b[sizeof(unsigned long)]; + } u; + int i; + int err; + + u.ul = 0; + i = IS_BE ? sizeof(unsigned long) - nb : 0; + err = copy_mem_in(&u.b[i], ea, nb, regs); + if (!err) + *dest = u.ul; + return err; } /* * Read memory at address ea for nb bytes, return 0 for success - * or -EFAULT if an error occurred. + * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8. + * If nb < sizeof(long), the result is right-justified on BE systems. */ -static int __kprobes read_mem(unsigned long *dest, unsigned long ea, int nb, +static int read_mem(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) { if (!address_ok(regs, ea, nb)) return -EFAULT; if ((ea & (nb - 1)) == 0) - return read_mem_aligned(dest, ea, nb); + return read_mem_aligned(dest, ea, nb, regs); return read_mem_unaligned(dest, ea, nb, regs); } +NOKPROBE_SYMBOL(read_mem); -static int __kprobes write_mem_aligned(unsigned long val, unsigned long ea, - int nb) +static __always_inline int +__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; - switch (nb) { case 1: - err = __put_user(val, (unsigned char __user *) ea); + unsafe_put_user(val, (unsigned char __user *)ea, Efault); break; case 2: - err = __put_user(val, (unsigned short __user *) ea); + unsafe_put_user(val, (unsigned short __user *)ea, Efault); break; case 4: - err = __put_user(val, (unsigned int __user *) ea); + unsafe_put_user(val, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __put_user(val, (unsigned long __user *) ea); + unsafe_put_user(val, (unsigned long __user *)ea, Efault); break; #endif } - return err; + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; } -static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea, - int nb, struct pt_regs *regs) +static nokprobe_inline int +write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) { int err; - unsigned long c; - /* unaligned or little-endian, do this in pieces */ + if (is_kernel_addr(ea)) + return __write_mem_aligned(val, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __write_mem_aligned(val, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; +} + +/* + * Copy from a buffer to userspace, using the largest possible + * aligned accesses, up to sizeof(long). + */ +static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int c; + for (; nb > 0; nb -= c) { c = max_align(ea); if (c > nb) c = max_align(nb); - err = write_mem_aligned(val >> (nb - c) * 8, ea, c); - if (err) - return err; - ++ea; + switch (c) { + case 1: + unsafe_put_user(*dest, (u8 __user *)ea, Efault); + break; + case 2: + unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault); + break; + case 4: + unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault); + break; +#ifdef __powerpc64__ + case 8: + unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault); + break; +#endif + } + dest += c; + ea += c; } return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_out(dest, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __copy_mem_out(dest, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; +} + +static nokprobe_inline int write_mem_unaligned(unsigned long val, + unsigned long ea, int nb, + struct pt_regs *regs) +{ + union { + unsigned long ul; + u8 b[sizeof(unsigned long)]; + } u; + int i; + + u.ul = val; + i = IS_BE ? sizeof(unsigned long) - nb : 0; + return copy_mem_out(&u.b[i], ea, nb, regs); } /* * Write memory at address ea for nb bytes, return 0 for success - * or -EFAULT if an error occurred. + * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8. */ -static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb, +static int write_mem(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) { if (!address_ok(regs, ea, nb)) return -EFAULT; if ((ea & (nb - 1)) == 0) - return write_mem_aligned(val, ea, nb); + return write_mem_aligned(val, ea, nb, regs); return write_mem_unaligned(val, ea, nb, regs); } +NOKPROBE_SYMBOL(write_mem); #ifdef CONFIG_PPC_FPU /* - * Check the address and alignment, and call func to do the actual - * load or store. + * These access either the real FP register or the image in the + * thread_struct, depending on regs->msr & MSR_FP. */ -static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long), - unsigned long ea, int nb, - struct pt_regs *regs) +static int do_fp_load(struct instruction_op *op, unsigned long ea, + struct pt_regs *regs, bool cross_endian) { - int err; - unsigned long val[sizeof(double) / sizeof(long)]; - unsigned long ptr; - + int err, rn, nb; + union { + int i; + unsigned int u; + float f; + double d[2]; + unsigned long l[2]; + u8 b[2 * sizeof(double)]; + } u; + + nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; - if ((ea & 3) == 0) - return (*func)(rn, ea); - ptr = (unsigned long) &val[0]; - if (sizeof(unsigned long) == 8 || nb == 4) { - err = read_mem_unaligned(&val[0], ea, nb, regs); - ptr += sizeof(unsigned long) - nb; - } else { - /* reading a double on 32-bit */ - err = read_mem_unaligned(&val[0], ea, 4, regs); - if (!err) - err = read_mem_unaligned(&val[1], ea + 4, 4, regs); - } + rn = op->reg; + err = copy_mem_in(u.b, ea, nb, regs); if (err) return err; - return (*func)(rn, ptr); + if (unlikely(cross_endian)) { + do_byte_reverse(u.b, min(nb, 8)); + if (nb == 16) + do_byte_reverse(&u.b[8], 8); + } + preempt_disable(); + if (nb == 4) { + if (op->type & FPCONV) + conv_sp_to_dp(&u.f, &u.d[0]); + else if (op->type & SIGNEXT) + u.l[0] = u.i; + else + u.l[0] = u.u; + } + if (regs->msr & MSR_FP) + put_fpr(rn, &u.d[0]); + else + current->thread.TS_FPR(rn) = u.l[0]; + if (nb == 16) { + /* lfdp */ + rn |= 1; + if (regs->msr & MSR_FP) + put_fpr(rn, &u.d[1]); + else + current->thread.TS_FPR(rn) = u.l[1]; + } + preempt_enable(); + return 0; } +NOKPROBE_SYMBOL(do_fp_load); -static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long), - unsigned long ea, int nb, - struct pt_regs *regs) +static int do_fp_store(struct instruction_op *op, unsigned long ea, + struct pt_regs *regs, bool cross_endian) { - int err; - unsigned long val[sizeof(double) / sizeof(long)]; - unsigned long ptr; - + int rn, nb; + union { + unsigned int u; + float f; + double d[2]; + unsigned long l[2]; + u8 b[2 * sizeof(double)]; + } u; + + nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; - if ((ea & 3) == 0) - return (*func)(rn, ea); - ptr = (unsigned long) &val[0]; - if (sizeof(unsigned long) == 8 || nb == 4) { - ptr += sizeof(unsigned long) - nb; - err = (*func)(rn, ptr); - if (err) - return err; - err = write_mem_unaligned(val[0], ea, nb, regs); - } else { - /* writing a double on 32-bit */ - err = (*func)(rn, ptr); - if (err) - return err; - err = write_mem_unaligned(val[0], ea, 4, regs); - if (!err) - err = write_mem_unaligned(val[1], ea + 4, 4, regs); + rn = op->reg; + preempt_disable(); + if (regs->msr & MSR_FP) + get_fpr(rn, &u.d[0]); + else + u.l[0] = current->thread.TS_FPR(rn); + if (nb == 4) { + if (op->type & FPCONV) + conv_dp_to_sp(&u.d[0], &u.f); + else + u.u = u.l[0]; } - return err; + if (nb == 16) { + rn |= 1; + if (regs->msr & MSR_FP) + get_fpr(rn, &u.d[1]); + else + u.l[1] = current->thread.TS_FPR(rn); + } + preempt_enable(); + if (unlikely(cross_endian)) { + do_byte_reverse(u.b, min(nb, 8)); + if (nb == 16) + do_byte_reverse(&u.b[8], 8); + } + return copy_mem_out(u.b, ea, nb, regs); } +NOKPROBE_SYMBOL(do_fp_store); #endif #ifdef CONFIG_ALTIVEC /* For Altivec/VMX, no need to worry about alignment */ -static int __kprobes do_vec_load(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +static nokprobe_inline int do_vec_load(int rn, unsigned long ea, + int size, struct pt_regs *regs, + bool cross_endian) { + int err; + union { + __vector128 v; + u8 b[sizeof(__vector128)]; + } u = {}; + + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; - return (*func)(rn, ea); + /* align to multiple of size */ + ea &= ~(size - 1); + err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs); + if (err) + return err; + if (unlikely(cross_endian)) + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); + preempt_disable(); + if (regs->msr & MSR_VEC) + put_vr(rn, &u.v); + else + current->thread.vr_state.vr[rn] = u.v; + preempt_enable(); + return 0; } -static int __kprobes do_vec_store(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +static nokprobe_inline int do_vec_store(int rn, unsigned long ea, + int size, struct pt_regs *regs, + bool cross_endian) { + union { + __vector128 v; + u8 b[sizeof(__vector128)]; + } u; + + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; - return (*func)(rn, ea); + /* align to multiple of size */ + ea &= ~(size - 1); + + preempt_disable(); + if (regs->msr & MSR_VEC) + get_vr(rn, &u.v); + else + u.v = current->thread.vr_state.vr[rn]; + preempt_enable(); + if (unlikely(cross_endian)) + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); + return copy_mem_out(&u.b[ea & 0xf], ea, size, regs); } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX -static int __kprobes do_vsx_load(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +#ifdef __powerpc64__ +static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea, + int reg, bool cross_endian) { int err; - unsigned long val[2]; if (!address_ok(regs, ea, 16)) return -EFAULT; - if ((ea & 3) == 0) - return (*func)(rn, ea); - err = read_mem_unaligned(&val[0], ea, 8, regs); - if (!err) - err = read_mem_unaligned(&val[1], ea + 8, 8, regs); - if (!err) - err = (*func)(rn, (unsigned long) &val[0]); + /* if aligned, should be atomic */ + if ((ea & 0xf) == 0) { + err = do_lq(ea, ®s->gpr[reg]); + } else { + err = read_mem(®s->gpr[reg + IS_LE], ea, 8, regs); + if (!err) + err = read_mem(®s->gpr[reg + IS_BE], ea + 8, 8, regs); + } + if (!err && unlikely(cross_endian)) + do_byte_reverse(®s->gpr[reg], 16); return err; } -static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), - unsigned long ea, struct pt_regs *regs) +static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, + int reg, bool cross_endian) { int err; - unsigned long val[2]; + unsigned long vals[2]; if (!address_ok(regs, ea, 16)) return -EFAULT; - if ((ea & 3) == 0) - return (*func)(rn, ea); - err = (*func)(rn, (unsigned long) &val[0]); - if (err) - return err; - err = write_mem_unaligned(val[0], ea, 8, regs); + vals[0] = regs->gpr[reg]; + vals[1] = regs->gpr[reg + 1]; + if (unlikely(cross_endian)) + do_byte_reverse(vals, 16); + + /* if aligned, should be atomic */ + if ((ea & 0xf) == 0) + return do_stq(ea, vals[0], vals[1]); + + err = write_mem(vals[IS_LE], ea, 8, regs); if (!err) - err = write_mem_unaligned(val[1], ea + 8, 8, regs); + err = write_mem(vals[IS_BE], ea + 8, 8, regs); return err; } +#endif /* __powerpc64 */ + +#ifdef CONFIG_VSX +static nokprobe_inline void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem, bool rev) +{ + int size, read_size; + int i, j; + const unsigned int *wp; + const unsigned short *hp; + const unsigned char *bp; + + size = GETSIZE(op->type); + reg->d[0] = reg->d[1] = 0; + + switch (op->element_size) { + case 32: + /* [p]lxvp[x] */ + case 16: + /* whole vector; lxv[x] or lxvl[l] */ + if (size == 0) + break; + memcpy(reg, mem, size); + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) + rev = !rev; + if (rev) + do_byte_reverse(reg, size); + break; + case 8: + /* scalar loads, lxvd2x, lxvdsx */ + read_size = (size >= 8) ? 8 : size; + i = IS_LE ? 8 : 8 - read_size; + memcpy(®->b[i], mem, read_size); + if (rev) + do_byte_reverse(®->b[i], 8); + if (size < 8) { + if (op->type & SIGNEXT) { + /* size == 4 is the only case here */ + reg->d[IS_LE] = (signed int) reg->d[IS_LE]; + } else if (op->vsx_flags & VSX_FPCONV) { + preempt_disable(); + conv_sp_to_dp(®->fp[1 + IS_LE], + ®->dp[IS_LE]); + preempt_enable(); + } + } else { + if (size == 16) { + unsigned long v = *(unsigned long *)(mem + 8); + reg->d[IS_BE] = !rev ? v : byterev_8(v); + } else if (op->vsx_flags & VSX_SPLAT) + reg->d[IS_BE] = reg->d[IS_LE]; + } + break; + case 4: + /* lxvw4x, lxvwsx */ + wp = mem; + for (j = 0; j < size / 4; ++j) { + i = IS_LE ? 3 - j : j; + reg->w[i] = !rev ? *wp++ : byterev_4(*wp++); + } + if (op->vsx_flags & VSX_SPLAT) { + u32 val = reg->w[IS_LE ? 3 : 0]; + for (; j < 4; ++j) { + i = IS_LE ? 3 - j : j; + reg->w[i] = val; + } + } + break; + case 2: + /* lxvh8x */ + hp = mem; + for (j = 0; j < size / 2; ++j) { + i = IS_LE ? 7 - j : j; + reg->h[i] = !rev ? *hp++ : byterev_2(*hp++); + } + break; + case 1: + /* lxvb16x */ + bp = mem; + for (j = 0; j < size; ++j) { + i = IS_LE ? 15 - j : j; + reg->b[i] = *bp++; + } + break; + } +} + +static nokprobe_inline void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem, bool rev) +{ + int size, write_size; + int i, j; + union vsx_reg buf; + unsigned int *wp; + unsigned short *hp; + unsigned char *bp; + + size = GETSIZE(op->type); + + switch (op->element_size) { + case 32: + /* [p]stxvp[x] */ + if (size == 0) + break; + if (rev) { + /* reverse 32 bytes */ + union vsx_reg buf32[2]; + buf32[0].d[0] = byterev_8(reg[1].d[1]); + buf32[0].d[1] = byterev_8(reg[1].d[0]); + buf32[1].d[0] = byterev_8(reg[0].d[1]); + buf32[1].d[1] = byterev_8(reg[0].d[0]); + memcpy(mem, buf32, size); + } else { + memcpy(mem, reg, size); + } + break; + case 16: + /* stxv, stxvx, stxvl, stxvll */ + if (size == 0) + break; + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) + rev = !rev; + if (rev) { + /* reverse 16 bytes */ + buf.d[0] = byterev_8(reg->d[1]); + buf.d[1] = byterev_8(reg->d[0]); + reg = &buf; + } + memcpy(mem, reg, size); + break; + case 8: + /* scalar stores, stxvd2x */ + write_size = (size >= 8) ? 8 : size; + i = IS_LE ? 8 : 8 - write_size; + if (size < 8 && op->vsx_flags & VSX_FPCONV) { + buf.d[0] = buf.d[1] = 0; + preempt_disable(); + conv_dp_to_sp(®->dp[IS_LE], &buf.fp[1 + IS_LE]); + preempt_enable(); + reg = &buf; + } + memcpy(mem, ®->b[i], write_size); + if (size == 16) + memcpy(mem + 8, ®->d[IS_BE], 8); + if (unlikely(rev)) { + do_byte_reverse(mem, write_size); + if (size == 16) + do_byte_reverse(mem + 8, 8); + } + break; + case 4: + /* stxvw4x */ + wp = mem; + for (j = 0; j < size / 4; ++j) { + i = IS_LE ? 3 - j : j; + *wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]); + } + break; + case 2: + /* stxvh8x */ + hp = mem; + for (j = 0; j < size / 2; ++j) { + i = IS_LE ? 7 - j : j; + *hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]); + } + break; + case 1: + /* stvxb16x */ + bp = mem; + for (j = 0; j < size; ++j) { + i = IS_LE ? 15 - j : j; + *bp++ = reg->b[i]; + } + break; + } +} + +static nokprobe_inline int do_vsx_load(struct instruction_op *op, + unsigned long ea, struct pt_regs *regs, + bool cross_endian) +{ + int reg = op->reg; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; + int size = GETSIZE(op->type); + + if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) + return -EFAULT; + + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); + emulate_vsx_load(op, buf, mem, cross_endian); + preempt_disable(); + if (reg < 32) { + /* FP regs + extensions */ + if (regs->msr & MSR_FP) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0]; + current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1]; + } + } + } else { + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.vr_state.vr[reg - 32 + i] = buf[j].v; + } + } + } + preempt_enable(); + return 0; +} + +static nokprobe_inline int do_vsx_store(struct instruction_op *op, + unsigned long ea, struct pt_regs *regs, + bool cross_endian) +{ + int reg = op->reg; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; + int size = GETSIZE(op->type); + + if (!address_ok(regs, ea, size)) + return -EFAULT; + + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); + preempt_disable(); + if (reg < 32) { + /* FP regs + extensions */ + if (regs->msr & MSR_FP) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0]; + buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1]; + } + } + } else { + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].v = current->thread.vr_state.vr[reg - 32 + i]; + } + } + } + preempt_enable(); + emulate_vsx_store(op, buf, mem, cross_endian); + return copy_mem_out(mem, ea, size, regs); +} #endif /* CONFIG_VSX */ +static __always_inline int __emulate_dcbz(unsigned long ea) +{ + unsigned long i; + unsigned long size = l1_dcache_bytes(); + + for (i = 0; i < size; i += sizeof(long)) + unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault); + + return 0; + +Efault: + return -EFAULT; +} + +int emulate_dcbz(unsigned long ea, struct pt_regs *regs) +{ + int err; + unsigned long size = l1_dcache_bytes(); + + ea = truncate_if_32bit(regs->msr, ea); + ea &= ~(size - 1); + if (!address_ok(regs, ea, size)) + return -EFAULT; + + if (is_kernel_addr(ea)) { + err = __emulate_dcbz(ea); + } else if (user_write_access_begin((void __user *)ea, size)) { + err = __emulate_dcbz(ea); + user_write_access_end(); + } else { + err = -EFAULT; + } + + if (err) + regs->dar = ea; + + + return err; +} +NOKPROBE_SYMBOL(emulate_dcbz); + #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%4\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (cr) \ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)) #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "r" (addr), "i" (-EFAULT), "0" (err)) @@ -459,31 +1129,39 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err) \ : "r" (addr), "i" (-EFAULT), "0" (err)) -static void __kprobes set_cr0(struct pt_regs *regs, int rd) +static nokprobe_inline void set_cr0(const struct pt_regs *regs, + struct instruction_op *op) { - long val = regs->gpr[rd]; + long val = op->val; - regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); -#ifdef __powerpc64__ + op->type |= SETCC; + op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); if (!(regs->msr & MSR_64BIT)) val = (int) val; -#endif if (val < 0) - regs->ccr |= 0x80000000; + op->ccval |= 0x80000000; else if (val > 0) - regs->ccr |= 0x40000000; + op->ccval |= 0x40000000; else - regs->ccr |= 0x20000000; + op->ccval |= 0x20000000; +} + +static nokprobe_inline void set_ca32(struct instruction_op *op, bool val) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (val) + op->xerval |= XER_CA32; + else + op->xerval &= ~XER_CA32; + } } -static void __kprobes add_with_carry(struct pt_regs *regs, int rd, +static nokprobe_inline void add_with_carry(const struct pt_regs *regs, + struct instruction_op *op, int rd, unsigned long val1, unsigned long val2, unsigned long carry_in) { @@ -491,24 +1169,28 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd, if (carry_in) ++val; - regs->gpr[rd] = val; -#ifdef __powerpc64__ - if (!(regs->msr & MSR_64BIT)) { - val = (unsigned int) val; - val1 = (unsigned int) val1; - } -#endif + op->type = COMPUTE | SETREG | SETXER; + op->reg = rd; + op->val = val; + val = truncate_if_32bit(regs->msr, val); + val1 = truncate_if_32bit(regs->msr, val1); + op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) - regs->xer |= XER_CA; + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; + + set_ca32(op, (unsigned int)val < (unsigned int)val1 || + (carry_in && (unsigned int)val == (unsigned int)val1)); } -static void __kprobes do_cmp_signed(struct pt_regs *regs, long v1, long v2, - int crfld) +static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs, + struct instruction_op *op, + long v1, long v2, int crfld) { unsigned int crval, shift; + op->type = COMPUTE | SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -517,14 +1199,17 @@ static void __kprobes do_cmp_signed(struct pt_regs *regs, long v1, long v2, else crval |= 2; shift = (7 - crfld) * 4; - regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); + op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift); } -static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, - unsigned long v2, int crfld) +static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, + unsigned long v2, int crfld) { unsigned int crval, shift; + op->type = COMPUTE | SETCC; crval = (regs->xer >> 31) & 1; /* get SO bit */ if (v1 < v2) crval |= 8; @@ -533,7 +1218,108 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, else crval |= 2; shift = (7 - crfld) * 4; - regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); + op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift); +} + +static nokprobe_inline void do_cmpb(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, unsigned long v2) +{ + unsigned long long out_val, mask; + int i; + + out_val = 0; + for (i = 0; i < 8; i++) { + mask = 0xffUL << (i * 8); + if ((v1 & mask) == (v2 & mask)) + out_val |= mask; + } + op->val = out_val; +} + +/* + * The size parameter is used to adjust the equivalent popcnt instruction. + * popcntb = 8, popcntw = 32, popcntd = 64 + */ +static nokprobe_inline void do_popcnt(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, int size) +{ + unsigned long long out = v1; + + out -= (out >> 1) & 0x5555555555555555ULL; + out = (0x3333333333333333ULL & out) + + (0x3333333333333333ULL & (out >> 2)); + out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0fULL; + + if (size == 8) { /* popcntb */ + op->val = out; + return; + } + out += out >> 8; + out += out >> 16; + if (size == 32) { /* popcntw */ + op->val = out & 0x0000003f0000003fULL; + return; + } + + out = (out + (out >> 32)) & 0x7f; + op->val = out; /* popcntd */ +} + +#ifdef CONFIG_PPC64 +static nokprobe_inline void do_bpermd(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v1, unsigned long v2) +{ + unsigned char perm, idx; + unsigned int i; + + perm = 0; + for (i = 0; i < 8; i++) { + idx = (v1 >> (i * 8)) & 0xff; + if (idx < 64) + if (v2 & PPC_BIT(idx)) + perm |= 1 << i; + } + op->val = perm; +} +#endif /* CONFIG_PPC64 */ +/* + * The size parameter adjusts the equivalent prty instruction. + * prtyw = 32, prtyd = 64 + */ +static nokprobe_inline void do_prty(const struct pt_regs *regs, + struct instruction_op *op, + unsigned long v, int size) +{ + unsigned long long res = v ^ (v >> 8); + + res ^= res >> 16; + if (size == 32) { /* prtyw */ + op->val = res & 0x0000000100000001ULL; + return; + } + + res ^= res >> 32; + op->val = res & 1; /*prtyd */ +} + +static nokprobe_inline int trap_compare(long v1, long v2) +{ + int ret = 0; + + if (v1 < v2) + ret |= 0x10; + else if (v1 > v2) + ret |= 0x08; + else + ret |= 0x04; + if ((unsigned long)v1 < (unsigned long)v2) + ret |= 0x02; + else if ((unsigned long)v1 > (unsigned long)v2) + ret |= 0x01; + return ret; } /* @@ -552,87 +1338,101 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x)) /* - * Emulate instructions that cause a transfer of control, - * loads and stores, and a few other instructions. - * Returns 1 if the step was emulated, 0 if not, - * or -1 if the instruction is one that should not be stepped, - * such as an rfid, or a mtmsrd that would clear MSR_RI. + * Decode an instruction, and return information about it in *op + * without changing *regs. + * Integer arithmetic and logical instructions, branches, and barrier + * instructions can be emulated just using the information in *op. + * + * Return value is 1 if the instruction can be emulated just by + * updating *regs with the information in *op, -1 if we need the + * GPRs but *regs doesn't contain the full register set, or 0 + * otherwise. */ -int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, + ppc_inst_t instr) { - unsigned int opcode, ra, rb, rd, spr, u; +#ifdef CONFIG_PPC64 + unsigned int suffixopcode, prefixtype, prefix_r; +#endif + unsigned int opcode, ra, rb, rc, rd, spr, u; unsigned long int imm; unsigned long int val, val2; - unsigned long int ea; - unsigned int cr, mb, me, sh; - int err; - unsigned long old_ra, val3; + unsigned int mb, me, sh; + unsigned int word, suffix; long ival; - opcode = instr >> 26; + word = ppc_inst_val(instr); + suffix = ppc_inst_suffix(instr); + + op->type = COMPUTE; + + opcode = ppc_inst_primary_opcode(instr); switch (opcode) { case 16: /* bc */ - imm = (signed short)(instr & 0xfffc); - if ((instr & 2) == 0) + op->type = BRANCH; + imm = (signed short)(word & 0xfffc); + if ((word & 2) == 0) imm += regs->nip; - regs->nip += 4; - regs->nip = truncate_if_32bit(regs->msr, regs->nip); - if (instr & 1) - regs->link = regs->nip; - if (branch_taken(instr, regs)) - regs->nip = truncate_if_32bit(regs->msr, imm); + op->val = truncate_if_32bit(regs->msr, imm); + if (word & 1) + op->type |= SETLK; + if (branch_taken(word, regs, op)) + op->type |= BRTAKEN; return 1; -#ifdef CONFIG_PPC64 case 17: /* sc */ - /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. - */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; -#endif + if ((word & 0xfe2) == 2) + op->type = SYSCALL; + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && + (word & 0xfe3) == 1) { /* scv */ + op->type = SYSCALL_VECTORED_0; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + } else + op->type = UNKNOWN; + return 0; case 18: /* b */ - imm = instr & 0x03fffffc; + op->type = BRANCH | BRTAKEN; + imm = word & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; - if ((instr & 2) == 0) + if ((word & 2) == 0) imm += regs->nip; - if (instr & 1) - regs->link = truncate_if_32bit(regs->msr, regs->nip + 4); - imm = truncate_if_32bit(regs->msr, imm); - regs->nip = imm; + op->val = truncate_if_32bit(regs->msr, imm); + if (word & 1) + op->type |= SETLK; return 1; case 19: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { + case 0: /* mcrf */ + op->type = COMPUTE + SETCC; + rd = 7 - ((word >> 23) & 0x7); + ra = 7 - ((word >> 18) & 0x7); + rd *= 4; + ra *= 4; + val = (regs->ccr >> ra) & 0xf; + op->ccval = (regs->ccr & ~(0xfUL << rd)) | (val << rd); + return 1; + case 16: /* bclr */ case 528: /* bcctr */ - imm = (instr & 0x400)? regs->ctr: regs->link; - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - imm = truncate_if_32bit(regs->msr, imm); - if (instr & 1) - regs->link = regs->nip; - if (branch_taken(instr, regs)) - regs->nip = imm; + op->type = BRANCH; + imm = (word & 0x400)? regs->ctr: regs->link; + op->val = truncate_if_32bit(regs->msr, imm); + if (word & 1) + op->type |= SETLK; + if (branch_taken(word, regs, op)) + op->type |= BRTAKEN; return 1; case 18: /* rfid, scary */ - return -1; + if (user_mode(regs)) + goto priv; + op->type = RFI; + return 0; case 150: /* isync */ - isync(); - goto instr_done; + op->type = BARRIER | BARRIER_ISYNC; + return 1; case 33: /* crnor */ case 129: /* crandc */ @@ -642,272 +1442,394 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 289: /* creqv */ case 417: /* crorc */ case 449: /* cror */ - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rd = (instr >> 21) & 0x1f; + op->type = COMPUTE + SETCC; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rd = (word >> 21) & 0x1f; ra = (regs->ccr >> (31 - ra)) & 1; rb = (regs->ccr >> (31 - rb)) & 1; - val = (instr >> (6 + ra * 2 + rb)) & 1; - regs->ccr = (regs->ccr & ~(1UL << (31 - rd))) | + val = (word >> (6 + ra * 2 + rb)) & 1; + op->ccval = (regs->ccr & ~(1UL << (31 - rd))) | (val << (31 - rd)); - goto instr_done; + return 1; } break; case 31: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 598: /* sync */ + op->type = BARRIER + BARRIER_SYNC; #ifdef __powerpc64__ - switch ((instr >> 21) & 3) { + switch ((word >> 21) & 3) { case 1: /* lwsync */ - asm volatile("lwsync" : : : "memory"); - goto instr_done; + op->type = BARRIER + BARRIER_LWSYNC; + break; case 2: /* ptesync */ - asm volatile("ptesync" : : : "memory"); - goto instr_done; + op->type = BARRIER + BARRIER_PTESYNC; + break; } #endif - mb(); - goto instr_done; + return 1; case 854: /* eieio */ - eieio(); - goto instr_done; + op->type = BARRIER + BARRIER_EIEIO; + return 1; } break; } - /* Following cases refer to regs->gpr[], so we need all regs */ - if (!FULL_REGS(regs)) - return 0; - - rd = (instr >> 21) & 0x1f; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; + rd = (word >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rc = (word >> 6) & 0x1f; switch (opcode) { +#ifdef __powerpc64__ + case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 2: + if (prefix_r && ra) + return 0; + switch (suffixopcode) { + case 14: /* paddi */ + op->type = COMPUTE | PREFIXED; + op->val = mlsd_8lsd_ea(word, suffix, regs); + goto compute_done; + } + } + break; + case 2: /* tdi */ + if (rd & trap_compare(regs->gpr[ra], (short) word)) + goto trap; + return 1; +#endif + case 3: /* twi */ + if (rd & trap_compare((int)regs->gpr[ra], (short) word)) + goto trap; + return 1; + +#ifdef __powerpc64__ + case 4: + /* + * There are very many instructions with this primary opcode + * introduced in the ISA as early as v2.03. However, the ones + * we currently emulate were all introduced with ISA 3.0 + */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + + switch (word & 0x3f) { + case 48: /* maddhd */ + asm volatile(PPC_MADDHD(%0, %1, %2, %3) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb]), "r" (regs->gpr[rc])); + goto compute_done; + + case 49: /* maddhdu */ + asm volatile(PPC_MADDHDU(%0, %1, %2, %3) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb]), "r" (regs->gpr[rc])); + goto compute_done; + + case 51: /* maddld */ + asm volatile(PPC_MADDLD(%0, %1, %2, %3) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb]), "r" (regs->gpr[rc])); + goto compute_done; + } + + /* + * There are other instructions from ISA 3.0 with the same + * primary opcode which do not have emulation support yet. + */ + goto unknown_opcode; +#endif + case 7: /* mulli */ - regs->gpr[rd] = regs->gpr[ra] * (short) instr; - goto instr_done; + op->val = regs->gpr[ra] * (short) word; + goto compute_done; case 8: /* subfic */ - imm = (short) instr; - add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1); - goto instr_done; + imm = (short) word; + add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1); + return 1; case 10: /* cmpli */ - imm = (unsigned short) instr; + imm = (unsigned short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) val = (unsigned int) val; #endif - do_cmp_unsigned(regs, val, imm, rd >> 2); - goto instr_done; + do_cmp_unsigned(regs, op, val, imm, rd >> 2); + return 1; case 11: /* cmpi */ - imm = (short) instr; + imm = (short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) val = (int) val; #endif - do_cmp_signed(regs, val, imm, rd >> 2); - goto instr_done; + do_cmp_signed(regs, op, val, imm, rd >> 2); + return 1; case 12: /* addic */ - imm = (short) instr; - add_with_carry(regs, rd, regs->gpr[ra], imm, 0); - goto instr_done; + imm = (short) word; + add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); + return 1; case 13: /* addic. */ - imm = (short) instr; - add_with_carry(regs, rd, regs->gpr[ra], imm, 0); - set_cr0(regs, rd); - goto instr_done; + imm = (short) word; + add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); + set_cr0(regs, op); + return 1; case 14: /* addi */ - imm = (short) instr; + imm = (short) word; if (ra) imm += regs->gpr[ra]; - regs->gpr[rd] = imm; - goto instr_done; + op->val = imm; + goto compute_done; case 15: /* addis */ - imm = ((short) instr) << 16; + imm = ((short) word) << 16; if (ra) imm += regs->gpr[ra]; - regs->gpr[rd] = imm; - goto instr_done; + op->val = imm; + goto compute_done; + + case 19: + if (((word >> 1) & 0x1f) == 2) { + /* addpcis */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + imm = (short) (word & 0xffc1); /* d0 + d2 fields */ + imm |= (word >> 15) & 0x3e; /* d1 field */ + op->val = regs->nip + (imm << 16) + 4; + goto compute_done; + } + op->type = UNKNOWN; + return 0; case 20: /* rlwimi */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); imm = MASK32(mb, me); - regs->gpr[ra] = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); + op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); goto logical_done; case 21: /* rlwinm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); - regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me); + op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 23: /* rlwnm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; rb = regs->gpr[rb] & 0x1f; val = DATA32(regs->gpr[rd]); - regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me); + op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 24: /* ori */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] | imm; - goto instr_done; + op->val = regs->gpr[rd] | (unsigned short) word; + goto logical_done_nocc; case 25: /* oris */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] | (imm << 16); - goto instr_done; + imm = (unsigned short) word; + op->val = regs->gpr[rd] | (imm << 16); + goto logical_done_nocc; case 26: /* xori */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] ^ imm; - goto instr_done; + op->val = regs->gpr[rd] ^ (unsigned short) word; + goto logical_done_nocc; case 27: /* xoris */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] ^ (imm << 16); - goto instr_done; + imm = (unsigned short) word; + op->val = regs->gpr[rd] ^ (imm << 16); + goto logical_done_nocc; case 28: /* andi. */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] & imm; - set_cr0(regs, ra); - goto instr_done; + op->val = regs->gpr[rd] & (unsigned short) word; + set_cr0(regs, op); + goto logical_done_nocc; case 29: /* andis. */ - imm = (unsigned short) instr; - regs->gpr[ra] = regs->gpr[rd] & (imm << 16); - set_cr0(regs, ra); - goto instr_done; + imm = (unsigned short) word; + op->val = regs->gpr[rd] & (imm << 16); + set_cr0(regs, op); + goto logical_done_nocc; #ifdef __powerpc64__ case 30: /* rld* */ - mb = ((instr >> 6) & 0x1f) | (instr & 0x20); + mb = ((word >> 6) & 0x1f) | (word & 0x20); val = regs->gpr[rd]; - if ((instr & 0x10) == 0) { - sh = rb | ((instr & 2) << 4); + if ((word & 0x10) == 0) { + sh = rb | ((word & 2) << 4); val = ROTATE(val, sh); - switch ((instr >> 2) & 3) { + switch ((word >> 2) & 3) { case 0: /* rldicl */ - regs->gpr[ra] = val & MASK64_L(mb); - goto logical_done; + val &= MASK64_L(mb); + break; case 1: /* rldicr */ - regs->gpr[ra] = val & MASK64_R(mb); - goto logical_done; + val &= MASK64_R(mb); + break; case 2: /* rldic */ - regs->gpr[ra] = val & MASK64(mb, 63 - sh); - goto logical_done; + val &= MASK64(mb, 63 - sh); + break; case 3: /* rldimi */ imm = MASK64(mb, 63 - sh); - regs->gpr[ra] = (regs->gpr[ra] & ~imm) | + val = (regs->gpr[ra] & ~imm) | (val & imm); - goto logical_done; } + op->val = val; + goto logical_done; } else { sh = regs->gpr[rb] & 0x3f; val = ROTATE(val, sh); - switch ((instr >> 1) & 7) { + switch ((word >> 1) & 7) { case 0: /* rldcl */ - regs->gpr[ra] = val & MASK64_L(mb); + op->val = val & MASK64_L(mb); goto logical_done; case 1: /* rldcr */ - regs->gpr[ra] = val & MASK64_R(mb); + op->val = val & MASK64_R(mb); goto logical_done; } } #endif + op->type = UNKNOWN; /* illegal instruction */ + return 0; case 31: - switch ((instr >> 1) & 0x3ff) { + /* isel occupies 32 minor opcodes */ + if (((word >> 1) & 0x1f) == 15) { + mb = (word >> 6) & 0x1f; /* bc field */ + val = (regs->ccr >> (31 - mb)) & 1; + val2 = (ra) ? regs->gpr[ra] : 0; + + op->val = (val) ? val2 : regs->gpr[rb]; + goto compute_done; + } + + switch ((word >> 1) & 0x3ff) { + case 4: /* tw */ + if (rd == 0x1f || + (rd & trap_compare((int)regs->gpr[ra], + (int)regs->gpr[rb]))) + goto trap; + return 1; +#ifdef __powerpc64__ + case 68: /* td */ + if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb])) + goto trap; + return 1; +#endif case 83: /* mfmsr */ - if (regs->msr & MSR_PR) - break; - regs->gpr[rd] = regs->msr & MSR_MASK; - goto instr_done; + if (user_mode(regs)) + goto priv; + op->type = MFMSR; + op->reg = rd; + return 0; case 146: /* mtmsr */ - if (regs->msr & MSR_PR) - break; - imm = regs->gpr[rd]; - if ((imm & MSR_RI) == 0) - /* can't step mtmsr that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + if (user_mode(regs)) + goto priv; + op->type = MTMSR; + op->reg = rd; + op->val = 0xffffffff & ~(MSR_ME | MSR_LE); + return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ + if (user_mode(regs)) + goto priv; + op->type = MTMSR; + op->reg = rd; /* only MSR_EE and MSR_RI get changed if bit 15 set */ - /* mtmsrd doesn't change MSR_HV and MSR_ME */ - if (regs->msr & MSR_PR) - break; - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; - imm = (regs->msr & MSR_MASK & ~imm) - | (regs->gpr[rd] & imm); - if ((imm & MSR_RI) == 0) - /* can't step mtmsrd that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ + imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + op->val = imm; + return 0; #endif + case 19: /* mfcr */ - regs->gpr[rd] = regs->ccr; - regs->gpr[rd] &= 0xffffffffUL; - goto instr_done; + imm = 0xffffffffUL; + if ((word >> 20) & 1) { + imm = 0xf0000000UL; + for (sh = 0; sh < 8; ++sh) { + if (word & (0x80000 >> sh)) + break; + imm >>= 4; + } + } + op->val = regs->ccr & imm; + goto compute_done; + + case 128: /* setb */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + /* + * 'ra' encodes the CR field number (bfa) in the top 3 bits. + * Since each CR field is 4 bits, + * we can simply mask off the bottom two bits (bfa * 4) + * to yield the first bit in the CR field. + */ + ra = ra & ~0x3; + /* 'val' stores bits of the CR field (bfa) */ + val = regs->ccr >> (CR0_SHIFT - ra); + /* checks if the LT bit of CR field (bfa) is set */ + if (val & 8) + op->val = -1; + /* checks if the GT bit of CR field (bfa) is set */ + else if (val & 4) + op->val = 1; + else + op->val = 0; + goto compute_done; case 144: /* mtcrf */ + op->type = COMPUTE + SETCC; imm = 0xf0000000UL; val = regs->gpr[rd]; + op->ccval = regs->ccr; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) - regs->ccr = (regs->ccr & ~imm) | + if (word & (0x80000 >> sh)) + op->ccval = (op->ccval & ~imm) | (val & imm); imm >>= 4; } - goto instr_done; + return 1; case 339: /* mfspr */ - spr = (instr >> 11) & 0x3ff; - switch (spr) { - case 0x20: /* mfxer */ - regs->gpr[rd] = regs->xer; - regs->gpr[rd] &= 0xffffffffUL; - goto instr_done; - case 0x100: /* mflr */ - regs->gpr[rd] = regs->link; - goto instr_done; - case 0x120: /* mfctr */ - regs->gpr[rd] = regs->ctr; - goto instr_done; - } - break; + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); + op->type = MFSPR; + op->reg = rd; + op->spr = spr; + if (spr == SPRN_XER || spr == SPRN_LR || + spr == SPRN_CTR) + return 1; + return 0; case 467: /* mtspr */ - spr = (instr >> 11) & 0x3ff; - switch (spr) { - case 0x20: /* mtxer */ - regs->xer = (regs->gpr[rd] & 0xffffffffUL); - goto instr_done; - case 0x100: /* mtlr */ - regs->link = regs->gpr[rd]; - goto instr_done; - case 0x120: /* mtctr */ - regs->ctr = regs->gpr[rd]; - goto instr_done; - } - break; + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); + op->type = MTSPR; + op->val = regs->gpr[rd]; + op->spr = spr; + if (spr == SPRN_XER || spr == SPRN_LR || + spr == SPRN_CTR) + return 1; + return 0; /* * Compare instructions @@ -922,8 +1844,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) val2 = (int) val2; } #endif - do_cmp_signed(regs, val, val2, rd >> 2); - goto instr_done; + do_cmp_signed(regs, op, val, val2, rd >> 2); + return 1; case 32: /* cmpl */ val = regs->gpr[ra]; @@ -935,168 +1857,271 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) val2 = (unsigned int) val2; } #endif - do_cmp_unsigned(regs, val, val2, rd >> 2); - goto instr_done; + do_cmp_unsigned(regs, op, val, val2, rd >> 2); + return 1; + + case 508: /* cmpb */ + do_cmpb(regs, op, regs->gpr[rd], regs->gpr[rb]); + goto logical_done_nocc; /* * Arithmetic instructions */ case 8: /* subfc */ - add_with_carry(regs, rd, ~regs->gpr[ra], + add_with_carry(regs, op, rd, ~regs->gpr[ra], regs->gpr[rb], 1); goto arith_done; #ifdef __powerpc64__ case 9: /* mulhdu */ - asm("mulhdu %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhdu %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; #endif case 10: /* addc */ - add_with_carry(regs, rd, regs->gpr[ra], + add_with_carry(regs, op, rd, regs->gpr[ra], regs->gpr[rb], 0); goto arith_done; case 11: /* mulhwu */ - asm("mulhwu %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhwu %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; case 40: /* subf */ - regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra]; + op->val = regs->gpr[rb] - regs->gpr[ra]; goto arith_done; #ifdef __powerpc64__ case 73: /* mulhd */ - asm("mulhd %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhd %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; #endif case 75: /* mulhw */ - asm("mulhw %0,%1,%2" : "=r" (regs->gpr[rd]) : + asm("mulhw %0,%1,%2" : "=r" (op->val) : "r" (regs->gpr[ra]), "r" (regs->gpr[rb])); goto arith_done; case 104: /* neg */ - regs->gpr[rd] = -regs->gpr[ra]; + op->val = -regs->gpr[ra]; goto arith_done; case 136: /* subfe */ - add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb], - regs->xer & XER_CA); + add_with_carry(regs, op, rd, ~regs->gpr[ra], + regs->gpr[rb], regs->xer & XER_CA); goto arith_done; case 138: /* adde */ - add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb], - regs->xer & XER_CA); + add_with_carry(regs, op, rd, regs->gpr[ra], + regs->gpr[rb], regs->xer & XER_CA); goto arith_done; case 200: /* subfze */ - add_with_carry(regs, rd, ~regs->gpr[ra], 0L, + add_with_carry(regs, op, rd, ~regs->gpr[ra], 0L, regs->xer & XER_CA); goto arith_done; case 202: /* addze */ - add_with_carry(regs, rd, regs->gpr[ra], 0L, + add_with_carry(regs, op, rd, regs->gpr[ra], 0L, regs->xer & XER_CA); goto arith_done; case 232: /* subfme */ - add_with_carry(regs, rd, ~regs->gpr[ra], -1L, + add_with_carry(regs, op, rd, ~regs->gpr[ra], -1L, regs->xer & XER_CA); goto arith_done; #ifdef __powerpc64__ case 233: /* mulld */ - regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb]; + op->val = regs->gpr[ra] * regs->gpr[rb]; goto arith_done; #endif case 234: /* addme */ - add_with_carry(regs, rd, regs->gpr[ra], -1L, + add_with_carry(regs, op, rd, regs->gpr[ra], -1L, regs->xer & XER_CA); goto arith_done; case 235: /* mullw */ - regs->gpr[rd] = (unsigned int) regs->gpr[ra] * - (unsigned int) regs->gpr[rb]; - goto arith_done; + op->val = (long)(int) regs->gpr[ra] * + (int) regs->gpr[rb]; + goto arith_done; +#ifdef __powerpc64__ + case 265: /* modud */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->val = regs->gpr[ra] % regs->gpr[rb]; + goto compute_done; +#endif case 266: /* add */ - regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb]; + op->val = regs->gpr[ra] + regs->gpr[rb]; goto arith_done; + + case 267: /* moduw */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->val = (unsigned int) regs->gpr[ra] % + (unsigned int) regs->gpr[rb]; + goto compute_done; #ifdef __powerpc64__ case 457: /* divdu */ - regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb]; + op->val = regs->gpr[ra] / regs->gpr[rb]; goto arith_done; #endif case 459: /* divwu */ - regs->gpr[rd] = (unsigned int) regs->gpr[ra] / + op->val = (unsigned int) regs->gpr[ra] / (unsigned int) regs->gpr[rb]; goto arith_done; #ifdef __powerpc64__ case 489: /* divd */ - regs->gpr[rd] = (long int) regs->gpr[ra] / + op->val = (long int) regs->gpr[ra] / (long int) regs->gpr[rb]; goto arith_done; #endif case 491: /* divw */ - regs->gpr[rd] = (int) regs->gpr[ra] / + op->val = (int) regs->gpr[ra] / (int) regs->gpr[rb]; goto arith_done; +#ifdef __powerpc64__ + case 425: /* divde[.] */ + asm volatile(PPC_DIVDE(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; + case 393: /* divdeu[.] */ + asm volatile(PPC_DIVDEU(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; +#endif + case 755: /* darn */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + switch (ra & 0x3) { + case 0: + /* 32-bit conditioned */ + asm volatile(PPC_DARN(%0, 0) : "=r" (op->val)); + goto compute_done; + + case 1: + /* 64-bit conditioned */ + asm volatile(PPC_DARN(%0, 1) : "=r" (op->val)); + goto compute_done; + + case 2: + /* 64-bit raw */ + asm volatile(PPC_DARN(%0, 2) : "=r" (op->val)); + goto compute_done; + } + + goto unknown_opcode; +#ifdef __powerpc64__ + case 777: /* modsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->val = (long int) regs->gpr[ra] % + (long int) regs->gpr[rb]; + goto compute_done; +#endif + case 779: /* modsw */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->val = (int) regs->gpr[ra] % + (int) regs->gpr[rb]; + goto compute_done; /* * Logical instructions */ case 26: /* cntlzw */ - asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) : - "r" (regs->gpr[rd])); + val = (unsigned int) regs->gpr[rd]; + op->val = ( val ? __builtin_clz(val) : 32 ); goto logical_done; #ifdef __powerpc64__ case 58: /* cntlzd */ - asm("cntlzd %0,%1" : "=r" (regs->gpr[ra]) : - "r" (regs->gpr[rd])); + val = regs->gpr[rd]; + op->val = ( val ? __builtin_clzl(val) : 64 ); goto logical_done; #endif case 28: /* and */ - regs->gpr[ra] = regs->gpr[rd] & regs->gpr[rb]; + op->val = regs->gpr[rd] & regs->gpr[rb]; goto logical_done; case 60: /* andc */ - regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb]; + op->val = regs->gpr[rd] & ~regs->gpr[rb]; goto logical_done; + case 122: /* popcntb */ + do_popcnt(regs, op, regs->gpr[rd], 8); + goto logical_done_nocc; + case 124: /* nor */ - regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]); + op->val = ~(regs->gpr[rd] | regs->gpr[rb]); goto logical_done; + case 154: /* prtyw */ + do_prty(regs, op, regs->gpr[rd], 32); + goto logical_done_nocc; + + case 186: /* prtyd */ + do_prty(regs, op, regs->gpr[rd], 64); + goto logical_done_nocc; +#ifdef CONFIG_PPC64 + case 252: /* bpermd */ + do_bpermd(regs, op, regs->gpr[rd], regs->gpr[rb]); + goto logical_done_nocc; +#endif case 284: /* xor */ - regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]); + op->val = ~(regs->gpr[rd] ^ regs->gpr[rb]); goto logical_done; case 316: /* xor */ - regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb]; + op->val = regs->gpr[rd] ^ regs->gpr[rb]; goto logical_done; + case 378: /* popcntw */ + do_popcnt(regs, op, regs->gpr[rd], 32); + goto logical_done_nocc; + case 412: /* orc */ - regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb]; + op->val = regs->gpr[rd] | ~regs->gpr[rb]; goto logical_done; case 444: /* or */ - regs->gpr[ra] = regs->gpr[rd] | regs->gpr[rb]; + op->val = regs->gpr[rd] | regs->gpr[rb]; goto logical_done; case 476: /* nand */ - regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]); + op->val = ~(regs->gpr[rd] & regs->gpr[rb]); goto logical_done; - +#ifdef CONFIG_PPC64 + case 506: /* popcntd */ + do_popcnt(regs, op, regs->gpr[rd], 64); + goto logical_done_nocc; +#endif + case 538: /* cnttzw */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + val = (unsigned int) regs->gpr[rd]; + op->val = (val ? __builtin_ctz(val) : 32); + goto logical_done; +#ifdef __powerpc64__ + case 570: /* cnttzd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + val = regs->gpr[rd]; + op->val = (val ? __builtin_ctzl(val) : 64); + goto logical_done; +#endif case 922: /* extsh */ - regs->gpr[ra] = (signed short) regs->gpr[rd]; + op->val = (signed short) regs->gpr[rd]; goto logical_done; case 954: /* extsb */ - regs->gpr[ra] = (signed char) regs->gpr[rd]; + op->val = (signed char) regs->gpr[rd]; goto logical_done; #ifdef __powerpc64__ case 986: /* extsw */ - regs->gpr[ra] = (signed int) regs->gpr[rd]; + op->val = (signed int) regs->gpr[rd]; goto logical_done; #endif @@ -1106,370 +2131,621 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 24: /* slw */ sh = regs->gpr[rb] & 0x3f; if (sh < 32) - regs->gpr[ra] = (regs->gpr[rd] << sh) & 0xffffffffUL; + op->val = (regs->gpr[rd] << sh) & 0xffffffffUL; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 536: /* srw */ sh = regs->gpr[rb] & 0x3f; if (sh < 32) - regs->gpr[ra] = (regs->gpr[rd] & 0xffffffffUL) >> sh; + op->val = (regs->gpr[rd] & 0xffffffffUL) >> sh; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 792: /* sraw */ + op->type = COMPUTE + SETREG + SETXER; sh = regs->gpr[rb] & 0x3f; ival = (signed int) regs->gpr[rd]; - regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); - if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0)) - regs->xer |= XER_CA; + op->val = ival >> (sh < 32 ? sh : 31); + op->xerval = regs->xer; + if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0)) + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; + set_ca32(op, op->xerval & XER_CA); goto logical_done; case 824: /* srawi */ + op->type = COMPUTE + SETREG + SETXER; sh = rb; ival = (signed int) regs->gpr[rd]; - regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) - regs->xer |= XER_CA; + op->val = ival >> sh; + op->xerval = regs->xer; + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; + set_ca32(op, op->xerval & XER_CA); goto logical_done; #ifdef __powerpc64__ case 27: /* sld */ - sh = regs->gpr[rd] & 0x7f; + sh = regs->gpr[rb] & 0x7f; if (sh < 64) - regs->gpr[ra] = regs->gpr[rd] << sh; + op->val = regs->gpr[rd] << sh; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 539: /* srd */ sh = regs->gpr[rb] & 0x7f; if (sh < 64) - regs->gpr[ra] = regs->gpr[rd] >> sh; + op->val = regs->gpr[rd] >> sh; else - regs->gpr[ra] = 0; + op->val = 0; goto logical_done; case 794: /* srad */ + op->type = COMPUTE + SETREG + SETXER; sh = regs->gpr[rb] & 0x7f; ival = (signed long int) regs->gpr[rd]; - regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); - if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0)) - regs->xer |= XER_CA; + op->val = ival >> (sh < 64 ? sh : 63); + op->xerval = regs->xer; + if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0)) + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; + set_ca32(op, op->xerval & XER_CA); goto logical_done; case 826: /* sradi with sh_5 = 0 */ case 827: /* sradi with sh_5 = 1 */ - sh = rb | ((instr & 2) << 4); + op->type = COMPUTE + SETREG + SETXER; + sh = rb | ((word & 2) << 4); ival = (signed long int) regs->gpr[rd]; - regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) - regs->xer |= XER_CA; + op->val = ival >> sh; + op->xerval = regs->xer; + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) + op->xerval |= XER_CA; else - regs->xer &= ~XER_CA; + op->xerval &= ~XER_CA; + set_ca32(op, op->xerval & XER_CA); goto logical_done; + + case 890: /* extswsli with sh_5 = 0 */ + case 891: /* extswsli with sh_5 = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->type = COMPUTE + SETREG; + sh = rb | ((word & 2) << 4); + val = (signed int) regs->gpr[rd]; + if (sh) + op->val = ROTATE(val, sh) & MASK64(0, 63 - sh); + else + op->val = val; + goto logical_done; + #endif /* __powerpc64__ */ /* * Cache instructions */ case 54: /* dcbst */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbst"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBST, 0); + op->ea = xform_ea(word, regs); + return 0; case 86: /* dcbf */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbf"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBF, 0); + op->ea = xform_ea(word, regs); + return 0; case 246: /* dcbtst */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetchw((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(word, regs); + op->reg = rd; + return 0; case 278: /* dcbt */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetch((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(word, regs); + op->reg = rd; + return 0; + + case 982: /* icbi */ + op->type = MKOP(CACHEOP, ICBI, 0); + op->ea = xform_ea(word, regs); + return 0; + case 1014: /* dcbz */ + op->type = MKOP(CACHEOP, DCBZ, 0); + op->ea = xform_ea(word, regs); + return 0; } break; } - /* - * Following cases are for loads and stores, so bail out - * if we're in little-endian mode. - */ - if (regs->msr & MSR_LE) - return 0; - - /* - * Save register RA in case it's an update form load or store - * and the access faults. - */ - old_ra = regs->gpr[ra]; +/* + * Loads and stores. + */ + op->type = UNKNOWN; + op->update_reg = ra; + op->reg = rd; + op->val = regs->gpr[rd]; + u = (word >> 20) & UPDATE; + op->vsx_flags = 0; switch (opcode) { case 31: - u = instr & 0x40; - switch ((instr >> 1) & 0x3ff) { + u = word & UPDATE; + op->ea = xform_ea(word, regs); + switch ((word >> 1) & 0x3ff) { case 20: /* lwarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "lwarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 4); + break; case 150: /* stwcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 4); + break; + +#ifdef CONFIG_PPC_HAS_LBARX_LHARX + case 52: /* lbarx */ + op->type = MKOP(LARX, 0, 1); + break; + + case 694: /* stbcx. */ + op->type = MKOP(STCX, 0, 1); + break; + + case 116: /* lharx */ + op->type = MKOP(LARX, 0, 2); + break; + case 726: /* sthcx. */ + op->type = MKOP(STCX, 0, 2); + break; +#endif #ifdef __powerpc64__ case 84: /* ldarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "ldarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 8); + break; case 214: /* stdcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 8); + break; - case 21: /* ldx */ - case 53: /* ldux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 8, regs); - goto ldst_done; + case 276: /* lqarx */ + if (!((rd & 1) || rd == ra || rd == rb)) + op->type = MKOP(LARX, 0, 16); + break; + + case 182: /* stqcx. */ + if (!(rd & 1)) + op->type = MKOP(STCX, 0, 16); + break; #endif case 23: /* lwzx */ case 55: /* lwzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + break; case 87: /* lbzx */ case 119: /* lbzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + break; #ifdef CONFIG_ALTIVEC + /* + * Note: for the load/store vector element instructions, + * bits of the EA say which field of the VMX register to use. + */ + case 7: /* lvebx */ + op->type = MKOP(LOAD_VMX, 0, 1); + op->element_size = 1; + break; + + case 39: /* lvehx */ + op->type = MKOP(LOAD_VMX, 0, 2); + op->element_size = 2; + break; + + case 71: /* lvewx */ + op->type = MKOP(LOAD_VMX, 0, 4); + op->element_size = 4; + break; + case 103: /* lvx */ case 359: /* lvxl */ - if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_load(rd, do_lvx, ea, regs); - goto ldst_done; + op->type = MKOP(LOAD_VMX, 0, 16); + op->element_size = 16; + break; + + case 135: /* stvebx */ + op->type = MKOP(STORE_VMX, 0, 1); + op->element_size = 1; + break; + + case 167: /* stvehx */ + op->type = MKOP(STORE_VMX, 0, 2); + op->element_size = 2; + break; + + case 199: /* stvewx */ + op->type = MKOP(STORE_VMX, 0, 4); + op->element_size = 4; + break; case 231: /* stvx */ case 487: /* stvxl */ - if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_store(rd, do_stvx, ea, regs); - goto ldst_done; + op->type = MKOP(STORE_VMX, 0, 16); + break; #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ + case 21: /* ldx */ + case 53: /* ldux */ + op->type = MKOP(LOAD, u, 8); + break; + case 149: /* stdx */ case 181: /* stdux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 8); + break; #endif case 151: /* stwx */ case 183: /* stwux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + break; case 215: /* stbx */ case 247: /* stbux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + break; case 279: /* lhzx */ case 311: /* lhzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + break; #ifdef __powerpc64__ case 341: /* lwax */ case 373: /* lwaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 4); + break; #endif case 343: /* lhax */ case 375: /* lhaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + break; case 407: /* sthx */ case 439: /* sthux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + break; #ifdef __powerpc64__ case 532: /* ldbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs); - if (!err) - regs->gpr[rd] = byterev_8(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 8); + break; #endif + case 533: /* lswx */ + op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f); + break; case 534: /* lwbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs); - if (!err) - regs->gpr[rd] = byterev_4(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 4); + break; + + case 597: /* lswi */ + if (rb == 0) + rb = 32; /* # bytes to load */ + op->type = MKOP(LOAD_MULTI, 0, rb); + op->ea = ra ? regs->gpr[ra] : 0; + break; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ - if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + op->type = MKOP(LOAD_FP, u | FPCONV, 4); + break; case 599: /* lfdx */ case 631: /* lfdux */ - if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + op->type = MKOP(LOAD_FP, u, 8); + break; case 663: /* stfsx */ case 695: /* stfsux */ - if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + op->type = MKOP(STORE_FP, u | FPCONV, 4); + break; case 727: /* stfdx */ case 759: /* stfdux */ - if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; -#endif + op->type = MKOP(STORE_FP, u, 8); + break; + +#ifdef __powerpc64__ + case 791: /* lfdpx */ + op->type = MKOP(LOAD_FP, 0, 16); + break; + + case 855: /* lfiwax */ + op->type = MKOP(LOAD_FP, SIGNEXT, 4); + break; + + case 887: /* lfiwzx */ + op->type = MKOP(LOAD_FP, 0, 4); + break; + + case 919: /* stfdpx */ + op->type = MKOP(STORE_FP, 0, 16); + break; + + case 983: /* stfiwx */ + op->type = MKOP(STORE_FP, 0, 4); + break; +#endif /* __powerpc64 */ +#endif /* CONFIG_PPC_FPU */ #ifdef __powerpc64__ case 660: /* stdbrx */ - val = byterev_8(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 8); + op->val = byterev_8(regs->gpr[rd]); + break; #endif + case 661: /* stswx */ + op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f); + break; + case 662: /* stwbrx */ - val = byterev_4(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 4); + op->val = byterev_4(regs->gpr[rd]); + break; + + case 725: /* stswi */ + if (rb == 0) + rb = 32; /* # bytes to store */ + op->type = MKOP(STORE_MULTI, 0, rb); + op->ea = ra ? regs->gpr[ra] : 0; + break; case 790: /* lhbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs); - if (!err) - regs->gpr[rd] = byterev_2(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 2); + break; case 918: /* sthbrx */ - val = byterev_2(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 2); + op->val = byterev_2(regs->gpr[rd]); + break; #ifdef CONFIG_VSX + case 12: /* lxsiwzx */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 8; + break; + + case 76: /* lxsiwax */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, SIGNEXT, 4); + op->element_size = 8; + break; + + case 140: /* stxsiwx */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 4); + op->element_size = 8; + break; + + case 268: /* lxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 269: /* lxvl */ + case 301: { /* lxvll */ + int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->ea = ra ? regs->gpr[ra] : 0; + nb = regs->gpr[rb] & 0xff; + if (nb > 16) + nb = 16; + op->type = MKOP(LOAD_VSX, 0, nb); + op->element_size = 16; + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | + VSX_CHECK_VEC; + break; + } + case 332: /* lxvdsx */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 8); + op->element_size = 8; + op->vsx_flags = VSX_SPLAT; + break; + + case 333: /* lxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, 0, 32); + op->element_size = 32; + break; + + case 364: /* lxvwsx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 4; + op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC; + break; + + case 396: /* stxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 397: /* stxvl */ + case 429: { /* stxvll */ + int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->ea = ra ? regs->gpr[ra] : 0; + nb = regs->gpr[rb] & 0xff; + if (nb > 16) + nb = 16; + op->type = MKOP(STORE_VSX, 0, nb); + op->element_size = 16; + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | + VSX_CHECK_VEC; + break; + } + case 461: /* stxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, 0, 32); + op->element_size = 32; + break; + case 524: /* lxsspx */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV; + break; + + case 588: /* lxsdx */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 8); + op->element_size = 8; + break; + + case 652: /* stxsspx */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV; + break; + + case 716: /* stxsdx */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 8); + op->element_size = 8; + break; + + case 780: /* lxvw4x */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 4; + break; + + case 781: /* lxsibzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 1); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 812: /* lxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 2; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 813: /* lxsihzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 2); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 844: /* lxvd2x */ - case 876: /* lxvd2ux */ - if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_load(rd, do_lxvd2x, ea, regs); - goto ldst_done; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 8; + break; + + case 876: /* lxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 1; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 908: /* stxvw4x */ + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 4; + break; + + case 909: /* stxsibx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 1); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 940: /* stxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 2; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 941: /* stxsihx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 2); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; case 972: /* stxvd2x */ - case 1004: /* stxvd2ux */ - if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_store(rd, do_stxvd2x, ea, regs); - goto ldst_done; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 8; + break; + + case 1004: /* stxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd | ((word & 1) << 5); + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 1; + op->vsx_flags = VSX_CHECK_VEC; + break; #endif /* CONFIG_VSX */ } @@ -1477,185 +2753,919 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 32: /* lwz */ case 33: /* lwzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + op->ea = dform_ea(word, regs); + break; case 34: /* lbz */ case 35: /* lbzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + op->ea = dform_ea(word, regs); + break; case 36: /* stw */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 4, regs); - goto ldst_done; - case 37: /* stwu */ - val = regs->gpr[rd]; - val3 = dform_ea(instr, regs); - /* - * For PPC32 we always use stwu to change stack point with r1. So - * this emulated store may corrupt the exception frame, now we - * have to provide the exception frame trampoline, which is pushed - * below the kprobed function stack. So we only update gpr[1] but - * don't emulate the real store operation. We will do real store - * operation safely in exception return code by checking this flag. - */ - if ((ra == 1) && !(regs->msr & MSR_PR) \ - && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { - /* - * Check if we will touch kernel sack overflow - */ - if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n"); - err = -EINVAL; - break; - } - - /* - * Check if we already set since that means we'll - * lose the previous value. - */ - WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); - set_thread_flag(TIF_EMULATE_STACK_STORE); - err = 0; - } else - err = write_mem(val, val3, 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + op->ea = dform_ea(word, regs); + break; case 38: /* stb */ case 39: /* stbu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + op->ea = dform_ea(word, regs); + break; case 40: /* lhz */ case 41: /* lhzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + op->ea = dform_ea(word, regs); + break; case 42: /* lha */ case 43: /* lhau */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + op->ea = dform_ea(word, regs); + break; case 44: /* sth */ case 45: /* sthu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + op->ea = dform_ea(word, regs); + break; case 46: /* lmw */ - ra = (instr >> 16) & 0x1f; if (ra >= rd) break; /* invalid form, ra in range to load */ - ea = dform_ea(instr, regs); - do { - err = read_mem(®s->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(word, regs); + break; case 47: /* stmw */ - ea = dform_ea(instr, regs); - do { - err = write_mem(regs->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(word, regs); + break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ - if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + op->type = MKOP(LOAD_FP, u | FPCONV, 4); + op->ea = dform_ea(word, regs); + break; case 50: /* lfd */ case 51: /* lfdu */ - if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + op->type = MKOP(LOAD_FP, u, 8); + op->ea = dform_ea(word, regs); + break; case 52: /* stfs */ case 53: /* stfsu */ - if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + op->type = MKOP(STORE_FP, u | FPCONV, 4); + op->ea = dform_ea(word, regs); + break; case 54: /* stfd */ case 55: /* stfdu */ - if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + op->type = MKOP(STORE_FP, u, 8); + op->ea = dform_ea(word, regs); + break; +#endif + +#ifdef __powerpc64__ + case 56: /* lq */ + if (!((rd & 1) || (rd == ra))) + op->type = MKOP(LOAD, 0, 16); + op->ea = dqform_ea(word, regs); + break; #endif +#ifdef CONFIG_VSX + case 57: /* lfdp, lxsd, lxssp */ + op->ea = dsform_ea(word, regs); + switch (word & 3) { + case 0: /* lfdp */ + if (rd & 1) + break; /* reg must be even */ + op->type = MKOP(LOAD_FP, 0, 16); + break; + case 2: /* lxsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, 0, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 3: /* lxssp */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + } + break; +#endif /* CONFIG_VSX */ + #ifdef __powerpc64__ case 58: /* ld[u], lwa */ - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* ld */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, 0, 8); + break; case 1: /* ldu */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, UPDATE, 8); + break; case 2: /* lwa */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT, 4); + break; + } + break; +#endif + +#ifdef CONFIG_VSX + case 6: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + op->reg = VSX_REGISTER_XTP(rd); + op->element_size = 32; + switch (word & 0xf) { + case 0: /* lxvp */ + op->type = MKOP(LOAD_VSX, 0, 32); + break; + case 1: /* stxvp */ + op->type = MKOP(STORE_VSX, 0, 32); + break; } break; + case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ + switch (word & 7) { + case 0: /* stfdp with LSB of DS field = 0 */ + case 4: /* stfdp with LSB of DS field = 1 */ + op->ea = dsform_ea(word, regs); + op->type = MKOP(STORE_FP, 0, 16); + break; + + case 1: /* lxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + if (word & 8) + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 2: /* stxsd with LSB of DS field = 0 */ + case 6: /* stxsd with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dsform_ea(word, regs); + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, 0, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + + case 3: /* stxssp with LSB of DS field = 0 */ + case 7: /* stxssp with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dsform_ea(word, regs); + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, 0, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + + case 5: /* stxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + op->ea = dqform_ea(word, regs); + if (word & 8) + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, 0, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + } + break; +#endif /* CONFIG_VSX */ + +#ifdef __powerpc64__ case 62: /* std[u] */ - val = regs->gpr[rd]; - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* std */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, 0, 8); + break; case 1: /* stdu */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, UPDATE, 8); + break; + case 2: /* stq */ + if (!(rd & 1)) + op->type = MKOP(STORE, 0, 16); + break; } break; + case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + goto unknown_opcode; + + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + op->update_reg = ra; + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 0: /* Type 00 Eight-Byte Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 41: /* plwa */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); + break; +#ifdef CONFIG_VSX + case 42: /* plxsd */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 43: /* plxssp */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 46: /* pstxsd */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 47: /* pstxssp */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 51: /* plxv1 */ + op->reg += 32; + fallthrough; + case 50: /* plxv0 */ + op->type = MKOP(LOAD_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 55: /* pstxv1 */ + op->reg = rd + 32; + fallthrough; + case 54: /* pstxv0 */ + op->type = MKOP(STORE_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; +#endif /* CONFIG_VSX */ + case 56: /* plq */ + op->type = MKOP(LOAD, PREFIXED, 16); + break; + case 57: /* pld */ + op->type = MKOP(LOAD, PREFIXED, 8); + break; +#ifdef CONFIG_VSX + case 58: /* plxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + case 60: /* pstq */ + op->type = MKOP(STORE, PREFIXED, 16); + break; + case 61: /* pstd */ + op->type = MKOP(STORE, PREFIXED, 8); + break; +#ifdef CONFIG_VSX + case 62: /* pstxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + } + break; + case 1: /* Type 01 Eight-Byte Register-to-Register */ + break; + case 2: /* Type 10 Modified Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 32: /* plwz */ + op->type = MKOP(LOAD, PREFIXED, 4); + break; + case 34: /* plbz */ + op->type = MKOP(LOAD, PREFIXED, 1); + break; + case 36: /* pstw */ + op->type = MKOP(STORE, PREFIXED, 4); + break; + case 38: /* pstb */ + op->type = MKOP(STORE, PREFIXED, 1); + break; + case 40: /* plhz */ + op->type = MKOP(LOAD, PREFIXED, 2); + break; + case 42: /* plha */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2); + break; + case 44: /* psth */ + op->type = MKOP(STORE, PREFIXED, 2); + break; + case 48: /* plfs */ + op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4); + break; + case 50: /* plfd */ + op->type = MKOP(LOAD_FP, PREFIXED, 8); + break; + case 52: /* pstfs */ + op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4); + break; + case 54: /* pstfd */ + op->type = MKOP(STORE_FP, PREFIXED, 8); + break; + } + break; + case 3: /* Type 11 Modified Register-to-Register */ + break; + } #endif /* __powerpc64__ */ } - err = -EINVAL; - ldst_done: - if (err) { - regs->gpr[ra] = old_ra; - return 0; /* invoke DSI if -EFAULT? */ + if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) { + switch (GETTYPE(op->type)) { + case LOAD: + if (ra == rd) + goto unknown_opcode; + fallthrough; + case STORE: + case LOAD_FP: + case STORE_FP: + if (ra == 0) + goto unknown_opcode; + } } - instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - return 1; + +#ifdef CONFIG_VSX + if ((GETTYPE(op->type) == LOAD_VSX || + GETTYPE(op->type) == STORE_VSX) && + !cpu_has_feature(CPU_FTR_VSX)) { + return -1; + } +#endif /* CONFIG_VSX */ + + return 0; + + unknown_opcode: + op->type = UNKNOWN; + return 0; logical_done: - if (instr & 1) - set_cr0(regs, ra); - goto instr_done; + if (word & 1) + set_cr0(regs, op); + logical_done_nocc: + op->reg = ra; + op->type |= SETREG; + return 1; arith_done: - if (instr & 1) - set_cr0(regs, rd); - goto instr_done; + if (word & 1) + set_cr0(regs, op); + compute_done: + op->reg = rd; + op->type |= SETREG; + return 1; + + priv: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGPRIV; + return 0; + + trap: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGTRAP; + return 0; +} +EXPORT_SYMBOL_GPL(analyse_instr); +NOKPROBE_SYMBOL(analyse_instr); + +/* + * For PPC32 we always use stwu with r1 to change the stack pointer. + * So this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ +static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs) +{ + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + return 0; +} + +static nokprobe_inline void do_signext(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = (signed short) *valp; + break; + case 4: + *valp = (signed int) *valp; + break; + } +} + +static nokprobe_inline void do_byterev(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = byterev_2(*valp); + break; + case 4: + *valp = byterev_4(*valp); + break; +#ifdef __powerpc64__ + case 8: + *valp = byterev_8(*valp); + break; +#endif + } +} + +/* + * Emulate an instruction that can be executed just by updating + * fields in *regs. + */ +void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) +{ + unsigned long next_pc; + + next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type)); + switch (GETTYPE(op->type)) { + case COMPUTE: + if (op->type & SETREG) + regs->gpr[op->reg] = op->val; + if (op->type & SETCC) + regs->ccr = op->ccval; + if (op->type & SETXER) + regs->xer = op->xerval; + break; + + case BRANCH: + if (op->type & SETLK) + regs->link = next_pc; + if (op->type & BRTAKEN) + next_pc = op->val; + if (op->type & DECCTR) + --regs->ctr; + break; + + case BARRIER: + switch (op->type & BARRIER_MASK) { + case BARRIER_SYNC: + mb(); + break; + case BARRIER_ISYNC: + isync(); + break; + case BARRIER_EIEIO: + eieio(); + break; +#ifdef CONFIG_PPC64 + case BARRIER_LWSYNC: + asm volatile("lwsync" : : : "memory"); + break; + case BARRIER_PTESYNC: + asm volatile("ptesync" : : : "memory"); + break; +#endif + } + break; + + case MFSPR: + switch (op->spr) { + case SPRN_XER: + regs->gpr[op->reg] = regs->xer & 0xffffffffUL; + break; + case SPRN_LR: + regs->gpr[op->reg] = regs->link; + break; + case SPRN_CTR: + regs->gpr[op->reg] = regs->ctr; + break; + default: + WARN_ON_ONCE(1); + } + break; + + case MTSPR: + switch (op->spr) { + case SPRN_XER: + regs->xer = op->val & 0xffffffffUL; + break; + case SPRN_LR: + regs->link = op->val; + break; + case SPRN_CTR: + regs->ctr = op->val; + break; + default: + WARN_ON_ONCE(1); + } + break; + + default: + WARN_ON_ONCE(1); + } + regs_set_return_ip(regs, next_pc); +} +NOKPROBE_SYMBOL(emulate_update_regs); + +/* + * Emulate a previously-analysed load or store instruction. + * Return values are: + * 0 = instruction emulated successfully + * -EFAULT = address out of range or access faulted (regs->dar + * contains the faulting address) + * -EACCES = misaligned access, instruction requires alignment + * -EINVAL = unknown operation in *op + */ +int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) +{ + int err, size, type; + int i, rd, nb; + unsigned int cr; + unsigned long val; + unsigned long ea; + bool cross_endian; + + err = 0; + size = GETSIZE(op->type); + type = GETTYPE(op->type); + cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE); + ea = truncate_if_32bit(regs->msr, op->ea); + + switch (type) { + case LARX: + if (ea & (size - 1)) + return -EACCES; /* can't handle misaligned */ + if (!address_ok(regs, ea, size)) + return -EFAULT; + err = 0; + val = 0; + switch (size) { +#ifdef CONFIG_PPC_HAS_LBARX_LHARX + case 1: + __get_user_asmx(val, ea, err, "lbarx"); + break; + case 2: + __get_user_asmx(val, ea, err, "lharx"); + break; +#endif + case 4: + __get_user_asmx(val, ea, err, "lwarx"); + break; +#ifdef __powerpc64__ + case 8: + __get_user_asmx(val, ea, err, "ldarx"); + break; + case 16: + err = do_lqarx(ea, ®s->gpr[op->reg]); + break; +#endif + default: + return -EINVAL; + } + if (err) { + regs->dar = ea; + break; + } + if (size < 16) + regs->gpr[op->reg] = val; + break; + + case STCX: + if (ea & (size - 1)) + return -EACCES; /* can't handle misaligned */ + if (!address_ok(regs, ea, size)) + return -EFAULT; + err = 0; + switch (size) { +#ifdef __powerpc64__ + case 1: + __put_user_asmx(op->val, ea, err, "stbcx.", cr); + break; + case 2: + __put_user_asmx(op->val, ea, err, "sthcx.", cr); + break; +#endif + case 4: + __put_user_asmx(op->val, ea, err, "stwcx.", cr); + break; +#ifdef __powerpc64__ + case 8: + __put_user_asmx(op->val, ea, err, "stdcx.", cr); + break; + case 16: + err = do_stqcx(ea, regs->gpr[op->reg], + regs->gpr[op->reg + 1], &cr); + break; +#endif + default: + return -EINVAL; + } + if (!err) + regs->ccr = (regs->ccr & 0x0fffffff) | + (cr & 0xe0000000) | + ((regs->xer >> 3) & 0x10000000); + else + regs->dar = ea; + break; + + case LOAD: +#ifdef __powerpc64__ + if (size == 16) { + err = emulate_lq(regs, ea, op->reg, cross_endian); + break; + } +#endif + err = read_mem(®s->gpr[op->reg], ea, size, regs); + if (!err) { + if (op->type & SIGNEXT) + do_signext(®s->gpr[op->reg], size); + if ((op->type & BYTEREV) == (cross_endian ? 0 : BYTEREV)) + do_byterev(®s->gpr[op->reg], size); + } + break; + +#ifdef CONFIG_PPC_FPU + case LOAD_FP: + /* + * If the instruction is in userspace, we can emulate it even + * if the VMX state is not live, because we have the state + * stored in the thread_struct. If the instruction is in + * the kernel, we must not touch the state in the thread_struct. + */ + if (!user_mode(regs) && !(regs->msr & MSR_FP)) + return 0; + err = do_fp_load(op, ea, regs, cross_endian); + break; +#endif +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) + return 0; + err = do_vec_load(op->reg, ea, size, regs, cross_endian); + break; +#endif +#ifdef CONFIG_VSX + case LOAD_VSX: { + unsigned long msrbit = MSR_VSX; + + /* + * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX + * when the target of the instruction is a vector register. + */ + if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) + msrbit = MSR_VEC; + if (!user_mode(regs) && !(regs->msr & msrbit)) + return 0; + err = do_vsx_load(op, ea, regs, cross_endian); + break; + } +#endif + case LOAD_MULTI: + if (!address_ok(regs, ea, size)) + return -EFAULT; + rd = op->reg; + for (i = 0; i < size; i += 4) { + unsigned int v32 = 0; + + nb = size - i; + if (nb > 4) + nb = 4; + err = copy_mem_in((u8 *) &v32, ea, nb, regs); + if (err) + break; + if (unlikely(cross_endian)) + v32 = byterev_4(v32); + regs->gpr[rd] = v32; + ea += 4; + /* reg number wraps from 31 to 0 for lsw[ix] */ + rd = (rd + 1) & 0x1f; + } + break; + + case STORE: +#ifdef __powerpc64__ + if (size == 16) { + err = emulate_stq(regs, ea, op->reg, cross_endian); + break; + } +#endif + if ((op->type & UPDATE) && size == sizeof(long) && + op->reg == 1 && op->update_reg == 1 && !user_mode(regs) && + ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { + err = handle_stack_update(ea, regs); + break; + } + if (unlikely(cross_endian)) + do_byterev(&op->val, size); + err = write_mem(op->val, ea, size, regs); + break; + +#ifdef CONFIG_PPC_FPU + case STORE_FP: + if (!user_mode(regs) && !(regs->msr & MSR_FP)) + return 0; + err = do_fp_store(op, ea, regs, cross_endian); + break; +#endif +#ifdef CONFIG_ALTIVEC + case STORE_VMX: + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) + return 0; + err = do_vec_store(op->reg, ea, size, regs, cross_endian); + break; +#endif +#ifdef CONFIG_VSX + case STORE_VSX: { + unsigned long msrbit = MSR_VSX; + + /* + * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX + * when the target of the instruction is a vector register. + */ + if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) + msrbit = MSR_VEC; + if (!user_mode(regs) && !(regs->msr & msrbit)) + return 0; + err = do_vsx_store(op, ea, regs, cross_endian); + break; + } +#endif + case STORE_MULTI: + if (!address_ok(regs, ea, size)) + return -EFAULT; + rd = op->reg; + for (i = 0; i < size; i += 4) { + unsigned int v32 = regs->gpr[rd]; + + nb = size - i; + if (nb > 4) + nb = 4; + if (unlikely(cross_endian)) + v32 = byterev_4(v32); + err = copy_mem_out((u8 *) &v32, ea, nb, regs); + if (err) + break; + ea += 4; + /* reg number wraps from 31 to 0 for stsw[ix] */ + rd = (rd + 1) & 0x1f; + } + break; + + default: + return -EINVAL; + } + + if (err) + return err; + + if (op->type & UPDATE) + regs->gpr[op->update_reg] = op->ea; + + return 0; +} +NOKPROBE_SYMBOL(emulate_loadstore); + +/* + * Emulate instructions that cause a transfer of control, + * loads and stores, and a few other instructions. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int emulate_step(struct pt_regs *regs, ppc_inst_t instr) +{ + struct instruction_op op; + int r, err, type; + unsigned long val; + unsigned long ea; + + r = analyse_instr(&op, regs, instr); + if (r < 0) + return r; + if (r > 0) { + emulate_update_regs(regs, &op); + return 1; + } + + err = 0; + type = GETTYPE(op.type); + + if (OP_IS_LOAD_STORE(type)) { + err = emulate_loadstore(regs, &op); + if (err) + return 0; + goto instr_done; + } + + switch (type) { + case CACHEOP: + ea = truncate_if_32bit(regs->msr, op.ea); + if (!address_ok(regs, ea, 8)) + return 0; + switch (op.type & CACHEOP_MASK) { + case DCBST: + __cacheop_user_asmx(ea, err, "dcbst"); + break; + case DCBF: + __cacheop_user_asmx(ea, err, "dcbf"); + break; + case DCBTST: + if (op.reg == 0) + prefetchw((void *) ea); + break; + case DCBT: + if (op.reg == 0) + prefetch((void *) ea); + break; + case ICBI: + __cacheop_user_asmx(ea, err, "icbi"); + break; + case DCBZ: + err = emulate_dcbz(ea, regs); + break; + } + if (err) { + regs->dar = ea; + return 0; + } + goto instr_done; + + case MFMSR: + regs->gpr[op.reg] = regs->msr & MSR_MASK; + goto instr_done; + + case MTMSR: + val = regs->gpr[op.reg]; + if ((val & MSR_RI) == 0) + /* can't step mtmsr[d] that would clear MSR_RI */ + return -1; + /* here op.val is the mask of bits to change */ + regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val)); + goto instr_done; + + case SYSCALL: /* sc */ + /* + * Per ISA v3.1, section 7.5.15 'Trace Interrupt', we can't + * single step a system call instruction: + * + * Successful completion for an instruction means that the + * instruction caused no other interrupt. Thus a Trace + * interrupt never occurs for a System Call or System Call + * Vectored instruction, or for a Trap instruction that + * traps. + */ + return -1; + case SYSCALL_VECTORED_0: /* scv 0 */ + return -1; + case RFI: + return -1; + } + return 0; + + instr_done: + regs_set_return_ip(regs, + truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type))); + return 1; } +NOKPROBE_SYMBOL(emulate_step); diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 1b5a0a09d609..daa72061dc0c 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -1,30 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * String handling functions for PowerPC. * * Copyright (C) 1996 Paul Mackerras. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ -#include <asm/processor.h> -#include <asm/errno.h> +#include <linux/export.h> #include <asm/ppc_asm.h> +#include <asm/cache.h> - .section __ex_table,"a" - PPC_LONG_ALIGN .text -_GLOBAL(strcpy) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r5) - bne 1b - blr - /* This clears out any unused part of the destination buffer, just as the libc version does. -- paulus */ _GLOBAL(strncpy) @@ -33,6 +18,7 @@ _GLOBAL(strncpy) mtctr r5 addi r6,r3,-1 addi r4,r4,-1 + .balign IFETCH_ALIGN_BYTES 1: lbzu r0,1(r4) cmpwi 0,r0,0 stbu r0,1(r6) @@ -44,30 +30,7 @@ _GLOBAL(strncpy) 2: stbu r0,1(r6) /* clear it out if so */ bdnz 2b blr - -_GLOBAL(strcat) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r0,1(r5) - cmpwi 0,r0,0 - bne 1b - addi r5,r5,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r5) - bne 1b - blr - -_GLOBAL(strcmp) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r3,1(r5) - cmpwi 1,r3,0 - lbzu r0,1(r4) - subf. r3,r0,r3 - beqlr 1 - beq 1b - blr +EXPORT_SYMBOL(strncpy) _GLOBAL(strncmp) PPC_LCMPI 0,r5,0 @@ -75,6 +38,7 @@ _GLOBAL(strncmp) mtctr r5 addi r5,r3,-1 addi r4,r4,-1 + .balign IFETCH_ALIGN_BYTES 1: lbzu r3,1(r5) cmpwi 1,r3,0 lbzu r0,1(r4) @@ -84,81 +48,18 @@ _GLOBAL(strncmp) blr 2: li r3,0 blr - -_GLOBAL(strlen) - addi r4,r3,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - bne 1b - subf r3,r3,r4 - blr - -_GLOBAL(memcmp) - PPC_LCMPI 0,r5,0 - beq- 2f - mtctr r5 - addi r6,r3,-1 - addi r4,r4,-1 -1: lbzu r3,1(r6) - lbzu r0,1(r4) - subf. r3,r0,r3 - bdnzt 2,1b - blr -2: li r3,0 - blr +EXPORT_SYMBOL(strncmp) _GLOBAL(memchr) PPC_LCMPI 0,r5,0 beq- 2f mtctr r5 addi r3,r3,-1 + .balign IFETCH_ALIGN_BYTES 1: lbzu r0,1(r3) cmpw 0,r0,r4 bdnzf 2,1b beqlr 2: li r3,0 blr - -#ifdef CONFIG_PPC32 -_GLOBAL(__clear_user) - addi r6,r3,-4 - li r3,0 - li r5,0 - cmplwi 0,r4,4 - blt 7f - /* clear a single word */ -11: stwu r5,4(r6) - beqlr - /* clear word sized chunks */ - andi. r0,r6,3 - add r4,r0,r4 - subf r6,r0,r6 - srwi r0,r4,2 - andi. r4,r4,3 - mtctr r0 - bdz 7f -1: stwu r5,4(r6) - bdnz 1b - /* clear byte sized chunks */ -7: cmpwi 0,r4,0 - beqlr - mtctr r4 - addi r6,r6,3 -8: stbu r5,1(r6) - bdnz 8b - blr -90: mr r3,r4 - blr -91: mfctr r3 - slwi r3,r3,2 - add r3,r3,r4 - blr -92: mfctr r3 - blr - - .section __ex_table,"a" - PPC_LONG 11b,90b - PPC_LONG 1b,91b - PPC_LONG 8b,92b - .text -#endif +EXPORT_SYMBOL(memchr) diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S new file mode 100644 index 000000000000..3ee45619a3f8 --- /dev/null +++ b/arch/powerpc/lib/string_32.S @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * String handling functions for PowerPC32 + * + * Copyright (C) 1996 Paul Mackerras. + * + */ + +#include <linux/export.h> +#include <asm/ppc_asm.h> +#include <asm/cache.h> + + .text + +CACHELINE_BYTES = L1_CACHE_BYTES +LG_CACHELINE_BYTES = L1_CACHE_SHIFT +CACHELINE_MASK = (L1_CACHE_BYTES-1) + +_GLOBAL(__arch_clear_user) +/* + * Use dcbz on the complete cache lines in the destination + * to set them to zero. This requires that the destination + * area is cacheable. + */ + cmplwi cr0, r4, 4 + mr r10, r3 + li r3, 0 + blt 7f + +11: stw r3, 0(r10) + beqlr + andi. r0, r10, 3 + add r11, r0, r4 + subf r6, r0, r10 + + clrlwi r7, r6, 32 - LG_CACHELINE_BYTES + add r8, r7, r11 + srwi r9, r8, LG_CACHELINE_BYTES + addic. r9, r9, -1 /* total number of complete cachelines */ + ble 2f + xori r0, r7, CACHELINE_MASK & ~3 + srwi. r0, r0, 2 + beq 3f + mtctr r0 +4: stwu r3, 4(r6) + bdnz 4b +3: mtctr r9 + li r7, 4 +10: dcbz r7, r6 + addi r6, r6, CACHELINE_BYTES + bdnz 10b + clrlwi r11, r8, 32 - LG_CACHELINE_BYTES + addi r11, r11, 4 + +2: srwi r0 ,r11 ,2 + mtctr r0 + bdz 6f +1: stwu r3, 4(r6) + bdnz 1b +6: andi. r11, r11, 3 + beqlr + mtctr r11 + addi r6, r6, 3 +8: stbu r3, 1(r6) + bdnz 8b + blr + +7: cmpwi cr0, r4, 0 + beqlr + mtctr r4 + addi r6, r10, -1 +9: stbu r3, 1(r6) + bdnz 9b + blr + +90: mr r3, r4 + blr +91: add r3, r10, r4 + subf r3, r6, r3 + blr + + EX_TABLE(11b, 90b) + EX_TABLE(4b, 91b) + EX_TABLE(10b, 91b) + EX_TABLE(1b, 91b) + EX_TABLE(8b, 91b) + EX_TABLE(9b, 91b) + +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 3b1e48049faf..a25eb8588434 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -1,33 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2012 * * Author: Anton Blanchard <anton@au.ibm.com> */ +#include <linux/export.h> #include <asm/ppc_asm.h> +#include <asm/linkage.h> #include <asm/asm-offsets.h> - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -40,26 +25,17 @@ PPC64_CACHES: .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .Ldo_err1: @@ -77,7 +53,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -152,16 +128,16 @@ err1; stb r0,0(r3) blr .Llong_clear: - ld r5,PPC64_CACHES@toc(r2) + LOAD_REG_ADDR(r5, ppc64_caches) bf cr7*4+0,11f err2; std r0,0(r3) addi r3,r3,8 addi r4,r4,-8 - /* Destination is 16 byte aligned, need to get it cacheline aligned */ -11: lwz r7,DCACHEL1LOGLINESIZE(r5) - lwz r9,DCACHEL1LINESIZE(r5) + /* Destination is 16 byte aligned, need to get it cache block aligned */ +11: lwz r7,DCACHEL1LOGBLOCKSIZE(r5) + lwz r9,DCACHEL1BLOCKSIZE(r5) /* * With worst case alignment the long clear loop takes a minimum @@ -191,7 +167,7 @@ err1; std r0,8(r3) mtctr r6 mr r8,r3 14: -err1; dcbz r0,r3 +err1; dcbz 0,r3 add r3,r3,r9 bdnz 14b @@ -200,3 +176,4 @@ err1; dcbz r0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S new file mode 100644 index 000000000000..bbd24feb233f --- /dev/null +++ b/arch/powerpc/lib/strlen_32.S @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * strlen() for PPC32 + * + * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information. + * + * Inspired from glibc implementation + */ +#include <linux/export.h> +#include <asm/ppc_asm.h> +#include <asm/cache.h> + + .text + +/* + * Algorithm: + * + * 1) Given a word 'x', we can test to see if it contains any 0 bytes + * by subtracting 0x01010101, and seeing if any of the high bits of each + * byte changed from 0 to 1. This works because the least significant + * 0 byte must have had no incoming carry (otherwise it's not the least + * significant), so it is 0x00 - 0x01 == 0xff. For all other + * byte values, either they have the high bit set initially, or when + * 1 is subtracted you get a value in the range 0x00-0x7f, none of which + * have their high bit set. The expression here is + * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when + * there were no 0x00 bytes in the word. You get 0x80 in bytes that + * match, but possibly false 0x80 matches in the next more significant + * byte to a true match due to carries. For little-endian this is + * of no consequence since the least significant match is the one + * we're interested in, but big-endian needs method 2 to find which + * byte matches. + * 2) Given a word 'x', we can test to see _which_ byte was zero by + * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080). + * This produces 0x80 in each byte that was zero, and 0x00 in all + * the other bytes. The '| ~0x80808080' clears the low 7 bits in each + * byte, and the '| x' part ensures that bytes with the high bit set + * produce 0x00. The addition will carry into the high bit of each byte + * iff that byte had one of its low 7 bits set. We can then just see + * which was the most significant bit set and divide by 8 to find how + * many to add to the index. + * This is from the book 'The PowerPC Compiler Writer's Guide', + * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + */ + +_GLOBAL(strlen) + andi. r0, r3, 3 + lis r7, 0x0101 + addi r10, r3, -4 + addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */ + rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */ + bne- 3f + .balign IFETCH_ALIGN_BYTES +1: lwzu r9, 4(r10) +2: subf r8, r7, r9 + and. r8, r8, r6 + beq+ 1b + andc. r8, r8, r9 + beq+ 1b + andc r8, r9, r6 + orc r9, r9, r6 + subfe r8, r6, r8 + nor r8, r8, r9 + cntlzw r8, r8 + subf r3, r3, r10 + srwi r8, r8, 3 + add r3, r3, r8 + blr + + /* Missaligned string: make sure bytes before string are seen not 0 */ +3: xor r10, r10, r0 + orc r8, r8, r8 + lwzu r9, 4(r10) + slwi r0, r0, 3 + srw r8, r8, r0 + orc r9, r9, r8 + b 2b +EXPORT_SYMBOL(strlen) diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c new file mode 100644 index 000000000000..1440d99630b3 --- /dev/null +++ b/arch/powerpc/lib/test-code-patching.c @@ -0,0 +1,495 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008 Michael Ellerman, IBM Corporation. + */ + +#include <linux/vmalloc.h> +#include <linux/init.h> + +#include <asm/text-patching.h> + +static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + +static void __init test_trampoline(void) +{ + asm ("nop;nop;\n"); +} + +#define check(x) do { \ + if (!(x)) \ + pr_err("code-patching: test failed at line %d\n", __LINE__); \ +} while (0) + +static void __init test_branch_iform(void) +{ + int err; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned long addr = (unsigned long)tmp; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_iform(ppc_inst(0x48000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); + + /* Simplest case, branch to self with link */ + check(instr_is_branch_iform(ppc_inst(0x48000001))); + /* All bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); + /* Some bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); + /* Must be a valid branch to start with */ + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); + + /* Absolute branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute branch to 0x420fc */ + ppc_inst_write(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); + /* Maximum positive relative branch, + 20MB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); + /* Smallest negative relative branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative branch, - 32 MB */ + ppc_inst_write(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Branch to self, with link */ + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100, with link */ + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100, no link */ + err = create_branch(&instr, iptr, addr + 0x100, 0); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 MB */ + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Out of range relative negative offset, - 32 MB + 4*/ + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); + check(err); + + /* Out of range relative positive offset, + 32 MB */ + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); + check(err); + + /* Unaligned target */ + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); + check(err); + + /* Check flags are masked correctly */ + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); +} + +static void __init test_create_function_call(void) +{ + u32 *iptr; + unsigned long dest; + ppc_inst_t instr; + + /* Check we can create a function call */ + iptr = (u32 *)ppc_function_entry(test_trampoline); + dest = ppc_function_entry(test_create_function_call); + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, dest)); +} + +static void __init test_branch_bform(void) +{ + int err; + unsigned long addr; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned int flags; + + addr = (unsigned long)iptr; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_bform(ppc_inst(0x40000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); + + /* Absolute conditional branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute conditional branch to 0x20fc */ + ppc_inst_write(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); + /* Maximum positive relative conditional branch, + 32 KB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); + /* Smallest negative relative conditional branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative conditional branch, - 32 KB */ + ppc_inst_write(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* All condition code bits set & link */ + flags = 0x3ff000 | BRANCH_SET_LINK; + + /* Branch to self */ + err = create_cond_branch(&instr, iptr, addr, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100 */ + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100 */ + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 KB */ + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* Out of range relative negative offset, - 32 KB + 4*/ + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); + check(err); + + /* Out of range relative positive offset, + 32 KB */ + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); + check(err); + + /* Unaligned target */ + err = create_cond_branch(&instr, iptr, addr + 3, flags); + check(err); + + /* Check flags are masked correctly */ + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); +} + +static void __init test_translate_branch(void) +{ + unsigned long addr; + void *p, *q; + ppc_inst_t instr; + void *buf; + + buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); + check(buf); + if (!buf) + return; + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = p + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 MB */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 0x2000000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); + + /* Maximum positive case, move x to x - 32 MB + 4 */ + p = buf + 0x2000000; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); + + /* Jump to x + 16 MB moved to x + 20 MB */ + p = buf; + addr = 0x1000000 + (unsigned long)buf; + create_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x1400000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 16 MB moved to x - 16 MB + 4 */ + p = buf + 0x1000000; + addr = 0x2000000 + (unsigned long)buf; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + + /* Conditional branch tests */ + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 KB */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 0x8000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); + + /* Maximum positive case, move x to x - 32 KB + 4 */ + p = buf + 0x8000; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); + + /* Jump to x + 12 KB moved to x + 20 KB */ + p = buf; + addr = 0x3000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x5000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 8 KB moved to x - 8 KB + 4 */ + p = buf + 0x2000; + addr = 0x4000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Free the buffer we were using */ + vfree(buf); +} + +static void __init test_prefixed_patching(void) +{ + u32 *iptr = (u32 *)ppc_function_entry(test_trampoline); + u32 expected[2] = {OP_PREFIX << 26, 0}; + ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0); + + if (!IS_ENABLED(CONFIG_PPC64)) + return; + + patch_instruction(iptr, inst); + + check(!memcmp(iptr, expected, sizeof(expected))); +} + +static void __init test_multi_instruction_patching(void) +{ + u32 code[32]; + void *buf; + u32 *addr32; + u64 *addr64; + ppc_inst_t inst64 = ppc_inst_prefix(OP_PREFIX << 26 | 3UL << 24, PPC_RAW_TRAP()); + u32 inst32 = PPC_RAW_NOP(); + + buf = vzalloc(PAGE_SIZE * 8); + check(buf); + if (!buf) + return; + + /* Test single page 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test single page 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 2; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test single page memcpy */ + addr32 = buf + PAGE_SIZE * 3; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + /* Test multipage 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE * 4 - 8; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test multipage 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 5 - 8; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test multipage memcpy */ + addr32 = buf + PAGE_SIZE * 6 - 12; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + vfree(buf); +} + +static void __init test_data_patching(void) +{ + void *buf; + u32 *addr32; + + buf = vzalloc(PAGE_SIZE); + check(buf); + if (!buf) + return; + + addr32 = buf + 128; + + addr32[1] = 0xA0A1A2A3; + addr32[2] = 0xB0B1B2B3; + + check(!patch_uint(&addr32[1], 0xC0C1C2C3)); + + check(addr32[0] == 0); + check(addr32[1] == 0xC0C1C2C3); + check(addr32[2] == 0xB0B1B2B3); + check(addr32[3] == 0); + + /* Unaligned patch_ulong() should fail */ + if (IS_ENABLED(CONFIG_PPC64)) + check(patch_ulong(&addr32[1], 0xD0D1D2D3) == -EINVAL); + + check(!patch_ulong(&addr32[2], 0xD0D1D2D3)); + + check(addr32[0] == 0); + check(addr32[1] == 0xC0C1C2C3); + check(*(unsigned long *)(&addr32[2]) == 0xD0D1D2D3); + + if (!IS_ENABLED(CONFIG_PPC64)) + check(addr32[3] == 0); + + check(addr32[4] == 0); + + vfree(buf); +} + +static int __init test_code_patching(void) +{ + pr_info("Running code patching self-tests ...\n"); + + test_branch_iform(); + test_branch_bform(); + test_create_function_call(); + test_translate_branch(); + test_prefixed_patching(); + test_multi_instruction_patching(); + test_data_patching(); + + return 0; +} +late_initcall(test_code_patching); diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..66b5b4fa1686 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,1741 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Simple sanity tests for instruction emulation infrastructure. + * + * Copyright IBM Corp. 2016 + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include <linux/ptrace.h> +#include <asm/cpu_has_feature.h> +#include <asm/sstep.h> +#include <asm/ppc-opcode.h> +#include <asm/text-patching.h> +#include <asm/inst.h> + +#define MAX_SUBTESTS 16 + +#define IGNORE_GPR(n) (0x1UL << (n)) +#define IGNORE_XER (0x1UL << 32) +#define IGNORE_CCR (0x1UL << 33) +#define NEGATIVE_TEST (0x1UL << 63) + +#define TEST_PLD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLWZ(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_LWZ(r, base, i)) + +#define TEST_PSTD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PADDI(t, a, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_ADDI(t, a, i)) + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *mnemonic, char *result) +{ + pr_info("%-14s : %s\n", mnemonic, result); +} + +static void __init show_result_with_descr(char *mnemonic, char *descr, + char *result) +{ + pr_info("%-14s : %-50s %s\n", mnemonic, descr, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LD(5, 3, 0))); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_pld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* pld r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLD(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("pld", "PASS"); + else + show_result("pld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZ(5, 3, 0))); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_plwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* plwz r5, 0(r3), 0 */ + + stepped = emulate_step(®s, TEST_PLWZ(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("plwz", "PASS"); + else + show_result("plwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZX(5, 3, 4))); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STD(5, 3, 0))); + if (stepped == 1 && regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_pstd(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + regs.gpr[5] = 0x5678; + + /* pstd r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTD(5, 3, 0, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("pstd", "PASS"); + else + show_result("pstd", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0))); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STDCX(5, 3, 4))); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFSX(10, 3, 4))); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFSX(10, 3, 4))); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_plfs_pstfs(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfs ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfs frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFS(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfs", "PASS"); + else + show_result("plfs", "FAIL"); + + + /*** pstfs ***/ + + c.a = 678.91; + + /* pstfs frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFS(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfs", "PASS"); + else + show_result("pstfs", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFDX(10, 3, 4))); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFDX(10, 3, 4))); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} + +static void __init test_plfd_pstfd(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfd ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfd frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFD(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfd", "PASS"); + else + show_result("plfd", "FAIL"); + + + /*** pstfd ***/ + + c.a = 987654.32; + + /* pstfd frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFD(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfd", "PASS"); + else + show_result("pstfd", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_plfs_pstfs(void) +{ + show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_plfd_pstfd(void) +{ + show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LVX(10, 3, 4))); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STVX(10, 3, 4))); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvd2x", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvd2x", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvd2x", "FAIL"); + } + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4))); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvd2x", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvd2x", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvd2x", "FAIL"); + } +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvp_stxvp(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[4] = (unsigned long)&c[0].a; + + /* + * lxvp XTp,DQ(RA) + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVP(34, 4, 0))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvp", "FAIL"); + } + + /*** stxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvp XSp,DQ(RA) + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVP(34, 4, 0))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvp", "FAIL"); + } +} +#else +static void __init test_lxvp_stxvp(void) +{ + show_result("lxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvpx_stxvpx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvpx ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[3] = (unsigned long)&c[0].a; + regs.gpr[4] = 0; + + /* + * lxvpx XTp,RA,RB + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVPX(34, 3, 4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvpx", "FAIL"); + } + + /*** stxvpx ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvpx XSp,RA,RB + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVPX(34, 3, 4))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvpx", "FAIL"); + } +} +#else +static void __init test_lxvpx_stxvpx(void) +{ + show_result("lxvpx", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvpx", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_plxvp_pstxvp(void) +{ + ppc_inst_t instr; + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + /*** plxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&c[0].a; + + /* + * plxvp XTp,D(RA),R + * XTp = 32xTX + 2xTp + * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0)); + + stepped = emulate_step(®s, instr); + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("plxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("plxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("plxvp", "FAIL"); + } + + /*** pstxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * pstxvp XSp,D(RA),R + * XSp = 32xSX + 2xSp + * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0)); + + stepped = emulate_step(®s, instr); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("pstxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("pstxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("pstxvp", "FAIL"); + } +} +#else +static void __init test_plxvp_pstxvp(void) +{ + show_result("plxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("pstxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static void __init run_tests_load_store(void) +{ + test_ld(); + test_pld(); + test_lwz(); + test_plwz(); + test_lwzx(); + test_std(); + test_pstd(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_plfs_pstfs(); + test_lfdx_stfdx(); + test_plfd_pstfd(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + test_lxvp_stxvp(); + test_lxvpx_stxvpx(); + test_plxvp_pstxvp(); +} + +struct compute_test { + char *mnemonic; + unsigned long cpu_feature; + struct { + char *descr; + unsigned long flags; + ppc_inst_t instr; + struct pt_regs regs; + } subtests[MAX_SUBTESTS + 1]; +}; + +/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */ +#define SI_MIN BIT(33) +#define SI_MAX (BIT(33) - 1) +#define SI_UMAX (BIT(34) - 1) + +static struct compute_test compute_tests[] = { + { + .mnemonic = "nop", + .subtests = { + { + .descr = "R0 = LONG_MAX", + .instr = ppc_inst(PPC_RAW_NOP()), + .regs = { + .gpr[0] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "setb", + .cpu_feature = CPU_FTR_ARCH_300, + .subtests = { + { + .descr = "BFA = 1, CR = GT", + .instr = ppc_inst(PPC_RAW_SETB(20, 1)), + .regs = { + .ccr = 0x4000000, + } + }, + { + .descr = "BFA = 4, CR = LT", + .instr = ppc_inst(PPC_RAW_SETB(20, 4)), + .regs = { + .ccr = 0x8000, + } + }, + { + .descr = "BFA = 5, CR = EQ", + .instr = ppc_inst(PPC_RAW_SETB(20, 5)), + .regs = { + .ccr = 0x200, + } + } + } + }, + { + .mnemonic = "add", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = ULONG_MAX", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MIN", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = INT_MAX, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = INT_MAX, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = UINT_MAX", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = 0x1, + } + } + } + }, + { + .mnemonic = "add.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .flags = IGNORE_CCR, + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX, RB = LONG_MAX", + .flags = IGNORE_CCR, + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = ULONG_MAX", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MIN", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = INT_MAX, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = INT_MAX, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = UINT_MAX", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = 0x1, + } + } + } + }, + { + .mnemonic = "addc", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = ULONG_MAX", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MIN", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = INT_MAX, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = INT_MAX, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = UINT_MAX", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN | (uint)INT_MIN, + .gpr[22] = LONG_MIN | (uint)INT_MIN, + } + } + } + }, + { + .mnemonic = "addc.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .flags = IGNORE_CCR, + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX, RB = LONG_MAX", + .flags = IGNORE_CCR, + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = ULONG_MAX", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MIN", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = INT_MAX, RB = INT_MAX", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = INT_MAX, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = UINT_MAX", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = 0x1", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN | (uint)INT_MIN, + .gpr[22] = LONG_MIN | (uint)INT_MIN, + } + } + } + }, + { + .mnemonic = "divde", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divde.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divdeu", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "divdeu.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "paddi", + .cpu_feature = CPU_FTR_ARCH_31, + .subtests = { + { + .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_UMAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 0, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + } + }, + { + .descr = "RA = 0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + .gpr[22] = 0x0, + } + }, + { + .descr = "RA is r0, SI = 0, R = 1", + .instr = TEST_PADDI(21, 0, 0, 1), + .regs = { + .gpr[21] = 0, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 1", + .instr = TEST_PADDI(21, 0, SI_MIN, 1), + .regs = { + .gpr[21] = 0, + } + }, + /* Invalid instruction form with R = 1 and RA != 0 */ + { + .descr = "RA = R22(0), SI = 0, R = 1", + .instr = TEST_PADDI(21, 22, 0, 1), + .flags = NEGATIVE_TEST, + .regs = { + .gpr[21] = 0, + .gpr[22] = 0, + } + } + } + } +}; + +static int __init emulate_compute_instr(struct pt_regs *regs, + ppc_inst_t instr, + bool negative) +{ + int analysed; + struct instruction_op op; + + if (!regs || !ppc_inst_val(instr)) + return -EINVAL; + + /* This is not a return frame regs */ + regs->nip = patch_site_addr(&patch__exec_instr); + + analysed = analyse_instr(&op, regs, instr); + if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { + if (negative) + return -EFAULT; + pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); + return -EFAULT; + } + if (analysed == 1 && negative) + pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); + if (!negative) + emulate_update_regs(regs, &op); + return 0; +} + +static int __init execute_compute_instr(struct pt_regs *regs, + ppc_inst_t instr) +{ + extern int exec_instr(struct pt_regs *regs); + + if (!regs || !ppc_inst_val(instr)) + return -EINVAL; + + /* Patch the NOP with the actual instruction */ + patch_instruction_site(&patch__exec_instr, instr); + if (exec_instr(regs)) { + pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); + return -EFAULT; + } + + return 0; +} + +#define gpr_mismatch(gprn, exp, got) \ + pr_info("GPR%u mismatch, exp = 0x%016lx, got = 0x%016lx\n", \ + gprn, exp, got) + +#define reg_mismatch(name, exp, got) \ + pr_info("%s mismatch, exp = 0x%016lx, got = 0x%016lx\n", \ + name, exp, got) + +static void __init run_tests_compute(void) +{ + unsigned long flags; + struct compute_test *test; + struct pt_regs *regs, exp, got; + unsigned int i, j, k; + ppc_inst_t instr; + bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; + + for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { + test = &compute_tests[i]; + + if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) { + show_result(test->mnemonic, "SKIP (!CPU_FTR)"); + continue; + } + + for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) { + instr = test->subtests[j].instr; + flags = test->subtests[j].flags; + regs = &test->subtests[j].regs; + negative = flags & NEGATIVE_TEST; + ignore_xer = flags & IGNORE_XER; + ignore_ccr = flags & IGNORE_CCR; + passed = true; + + memcpy(&exp, regs, sizeof(struct pt_regs)); + memcpy(&got, regs, sizeof(struct pt_regs)); + + /* + * Set a compatible MSR value explicitly to ensure + * that XER and CR bits are updated appropriately + */ + exp.msr = MSR_KERNEL; + got.msr = MSR_KERNEL; + + rc = emulate_compute_instr(&got, instr, negative) != 0; + if (negative) { + /* skip executing instruction */ + passed = rc; + goto print; + } else if (rc || execute_compute_instr(&exp, instr)) { + passed = false; + goto print; + } + + /* Verify GPR values */ + for (k = 0; k < 32; k++) { + ignore_gpr = flags & IGNORE_GPR(k); + if (!ignore_gpr && exp.gpr[k] != got.gpr[k]) { + passed = false; + gpr_mismatch(k, exp.gpr[k], got.gpr[k]); + } + } + + /* Verify LR value */ + if (exp.link != got.link) { + passed = false; + reg_mismatch("LR", exp.link, got.link); + } + + /* Verify XER value */ + if (!ignore_xer && exp.xer != got.xer) { + passed = false; + reg_mismatch("XER", exp.xer, got.xer); + } + + /* Verify CR value */ + if (!ignore_ccr && exp.ccr != got.ccr) { + passed = false; + reg_mismatch("CR", exp.ccr, got.ccr); + } + +print: + show_result_with_descr(test->mnemonic, + test->subtests[j].descr, + passed ? "PASS" : "FAIL"); + } + } +} + +static int __init test_emulate_step(void) +{ + printk(KERN_INFO "Running instruction emulation self-tests ...\n"); + run_tests_load_store(); + run_tests_compute(); + + return 0; +} +late_initcall(test_emulate_step); diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S new file mode 100644 index 000000000000..e2b646a4f7fa --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Non-emulated single-stepping support (currently limited to basic integer + * computations) used to validate the instruction emulation infrastructure. + * + * Copyright (C) 2019 IBM Corporation + */ + +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <asm/code-patching-asm.h> +#include <linux/errno.h> + +/* int exec_instr(struct pt_regs *regs) */ +_GLOBAL(exec_instr) + + /* + * Stack frame layout (INT_FRAME_SIZE bytes) + * In-memory pt_regs (SP + STACK_INT_FRAME_REGS) + * Scratch space (SP + 8) + * Back chain (SP + 0) + */ + + /* + * Allocate a new stack frame with enough space to hold the register + * states in an in-memory pt_regs and also create the back chain to + * the caller's stack frame. + */ + stdu r1, -INT_FRAME_SIZE(r1) + + /* + * Save non-volatile GPRs on stack. This includes TOC pointer (GPR2) + * and local variables (GPR14 to GPR31). The register for the pt_regs + * parameter (GPR3) is saved additionally to ensure that the resulting + * register state can still be saved even if GPR3 gets overwritten + * when loading the initial register state for the test instruction. + * The stack pointer (GPR1) and the thread pointer (GPR13) are not + * saved as these should not be modified anyway. + */ + SAVE_GPRS(2, 3, r1) + SAVE_NVGPRS(r1) + + /* + * Save LR on stack to ensure that the return address is available + * even if it gets overwritten by the test instruction. + */ + mflr r0 + std r0, _LINK(r1) + + /* + * Save CR on stack. For simplicity, the entire register is saved + * even though only fields 2 to 4 are non-volatile. + */ + mfcr r0 + std r0, _CCR(r1) + + /* + * Load register state for the test instruction without touching the + * critical non-volatile registers. The register state is passed as a + * pointer to a pt_regs instance. + */ + subi r31, r3, GPR0 + + /* Load LR from pt_regs */ + ld r0, _LINK(r31) + mtlr r0 + + /* Load CR from pt_regs */ + ld r0, _CCR(r31) + mtcr r0 + + /* Load XER from pt_regs */ + ld r0, _XER(r31) + mtxer r0 + + /* Load GPRs from pt_regs */ + REST_GPR(0, r31) + REST_GPRS(2, 12, r31) + REST_NVGPRS(r31) + + /* Placeholder for the test instruction */ + .balign 64 +1: nop + nop + patch_site 1b patch__exec_instr + + /* + * Since GPR3 is overwritten, temporarily restore it back to its + * original state, i.e. the pointer to pt_regs, to ensure that the + * resulting register state can be saved. Before doing this, a copy + * of it is created in the scratch space which is used later on to + * save it to pt_regs. + */ + std r3, 8(r1) + REST_GPR(3, r1) + + /* Save resulting GPR state to pt_regs */ + subi r3, r3, GPR0 + SAVE_GPR(0, r3) + SAVE_GPR(2, r3) + SAVE_GPRS(4, 12, r3) + SAVE_NVGPRS(r3) + + /* Save resulting LR to pt_regs */ + mflr r0 + std r0, _LINK(r3) + + /* Save resulting CR to pt_regs */ + mfcr r0 + std r0, _CCR(r3) + + /* Save resulting XER to pt_regs */ + mfxer r0 + std r0, _XER(r3) + + /* Restore resulting GPR3 from scratch space and save it to pt_regs */ + ld r0, 8(r1) + std r0, GPR3(r3) + + /* Set return value to denote execution success */ + li r3, 0 + + /* Continue */ + b 3f + + /* Set return value to denote execution failure */ +2: li r3, -EFAULT + + /* Restore the non-volatile GPRs from stack */ +3: REST_GPR(2, r1) + REST_NVGPRS(r1) + + /* Restore LR from stack to be able to return */ + ld r0, _LINK(r1) + mtlr r0 + + /* Restore CR from stack */ + ld r0, _CCR(r1) + mtcr r0 + + /* Tear down stack frame */ + addi r1, r1, INT_FRAME_SIZE + + /* Return */ + blr + + /* Setup exception table */ + EX_TABLE(1b, 2b) + +_ASM_NOKPROBE_SYMBOL(exec_instr) diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c deleted file mode 100644 index 5eea6f3c1e03..000000000000 --- a/arch/powerpc/lib/usercopy_64.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Functions which are too large to be inlined. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/module.h> -#include <asm/uaccess.h> - -unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_READ, from, n))) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; -} - -unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_WRITE, to, n))) - n = __copy_to_user(to, from, n); - return n; -} - -unsigned long copy_in_user(void __user *to, const void __user *from, - unsigned long n) -{ - might_sleep(); - if (likely(access_ok(VERIFY_READ, from, n) && - access_ok(VERIFY_WRITE, to, n))) - n =__copy_tofrom_user(to, from, n); - return n; -} - -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(copy_in_user); - diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 3cf529ceec5b..54340912398f 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -1,17 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2011 * @@ -27,11 +15,11 @@ int enter_vmx_usercopy(void) if (in_interrupt()) return 0; - /* This acts as preempt_disable() as well and will make - * enable_kernel_altivec(). We need to disable page faults - * as they can call schedule and thus make us lose the VMX - * context. So on page faults, we just fail which will cause - * a fallback to the normal non-vmx copy. + preempt_disable(); + /* + * We need to disable page faults as they can call schedule and + * thus make us lose the VMX context. So on page faults, we just + * fail which will cause a fallback to the normal non-vmx copy. */ pagefault_disable(); @@ -46,11 +34,23 @@ int enter_vmx_usercopy(void) */ int exit_vmx_usercopy(void) { + disable_kernel_altivec(); pagefault_enable(); + preempt_enable_no_resched(); + /* + * Must never explicitly call schedule (including preempt_enable()) + * while in a kuap-unlocked user copy, because the AMR register will + * not be saved and restored across context switch. However preempt + * kernels need to be preempted as soon as possible if need_resched is + * set and we are preemptible. The hack here is to schedule a + * decrementer to fire here and reschedule for us if necessary. + */ + if (need_irq_preemption() && need_resched()) + set_dec(1); return 0; } -int enter_vmx_copy(void) +int enter_vmx_ops(void) { if (in_interrupt()) return 0; @@ -67,8 +67,9 @@ int enter_vmx_copy(void) * passed a pointer to the destination which we return as required by a * memcpy implementation. */ -void *exit_vmx_copy(void *dest) +void *exit_vmx_ops(void *dest) { + disable_kernel_altivec(); preempt_enable(); return dest; } diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c new file mode 100644 index 000000000000..aab49d056d18 --- /dev/null +++ b/arch/powerpc/lib/xor_vmx.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ + +/* + * Sparse (as at v0.5.0) gets very, very confused by this file. + * Make it a bit simpler for it. + */ +#if !defined(__CHECKER__) +#include <altivec.h> +#else +#define vec_xor(a, b) a ^ b +#define vector __attribute__((vector_size(16))) +#endif + +#include "xor_vmx.h" + +typedef vector signed char unative_t; + +#define DEFINE(V) \ + unative_t *V = (unative_t *)V##_in; \ + unative_t V##_0, V##_1, V##_2, V##_3 + +#define LOAD(V) \ + do { \ + V##_0 = V[0]; \ + V##_1 = V[1]; \ + V##_2 = V[2]; \ + V##_3 = V[3]; \ + } while (0) + +#define STORE(V) \ + do { \ + V[0] = V##_0; \ + V[1] = V##_1; \ + V[2] = V##_2; \ + V[3] = V##_3; \ + } while (0) + +#define XOR(V1, V2) \ + do { \ + V1##_0 = vec_xor(V1##_0, V2##_0); \ + V1##_1 = vec_xor(V1##_1, V2##_1); \ + V1##_2 = vec_xor(V1##_2, V2##_2); \ + V1##_3 = vec_xor(V1##_3, V2##_3); \ + } while (0) + +void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) +{ + DEFINE(v1); + DEFINE(v2); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + XOR(v1, v2); + STORE(v1); + + v1 += 4; + v2 += 4; + } while (--lines > 0); +} + +void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + XOR(v1, v2); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + } while (--lines > 0); +} + +void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + } while (--lines > 0); +} + +void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + DEFINE(v5); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + LOAD(v5); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v5); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + v5 += 4; + } while (--lines > 0); +} diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h new file mode 100644 index 000000000000..573c41d90dac --- /dev/null +++ b/arch/powerpc/lib/xor_vmx.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Simple interface to link xor_vmx.c and xor_vmx_glue.c + * + * Separating these file ensures that no altivec instructions are run + * outside of the enable/disable altivec block. + */ + +void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c new file mode 100644 index 000000000000..35d917ece4d1 --- /dev/null +++ b/arch/powerpc/lib/xor_vmx_glue.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Altivec XOR operations + * + * Copyright 2017 IBM Corp. + */ + +#include <linux/preempt.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <asm/switch_to.h> +#include <asm/xor_altivec.h> +#include "xor_vmx.h" + +void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_2(bytes, p1, p2); + disable_kernel_altivec(); + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_2); + +void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_3(bytes, p1, p2, p3); + disable_kernel_altivec(); + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_3); + +void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_4(bytes, p1, p2, p3, p4); + disable_kernel_altivec(); + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_4); + +void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); + disable_kernel_altivec(); + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_5); |
