summaryrefslogtreecommitdiff
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile32
-rw-r--r--arch/powerpc/lib/alloc.c23
-rw-r--r--arch/powerpc/lib/checksum_32.S80
-rw-r--r--arch/powerpc/lib/checksum_64.S39
-rw-r--r--arch/powerpc/lib/checksum_wrappers.c83
-rw-r--r--arch/powerpc/lib/code-patching.c930
-rw-r--r--arch/powerpc/lib/copy_32.S5
-rw-r--r--arch/powerpc/lib/copy_mc_64.S (renamed from arch/powerpc/lib/memcpy_mcsafe_64.S)6
-rw-r--r--arch/powerpc/lib/copypage_64.S19
-rw-r--r--arch/powerpc/lib/copypage_power7.S16
-rw-r--r--arch/powerpc/lib/copyuser_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_power7.S20
-rw-r--r--arch/powerpc/lib/error-inject.c2
-rw-r--r--arch/powerpc/lib/feature-fixups-test.S69
-rw-r--r--arch/powerpc/lib/feature-fixups.c598
-rw-r--r--arch/powerpc/lib/hweight_64.S10
-rw-r--r--arch/powerpc/lib/locks.c12
-rw-r--r--arch/powerpc/lib/mem_64.S2
-rw-r--r--arch/powerpc/lib/memcmp_32.S2
-rw-r--r--arch/powerpc/lib/memcmp_64.S6
-rw-r--r--arch/powerpc/lib/memcpy_64.S2
-rw-r--r--arch/powerpc/lib/memcpy_power7.S16
-rw-r--r--arch/powerpc/lib/pmem.c56
-rw-r--r--arch/powerpc/lib/qspinlock.c989
-rw-r--r--arch/powerpc/lib/restart_table.c56
-rw-r--r--arch/powerpc/lib/sstep.c1099
-rw-r--r--arch/powerpc/lib/string.S2
-rw-r--r--arch/powerpc/lib/string_32.S6
-rw-r--r--arch/powerpc/lib/string_64.S15
-rw-r--r--arch/powerpc/lib/strlen_32.S2
-rw-r--r--arch/powerpc/lib/test-code-patching.c362
-rw-r--r--arch/powerpc/lib/test_emulate_step.c1007
-rw-r--r--arch/powerpc/lib/test_emulate_step_exec_instr.S12
-rw-r--r--arch/powerpc/lib/vmx-helper.c13
-rw-r--r--arch/powerpc/lib/xor_vmx.c28
-rw-r--r--arch/powerpc/lib/xor_vmx.h27
-rw-r--r--arch/powerpc/lib/xor_vmx_glue.c32
37 files changed, 4287 insertions, 1393 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index b8de3be10eb4..0ab65eeb93ee 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -5,18 +5,31 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+CFLAGS_code-patching.o += -fno-stack-protector
+CFLAGS_feature-fixups.o += -fno-stack-protector
+
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
KASAN_SANITIZE_code-patching.o := n
KASAN_SANITIZE_feature-fixups.o := n
+# restart_table.o contains functions called in the NMI interrupt path
+# which can be in real mode. Disable KASAN.
+KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
ifdef CONFIG_KASAN
CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o
+CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
+obj-y += code-patching.o feature-fixups.o pmem.o
+
+obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o
ifndef CONFIG_KASAN
obj-y += string.o memcmp_$(BITS).o
@@ -31,17 +44,22 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
# 64-bit linker creates .sfpr on demand for final link (vmlinux),
# so it is only needed for modules, and only for older linkers which
# do not support --save-restore-funcs
-ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
-extra-$(CONFIG_PPC64) += crtsavres.o
+ifndef CONFIG_LD_IS_BFD
+always-$(CONFIG_PPC64) += crtsavres.o
endif
obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
- memcpy_power7.o
+ memcpy_power7.o restart_table.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- memcpy_64.o memcpy_mcsafe_64.o
+ memcpy_64.o copy_mc_64.o
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP) += qspinlock.o
+else
obj64-$(CONFIG_SMP) += locks.o
+endif
+
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
test_emulate_step_exec_instr.o
@@ -58,6 +76,8 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
-CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
+CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
+# Enable <altivec.h>
+CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
deleted file mode 100644
index ce180870bd52..000000000000
--- a/arch/powerpc/lib/alloc.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/memblock.h>
-#include <linux/string.h>
-#include <asm/setup.h>
-
-
-void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
- void *p;
-
- if (slab_is_available())
- p = kzalloc(size, mask);
- else {
- p = memblock_alloc(size, SMP_CACHE_BYTES);
- if (!p)
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- size);
- }
- return p;
-}
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index ecd150dc3ed9..cd00b9bdd772 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -8,12 +8,12 @@
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
.text
@@ -78,12 +78,10 @@ EXPORT_SYMBOL(__csum_partial)
/*
* Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
+ * and adds in 0xffffffff, while copying the block to dst.
+ * If an access exception occurs it returns zero.
*
- * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ * csum_partial_copy_generic(src, dst, len)
*/
#define CSUM_COPY_16_BYTES_WITHEX(n) \
8 ## n ## 0: \
@@ -108,30 +106,24 @@ EXPORT_SYMBOL(__csum_partial)
adde r12,r12,r10
#define CSUM_COPY_16_BYTES_EXCODE(n) \
- EX_TABLE(8 ## n ## 0b, src_error); \
- EX_TABLE(8 ## n ## 1b, src_error); \
- EX_TABLE(8 ## n ## 2b, src_error); \
- EX_TABLE(8 ## n ## 3b, src_error); \
- EX_TABLE(8 ## n ## 4b, dst_error); \
- EX_TABLE(8 ## n ## 5b, dst_error); \
- EX_TABLE(8 ## n ## 6b, dst_error); \
- EX_TABLE(8 ## n ## 7b, dst_error);
+ EX_TABLE(8 ## n ## 0b, fault); \
+ EX_TABLE(8 ## n ## 1b, fault); \
+ EX_TABLE(8 ## n ## 2b, fault); \
+ EX_TABLE(8 ## n ## 3b, fault); \
+ EX_TABLE(8 ## n ## 4b, fault); \
+ EX_TABLE(8 ## n ## 5b, fault); \
+ EX_TABLE(8 ## n ## 6b, fault); \
+ EX_TABLE(8 ## n ## 7b, fault);
.text
- .stabs "arch/powerpc/lib/",N_SO,0,0,0f
- .stabs "checksum_32.S",N_SO,0,0,0f
-0:
CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
_GLOBAL(csum_partial_copy_generic)
- stwu r1,-16(r1)
- stw r7,12(r1)
- stw r8,8(r1)
-
- addic r12,r6,0
+ li r12,-1
+ addic r0,r0,0 /* clear carry */
addi r6,r4,-4
neg r0,r4
addi r4,r3,-4
@@ -241,39 +233,23 @@ _GLOBAL(csum_partial_copy_generic)
slwi r0,r0,8
adde r12,r12,r0
66: addze r3,r12
- addi r1,r1,16
beqlr+ cr7
rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
blr
-/* read fault */
-src_error:
- lwz r7,12(r1)
- addi r1,r1,16
- cmpwi cr0,r7,0
- beqlr
- li r0,-EFAULT
- stw r0,0(r7)
- blr
-/* write fault */
-dst_error:
- lwz r8,8(r1)
- addi r1,r1,16
- cmpwi cr0,r8,0
- beqlr
- li r0,-EFAULT
- stw r0,0(r8)
+fault:
+ li r3,0
blr
- EX_TABLE(70b, src_error);
- EX_TABLE(71b, dst_error);
- EX_TABLE(72b, src_error);
- EX_TABLE(73b, dst_error);
- EX_TABLE(54b, dst_error);
+ EX_TABLE(70b, fault);
+ EX_TABLE(71b, fault);
+ EX_TABLE(72b, fault);
+ EX_TABLE(73b, fault);
+ EX_TABLE(54b, fault);
/*
* this stuff handles faults in the cacheline loop and branches to either
- * src_error (if in read part) or dst_error (if in write part)
+ * fault (if in read part) or fault (if in write part)
*/
CSUM_COPY_16_BYTES_EXCODE(0)
#if L1_CACHE_BYTES >= 32
@@ -290,12 +266,12 @@ dst_error:
#endif
#endif
- EX_TABLE(30b, src_error);
- EX_TABLE(31b, dst_error);
- EX_TABLE(40b, src_error);
- EX_TABLE(41b, dst_error);
- EX_TABLE(50b, src_error);
- EX_TABLE(51b, dst_error);
+ EX_TABLE(30b, fault);
+ EX_TABLE(31b, fault);
+ EX_TABLE(40b, fault);
+ EX_TABLE(41b, fault);
+ EX_TABLE(50b, fault);
+ EX_TABLE(51b, fault);
EXPORT_SYMBOL(csum_partial_copy_generic)
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 514978f908d4..d53d8f09a2c2 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -8,11 +8,11 @@
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
/*
* Computes the checksum of a memory block at buff, length len,
@@ -182,34 +182,33 @@ EXPORT_SYMBOL(__csum_partial)
.macro srcnr
100:
- EX_TABLE(100b,.Lsrc_error_nr)
+ EX_TABLE(100b,.Lerror_nr)
.endm
.macro source
150:
- EX_TABLE(150b,.Lsrc_error)
+ EX_TABLE(150b,.Lerror)
.endm
.macro dstnr
200:
- EX_TABLE(200b,.Ldest_error_nr)
+ EX_TABLE(200b,.Lerror_nr)
.endm
.macro dest
250:
- EX_TABLE(250b,.Ldest_error)
+ EX_TABLE(250b,.Lerror)
.endm
/*
* Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively. The caller must take any action
- * required in this case (zeroing memory, recalculating partial checksum etc).
+ * and adds in 0xffffffff (32-bit), while copying the block to dst.
+ * If an access exception occurs, it returns 0.
*
- * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
*/
_GLOBAL(csum_partial_copy_generic)
+ li r6,-1
addic r0,r6,0 /* clear carry */
srdi. r6,r5,3 /* less than 8 bytes? */
@@ -401,29 +400,15 @@ dstnr; stb r6,0(r4)
srdi r3,r3,32
blr
-.Lsrc_error:
+.Lerror:
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
addi r1,r1,STACKFRAMESIZE
-.Lsrc_error_nr:
- cmpdi 0,r7,0
- beqlr
- li r6,-EFAULT
- stw r6,0(r7)
+.Lerror_nr:
+ li r3,0
blr
-.Ldest_error:
- ld r14,STK_REG(R14)(r1)
- ld r15,STK_REG(R15)(r1)
- ld r16,STK_REG(R16)(r1)
- addi r1,r1,STACKFRAMESIZE
-.Ldest_error_nr:
- cmpdi 0,r8,0
- beqlr
- li r6,-EFAULT
- stw r6,0(r8)
- blr
EXPORT_SYMBOL(csum_partial_copy_generic)
/*
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index fabe4db28726..1a14c8780278 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -12,83 +12,28 @@
#include <linux/uaccess.h>
__wsum csum_and_copy_from_user(const void __user *src, void *dst,
- int len, __wsum sum, int *err_ptr)
+ int len)
{
- unsigned int csum;
+ __wsum csum;
- might_sleep();
- allow_read_from_user(src, len);
+ if (unlikely(!user_read_access_begin(src, len)))
+ return 0;
- *err_ptr = 0;
+ csum = csum_partial_copy_generic((void __force *)src, dst, len);
- if (!len) {
- csum = 0;
- goto out;
- }
-
- if (unlikely((len < 0) || !access_ok(src, len))) {
- *err_ptr = -EFAULT;
- csum = (__force unsigned int)sum;
- goto out;
- }
-
- csum = csum_partial_copy_generic((void __force *)src, dst,
- len, sum, err_ptr, NULL);
-
- if (unlikely(*err_ptr)) {
- int missing = __copy_from_user(dst, src, len);
-
- if (missing) {
- memset(dst + len - missing, 0, missing);
- *err_ptr = -EFAULT;
- } else {
- *err_ptr = 0;
- }
-
- csum = csum_partial(dst, len, sum);
- }
-
-out:
- prevent_read_from_user(src, len);
- return (__force __wsum)csum;
+ user_read_access_end();
+ return csum;
}
-EXPORT_SYMBOL(csum_and_copy_from_user);
-__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
- __wsum sum, int *err_ptr)
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
{
- unsigned int csum;
-
- might_sleep();
- allow_write_to_user(dst, len);
-
- *err_ptr = 0;
-
- if (!len) {
- csum = 0;
- goto out;
- }
-
- if (unlikely((len < 0) || !access_ok(dst, len))) {
- *err_ptr = -EFAULT;
- csum = -1; /* invalid checksum */
- goto out;
- }
-
- csum = csum_partial_copy_generic(src, (void __force *)dst,
- len, sum, NULL, err_ptr);
+ __wsum csum;
- if (unlikely(*err_ptr)) {
- csum = csum_partial(src, len, sum);
+ if (unlikely(!user_write_access_begin(dst, len)))
+ return 0;
- if (copy_to_user(dst, src, len)) {
- *err_ptr = -EFAULT;
- csum = -1; /* invalid checksum */
- }
- }
+ csum = csum_partial_copy_generic(src, (void __force *)dst, len);
-out:
- prevent_write_to_user(dst, len);
- return (__force __wsum)csum;
+ user_write_access_end();
+ return csum;
}
-EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 3345f039a876..c6ab46156cda 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -3,47 +3,108 @@
* Copyright 2008 Michael Ellerman, IBM Corporation.
*/
-#include <linux/kernel.h>
#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/random.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
-#include <linux/mm.h>
#include <linux/cpuhotplug.h>
-#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/jump_label.h>
-#include <asm/pgtable.h>
+#include <asm/debug.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/code-patching.h>
-#include <asm/setup.h>
+#include <asm/inst.h>
-static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
- unsigned int *patch_addr)
+static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
{
- int err = 0;
+ if (!ppc_inst_prefixed(instr)) {
+ u32 val = ppc_inst_val(instr);
- __put_user_asm(instr, patch_addr, err, "stw");
- if (err)
- return err;
+ __put_kernel_nofault(patch_addr, &val, u32, failed);
+ } else {
+ u64 val = ppc_inst_as_ulong(instr);
+
+ __put_kernel_nofault(patch_addr, &val, u64, failed);
+ }
asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
"r" (exec_addr));
return 0;
+
+failed:
+ mb(); /* sync */
+ return -EPERM;
}
-int raw_patch_instruction(unsigned int *addr, unsigned int instr)
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
{
return __patch_instruction(addr, instr, addr);
}
-#ifdef CONFIG_STRICT_KERNEL_RWX
-static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+struct patch_context {
+ union {
+ struct vm_struct *area;
+ struct mm_struct *mm;
+ };
+ unsigned long addr;
+ pte_t *pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
+
+static int map_patch_area(void *addr, unsigned long text_poke_addr);
+static void unmap_patch_area(unsigned long addr);
+
+static bool mm_patch_enabled(void)
+{
+ return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller to perform the CSI and hwsync before
+ * starting/stopping the temp mm.
+ */
+static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
+{
+ struct mm_struct *orig_mm = current->active_mm;
+
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(orig_mm, temp_mm, current);
+
+ WARN_ON(!mm_is_thread_local(temp_mm));
+
+ suspend_breakpoints();
+ return orig_mm;
+}
+
+static void stop_using_temp_mm(struct mm_struct *temp_mm,
+ struct mm_struct *orig_mm)
+{
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(temp_mm, orig_mm, current);
+ restore_breakpoints();
+}
static int text_area_cpu_up(unsigned int cpu)
{
struct vm_struct *area;
+ unsigned long addr;
+ int err;
area = get_vm_area(PAGE_SIZE, VM_ALLOC);
if (!area) {
@@ -51,188 +112,426 @@ static int text_area_cpu_up(unsigned int cpu)
cpu);
return -1;
}
- this_cpu_write(text_poke_area, area);
+
+ // Map/unmap the area to ensure all page tables are pre-allocated
+ addr = (unsigned long)area->addr;
+ err = map_patch_area(empty_zero_page, addr);
+ if (err)
+ return err;
+
+ unmap_patch_area(addr);
+
+ this_cpu_write(cpu_patching_context.area, area);
+ this_cpu_write(cpu_patching_context.addr, addr);
+ this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
return 0;
}
static int text_area_cpu_down(unsigned int cpu)
{
- free_vm_area(this_cpu_read(text_poke_area));
+ free_vm_area(this_cpu_read(cpu_patching_context.area));
+ this_cpu_write(cpu_patching_context.area, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+ this_cpu_write(cpu_patching_context.pte, NULL);
return 0;
}
-/*
- * Run as a late init call. This allows all the boot time patching to be done
- * simply by patching the code, and then we're called here prior to
- * mark_rodata_ro(), which happens after all init calls are run. Although
- * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge
- * it as being preferable to a kernel that will crash later when someone tries
- * to use patch_instruction().
- */
-static int __init setup_text_poke_area(void)
+static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
+{
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, mm);
+ free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
+ mmput(mm);
+}
+
+static int text_area_cpu_up_mm(unsigned int cpu)
+{
+ struct mm_struct *mm;
+ unsigned long addr;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ mm = mm_alloc();
+ if (WARN_ON(!mm))
+ goto fail_no_mm;
+
+ /*
+ * Choose a random page-aligned address from the interval
+ * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
+ * The lower address bound is PAGE_SIZE to avoid the zero-page.
+ */
+ addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
+
+ /*
+ * PTE allocation uses GFP_KERNEL which means we need to
+ * pre-allocate the PTE here because we cannot do the
+ * allocation during patching when IRQs are disabled.
+ *
+ * Using get_locked_pte() to avoid open coding, the lock
+ * is unnecessary.
+ */
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+ goto fail_no_pte;
+ pte_unmap_unlock(pte, ptl);
+
+ this_cpu_write(cpu_patching_context.mm, mm);
+ this_cpu_write(cpu_patching_context.addr, addr);
+
+ return 0;
+
+fail_no_pte:
+ put_patching_mm(mm, addr);
+fail_no_mm:
+ return -ENOMEM;
+}
+
+static int text_area_cpu_down_mm(unsigned int cpu)
{
- BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
- "powerpc/text_poke:online", text_area_cpu_up,
- text_area_cpu_down));
+ put_patching_mm(this_cpu_read(cpu_patching_context.mm),
+ this_cpu_read(cpu_patching_context.addr));
+
+ this_cpu_write(cpu_patching_context.mm, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
return 0;
}
-late_initcall(setup_text_poke_area);
-/*
- * This can be called for kernel text or a module.
- */
-static int map_patch_area(void *addr, unsigned long text_poke_addr)
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
+
+void __init poking_init(void)
{
- unsigned long pfn;
- int err;
+ int ret;
- if (is_vmalloc_addr(addr))
- pfn = vmalloc_to_pfn(addr);
+ if (mm_patch_enabled())
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke_mm:online",
+ text_area_cpu_up_mm,
+ text_area_cpu_down_mm);
else
- pfn = __pa_symbol(addr) >> PAGE_SHIFT;
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online",
+ text_area_cpu_up,
+ text_area_cpu_down);
- err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
+ /* cpuhp_setup_state returns >= 0 on success */
+ if (WARN_ON(ret < 0))
+ return;
- pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
- if (err)
- return -1;
+ static_branch_enable(&poking_init_done);
+}
- return 0;
+static unsigned long get_patch_pfn(void *addr)
+{
+ if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+ return vmalloc_to_pfn(addr);
+ else
+ return __pa_symbol(addr) >> PAGE_SHIFT;
}
-static inline int unmap_patch_area(unsigned long addr)
+/*
+ * This can be called for kernel text or a module.
+ */
+static int map_patch_area(void *addr, unsigned long text_poke_addr)
+{
+ unsigned long pfn = get_patch_pfn(addr);
+
+ return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
+}
+
+static void unmap_patch_area(unsigned long addr)
{
pte_t *ptep;
pmd_t *pmdp;
pud_t *pudp;
+ p4d_t *p4dp;
pgd_t *pgdp;
pgdp = pgd_offset_k(addr);
- if (unlikely(!pgdp))
- return -EINVAL;
+ if (WARN_ON(pgd_none(*pgdp)))
+ return;
+
+ p4dp = p4d_offset(pgdp, addr);
+ if (WARN_ON(p4d_none(*p4dp)))
+ return;
- pudp = pud_offset(pgdp, addr);
- if (unlikely(!pudp))
- return -EINVAL;
+ pudp = pud_offset(p4dp, addr);
+ if (WARN_ON(pud_none(*pudp)))
+ return;
pmdp = pmd_offset(pudp, addr);
- if (unlikely(!pmdp))
- return -EINVAL;
+ if (WARN_ON(pmd_none(*pmdp)))
+ return;
ptep = pte_offset_kernel(pmdp, addr);
- if (unlikely(!ptep))
- return -EINVAL;
-
- pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr);
+ if (WARN_ON(pte_none(*ptep)))
+ return;
/*
* In hash, pte_clear flushes the tlb, in radix, we have to
*/
pte_clear(&init_mm, addr, ptep);
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+}
- return 0;
+static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
+ struct mm_struct *patching_mm;
+ struct mm_struct *orig_mm;
+ spinlock_t *ptl;
+
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync": : :"memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
+ err = __patch_instruction(addr, instr, patch_addr);
+
+ /* context synchronisation performed by __patch_instruction (isync or exception) */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+ pte_unmap_unlock(pte, ptl);
+
+ return err;
}
-static int do_patch_instruction(unsigned int *addr, unsigned int instr)
+static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
int err;
- unsigned int *patch_addr = NULL;
- unsigned long flags;
+ u32 *patch_addr;
unsigned long text_poke_addr;
- unsigned long kaddr = (unsigned long)addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
+
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = __this_cpu_read(cpu_patching_context.pte);
+ __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync": : :"memory");
+
+ err = __patch_instruction(addr, instr, patch_addr);
+
+ pte_clear(&init_mm, text_poke_addr, pte);
+ flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+
+ return err;
+}
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ unsigned long flags;
/*
* During early early boot patch_instruction is called
* when text_poke_area is not ready, but we still need
* to allow patching. We just do the plain old patching
*/
- if (!this_cpu_read(text_poke_area))
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
+ !static_branch_likely(&poking_init_done))
return raw_patch_instruction(addr, instr);
local_irq_save(flags);
+ if (mm_patch_enabled())
+ err = __do_patch_instruction_mm(addr, instr);
+ else
+ err = __do_patch_instruction(addr, instr);
+ local_irq_restore(flags);
+
+ return err;
+}
+NOKPROBE_SYMBOL(patch_instruction);
+
+static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr)
+{
+ unsigned long start = (unsigned long)patch_addr;
- text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
- if (map_patch_area(addr, text_poke_addr)) {
- err = -1;
- goto out;
+ /* Repeat instruction */
+ if (repeat_instr) {
+ ppc_inst_t instr = ppc_inst_read(code);
+
+ if (ppc_inst_prefixed(instr)) {
+ u64 val = ppc_inst_as_ulong(instr);
+
+ memset64((u64 *)patch_addr, val, len / 8);
+ } else {
+ u32 val = ppc_inst_val(instr);
+
+ memset32(patch_addr, val, len / 4);
+ }
+ } else {
+ memcpy(patch_addr, code, len);
}
- patch_addr = (unsigned int *)(text_poke_addr) +
- ((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
+ smp_wmb(); /* smp write barrier */
+ flush_icache_range(start, start + len);
+ return 0;
+}
- __patch_instruction(addr, instr, patch_addr);
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
+{
+ struct mm_struct *patching_mm, *orig_mm;
+ unsigned long pfn = get_patch_pfn(addr);
+ unsigned long text_poke_addr;
+ spinlock_t *ptl;
+ u32 *patch_addr;
+ pte_t *pte;
+ int err;
- err = unmap_patch_area(text_poke_addr);
- if (err)
- pr_warn("failed to unmap %lx\n", text_poke_addr);
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-out:
- local_irq_restore(flags);
+ pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync" ::: "memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
+ err = __patch_instructions(patch_addr, code, len, repeat_instr);
+
+ /* context synchronisation performed by __patch_instructions */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+ pte_unmap_unlock(pte, ptl);
return err;
}
-#else /* !CONFIG_STRICT_KERNEL_RWX */
-static int do_patch_instruction(unsigned int *addr, unsigned int instr)
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
{
- return raw_patch_instruction(addr, instr);
-}
+ unsigned long pfn = get_patch_pfn(addr);
+ unsigned long text_poke_addr;
+ u32 *patch_addr;
+ pte_t *pte;
+ int err;
-#endif /* CONFIG_STRICT_KERNEL_RWX */
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-int patch_instruction(unsigned int *addr, unsigned int instr)
-{
- /* Make sure we aren't patching a freed init section */
- if (init_mem_is_free && init_section_contains(addr, 4)) {
- pr_debug("Skipping init section patching addr: 0x%px\n", addr);
- return 0;
- }
- return do_patch_instruction(addr, instr);
+ pte = __this_cpu_read(cpu_patching_context.pte);
+ __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync" ::: "memory");
+
+ err = __patch_instructions(patch_addr, code, len, repeat_instr);
+
+ pte_clear(&init_mm, text_poke_addr, pte);
+ flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+
+ return err;
}
-NOKPROBE_SYMBOL(patch_instruction);
-int patch_branch(unsigned int *addr, unsigned long target, int flags)
-{
- return patch_instruction(addr, create_branch(addr, target, flags));
+/*
+ * Patch 'addr' with 'len' bytes of instructions from 'code'.
+ *
+ * If repeat_instr is true, the same instruction is filled for
+ * 'len' bytes.
+ */
+int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
+{
+ while (len > 0) {
+ unsigned long flags;
+ size_t plen;
+ int err;
+
+ plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
+
+ local_irq_save(flags);
+ if (mm_patch_enabled())
+ err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
+ else
+ err = __do_patch_instructions(addr, code, plen, repeat_instr);
+ local_irq_restore(flags);
+ if (err)
+ return err;
+
+ len -= plen;
+ addr = (u32 *)((unsigned long)addr + plen);
+ if (!repeat_instr)
+ code = (u32 *)((unsigned long)code + plen);
+ }
+
+ return 0;
}
+NOKPROBE_SYMBOL(patch_instructions);
-bool is_offset_in_branch_range(long offset)
+int patch_branch(u32 *addr, unsigned long target, int flags)
{
- /*
- * Powerpc branch instruction is :
- *
- * 0 6 30 31
- * +---------+----------------+---+---+
- * | opcode | LI |AA |LK |
- * +---------+----------------+---+---+
- * Where AA = 0 and LK = 0
- *
- * LI is a signed 24 bits integer. The real branch offset is computed
- * by: imm32 = SignExtend(LI:'0b00', 32);
- *
- * So the maximum forward branch should be:
- * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc
- * The maximum backward branch should be:
- * (0xff800000 << 2) = 0xfe000000 = -0x2000000
- */
- return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3));
+ ppc_inst_t instr;
+
+ if (create_branch(&instr, addr, target, flags))
+ return -ERANGE;
+
+ return patch_instruction(addr, instr);
}
/*
* Helper to check if a given instruction is a conditional branch
* Derived from the conditional checks in analyse_instr()
*/
-bool is_conditional_branch(unsigned int instr)
+bool is_conditional_branch(ppc_inst_t instr)
{
- unsigned int opcode = instr >> 26;
+ unsigned int opcode = ppc_inst_primary_opcode(instr);
if (opcode == 16) /* bc, bca, bcl, bcla */
return true;
if (opcode == 19) {
- switch ((instr >> 1) & 0x3ff) {
+ switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
case 16: /* bclr, bclrl */
case 528: /* bcctr, bcctrl */
case 560: /* bctar, bctarl */
@@ -243,30 +542,9 @@ bool is_conditional_branch(unsigned int instr)
}
NOKPROBE_SYMBOL(is_conditional_branch);
-unsigned int create_branch(const unsigned int *addr,
- unsigned long target, int flags)
-{
- unsigned int instruction;
- long offset;
-
- offset = target;
- if (! (flags & BRANCH_ABSOLUTE))
- offset = offset - (unsigned long)addr;
-
- /* Check we can represent the target in the instruction format */
- if (!is_offset_in_branch_range(offset))
- return 0;
-
- /* Mask out the flags and target, so they don't step on each other. */
- instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC);
-
- return instruction;
-}
-
-unsigned int create_cond_branch(const unsigned int *addr,
- unsigned long target, int flags)
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
+ unsigned long target, int flags)
{
- unsigned int instruction;
long offset;
offset = target;
@@ -274,413 +552,81 @@ unsigned int create_cond_branch(const unsigned int *addr,
offset = offset - (unsigned long)addr;
/* Check we can represent the target in the instruction format */
- if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3)
- return 0;
+ if (!is_offset_in_cond_branch_range(offset))
+ return 1;
/* Mask out the flags and target, so they don't step on each other. */
- instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC);
-
- return instruction;
-}
+ *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
-static unsigned int branch_opcode(unsigned int instr)
-{
- return (instr >> 26) & 0x3F;
-}
-
-static int instr_is_branch_iform(unsigned int instr)
-{
- return branch_opcode(instr) == 18;
-}
-
-static int instr_is_branch_bform(unsigned int instr)
-{
- return branch_opcode(instr) == 16;
+ return 0;
}
-int instr_is_relative_branch(unsigned int instr)
+int instr_is_relative_branch(ppc_inst_t instr)
{
- if (instr & BRANCH_ABSOLUTE)
+ if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
return 0;
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
}
-int instr_is_relative_link_branch(unsigned int instr)
+int instr_is_relative_link_branch(ppc_inst_t instr)
{
- return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK);
+ return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
}
-static unsigned long branch_iform_target(const unsigned int *instr)
+static unsigned long branch_iform_target(const u32 *instr)
{
signed long imm;
- imm = *instr & 0x3FFFFFC;
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
/* If the top bit of the immediate value is set this is negative */
if (imm & 0x2000000)
imm -= 0x4000000;
- if ((*instr & BRANCH_ABSOLUTE) == 0)
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
imm += (unsigned long)instr;
return (unsigned long)imm;
}
-static unsigned long branch_bform_target(const unsigned int *instr)
+static unsigned long branch_bform_target(const u32 *instr)
{
signed long imm;
- imm = *instr & 0xFFFC;
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
/* If the top bit of the immediate value is set this is negative */
if (imm & 0x8000)
imm -= 0x10000;
- if ((*instr & BRANCH_ABSOLUTE) == 0)
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
imm += (unsigned long)instr;
return (unsigned long)imm;
}
-unsigned long branch_target(const unsigned int *instr)
+unsigned long branch_target(const u32 *instr)
{
- if (instr_is_branch_iform(*instr))
+ if (instr_is_branch_iform(ppc_inst_read(instr)))
return branch_iform_target(instr);
- else if (instr_is_branch_bform(*instr))
+ else if (instr_is_branch_bform(ppc_inst_read(instr)))
return branch_bform_target(instr);
return 0;
}
-int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr)
-{
- if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr))
- return branch_target(instr) == addr;
-
- return 0;
-}
-
-unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
{
unsigned long target;
-
target = branch_target(src);
- if (instr_is_branch_iform(*src))
- return create_branch(dest, target, *src);
- else if (instr_is_branch_bform(*src))
- return create_cond_branch(dest, target, *src);
+ if (instr_is_branch_iform(ppc_inst_read(src)))
+ return create_branch(instr, dest, target,
+ ppc_inst_val(ppc_inst_read(src)));
+ else if (instr_is_branch_bform(ppc_inst_read(src)))
+ return create_cond_branch(instr, dest, target,
+ ppc_inst_val(ppc_inst_read(src)));
- return 0;
+ return 1;
}
-
-#ifdef CONFIG_PPC_BOOK3E_64
-void __patch_exception(int exc, unsigned long addr)
-{
- extern unsigned int interrupt_base_book3e;
- unsigned int *ibase = &interrupt_base_book3e;
-
- /* Our exceptions vectors start with a NOP and -then- a branch
- * to deal with single stepping from userspace which stops on
- * the second instruction. Thus we need to patch the second
- * instruction of the exception, not the first one
- */
-
- patch_branch(ibase + (exc / 4) + 1, addr, 0);
-}
-#endif
-
-#ifdef CONFIG_CODE_PATCHING_SELFTEST
-
-static void __init test_trampoline(void)
-{
- asm ("nop;\n");
-}
-
-#define check(x) \
- if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__);
-
-static void __init test_branch_iform(void)
-{
- unsigned int instr;
- unsigned long addr;
-
- addr = (unsigned long)&instr;
-
- /* The simplest case, branch to self, no flags */
- check(instr_is_branch_iform(0x48000000));
- /* All bits of target set, and flags */
- check(instr_is_branch_iform(0x4bffffff));
- /* High bit of opcode set, which is wrong */
- check(!instr_is_branch_iform(0xcbffffff));
- /* Middle bits of opcode set, which is wrong */
- check(!instr_is_branch_iform(0x7bffffff));
-
- /* Simplest case, branch to self with link */
- check(instr_is_branch_iform(0x48000001));
- /* All bits of targets set */
- check(instr_is_branch_iform(0x4bfffffd));
- /* Some bits of targets set */
- check(instr_is_branch_iform(0x4bff00fd));
- /* Must be a valid branch to start with */
- check(!instr_is_branch_iform(0x7bfffffd));
-
- /* Absolute branch to 0x100 */
- instr = 0x48000103;
- check(instr_is_branch_to_addr(&instr, 0x100));
- /* Absolute branch to 0x420fc */
- instr = 0x480420ff;
- check(instr_is_branch_to_addr(&instr, 0x420fc));
- /* Maximum positive relative branch, + 20MB - 4B */
- instr = 0x49fffffc;
- check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC));
- /* Smallest negative relative branch, - 4B */
- instr = 0x4bfffffc;
- check(instr_is_branch_to_addr(&instr, addr - 4));
- /* Largest negative relative branch, - 32 MB */
- instr = 0x4a000000;
- check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
-
- /* Branch to self, with link */
- instr = create_branch(&instr, addr, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr));
-
- /* Branch to self - 0x100, with link */
- instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr - 0x100));
-
- /* Branch to self + 0x100, no link */
- instr = create_branch(&instr, addr + 0x100, 0);
- check(instr_is_branch_to_addr(&instr, addr + 0x100));
-
- /* Maximum relative negative offset, - 32 MB */
- instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
-
- /* Out of range relative negative offset, - 32 MB + 4*/
- instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK);
- check(instr == 0);
-
- /* Out of range relative positive offset, + 32 MB */
- instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK);
- check(instr == 0);
-
- /* Unaligned target */
- instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK);
- check(instr == 0);
-
- /* Check flags are masked correctly */
- instr = create_branch(&instr, addr, 0xFFFFFFFC);
- check(instr_is_branch_to_addr(&instr, addr));
- check(instr == 0x48000000);
-}
-
-static void __init test_create_function_call(void)
-{
- unsigned int *iptr;
- unsigned long dest;
-
- /* Check we can create a function call */
- iptr = (unsigned int *)ppc_function_entry(test_trampoline);
- dest = ppc_function_entry(test_create_function_call);
- patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK));
- check(instr_is_branch_to_addr(iptr, dest));
-}
-
-static void __init test_branch_bform(void)
-{
- unsigned long addr;
- unsigned int *iptr, instr, flags;
-
- iptr = &instr;
- addr = (unsigned long)iptr;
-
- /* The simplest case, branch to self, no flags */
- check(instr_is_branch_bform(0x40000000));
- /* All bits of target set, and flags */
- check(instr_is_branch_bform(0x43ffffff));
- /* High bit of opcode set, which is wrong */
- check(!instr_is_branch_bform(0xc3ffffff));
- /* Middle bits of opcode set, which is wrong */
- check(!instr_is_branch_bform(0x7bffffff));
-
- /* Absolute conditional branch to 0x100 */
- instr = 0x43ff0103;
- check(instr_is_branch_to_addr(&instr, 0x100));
- /* Absolute conditional branch to 0x20fc */
- instr = 0x43ff20ff;
- check(instr_is_branch_to_addr(&instr, 0x20fc));
- /* Maximum positive relative conditional branch, + 32 KB - 4B */
- instr = 0x43ff7ffc;
- check(instr_is_branch_to_addr(&instr, addr + 0x7FFC));
- /* Smallest negative relative conditional branch, - 4B */
- instr = 0x43fffffc;
- check(instr_is_branch_to_addr(&instr, addr - 4));
- /* Largest negative relative conditional branch, - 32 KB */
- instr = 0x43ff8000;
- check(instr_is_branch_to_addr(&instr, addr - 0x8000));
-
- /* All condition code bits set & link */
- flags = 0x3ff000 | BRANCH_SET_LINK;
-
- /* Branch to self */
- instr = create_cond_branch(iptr, addr, flags);
- check(instr_is_branch_to_addr(&instr, addr));
-
- /* Branch to self - 0x100 */
- instr = create_cond_branch(iptr, addr - 0x100, flags);
- check(instr_is_branch_to_addr(&instr, addr - 0x100));
-
- /* Branch to self + 0x100 */
- instr = create_cond_branch(iptr, addr + 0x100, flags);
- check(instr_is_branch_to_addr(&instr, addr + 0x100));
-
- /* Maximum relative negative offset, - 32 KB */
- instr = create_cond_branch(iptr, addr - 0x8000, flags);
- check(instr_is_branch_to_addr(&instr, addr - 0x8000));
-
- /* Out of range relative negative offset, - 32 KB + 4*/
- instr = create_cond_branch(iptr, addr - 0x8004, flags);
- check(instr == 0);
-
- /* Out of range relative positive offset, + 32 KB */
- instr = create_cond_branch(iptr, addr + 0x8000, flags);
- check(instr == 0);
-
- /* Unaligned target */
- instr = create_cond_branch(iptr, addr + 3, flags);
- check(instr == 0);
-
- /* Check flags are masked correctly */
- instr = create_cond_branch(iptr, addr, 0xFFFFFFFC);
- check(instr_is_branch_to_addr(&instr, addr));
- check(instr == 0x43FF0000);
-}
-
-static void __init test_translate_branch(void)
-{
- unsigned long addr;
- unsigned int *p, *q;
- void *buf;
-
- buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
- check(buf);
- if (!buf)
- return;
-
- /* Simple case, branch to self moved a little */
- p = buf;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- check(instr_is_branch_to_addr(p, addr));
- q = p + 1;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Maximum negative case, move b . to addr + 32 MB */
- p = buf;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- q = buf + 0x2000000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x4a000000);
-
- /* Maximum positive case, move x to x - 32 MB + 4 */
- p = buf + 0x2000000;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x49fffffc);
-
- /* Jump to x + 16 MB moved to x + 20 MB */
- p = buf;
- addr = 0x1000000 + (unsigned long)buf;
- patch_branch(p, addr, BRANCH_SET_LINK);
- q = buf + 0x1400000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Jump to x + 16 MB moved to x - 16 MB + 4 */
- p = buf + 0x1000000;
- addr = 0x2000000 + (unsigned long)buf;
- patch_branch(p, addr, 0);
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
-
- /* Conditional branch tests */
-
- /* Simple case, branch to self moved a little */
- p = buf;
- addr = (unsigned long)p;
- patch_instruction(p, create_cond_branch(p, addr, 0));
- check(instr_is_branch_to_addr(p, addr));
- q = p + 1;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Maximum negative case, move b . to addr + 32 KB */
- p = buf;
- addr = (unsigned long)p;
- patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
- q = buf + 0x8000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x43ff8000);
-
- /* Maximum positive case, move x to x - 32 KB + 4 */
- p = buf + 0x8000;
- addr = (unsigned long)p;
- patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x43ff7ffc);
-
- /* Jump to x + 12 KB moved to x + 20 KB */
- p = buf;
- addr = 0x3000 + (unsigned long)buf;
- patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK));
- q = buf + 0x5000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Jump to x + 8 KB moved to x - 8 KB + 4 */
- p = buf + 0x2000;
- addr = 0x4000 + (unsigned long)buf;
- patch_instruction(p, create_cond_branch(p, addr, 0));
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Free the buffer we were using */
- vfree(buf);
-}
-
-static int __init test_code_patching(void)
-{
- printk(KERN_DEBUG "Running code patching self-tests ...\n");
-
- test_branch_iform();
- test_branch_bform();
- test_create_function_call();
- test_translate_branch();
-
- return 0;
-}
-late_initcall(test_code_patching);
-
-#endif /* CONFIG_CODE_PATCHING_SELFTEST */
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index a3bcf4786e4a..933b685e7ab6 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -4,11 +4,11 @@
*
* Copyright (C) 1996-2005 Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/code-patching-asm.h>
#include <asm/kasan.h>
@@ -57,9 +57,6 @@
EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
.text
- .stabs "arch/powerpc/lib/",N_SO,0,0,0f
- .stabs "copy_32.S",N_SO,0,0,0f
-0:
CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/copy_mc_64.S
index cb882d9a6d8a..bf1014b28fe8 100644
--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -4,9 +4,9 @@
* Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
* Author - Balbir Singh <bsingharora@gmail.com>
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/errno.h>
-#include <asm/export.h>
.macro err1
100:
@@ -50,7 +50,7 @@ err3; stb r0,0(r3)
blr
-_GLOBAL(memcpy_mcsafe)
+_GLOBAL(copy_mc_generic)
mr r7,r5
cmpldi r5,16
blt .Lshort_copy
@@ -239,4 +239,4 @@ err1; stb r0,0(r3)
15: li r3,0
blr
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+EXPORT_SYMBOL_GPL(copy_mc_generic);
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index d1091b5ee5da..f33a2e6088e5 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -2,18 +2,13 @@
/*
* Copyright (C) 2008 Mark Nelson, IBM Corp.
*/
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
- .section ".toc","aw"
-PPC64_CACHES:
- .tc ppc64_caches[TC],ppc64_caches
- .section ".text"
-
_GLOBAL_TOC(copy_page)
BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
@@ -23,8 +18,18 @@ FTR_SECTION_ELSE
#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /*
+ * Hack for toolchain - prefixed instructions cause label difference to
+ * be non-constant even if 8 byte alignment is known, so they can not
+ * be put in FTR sections.
+ */
+ LOAD_REG_ADDR(r10, ppc64_caches)
+BEGIN_FTR_SECTION
+#else
BEGIN_FTR_SECTION
- ld r10,PPC64_CACHES@toc(r2)
+ LOAD_REG_ADDR(r10, ppc64_caches)
+#endif
lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */
lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */
li r9,0
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index a9844c6353cf..07e7cec4d135 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -27,17 +27,7 @@ _GLOBAL(copypage_power7)
#endif
ori r10,r7,1 /* stream=1 */
- lis r8,0x8000 /* GO=1 */
- clrldi r8,r8,32
-
- /* setup read stream 0 */
- dcbt 0,r4,0b01000 /* addr from */
- dcbt 0,r7,0b01010 /* length and depth from */
- /* setup write stream 1 */
- dcbtst 0,r9,0b01000 /* addr to */
- dcbtst 0,r10,0b01010 /* length and depth to */
- eieio
- dcbt 0,r8,0b01010 /* all streams GO */
+ DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
#ifdef CONFIG_ALTIVEC
mflr r0
@@ -45,7 +35,7 @@ _GLOBAL(copypage_power7)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl enter_vmx_ops
+ bl CFUNC(enter_vmx_ops)
cmpwi r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
@@ -88,7 +78,7 @@ _GLOBAL(copypage_power7)
addi r3,r3,128
bdnz 1b
- b exit_vmx_ops /* tail call optimise */
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
#else
li r0,(PAGE_SIZE/128)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index db8719a14846..9af969d2cc0c 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -2,9 +2,9 @@
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 28f0be523c06..8474c682a178 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -47,7 +47,7 @@
ld r15,STK_REG(R15)(r1)
ld r14,STK_REG(R14)(r1)
.Ldo_err3:
- bl exit_vmx_usercopy
+ bl CFUNC(exit_vmx_usercopy)
ld r0,STACKFRAMESIZE+16(r1)
mtlr r0
b .Lexit
@@ -272,7 +272,7 @@ err1; stb r0,0(r3)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl enter_vmx_usercopy
+ bl CFUNC(enter_vmx_usercopy)
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
@@ -298,17 +298,7 @@ err1; stb r0,0(r3)
or r7,r7,r0
ori r10,r7,1 /* stream=1 */
- lis r8,0x8000 /* GO=1 */
- clrldi r8,r8,32
-
- /* setup read stream 0 */
- dcbt 0,r6,0b01000 /* addr from */
- dcbt 0,r7,0b01010 /* length and depth from */
- /* setup write stream 1 */
- dcbtst 0,r9,0b01000 /* addr to */
- dcbtst 0,r10,0b01010 /* length and depth to */
- eieio
- dcbt 0,r8,0b01010 /* all streams GO */
+ DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
beq cr1,.Lunwind_stack_nonvmx_copy
@@ -488,7 +478,7 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b exit_vmx_usercopy /* tail call optimise */
+ b CFUNC(exit_vmx_usercopy) /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -691,5 +681,5 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b exit_vmx_usercopy /* tail call optimise */
+ b CFUNC(exit_vmx_usercopy) /* tail call optimise */
#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c
index 407b992fb02f..e834079d2b5c 100644
--- a/arch/powerpc/lib/error-inject.c
+++ b/arch/powerpc/lib/error-inject.c
@@ -11,6 +11,6 @@ void override_function_with_return(struct pt_regs *regs)
* function in the kernel/module, captured on a kprobe. We don't need
* to worry about 32-bit userspace on a 64-bit kernel.
*/
- regs->nip = regs->link;
+ regs_set_return_ip(regs, regs->link);
}
NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index b12168c2447a..480172fbd024 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -7,6 +7,7 @@
#include <asm/ppc_asm.h>
#include <asm/synch.h>
#include <asm/asm-compat.h>
+#include <asm/ppc-opcode.h>
.text
@@ -791,3 +792,71 @@ globl(lwsync_fixup_test_expected_SYNC)
1: or 1,1,1
sync
+globl(ftr_fixup_prefix1)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+globl(end_ftr_fixup_prefix1)
+
+globl(ftr_fixup_prefix1_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+
+globl(ftr_fixup_prefix1_expected)
+ or 1,1,1
+ nop
+ nop
+ or 2,2,2
+
+globl(ftr_fixup_prefix2)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+globl(end_ftr_fixup_prefix2)
+
+globl(ftr_fixup_prefix2_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+
+globl(ftr_fixup_prefix2_alt)
+ .long OP_PREFIX << 26
+ .long 0x0000001
+
+globl(ftr_fixup_prefix2_expected)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ or 2,2,2
+
+globl(ftr_fixup_prefix3)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+ or 3,3,3
+globl(end_ftr_fixup_prefix3)
+
+globl(ftr_fixup_prefix3_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_prefix3_alt)
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ nop
+
+globl(ftr_fixup_prefix3_expected)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ nop
+ or 3,3,3
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 4ba634b89ce5..4f82581ca203 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,13 +14,16 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
+#include <asm/interrupt.h>
#include <asm/page.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/security_features.h>
#include <asm/firmware.h>
+#include <asm/inst.h>
struct fixup_entry {
unsigned long mask;
@@ -31,30 +34,30 @@ struct fixup_entry {
long alt_end_off;
};
-static unsigned int *calc_addr(struct fixup_entry *fcur, long offset)
+static u32 *calc_addr(struct fixup_entry *fcur, long offset)
{
/*
* We store the offset to the code as a negative offset from
* the start of the alt_entry, to support the VDSO. This
* routine converts that back into an actual address.
*/
- return (unsigned int *)((unsigned long)fcur + offset);
+ return (u32 *)((unsigned long)fcur + offset);
}
-static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
- unsigned int *alt_start, unsigned int *alt_end)
+static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
{
- unsigned int instr;
+ int err;
+ ppc_inst_t instr;
- instr = *src;
+ instr = ppc_inst_read(src);
- if (instr_is_relative_branch(*src)) {
- unsigned int *target = (unsigned int *)branch_target(src);
+ if (instr_is_relative_branch(ppc_inst_read(src))) {
+ u32 *target = (u32 *)branch_target(src);
/* Branch within the section doesn't need translating */
if (target < alt_start || target > alt_end) {
- instr = translate_branch(dest, src);
- if (!instr)
+ err = translate_branch(&instr, dest, src);
+ if (err)
return 1;
}
}
@@ -64,9 +67,10 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
return 0;
}
-static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+static int patch_feature_section_mask(unsigned long value, unsigned long mask,
+ struct fixup_entry *fcur)
{
- unsigned int *start, *end, *alt_start, *alt_end, *src, *dest;
+ u32 *start, *end, *alt_start, *alt_end, *src, *dest;
start = calc_addr(fcur, fcur->start_off);
end = calc_addr(fcur, fcur->end_off);
@@ -76,24 +80,26 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
if ((alt_end - alt_start) > (end - start))
return 1;
- if ((value & fcur->mask) == fcur->value)
+ if ((value & fcur->mask & mask) == (fcur->value & mask))
return 0;
src = alt_start;
dest = start;
- for (; src < alt_end; src++, dest++) {
+ for (; src < alt_end; src = ppc_inst_next(src, src),
+ dest = ppc_inst_next(dest, dest)) {
if (patch_alt_instruction(src, dest, alt_start, alt_end))
return 1;
}
for (; dest < end; dest++)
- raw_patch_instruction(dest, PPC_INST_NOP);
+ raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP()));
return 0;
}
-void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+static void do_feature_fixups_mask(unsigned long value, unsigned long mask,
+ void *fixup_start, void *fixup_end)
{
struct fixup_entry *fcur, *fend;
@@ -101,7 +107,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
fend = fixup_end;
for (; fcur < fend; fcur++) {
- if (patch_feature_section(value, fcur)) {
+ if (patch_feature_section_mask(value, mask, fcur)) {
WARN_ON(1);
printk("Unable to patch feature section at %p - %p" \
" with %p - %p\n",
@@ -113,48 +119,94 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+ do_feature_fixups_mask(value, ~0, fixup_start, fixup_end);
+}
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+static bool is_fixup_addr_valid(void *dest, size_t size)
+{
+ return system_state < SYSTEM_FREEING_INITMEM ||
+ !init_section_contains(dest, size);
+}
+
+static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ int j;
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ for (j = 0; j < num; j++)
+ patch_instruction(dest + j, ppc_inst(instrs[j]));
+ }
+ return i;
+}
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
+static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
+ bool do_fallback, void *fallback)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ // See comment in do_entry_flush_fixups() RE order of patching
+ if (do_fallback) {
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK);
+ } else {
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ }
+ }
+ return i;
+}
+
static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
{
- unsigned int instrs[3], *dest;
+ unsigned int instrs[3];
long *start, *end;
int i;
- start = PTRRELOC(&__start___stf_entry_barrier_fixup),
+ start = PTRRELOC(&__start___stf_entry_barrier_fixup);
end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
i = 0;
if (types & STF_BARRIER_FALLBACK) {
- instrs[i++] = 0x7d4802a6; /* mflr r10 */
- instrs[i++] = 0x60000000; /* branch patched below */
- instrs[i++] = 0x7d4803a6; /* mtlr r10 */
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
} else if (types & STF_BARRIER_EIEIO) {
- instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
} else if (types & STF_BARRIER_SYNC_ORI) {
- instrs[i++] = 0x7c0004ac; /* hwsync */
- instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R10, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- patch_instruction(dest, instrs[0]);
-
- if (types & STF_BARRIER_FALLBACK)
- patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback,
- BRANCH_SET_LINK);
- else
- patch_instruction(dest + 1, instrs[1]);
-
- patch_instruction(dest + 2, instrs[2]);
- }
+ i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK,
+ &stf_barrier_fallback);
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
@@ -166,53 +218,42 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
{
- unsigned int instrs[6], *dest;
+ unsigned int instrs[6];
long *start, *end;
int i;
- start = PTRRELOC(&__start___stf_exit_barrier_fixup),
+ start = PTRRELOC(&__start___stf_exit_barrier_fixup);
end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
- instrs[3] = 0x60000000; /* nop */
- instrs[4] = 0x60000000; /* nop */
- instrs[5] = 0x60000000; /* nop */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_NOP();
+ instrs[4] = PPC_RAW_NOP();
+ instrs[5] = PPC_RAW_NOP();
i = 0;
if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
if (cpu_has_feature(CPU_FTR_HVMODE)) {
- instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
- instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0);
} else {
- instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */
- instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1);
}
- instrs[i++] = 0x7c0004ac; /* hwsync */
- instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
- } else {
- instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
- }
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R13, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1);
+ else
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2);
} else if (types & STF_BARRIER_EIEIO) {
- instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
- patch_instruction(dest, instrs[0]);
- patch_instruction(dest + 1, instrs[1]);
- patch_instruction(dest + 2, instrs[2]);
- patch_instruction(dest + 3, instrs[3]);
- patch_instruction(dest + 4, instrs[4]);
- patch_instruction(dest + 5, instrs[5]);
- }
printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
(types == STF_BARRIER_FALLBACK) ? "fallback" :
@@ -221,49 +262,205 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
: "unknown");
}
+static bool stf_exit_reentrant = false;
+static bool rfi_exit_reentrant = false;
+static DEFINE_MUTEX(exit_flush_lock);
+
+static int __do_stf_barrier_fixups(void *data)
+{
+ enum stf_barrier_type *types = data;
+
+ do_stf_entry_barrier_fixups(*types);
+ do_stf_exit_barrier_fixups(*types);
+
+ return 0;
+}
void do_stf_barrier_fixups(enum stf_barrier_type types)
{
- do_stf_entry_barrier_fixups(types);
- do_stf_exit_barrier_fixups(types);
+ /*
+ * The call to the fallback entry flush, and the fallback/sync-ori exit
+ * flush can not be safely patched in/out while other CPUs are
+ * executing them. So call __do_stf_barrier_fixups() on one CPU while
+ * all other CPUs spin in the stop machine core with interrupts hard
+ * disabled.
+ *
+ * The branch to mark interrupt exits non-reentrant is enabled first,
+ * then stop_machine runs which will ensure all CPUs are out of the
+ * low level interrupt exit code before patching. After the patching,
+ * if allowed, then flip the branch to allow fast exits.
+ */
+
+ // Prevent static key update races with do_rfi_flush_fixups()
+ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
+
+ if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI))
+ stf_exit_reentrant = false;
+ else
+ stf_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
+
+ mutex_unlock(&exit_flush_lock);
}
-void do_rfi_flush_fixups(enum l1d_flush_type types)
+void do_uaccess_flush_fixups(enum l1d_flush_type types)
{
- unsigned int instrs[3], *dest;
+ unsigned int instrs[4];
long *start, *end;
int i;
- start = PTRRELOC(&__start___rfi_flush_fixup),
- end = PTRRELOC(&__stop___rfi_flush_fixup);
+ start = PTRRELOC(&__start___uaccess_flush_fixup);
+ end = PTRRELOC(&__stop___uaccess_flush_fixup);
- instrs[0] = 0x60000000; /* nop */
- instrs[1] = 0x60000000; /* nop */
- instrs[2] = 0x60000000; /* nop */
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_BLR();
- if (types & L1D_FLUSH_FALLBACK)
- /* b .+16 to fallback flush */
- instrs[0] = 0x48000010;
+ i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[3] = PPC_RAW_NOP();
+ /* fallthrough to fallback flush */
+ }
+
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+ printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+}
+
+static int __do_entry_flush_fixups(void *data)
+{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
+ }
+
if (types & L1D_FLUSH_ORI) {
- instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
- instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
}
if (types & L1D_FLUSH_MTTRIG)
- instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
+ /*
+ * If we're patching in or out the fallback flush we need to be careful about the
+ * order in which we patch instructions. That's because it's possible we could
+ * take a page fault after patching one instruction, so the sequence of
+ * instructions must be safe even in a half patched state.
+ *
+ * To make that work, when patching in the fallback flush we patch in this order:
+ * - the mflr (dest)
+ * - the mtlr (dest + 2)
+ * - the branch (dest + 1)
+ *
+ * That ensures the sequence is safe to execute at any point. In contrast if we
+ * patch the mtlr last, it's possible we could return from the branch and not
+ * restore LR, leading to a crash later.
+ *
+ * When patching out the fallback flush (either with nops or another flush type),
+ * we patch in this order:
+ * - the branch (dest + 1)
+ * - the mtlr (dest + 2)
+ * - the mflr (dest)
+ *
+ * Note we are protected by stop_machine() from other CPUs executing the code in a
+ * semi-patched state.
+ */
- pr_devel("patching dest %lx\n", (unsigned long)dest);
+ start = PTRRELOC(&__start___entry_flush_fixup);
+ end = PTRRELOC(&__stop___entry_flush_fixup);
+ i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &entry_flush_fallback);
+
+ start = PTRRELOC(&__start___scv_entry_flush_fixup);
+ end = PTRRELOC(&__stop___scv_entry_flush_fixup);
+ i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &scv_entry_flush_fallback);
- patch_instruction(dest, instrs[0]);
- patch_instruction(dest + 1, instrs[1]);
- patch_instruction(dest + 2, instrs[2]);
+ printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+
+ return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * The call to the fallback flush can not be safely patched in/out while
+ * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+ * CPU while all other CPUs spin in the stop machine core with interrupts
+ * hard disabled.
+ */
+ stop_machine(__do_entry_flush_fixups, &types, NULL);
+}
+
+static int __do_rfi_flush_fixups(void *data)
+{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___rfi_flush_fixup);
+ end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+
+ if (types & L1D_FLUSH_FALLBACK)
+ /* b .+16 to fallback flush */
+ instrs[0] = PPC_RAW_BRANCH(16);
+
+ i = 0;
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
}
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
(types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
@@ -272,30 +469,53 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
+
+ return 0;
+}
+
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * stop_machine gets all CPUs out of the interrupt exit handler same
+ * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run
+ * without stop_machine, so this could be achieved with a broadcast
+ * IPI instead, but this matches the stf sequence.
+ */
+
+ // Prevent static key update races with do_stf_barrier_fixups()
+ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_rfi_flush_fixups, &types, NULL);
+
+ if (types & L1D_FLUSH_FALLBACK)
+ rfi_exit_reentrant = false;
+ else
+ rfi_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
+
+ mutex_unlock(&exit_flush_lock);
}
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
- unsigned int instr, *dest;
+ unsigned int instr;
long *start, *end;
int i;
start = fixup_start;
end = fixup_end;
- instr = 0x60000000; /* nop */
+ instr = PPC_RAW_NOP();
if (enable) {
pr_info("barrier-nospec: using ORI speculation barrier\n");
- instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction(dest, instr);
- }
+ i = do_patch_fixups(start, end, &instr, 1);
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
@@ -307,44 +527,38 @@ void do_barrier_nospec_fixups(bool enable)
{
void *start, *end;
- start = PTRRELOC(&__start___barrier_nospec_fixup),
+ start = PTRRELOC(&__start___barrier_nospec_fixup);
end = PTRRELOC(&__stop___barrier_nospec_fixup);
do_barrier_nospec_fixups_range(enable, start, end);
}
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
- unsigned int instr[2], *dest;
+ unsigned int instr[2];
long *start, *end;
int i;
start = fixup_start;
end = fixup_end;
- instr[0] = PPC_INST_NOP;
- instr[1] = PPC_INST_NOP;
+ instr[0] = PPC_RAW_NOP();
+ instr[1] = PPC_RAW_NOP();
if (enable) {
pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
- instr[0] = PPC_INST_ISYNC;
- instr[1] = PPC_INST_SYNC;
+ instr[0] = PPC_RAW_ISYNC();
+ instr[1] = PPC_RAW_SYNC();
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction(dest, instr[0]);
- patch_instruction(dest + 1, instr[1]);
- }
+ i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr));
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
-static void patch_btb_flush_section(long *curr)
+static void __init patch_btb_flush_section(long *curr)
{
unsigned int *start, *end;
@@ -352,11 +566,11 @@ static void patch_btb_flush_section(long *curr)
end = (void *)curr + *(curr + 1);
for (; start < end; start++) {
pr_devel("patching dest %lx\n", (unsigned long)start);
- patch_instruction(start, PPC_INST_NOP);
+ patch_instruction(start, ppc_inst(PPC_RAW_NOP()));
}
}
-void do_btb_flush_fixups(void)
+void __init do_btb_flush_fixups(void)
{
long *start, *end;
@@ -366,12 +580,12 @@ void do_btb_flush_fixups(void)
for (; start < end; start += 2)
patch_btb_flush_section(start);
}
-#endif /* CONFIG_PPC_FSL_BOOK3E */
+#endif /* CONFIG_PPC_E500 */
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
- unsigned int *dest;
+ u32 *dest;
if (!(value & CPU_FTR_LWSYNC))
return ;
@@ -381,27 +595,28 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
for (; start < end; start++) {
dest = (void *)start + *start;
- raw_patch_instruction(dest, PPC_INST_LWSYNC);
+ raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC));
}
}
-static void do_final_fixups(void)
+static void __init do_final_fixups(void)
{
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
- int *src, *dest;
- unsigned long length;
+ ppc_inst_t inst;
+ u32 *src, *dest, *end;
if (PHYSICAL_START == 0)
return;
- src = (int *)(KERNELBASE + PHYSICAL_START);
- dest = (int *)KERNELBASE;
- length = (__end_interrupts - _stext) / sizeof(int);
+ src = (u32 *)(KERNELBASE + PHYSICAL_START);
+ dest = (u32 *)KERNELBASE;
+ end = (void *)src + (__end_interrupts - _stext);
- while (length--) {
- raw_patch_instruction(dest, *src);
- src++;
- dest++;
+ while (src < end) {
+ inst = ppc_inst_read(src);
+ raw_patch_instruction(dest, inst);
+ src = ppc_inst_next(src, src);
+ dest = ppc_inst_next(dest, dest);
}
#endif
}
@@ -443,6 +658,17 @@ void __init apply_feature_fixups(void)
do_final_fixups();
}
+void __init update_mmu_feature_fixups(unsigned long mask)
+{
+ saved_mmu_features &= ~mask;
+ saved_mmu_features |= cur_cpu_spec->mmu_features & mask;
+
+ do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask,
+ PTRRELOC(&__start___mmu_ftr_fixup),
+ PTRRELOC(&__stop___mmu_ftr_fixup));
+ mmu_feature_keys_init();
+}
+
void __init setup_feature_keys(void)
{
/*
@@ -475,15 +701,20 @@ late_initcall(check_features);
#define check(x) \
if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
+static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+{
+ return patch_feature_section_mask(value, ~0, fcur);
+}
+
/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
static struct fixup_entry fixup;
-static long calc_offset(struct fixup_entry *entry, unsigned int *p)
+static long __init calc_offset(struct fixup_entry *entry, unsigned int *p)
{
return (unsigned long)p - (unsigned long)entry;
}
-static void test_basic_patching(void)
+static void __init test_basic_patching(void)
{
extern unsigned int ftr_fixup_test1[];
extern unsigned int end_ftr_fixup_test1[];
@@ -514,7 +745,7 @@ static void test_basic_patching(void)
check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
}
-static void test_alternative_patching(void)
+static void __init test_alternative_patching(void)
{
extern unsigned int ftr_fixup_test2[];
extern unsigned int end_ftr_fixup_test2[];
@@ -547,7 +778,7 @@ static void test_alternative_patching(void)
check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
}
-static void test_alternative_case_too_big(void)
+static void __init test_alternative_case_too_big(void)
{
extern unsigned int ftr_fixup_test3[];
extern unsigned int end_ftr_fixup_test3[];
@@ -573,7 +804,7 @@ static void test_alternative_case_too_big(void)
check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
}
-static void test_alternative_case_too_small(void)
+static void __init test_alternative_case_too_small(void)
{
extern unsigned int ftr_fixup_test4[];
extern unsigned int end_ftr_fixup_test4[];
@@ -619,7 +850,7 @@ static void test_alternative_case_with_branch(void)
check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
}
-static void test_alternative_case_with_external_branch(void)
+static void __init test_alternative_case_with_external_branch(void)
{
extern unsigned int ftr_fixup_test6[];
extern unsigned int end_ftr_fixup_test6[];
@@ -629,7 +860,7 @@ static void test_alternative_case_with_external_branch(void)
check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
}
-static void test_alternative_case_with_branch_to_end(void)
+static void __init test_alternative_case_with_branch_to_end(void)
{
extern unsigned int ftr_fixup_test7[];
extern unsigned int end_ftr_fixup_test7[];
@@ -639,7 +870,7 @@ static void test_alternative_case_with_branch_to_end(void)
check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
}
-static void test_cpu_macros(void)
+static void __init test_cpu_macros(void)
{
extern u8 ftr_fixup_test_FTR_macros[];
extern u8 ftr_fixup_test_FTR_macros_expected[];
@@ -651,7 +882,7 @@ static void test_cpu_macros(void)
ftr_fixup_test_FTR_macros_expected, size) == 0);
}
-static void test_fw_macros(void)
+static void __init test_fw_macros(void)
{
#ifdef CONFIG_PPC64
extern u8 ftr_fixup_test_FW_FTR_macros[];
@@ -665,7 +896,7 @@ static void test_fw_macros(void)
#endif
}
-static void test_lwsync_macros(void)
+static void __init test_lwsync_macros(void)
{
extern u8 lwsync_fixup_test[];
extern u8 end_lwsync_fixup_test[];
@@ -684,6 +915,78 @@ static void test_lwsync_macros(void)
}
}
+#ifdef CONFIG_PPC64
+static void __init test_prefix_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix1[];
+ extern unsigned int end_ftr_fixup_prefix1[];
+ extern unsigned int ftr_fixup_prefix1_orig[];
+ extern unsigned int ftr_fixup_prefix1_expected[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3);
+ fixup.alt_start_off = fixup.alt_end_off = 0;
+
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0);
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0);
+}
+
+static void __init test_prefix_alt_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix2[];
+ extern unsigned int end_ftr_fixup_prefix2[];
+ extern unsigned int ftr_fixup_prefix2_orig[];
+ extern unsigned int ftr_fixup_prefix2_expected[];
+ extern unsigned int ftr_fixup_prefix2_alt[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2);
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0);
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0);
+}
+
+static void __init test_prefix_word_alt_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix3[];
+ extern unsigned int end_ftr_fixup_prefix3[];
+ extern unsigned int ftr_fixup_prefix3_orig[];
+ extern unsigned int ftr_fixup_prefix3_expected[];
+ extern unsigned int ftr_fixup_prefix3_alt[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3);
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0);
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0);
+}
+#else
+static inline void test_prefix_patching(void) {}
+static inline void test_prefix_alt_patching(void) {}
+static inline void test_prefix_word_alt_patching(void) {}
+#endif /* CONFIG_PPC64 */
+
static int __init test_feature_fixups(void)
{
printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
@@ -698,6 +1001,9 @@ static int __init test_feature_fixups(void)
test_cpu_macros();
test_fw_macros();
test_lwsync_macros();
+ test_prefix_patching();
+ test_prefix_alt_patching();
+ test_prefix_word_alt_patching();
return 0;
}
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
index 6effad901ef7..151875050da9 100644
--- a/arch/powerpc/lib/hweight_64.S
+++ b/arch/powerpc/lib/hweight_64.S
@@ -5,16 +5,16 @@
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
/* Note: This code relies on -mminimal-toc */
_GLOBAL(__arch_hweight8)
BEGIN_FTR_SECTION
- b __sw_hweight8
+ b CFUNC(__sw_hweight8)
nop
nop
FTR_SECTION_ELSE
@@ -26,7 +26,7 @@ EXPORT_SYMBOL(__arch_hweight8)
_GLOBAL(__arch_hweight16)
BEGIN_FTR_SECTION
- b __sw_hweight16
+ b CFUNC(__sw_hweight16)
nop
nop
nop
@@ -49,7 +49,7 @@ EXPORT_SYMBOL(__arch_hweight16)
_GLOBAL(__arch_hweight32)
BEGIN_FTR_SECTION
- b __sw_hweight32
+ b CFUNC(__sw_hweight32)
nop
nop
nop
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(__arch_hweight32)
_GLOBAL(__arch_hweight64)
BEGIN_FTR_SECTION
- b __sw_hweight64
+ b CFUNC(__sw_hweight64)
nop
nop
nop
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 6440d5943c00..04165b7a163f 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -27,14 +27,14 @@ void splpar_spin_yield(arch_spinlock_t *lock)
return;
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+ yield_count = yield_count_of(holder_cpu);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (lock->slock != lock_value)
return; /* something has changed */
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
+ yield_to_preempted(holder_cpu, yield_count);
}
EXPORT_SYMBOL_GPL(splpar_spin_yield);
@@ -53,13 +53,13 @@ void splpar_rw_yield(arch_rwlock_t *rw)
return; /* no write lock at present */
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+ yield_count = yield_count_of(holder_cpu);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (rw->lock != lock_value)
return; /* something has changed */
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
+ yield_to_preempted(holder_cpu, yield_count);
}
#endif
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 9351ffab409c..6fd06cd20faa 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -4,10 +4,10 @@
*
* Copyright (C) 1996 Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/kasan.h>
#ifndef CONFIG_KASAN
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
index 5010e376f7b8..f6fca5664e91 100644
--- a/arch/powerpc/lib/memcmp_32.S
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -7,8 +7,8 @@
*
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
.text
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index 384218df71ba..142c666d3897 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -3,8 +3,8 @@
* Author: Anton Blanchard <anton@au.ibm.com>
* Copyright 2015 IBM Corporation.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/ppc-opcode.h>
#define off8 r6
@@ -44,7 +44,7 @@
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
std r0,16(r1); \
stdu r1,-STACKFRAMESIZE(r1); \
- bl enter_vmx_ops; \
+ bl CFUNC(enter_vmx_ops); \
cmpwi cr1,r3,0; \
ld r0,STACKFRAMESIZE+16(r1); \
ld r3,STK_REG(R31)(r1); \
@@ -60,7 +60,7 @@
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
std r0,16(r1); \
stdu r1,-STACKFRAMESIZE(r1); \
- bl exit_vmx_ops; \
+ bl CFUNC(exit_vmx_ops); \
ld r0,STACKFRAMESIZE+16(r1); \
ld r3,STK_REG(R31)(r1); \
ld r4,STK_REG(R30)(r1); \
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 016c91e958d8..b5a67e20143f 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -2,9 +2,9 @@
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/kasan.h>
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 54f226333c94..b7c5e7fca8b9 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -218,7 +218,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl enter_vmx_ops
+ bl CFUNC(enter_vmx_ops)
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
@@ -244,15 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
or r7,r7,r0
ori r10,r7,1 /* stream=1 */
- lis r8,0x8000 /* GO=1 */
- clrldi r8,r8,32
-
- dcbt 0,r6,0b01000
- dcbt 0,r7,0b01010
- dcbtst 0,r9,0b01000
- dcbtst 0,r10,0b01010
- eieio
- dcbt 0,r8,0b01010 /* GO */
+ DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
beq cr1,.Lunwind_stack_nonvmx_copy
@@ -433,7 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
15: addi r1,r1,STACKFRAMESIZE
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
- b exit_vmx_ops /* tail call optimise */
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -637,5 +629,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
15: addi r1,r1,STACKFRAMESIZE
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
- b exit_vmx_ops /* tail call optimise */
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 0666a8d29596..4e724c4c01ad 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -6,23 +6,60 @@
#include <linux/string.h>
#include <linux/export.h>
#include <linux/uaccess.h>
+#include <linux/libnvdimm.h>
#include <asm/cacheflush.h>
+static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __clean_pmem_range(start, stop);
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __flush_pmem_range(start, stop);
+}
+
/*
* CONFIG_ARCH_HAS_PMEM_API symbols
*/
void arch_wb_cache_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_dcache_range(start, start + size);
+ flush_pmem_range(start, start + size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
@@ -35,25 +72,16 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
unsigned long copied, start = (unsigned long) dest;
copied = __copy_from_user(dest, src, size);
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
return copied;
}
-void *memcpy_flushcache(void *dest, const void *src, size_t size)
+void memcpy_flushcache(void *dest, const void *src, size_t size)
{
unsigned long start = (unsigned long) dest;
memcpy(dest, src, size);
- flush_dcache_range(start, start + size);
-
- return dest;
+ clean_pmem_range(start, start + size);
}
EXPORT_SYMBOL(memcpy_flushcache);
-
-void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
- size_t len)
-{
- memcpy_flushcache(to, page_to_virt(page) + offset, len);
-}
-EXPORT_SYMBOL(memcpy_page_flushcache);
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644
index 000000000000..5de4dd549f6e
--- /dev/null
+++ b/arch/powerpc/lib/qspinlock.c
@@ -0,0 +1,989 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/processor.h>
+#include <linux/smp.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <asm/qspinlock.h>
+#include <asm/paravirt.h>
+
+#define MAX_NODES 4
+
+struct qnode {
+ struct qnode *next;
+ struct qspinlock *lock;
+ int cpu;
+ u8 sleepy; /* 1 if the previous vCPU was preempted or
+ * if the previous node was sleepy */
+ u8 locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+ int count;
+ struct qnode nodes[MAX_NODES];
+};
+
+/* Tuning parameters */
+static int steal_spins __read_mostly = (1 << 5);
+static int remote_steal_spins __read_mostly = (1 << 2);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
+static bool maybe_stealers __read_mostly = true;
+#endif
+static int head_spins __read_mostly = (1 << 8);
+
+static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
+static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_sleepy_owner __read_mostly = true;
+static bool pv_prod_head __read_mostly = false;
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
+
+#if _Q_SPIN_SPEC_BARRIER == 1
+#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
+#else
+#define spec_barrier() do { } while (0)
+#endif
+
+static __always_inline bool recently_sleepy(void)
+{
+ /* pv_sleepy_lock is true when this is called */
+ if (pv_sleepy_lock_interval_ns) {
+ u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+ if (seen) {
+ u64 delta = sched_clock() - seen;
+ if (delta < pv_sleepy_lock_interval_ns)
+ return true;
+ this_cpu_write(sleepy_lock_seen_clock, 0);
+ }
+ }
+
+ return false;
+}
+
+static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return steal_spins * pv_sleepy_lock_factor;
+ else
+ return steal_spins;
+}
+
+static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return remote_steal_spins * pv_sleepy_lock_factor;
+ else
+ return remote_steal_spins;
+}
+
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return head_spins * pv_sleepy_lock_factor;
+ else
+ return head_spins;
+}
+
+static inline u32 encode_tail_cpu(int cpu)
+{
+ return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(u32 val)
+{
+ return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+static inline int get_owner_cpu(u32 val)
+{
+ return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
+{
+ u32 newval = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ asm volatile(
+"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
+ /* This test is necessary if there could be stealers */
+" andi. %1,%0,%5 \n"
+" bne 3f \n"
+ /* Test whether the lock tail == mytail */
+" and %1,%0,%6 \n"
+" cmpw 0,%1,%3 \n"
+ /* Merge the new locked value */
+" or %1,%1,%4 \n"
+" bne 2f \n"
+ /* If the lock tail matched, then clear it, otherwise leave it. */
+" andc %1,%1,%6 \n"
+"2: stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"3: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r"(tail), "r" (newval),
+ "i" (_Q_LOCKED_VAL),
+ "r" (_Q_TAIL_CPU_MASK),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
+{
+ u32 prev, tmp;
+
+ kcsan_release();
+
+ asm volatile(
+"\t" PPC_RELEASE_BARRIER " \n"
+"1: lwarx %0,0,%2 # publish_tail_cpu \n"
+" andc %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+ : "=&r" (prev), "=&r"(tmp)
+ : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 set_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # set_mustq \n"
+" or %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 clear_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # clear_mustq \n"
+" andc %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+ u32 prev;
+ u32 new = old | _Q_SLEEPY_VAL;
+
+ BUG_ON(!(old & _Q_LOCKED_VAL));
+ BUG_ON(old & _Q_SLEEPY_VAL);
+
+ asm volatile(
+"1: lwarx %0,0,%1 # try_set_sleepy \n"
+" cmpw 0,%0,%2 \n"
+" bne- 2f \n"
+" stwcx. %3,0,%1 \n"
+" bne- 1b \n"
+"2: \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r"(old), "r" (new)
+ : "cr0", "memory");
+
+ return likely(prev == old);
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+ if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(void)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ /* Don't set sleepy because we likely have a stale val */
+ }
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
+{
+ struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
+ int idx;
+
+ /*
+ * After publishing the new tail and finding a previous tail in the
+ * previous val (which is the control dependency), this barrier
+ * orders the release barrier in publish_tail_cpu performed by the
+ * last CPU, with subsequently looking at its qnode structures
+ * after the barrier.
+ */
+ smp_acquire__after_ctrl_dep();
+
+ for (idx = 0; idx < MAX_NODES; idx++) {
+ struct qnode *qnode = &qnodesp->nodes[idx];
+ if (qnode->lock == lock)
+ return qnode;
+ }
+
+ BUG();
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
+{
+ int owner;
+ u32 yield_count;
+ bool preempted = false;
+
+ BUG_ON(!(val & _Q_LOCKED_VAL));
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_owner)
+ goto relax;
+
+ owner = get_owner_cpu(val);
+ yield_count = yield_count_of(owner);
+
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ seen_sleepy_owner(lock, val);
+ preempted = true;
+
+ /*
+ * Read the lock word after sampling the yield count. On the other side
+ * there may a wmb because the yield count update is done by the
+ * hypervisor preemption and the value update by the OS, however this
+ * ordering might reduce the chance of out of order accesses and
+ * improve the heuristic.
+ */
+ smp_rmb();
+
+ if (READ_ONCE(lock->val) == val) {
+ if (mustq)
+ clear_mustq(lock);
+ yield_to_preempted(owner, yield_count);
+ if (mustq)
+ set_mustq(lock);
+ spin_begin();
+
+ /* Don't relax if we yielded. Maybe we should? */
+ return preempted;
+ }
+ spin_begin();
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ return __yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ bool mustq = false;
+
+ if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
+ mustq = true;
+
+ return __yield_to_locked_owner(lock, val, paravirt, mustq);
+}
+
+static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
+{
+ struct qnode *next;
+ int owner;
+
+ if (!paravirt)
+ return;
+ if (!pv_yield_sleepy_owner)
+ return;
+
+ next = READ_ONCE(node->next);
+ if (!next)
+ return;
+
+ if (next->sleepy)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (vcpu_is_preempted(owner))
+ next->sleepy = 1;
+}
+
+/* Called inside spin_begin() */
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
+{
+ u32 yield_count;
+ bool preempted = false;
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_sleepy_owner)
+ goto yield_prev;
+
+ /*
+ * If the previous waiter was preempted it might not be able to
+ * propagate sleepy to us, so check the lock in that case too.
+ */
+ if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
+ u32 val = READ_ONCE(lock->val);
+
+ if (val & _Q_LOCKED_VAL) {
+ if (node->next && !node->next->sleepy) {
+ /*
+ * Propagate sleepy to next waiter. Only if
+ * owner is preempted, which allows the queue
+ * to become "non-sleepy" if vCPU preemption
+ * ceases to occur, even if the lock remains
+ * highly contended.
+ */
+ if (vcpu_is_preempted(get_owner_cpu(val)))
+ node->next->sleepy = 1;
+ }
+
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ if (preempted)
+ return preempted;
+ }
+ node->sleepy = false;
+ }
+
+yield_prev:
+ if (!pv_yield_prev)
+ goto relax;
+
+ yield_count = yield_count_of(prev_cpu);
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node();
+
+ smp_rmb(); /* See __yield_to_locked_owner comment */
+
+ if (!READ_ONCE(node->locked)) {
+ yield_to_preempted(prev_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
+{
+ if (iters >= get_steal_spins(paravirt, sleepy))
+ return true;
+
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ (iters >= get_remote_steal_spins(paravirt, sleepy))) {
+ int cpu = get_owner_cpu(val);
+ if (numa_node_id() != cpu_to_node(cpu))
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
+{
+ bool seen_preempted = false;
+ bool sleepy = false;
+ int iters = 0;
+ u32 val;
+
+ if (!steal_spins) {
+ /* XXX: should spin_on_preempted_owner do anything here? */
+ return false;
+ }
+
+ /* Attempt to steal the lock */
+ spin_begin();
+ do {
+ bool preempted = false;
+
+ val = READ_ONCE(lock->val);
+ if (val & _Q_MUST_Q_VAL)
+ break;
+ spec_barrier();
+
+ if (unlikely(!(val & _Q_LOCKED_VAL))) {
+ spin_end();
+ if (__queued_spin_trylock_steal(lock))
+ return true;
+ spin_begin();
+ } else {
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ }
+
+ if (paravirt && pv_sleepy_lock) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ if (preempted) {
+ seen_preempted = true;
+ sleepy = true;
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ /*
+ * pv_spin_on_preempted_owner don't increase iters
+ * while the owner is preempted -- we won't interfere
+ * with it by definition. This could introduce some
+ * latency issue if we continually observe preempted
+ * owners, but hopefully that's a rare corner case of
+ * a badly oversubscribed system.
+ */
+ } else {
+ iters++;
+ }
+ } while (!steal_break(val, iters, paravirt, sleepy));
+
+ spin_end();
+
+ return false;
+}
+
+static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
+{
+ struct qnodes *qnodesp;
+ struct qnode *next, *node;
+ u32 val, old, tail;
+ bool seen_preempted = false;
+ bool sleepy = false;
+ bool mustq = false;
+ int idx;
+ int iters = 0;
+
+ BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+ qnodesp = this_cpu_ptr(&qnodes);
+ if (unlikely(qnodesp->count >= MAX_NODES)) {
+ spec_barrier();
+ while (!queued_spin_trylock(lock))
+ cpu_relax();
+ return;
+ }
+
+ idx = qnodesp->count++;
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+ node = &qnodesp->nodes[idx];
+ node->next = NULL;
+ node->lock = lock;
+ node->cpu = smp_processor_id();
+ node->sleepy = 0;
+ node->locked = 0;
+
+ tail = encode_tail_cpu(node->cpu);
+
+ /*
+ * Assign all attributes of a node before it can be published.
+ * Issues an lwsync, serving as a release barrier, as well as a
+ * compiler barrier.
+ */
+ old = publish_tail_cpu(lock, tail);
+
+ /*
+ * If there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_CPU_MASK) {
+ int prev_cpu = decode_tail_cpu(old);
+ struct qnode *prev = get_tail_qnode(lock, prev_cpu);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ /* Wait for mcs node lock to be released */
+ spin_begin();
+ while (!READ_ONCE(node->locked)) {
+ spec_barrier();
+
+ if (yield_to_prev(lock, node, prev_cpu, paravirt))
+ seen_preempted = true;
+ }
+ spec_barrier();
+ spin_end();
+
+ smp_rmb(); /* acquire barrier for the mcs lock */
+
+ /*
+ * Generic qspinlocks have this prefetch here, but it seems
+ * like it could cause additional line transitions because
+ * the waiter will keep loading from it.
+ */
+ if (_Q_SPIN_PREFETCH_NEXT) {
+ next = READ_ONCE(node->next);
+ if (next)
+ prefetchw(next);
+ }
+ }
+
+ /* We're at the head of the waitqueue, wait for the lock. */
+again:
+ spin_begin();
+ for (;;) {
+ bool preempted;
+
+ val = READ_ONCE(lock->val);
+ if (!(val & _Q_LOCKED_VAL))
+ break;
+ spec_barrier();
+
+ if (paravirt && pv_sleepy_lock && maybe_stealers) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ propagate_sleepy(node, val, paravirt);
+ preempted = yield_head_to_locked_owner(lock, val, paravirt);
+ if (!maybe_stealers)
+ continue;
+
+ if (preempted)
+ seen_preempted = true;
+
+ if (paravirt && preempted) {
+ sleepy = true;
+
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ } else {
+ iters++;
+ }
+
+ if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
+ mustq = true;
+ set_mustq(lock);
+ val |= _Q_MUST_Q_VAL;
+ }
+ }
+ spec_barrier();
+ spin_end();
+
+ /* If we're the last queued, must clean up the tail. */
+ old = trylock_clean_tail(lock, tail);
+ if (unlikely(old & _Q_LOCKED_VAL)) {
+ BUG_ON(!maybe_stealers);
+ goto again; /* Can only be true if maybe_stealers. */
+ }
+
+ if ((old & _Q_TAIL_CPU_MASK) == tail)
+ goto release; /* We were the tail, no next. */
+
+ /* There is a next, must wait for node->next != NULL (MCS protocol) */
+ next = READ_ONCE(node->next);
+ if (!next) {
+ spin_begin();
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
+ spin_end();
+ }
+ spec_barrier();
+
+ /*
+ * Unlock the next mcs waiter node. Release barrier is not required
+ * here because the acquirer is only accessing the lock word, and
+ * the acquire barrier we took the lock with orders that update vs
+ * this store to locked. The corresponding barrier is the smp_rmb()
+ * acquire barrier for mcs lock, above.
+ */
+ if (paravirt && pv_prod_head) {
+ int next_cpu = next->cpu;
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ if (vcpu_is_preempted(next_cpu))
+ prod_cpu(next_cpu);
+ } else {
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ }
+
+release:
+ qnodesp->count--; /* release the node */
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+ /*
+ * This looks funny, but it induces the compiler to inline both
+ * sides of the branch rather than share code as when the condition
+ * is passed as the paravirt argument to the functions.
+ */
+ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
+ if (try_to_steal_lock(lock, true)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, true);
+ } else {
+ if (try_to_steal_lock(lock, false)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, false);
+ }
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif
+
+#include <linux/debugfs.h>
+static int steal_spins_set(void *data, u64 val)
+{
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+ /* MAYBE_STEAL remains true */
+ steal_spins = val;
+#else
+ static DEFINE_MUTEX(lock);
+
+ /*
+ * The lock slow path has a !maybe_stealers case that can assume
+ * the head of queue will not see concurrent waiters. That waiter
+ * is unsafe in the presence of stealers, so must keep them away
+ * from one another.
+ */
+
+ mutex_lock(&lock);
+ if (val && !steal_spins) {
+ maybe_stealers = true;
+ /* wait for queue head waiter to go away */
+ synchronize_rcu();
+ steal_spins = val;
+ } else if (!val && steal_spins) {
+ steal_spins = val;
+ /* wait for all possible stealers to go away */
+ synchronize_rcu();
+ maybe_stealers = false;
+ } else {
+ steal_spins = val;
+ }
+ mutex_unlock(&lock);
+#endif
+
+ return 0;
+}
+
+static int steal_spins_get(void *data, u64 *val)
+{
+ *val = steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
+
+static int remote_steal_spins_set(void *data, u64 val)
+{
+ remote_steal_spins = val;
+
+ return 0;
+}
+
+static int remote_steal_spins_get(void *data, u64 *val)
+{
+ *val = remote_steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
+
+static int head_spins_set(void *data, u64 val)
+{
+ head_spins = val;
+
+ return 0;
+}
+
+static int head_spins_get(void *data, u64 *val)
+{
+ *val = head_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
+
+static int pv_yield_owner_set(void *data, u64 val)
+{
+ pv_yield_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
+
+static int pv_yield_allow_steal_set(void *data, u64 val)
+{
+ pv_yield_allow_steal = !!val;
+
+ return 0;
+}
+
+static int pv_yield_allow_steal_get(void *data, u64 *val)
+{
+ *val = pv_yield_allow_steal;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
+
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+ pv_spin_on_preempted_owner = !!val;
+
+ return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+ *val = pv_spin_on_preempted_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
+static int pv_sleepy_lock_set(void *data, u64 val)
+{
+ pv_sleepy_lock = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
+
+static int pv_sleepy_lock_sticky_set(void *data, u64 val)
+{
+ pv_sleepy_lock_sticky = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_sticky;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
+
+static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
+{
+ pv_sleepy_lock_interval_ns = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_interval_ns;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
+
+static int pv_sleepy_lock_factor_set(void *data, u64 val)
+{
+ pv_sleepy_lock_factor = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_factor_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_factor;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
+
+static int pv_yield_prev_set(void *data, u64 val)
+{
+ pv_yield_prev = !!val;
+
+ return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+ *val = pv_yield_prev;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
+static int pv_yield_sleepy_owner_set(void *data, u64 val)
+{
+ pv_yield_sleepy_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_sleepy_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_sleepy_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
+
+static int pv_prod_head_set(void *data, u64 val)
+{
+ pv_prod_head = !!val;
+
+ return 0;
+}
+
+static int pv_prod_head_get(void *data, u64 *val)
+{
+ *val = pv_prod_head;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
+
+static __init int spinlock_debugfs_init(void)
+{
+ debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+ debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
+ debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
+ if (is_shared_processor()) {
+ debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+ debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+ debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
+ debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
+ debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
+ debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
+ debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
+ debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+ debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
+ debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
+ }
+
+ return 0;
+}
+device_initcall(spinlock_debugfs_init);
diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c
new file mode 100644
index 000000000000..bccb662c1b7b
--- /dev/null
+++ b/arch/powerpc/lib/restart_table.c
@@ -0,0 +1,56 @@
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+
+struct soft_mask_table_entry {
+ unsigned long start;
+ unsigned long end;
+};
+
+struct restart_table_entry {
+ unsigned long start;
+ unsigned long end;
+ unsigned long fixup;
+};
+
+extern struct soft_mask_table_entry __start___soft_mask_table[];
+extern struct soft_mask_table_entry __stop___soft_mask_table[];
+
+extern struct restart_table_entry __start___restart_table[];
+extern struct restart_table_entry __stop___restart_table[];
+
+/* Given an address, look for it in the soft mask table */
+bool search_kernel_soft_mask_table(unsigned long addr)
+{
+ struct soft_mask_table_entry *smte = __start___soft_mask_table;
+
+ while (smte < __stop___soft_mask_table) {
+ unsigned long start = smte->start;
+ unsigned long end = smte->end;
+
+ if (addr >= start && addr < end)
+ return true;
+
+ smte++;
+ }
+ return false;
+}
+NOKPROBE_SYMBOL(search_kernel_soft_mask_table);
+
+/* Given an address, look for it in the kernel exception table */
+unsigned long search_kernel_restart_table(unsigned long addr)
+{
+ struct restart_table_entry *rte = __start___restart_table;
+
+ while (rte < __stop___restart_table) {
+ unsigned long start = rte->start;
+ unsigned long end = rte->end;
+ unsigned long fixup = rte->fixup;
+
+ if (addr >= start && addr < end)
+ return fixup;
+
+ rte++;
+ }
+ return 0;
+}
+NOKPROBE_SYMBOL(search_kernel_restart_table);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c077acb983a1..e65f3fb68d06 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -13,8 +13,7 @@
#include <linux/uaccess.h>
#include <asm/cpu_has_feature.h>
#include <asm/cputable.h>
-
-extern char system_call_common[];
+#include <asm/disassemble.h>
#ifdef CONFIG_PPC64
/* Bits in SRR1 that are copied from MSR */
@@ -30,6 +29,10 @@ extern char system_call_common[];
#define XER_OV32 0x00080000U
#define XER_CA32 0x00040000U
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
+
#ifdef CONFIG_PPC_FPU
/*
* Functions in ldstfp.S
@@ -69,10 +72,8 @@ extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr,
unsigned long val)
{
-#ifdef __powerpc64__
if ((msr & MSR_64BIT) == 0)
val &= 0xffffffffUL;
-#endif
return val;
}
@@ -106,11 +107,11 @@ static nokprobe_inline long address_ok(struct pt_regs *regs,
{
if (!user_mode(regs))
return 1;
- if (__access_ok(ea, nb, USER_DS))
+ if (access_ok((void __user *)ea, nb))
return 1;
- if (__access_ok(ea, 1, USER_DS))
+ if (access_ok((void __user *)ea, 1))
/* Access overlaps the end of the user region */
- regs->dar = USER_DS.seg;
+ regs->dar = TASK_SIZE_MAX - 1;
else
regs->dar = ea;
return 0;
@@ -188,6 +189,47 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr,
}
/*
+ * Calculate effective address for a MLS:D-form / 8LS:D-form
+ * prefixed instruction
+ */
+static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr,
+ unsigned int suffix,
+ const struct pt_regs *regs)
+{
+ int ra, prefix_r;
+ unsigned int dd;
+ unsigned long ea, d0, d1, d;
+
+ prefix_r = GET_PREFIX_R(instr);
+ ra = GET_PREFIX_RA(suffix);
+
+ d0 = instr & 0x3ffff;
+ d1 = suffix & 0xffff;
+ d = (d0 << 16) | d1;
+
+ /*
+ * sign extend a 34 bit number
+ */
+ dd = (unsigned int)(d >> 2);
+ ea = (signed int)dd;
+ ea = (ea << 2) | (d & 0x3);
+
+ if (!prefix_r && ra)
+ ea += regs->gpr[ra];
+ else if (!prefix_r && !ra)
+ ; /* Leave ea as is */
+ else if (prefix_r)
+ ea += regs->nip;
+
+ /*
+ * (prefix_r && ra) is an invalid form. Should already be
+ * checked for by caller!
+ */
+
+ return ea;
+}
+
+/*
* Return the largest power of 2, not greater than sizeof(unsigned long),
* such that x is a multiple of it.
*/
@@ -236,39 +278,70 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
up[1] = tmp;
break;
}
+ case 32: {
+ unsigned long *up = (unsigned long *)ptr;
+ unsigned long tmp;
+
+ tmp = byterev_8(up[0]);
+ up[0] = byterev_8(up[3]);
+ up[3] = tmp;
+ tmp = byterev_8(up[2]);
+ up[2] = byterev_8(up[1]);
+ up[1] = tmp;
+ break;
+ }
+
#endif
default:
WARN_ON_ONCE(1);
}
}
-static nokprobe_inline int read_mem_aligned(unsigned long *dest,
- unsigned long ea, int nb,
- struct pt_regs *regs)
+static __always_inline int
+__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
{
- int err = 0;
unsigned long x = 0;
switch (nb) {
case 1:
- err = __get_user(x, (unsigned char __user *) ea);
+ unsafe_get_user(x, (unsigned char __user *)ea, Efault);
break;
case 2:
- err = __get_user(x, (unsigned short __user *) ea);
+ unsafe_get_user(x, (unsigned short __user *)ea, Efault);
break;
case 4:
- err = __get_user(x, (unsigned int __user *) ea);
+ unsafe_get_user(x, (unsigned int __user *)ea, Efault);
break;
#ifdef __powerpc64__
case 8:
- err = __get_user(x, (unsigned long __user *) ea);
+ unsafe_get_user(x, (unsigned long __user *)ea, Efault);
break;
#endif
}
- if (!err)
- *dest = x;
- else
+ *dest = x;
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int
+read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __read_mem_aligned(dest, ea, nb, regs);
+
+ if (user_read_access_begin((void __user *)ea, nb)) {
+ err = __read_mem_aligned(dest, ea, nb, regs);
+ user_read_access_end();
+ } else {
+ err = -EFAULT;
regs->dar = ea;
+ }
+
return err;
}
@@ -276,10 +349,8 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest,
* Copy from userspace to a buffer, using the largest possible
* aligned accesses, up to sizeof(long).
*/
-static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb,
- struct pt_regs *regs)
+static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
{
- int err = 0;
int c;
for (; nb > 0; nb -= c) {
@@ -288,31 +359,46 @@ static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb,
c = max_align(nb);
switch (c) {
case 1:
- err = __get_user(*dest, (unsigned char __user *) ea);
+ unsafe_get_user(*dest, (u8 __user *)ea, Efault);
break;
case 2:
- err = __get_user(*(u16 *)dest,
- (unsigned short __user *) ea);
+ unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault);
break;
case 4:
- err = __get_user(*(u32 *)dest,
- (unsigned int __user *) ea);
+ unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault);
break;
#ifdef __powerpc64__
case 8:
- err = __get_user(*(unsigned long *)dest,
- (unsigned long __user *) ea);
+ unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault);
break;
#endif
}
- if (err) {
- regs->dar = ea;
- return err;
- }
dest += c;
ea += c;
}
return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __copy_mem_in(dest, ea, nb, regs);
+
+ if (user_read_access_begin((void __user *)ea, nb)) {
+ err = __copy_mem_in(dest, ea, nb, regs);
+ user_read_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
}
static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
@@ -350,30 +436,48 @@ static int read_mem(unsigned long *dest, unsigned long ea, int nb,
}
NOKPROBE_SYMBOL(read_mem);
-static nokprobe_inline int write_mem_aligned(unsigned long val,
- unsigned long ea, int nb,
- struct pt_regs *regs)
+static __always_inline int
+__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
{
- int err = 0;
-
switch (nb) {
case 1:
- err = __put_user(val, (unsigned char __user *) ea);
+ unsafe_put_user(val, (unsigned char __user *)ea, Efault);
break;
case 2:
- err = __put_user(val, (unsigned short __user *) ea);
+ unsafe_put_user(val, (unsigned short __user *)ea, Efault);
break;
case 4:
- err = __put_user(val, (unsigned int __user *) ea);
+ unsafe_put_user(val, (unsigned int __user *)ea, Efault);
break;
#ifdef __powerpc64__
case 8:
- err = __put_user(val, (unsigned long __user *) ea);
+ unsafe_put_user(val, (unsigned long __user *)ea, Efault);
break;
#endif
}
- if (err)
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int
+write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __write_mem_aligned(val, ea, nb, regs);
+
+ if (user_write_access_begin((void __user *)ea, nb)) {
+ err = __write_mem_aligned(val, ea, nb, regs);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
regs->dar = ea;
+ }
+
return err;
}
@@ -381,10 +485,8 @@ static nokprobe_inline int write_mem_aligned(unsigned long val,
* Copy from a buffer to userspace, using the largest possible
* aligned accesses, up to sizeof(long).
*/
-static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb,
- struct pt_regs *regs)
+static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
{
- int err = 0;
int c;
for (; nb > 0; nb -= c) {
@@ -393,31 +495,46 @@ static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb,
c = max_align(nb);
switch (c) {
case 1:
- err = __put_user(*dest, (unsigned char __user *) ea);
+ unsafe_put_user(*dest, (u8 __user *)ea, Efault);
break;
case 2:
- err = __put_user(*(u16 *)dest,
- (unsigned short __user *) ea);
+ unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault);
break;
case 4:
- err = __put_user(*(u32 *)dest,
- (unsigned int __user *) ea);
+ unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault);
break;
#ifdef __powerpc64__
case 8:
- err = __put_user(*(unsigned long *)dest,
- (unsigned long __user *) ea);
+ unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault);
break;
#endif
}
- if (err) {
- regs->dar = ea;
- return err;
- }
dest += c;
ea += c;
}
return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __copy_mem_out(dest, ea, nb, regs);
+
+ if (user_write_access_begin((void __user *)ea, nb)) {
+ err = __copy_mem_out(dest, ea, nb, regs);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
}
static nokprobe_inline int write_mem_unaligned(unsigned long val,
@@ -469,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea,
} u;
nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
rn = op->reg;
@@ -519,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea,
} u;
nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
rn = op->reg;
@@ -563,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
u8 b[sizeof(__vector128)];
} u = {};
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
/* align to multiple of size */
@@ -571,7 +695,7 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
if (err)
return err;
if (unlikely(cross_endian))
- do_byte_reverse(&u.b[ea & 0xf], size);
+ do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u)));
preempt_disable();
if (regs->msr & MSR_VEC)
put_vr(rn, &u.v);
@@ -590,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
u8 b[sizeof(__vector128)];
} u;
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
/* align to multiple of size */
@@ -602,7 +729,7 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
u.v = current->thread.vr_state.vr[rn];
preempt_enable();
if (unlikely(cross_endian))
- do_byte_reverse(&u.b[ea & 0xf], size);
+ do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u)));
return copy_mem_out(&u.b[ea & 0xf], ea, size, regs);
}
#endif /* CONFIG_ALTIVEC */
@@ -666,6 +793,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
reg->d[0] = reg->d[1] = 0;
switch (op->element_size) {
+ case 32:
+ /* [p]lxvp[x] */
case 16:
/* whole vector; lxv[x] or lxvl[l] */
if (size == 0)
@@ -674,7 +803,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
rev = !rev;
if (rev)
- do_byte_reverse(reg, 16);
+ do_byte_reverse(reg, size);
break;
case 8:
/* scalar loads, lxvd2x, lxvdsx */
@@ -750,6 +879,22 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
size = GETSIZE(op->type);
switch (op->element_size) {
+ case 32:
+ /* [p]stxvp[x] */
+ if (size == 0)
+ break;
+ if (rev) {
+ /* reverse 32 bytes */
+ union vsx_reg buf32[2];
+ buf32[0].d[0] = byterev_8(reg[1].d[1]);
+ buf32[0].d[1] = byterev_8(reg[1].d[0]);
+ buf32[1].d[0] = byterev_8(reg[0].d[1]);
+ buf32[1].d[1] = byterev_8(reg[0].d[0]);
+ memcpy(mem, buf32, size);
+ } else {
+ memcpy(mem, reg, size);
+ }
+ break;
case 16:
/* stxv, stxvx, stxvl, stxvll */
if (size == 0)
@@ -818,28 +963,43 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op,
bool cross_endian)
{
int reg = op->reg;
- u8 mem[16];
- union vsx_reg buf;
+ int i, j, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
int size = GETSIZE(op->type);
if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
return -EFAULT;
- emulate_vsx_load(op, &buf, mem, cross_endian);
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
+ emulate_vsx_load(op, buf, mem, cross_endian);
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
if (regs->msr & MSR_FP) {
- load_vsrn(reg, &buf);
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ load_vsrn(reg + i, &buf[j].v);
+ }
} else {
- current->thread.fp_state.fpr[reg][0] = buf.d[0];
- current->thread.fp_state.fpr[reg][1] = buf.d[1];
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0];
+ current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1];
+ }
}
} else {
- if (regs->msr & MSR_VEC)
- load_vsrn(reg, &buf);
- else
- current->thread.vr_state.vr[reg - 32] = buf.v;
+ if (regs->msr & MSR_VEC) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ load_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ current->thread.vr_state.vr[reg - 32 + i] = buf[j].v;
+ }
+ }
}
preempt_enable();
return 0;
@@ -850,63 +1010,96 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op,
bool cross_endian)
{
int reg = op->reg;
- u8 mem[16];
- union vsx_reg buf;
+ int i, j, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
int size = GETSIZE(op->type);
if (!address_ok(regs, ea, size))
return -EFAULT;
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
if (regs->msr & MSR_FP) {
- store_vsrn(reg, &buf);
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ store_vsrn(reg + i, &buf[j].v);
+ }
} else {
- buf.d[0] = current->thread.fp_state.fpr[reg][0];
- buf.d[1] = current->thread.fp_state.fpr[reg][1];
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0];
+ buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1];
+ }
}
} else {
- if (regs->msr & MSR_VEC)
- store_vsrn(reg, &buf);
- else
- buf.v = current->thread.vr_state.vr[reg - 32];
+ if (regs->msr & MSR_VEC) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ store_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ buf[j].v = current->thread.vr_state.vr[reg - 32 + i];
+ }
+ }
}
preempt_enable();
- emulate_vsx_store(op, &buf, mem, cross_endian);
+ emulate_vsx_store(op, buf, mem, cross_endian);
return copy_mem_out(mem, ea, size, regs);
}
#endif /* CONFIG_VSX */
+static __always_inline int __emulate_dcbz(unsigned long ea)
+{
+ unsigned long i;
+ unsigned long size = l1_dcache_bytes();
+
+ for (i = 0; i < size; i += sizeof(long))
+ unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault);
+
+ return 0;
+
+Efault:
+ return -EFAULT;
+}
+
int emulate_dcbz(unsigned long ea, struct pt_regs *regs)
{
int err;
- unsigned long i, size;
+ unsigned long size = l1_dcache_bytes();
-#ifdef __powerpc64__
- size = ppc64_caches.l1d.block_size;
- if (!(regs->msr & MSR_64BIT))
- ea &= 0xffffffffUL;
-#else
- size = L1_CACHE_BYTES;
-#endif
+ ea = truncate_if_32bit(regs->msr, ea);
ea &= ~(size - 1);
if (!address_ok(regs, ea, size))
return -EFAULT;
- for (i = 0; i < size; i += sizeof(long)) {
- err = __put_user(0, (unsigned long __user *) (ea + i));
- if (err) {
- regs->dar = ea;
- return err;
- }
+
+ if (is_kernel_addr(ea)) {
+ err = __emulate_dcbz(ea);
+ } else if (user_write_access_begin((void __user *)ea, size)) {
+ err = __emulate_dcbz(ea);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
}
- return 0;
+
+ if (err)
+ regs->dar = ea;
+
+
+ return err;
}
NOKPROBE_SYMBOL(emulate_dcbz);
#define __put_user_asmx(x, addr, err, op, cr) \
__asm__ __volatile__( \
+ ".machine push\n" \
+ ".machine power8\n" \
"1: " op " %2,0,%3\n" \
+ ".machine pop\n" \
" mfcr %1\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
@@ -919,7 +1112,10 @@ NOKPROBE_SYMBOL(emulate_dcbz);
#define __get_user_asmx(x, addr, err, op) \
__asm__ __volatile__( \
+ ".machine push\n" \
+ ".machine power8\n" \
"1: "op" %1,0,%2\n" \
+ ".machine pop\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: li %0,%3\n" \
@@ -948,10 +1144,8 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs,
op->type |= SETCC;
op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
-#ifdef __powerpc64__
if (!(regs->msr & MSR_64BIT))
val = (int) val;
-#endif
if (val < 0)
op->ccval |= 0x80000000;
else if (val > 0)
@@ -979,15 +1173,11 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
if (carry_in)
++val;
- op->type = COMPUTE + SETREG + SETXER;
+ op->type = COMPUTE | SETREG | SETXER;
op->reg = rd;
op->val = val;
-#ifdef __powerpc64__
- if (!(regs->msr & MSR_64BIT)) {
- val = (unsigned int) val;
- val1 = (unsigned int) val1;
- }
-#endif
+ val = truncate_if_32bit(regs->msr, val);
+ val1 = truncate_if_32bit(regs->msr, val1);
op->xerval = regs->xer;
if (val < val1 || (carry_in && val == val1))
op->xerval |= XER_CA;
@@ -1004,7 +1194,7 @@ static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
{
unsigned int crval, shift;
- op->type = COMPUTE + SETCC;
+ op->type = COMPUTE | SETCC;
crval = (regs->xer >> 31) & 1; /* get SO bit */
if (v1 < v2)
crval |= 8;
@@ -1023,7 +1213,7 @@ static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs,
{
unsigned int crval, shift;
- op->type = COMPUTE + SETCC;
+ op->type = COMPUTE | SETCC;
crval = (regs->xer >> 31) & 1; /* get SO bit */
if (v1 < v2)
crval |= 8;
@@ -1163,54 +1353,64 @@ static nokprobe_inline int trap_compare(long v1, long v2)
* otherwise.
*/
int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
- unsigned int instr)
+ ppc_inst_t instr)
{
+#ifdef CONFIG_PPC64
+ unsigned int suffixopcode, prefixtype, prefix_r;
+#endif
unsigned int opcode, ra, rb, rc, rd, spr, u;
unsigned long int imm;
unsigned long int val, val2;
unsigned int mb, me, sh;
+ unsigned int word, suffix;
long ival;
+ word = ppc_inst_val(instr);
+ suffix = ppc_inst_suffix(instr);
+
op->type = COMPUTE;
- opcode = instr >> 26;
+ opcode = ppc_inst_primary_opcode(instr);
switch (opcode) {
case 16: /* bc */
op->type = BRANCH;
- imm = (signed short)(instr & 0xfffc);
- if ((instr & 2) == 0)
+ imm = (signed short)(word & 0xfffc);
+ if ((word & 2) == 0)
imm += regs->nip;
op->val = truncate_if_32bit(regs->msr, imm);
- if (instr & 1)
+ if (word & 1)
op->type |= SETLK;
- if (branch_taken(instr, regs, op))
+ if (branch_taken(word, regs, op))
op->type |= BRTAKEN;
return 1;
-#ifdef CONFIG_PPC64
case 17: /* sc */
- if ((instr & 0xfe2) == 2)
+ if ((word & 0xfe2) == 2)
op->type = SYSCALL;
- else
+ else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ (word & 0xfe3) == 1) { /* scv */
+ op->type = SYSCALL_VECTORED_0;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ } else
op->type = UNKNOWN;
return 0;
-#endif
case 18: /* b */
op->type = BRANCH | BRTAKEN;
- imm = instr & 0x03fffffc;
+ imm = word & 0x03fffffc;
if (imm & 0x02000000)
imm -= 0x04000000;
- if ((instr & 2) == 0)
+ if ((word & 2) == 0)
imm += regs->nip;
op->val = truncate_if_32bit(regs->msr, imm);
- if (instr & 1)
+ if (word & 1)
op->type |= SETLK;
return 1;
case 19:
- switch ((instr >> 1) & 0x3ff) {
+ switch ((word >> 1) & 0x3ff) {
case 0: /* mcrf */
op->type = COMPUTE + SETCC;
- rd = 7 - ((instr >> 23) & 0x7);
- ra = 7 - ((instr >> 18) & 0x7);
+ rd = 7 - ((word >> 23) & 0x7);
+ ra = 7 - ((word >> 18) & 0x7);
rd *= 4;
ra *= 4;
val = (regs->ccr >> ra) & 0xf;
@@ -1220,16 +1420,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 16: /* bclr */
case 528: /* bcctr */
op->type = BRANCH;
- imm = (instr & 0x400)? regs->ctr: regs->link;
+ imm = (word & 0x400)? regs->ctr: regs->link;
op->val = truncate_if_32bit(regs->msr, imm);
- if (instr & 1)
+ if (word & 1)
op->type |= SETLK;
- if (branch_taken(instr, regs, op))
+ if (branch_taken(word, regs, op))
op->type |= BRTAKEN;
return 1;
case 18: /* rfid, scary */
- if (regs->msr & MSR_PR)
+ if (user_mode(regs))
goto priv;
op->type = RFI;
return 0;
@@ -1247,23 +1447,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 417: /* crorc */
case 449: /* cror */
op->type = COMPUTE + SETCC;
- ra = (instr >> 16) & 0x1f;
- rb = (instr >> 11) & 0x1f;
- rd = (instr >> 21) & 0x1f;
+ ra = (word >> 16) & 0x1f;
+ rb = (word >> 11) & 0x1f;
+ rd = (word >> 21) & 0x1f;
ra = (regs->ccr >> (31 - ra)) & 1;
rb = (regs->ccr >> (31 - rb)) & 1;
- val = (instr >> (6 + ra * 2 + rb)) & 1;
+ val = (word >> (6 + ra * 2 + rb)) & 1;
op->ccval = (regs->ccr & ~(1UL << (31 - rd))) |
(val << (31 - rd));
return 1;
}
break;
case 31:
- switch ((instr >> 1) & 0x3ff) {
+ switch ((word >> 1) & 0x3ff) {
case 598: /* sync */
op->type = BARRIER + BARRIER_SYNC;
#ifdef __powerpc64__
- switch ((instr >> 21) & 3) {
+ switch ((word >> 21) & 3) {
case 1: /* lwsync */
op->type = BARRIER + BARRIER_LWSYNC;
break;
@@ -1281,33 +1481,57 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
break;
}
- /* Following cases refer to regs->gpr[], so we need all regs */
- if (!FULL_REGS(regs))
- return -1;
-
- rd = (instr >> 21) & 0x1f;
- ra = (instr >> 16) & 0x1f;
- rb = (instr >> 11) & 0x1f;
- rc = (instr >> 6) & 0x1f;
+ rd = (word >> 21) & 0x1f;
+ ra = (word >> 16) & 0x1f;
+ rb = (word >> 11) & 0x1f;
+ rc = (word >> 6) & 0x1f;
switch (opcode) {
#ifdef __powerpc64__
+ case 1:
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+
+ prefix_r = GET_PREFIX_R(word);
+ ra = GET_PREFIX_RA(suffix);
+ rd = (suffix >> 21) & 0x1f;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+ suffixopcode = get_op(suffix);
+ prefixtype = (word >> 24) & 0x3;
+ switch (prefixtype) {
+ case 2:
+ if (prefix_r && ra)
+ return 0;
+ switch (suffixopcode) {
+ case 14: /* paddi */
+ op->type = COMPUTE | PREFIXED;
+ op->val = mlsd_8lsd_ea(word, suffix, regs);
+ goto compute_done;
+ }
+ }
+ break;
case 2: /* tdi */
- if (rd & trap_compare(regs->gpr[ra], (short) instr))
+ if (rd & trap_compare(regs->gpr[ra], (short) word))
goto trap;
return 1;
#endif
case 3: /* twi */
- if (rd & trap_compare((int)regs->gpr[ra], (short) instr))
+ if (rd & trap_compare((int)regs->gpr[ra], (short) word))
goto trap;
return 1;
#ifdef __powerpc64__
case 4:
+ /*
+ * There are very many instructions with this primary opcode
+ * introduced in the ISA as early as v2.03. However, the ones
+ * we currently emulate were all introduced with ISA 3.0
+ */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
- switch (instr & 0x3f) {
+ switch (word & 0x3f) {
case 48: /* maddhd */
asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
"=r" (op->val) : "r" (regs->gpr[ra]),
@@ -1331,20 +1555,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
* There are other instructions from ISA 3.0 with the same
* primary opcode which do not have emulation support yet.
*/
- return -1;
+ goto unknown_opcode;
#endif
case 7: /* mulli */
- op->val = regs->gpr[ra] * (short) instr;
+ op->val = regs->gpr[ra] * (short) word;
goto compute_done;
case 8: /* subfic */
- imm = (short) instr;
+ imm = (short) word;
add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1);
return 1;
case 10: /* cmpli */
- imm = (unsigned short) instr;
+ imm = (unsigned short) word;
val = regs->gpr[ra];
#ifdef __powerpc64__
if ((rd & 1) == 0)
@@ -1354,7 +1578,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 1;
case 11: /* cmpi */
- imm = (short) instr;
+ imm = (short) word;
val = regs->gpr[ra];
#ifdef __powerpc64__
if ((rd & 1) == 0)
@@ -1364,35 +1588,37 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 1;
case 12: /* addic */
- imm = (short) instr;
+ imm = (short) word;
add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
return 1;
case 13: /* addic. */
- imm = (short) instr;
+ imm = (short) word;
add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
set_cr0(regs, op);
return 1;
case 14: /* addi */
- imm = (short) instr;
+ imm = (short) word;
if (ra)
imm += regs->gpr[ra];
op->val = imm;
goto compute_done;
case 15: /* addis */
- imm = ((short) instr) << 16;
+ imm = ((short) word) << 16;
if (ra)
imm += regs->gpr[ra];
op->val = imm;
goto compute_done;
case 19:
- if (((instr >> 1) & 0x1f) == 2) {
+ if (((word >> 1) & 0x1f) == 2) {
/* addpcis */
- imm = (short) (instr & 0xffc1); /* d0 + d2 fields */
- imm |= (instr >> 15) & 0x3e; /* d1 field */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ imm = (short) (word & 0xffc1); /* d0 + d2 fields */
+ imm |= (word >> 15) & 0x3e; /* d1 field */
op->val = regs->nip + (imm << 16) + 4;
goto compute_done;
}
@@ -1400,65 +1626,65 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 0;
case 20: /* rlwimi */
- mb = (instr >> 6) & 0x1f;
- me = (instr >> 1) & 0x1f;
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
val = DATA32(regs->gpr[rd]);
imm = MASK32(mb, me);
op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
goto logical_done;
case 21: /* rlwinm */
- mb = (instr >> 6) & 0x1f;
- me = (instr >> 1) & 0x1f;
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
val = DATA32(regs->gpr[rd]);
op->val = ROTATE(val, rb) & MASK32(mb, me);
goto logical_done;
case 23: /* rlwnm */
- mb = (instr >> 6) & 0x1f;
- me = (instr >> 1) & 0x1f;
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
rb = regs->gpr[rb] & 0x1f;
val = DATA32(regs->gpr[rd]);
op->val = ROTATE(val, rb) & MASK32(mb, me);
goto logical_done;
case 24: /* ori */
- op->val = regs->gpr[rd] | (unsigned short) instr;
+ op->val = regs->gpr[rd] | (unsigned short) word;
goto logical_done_nocc;
case 25: /* oris */
- imm = (unsigned short) instr;
+ imm = (unsigned short) word;
op->val = regs->gpr[rd] | (imm << 16);
goto logical_done_nocc;
case 26: /* xori */
- op->val = regs->gpr[rd] ^ (unsigned short) instr;
+ op->val = regs->gpr[rd] ^ (unsigned short) word;
goto logical_done_nocc;
case 27: /* xoris */
- imm = (unsigned short) instr;
+ imm = (unsigned short) word;
op->val = regs->gpr[rd] ^ (imm << 16);
goto logical_done_nocc;
case 28: /* andi. */
- op->val = regs->gpr[rd] & (unsigned short) instr;
+ op->val = regs->gpr[rd] & (unsigned short) word;
set_cr0(regs, op);
goto logical_done_nocc;
case 29: /* andis. */
- imm = (unsigned short) instr;
+ imm = (unsigned short) word;
op->val = regs->gpr[rd] & (imm << 16);
set_cr0(regs, op);
goto logical_done_nocc;
#ifdef __powerpc64__
case 30: /* rld* */
- mb = ((instr >> 6) & 0x1f) | (instr & 0x20);
+ mb = ((word >> 6) & 0x1f) | (word & 0x20);
val = regs->gpr[rd];
- if ((instr & 0x10) == 0) {
- sh = rb | ((instr & 2) << 4);
+ if ((word & 0x10) == 0) {
+ sh = rb | ((word & 2) << 4);
val = ROTATE(val, sh);
- switch ((instr >> 2) & 3) {
+ switch ((word >> 2) & 3) {
case 0: /* rldicl */
val &= MASK64_L(mb);
break;
@@ -1478,7 +1704,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
} else {
sh = regs->gpr[rb] & 0x3f;
val = ROTATE(val, sh);
- switch ((instr >> 1) & 7) {
+ switch ((word >> 1) & 7) {
case 0: /* rldcl */
op->val = val & MASK64_L(mb);
goto logical_done;
@@ -1493,8 +1719,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 31:
/* isel occupies 32 minor opcodes */
- if (((instr >> 1) & 0x1f) == 15) {
- mb = (instr >> 6) & 0x1f; /* bc field */
+ if (((word >> 1) & 0x1f) == 15) {
+ mb = (word >> 6) & 0x1f; /* bc field */
val = (regs->ccr >> (31 - mb)) & 1;
val2 = (ra) ? regs->gpr[ra] : 0;
@@ -1502,7 +1728,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
goto compute_done;
}
- switch ((instr >> 1) & 0x3ff) {
+ switch ((word >> 1) & 0x3ff) {
case 4: /* tw */
if (rd == 0x1f ||
(rd & trap_compare((int)regs->gpr[ra],
@@ -1516,13 +1742,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 1;
#endif
case 83: /* mfmsr */
- if (regs->msr & MSR_PR)
+ if (user_mode(regs))
goto priv;
op->type = MFMSR;
op->reg = rd;
return 0;
case 146: /* mtmsr */
- if (regs->msr & MSR_PR)
+ if (user_mode(regs))
goto priv;
op->type = MTMSR;
op->reg = rd;
@@ -1530,23 +1756,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 0;
#ifdef CONFIG_PPC64
case 178: /* mtmsrd */
- if (regs->msr & MSR_PR)
+ if (user_mode(regs))
goto priv;
op->type = MTMSR;
op->reg = rd;
/* only MSR_EE and MSR_RI get changed if bit 15 set */
/* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */
- imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL;
+ imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL;
op->val = imm;
return 0;
#endif
case 19: /* mfcr */
imm = 0xffffffffUL;
- if ((instr >> 20) & 1) {
+ if ((word >> 20) & 1) {
imm = 0xf0000000UL;
for (sh = 0; sh < 8; ++sh) {
- if (instr & (0x80000 >> sh))
+ if (word & (0x80000 >> sh))
break;
imm >>= 4;
}
@@ -1554,13 +1780,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->val = regs->ccr & imm;
goto compute_done;
+ case 128: /* setb */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ /*
+ * 'ra' encodes the CR field number (bfa) in the top 3 bits.
+ * Since each CR field is 4 bits,
+ * we can simply mask off the bottom two bits (bfa * 4)
+ * to yield the first bit in the CR field.
+ */
+ ra = ra & ~0x3;
+ /* 'val' stores bits of the CR field (bfa) */
+ val = regs->ccr >> (CR0_SHIFT - ra);
+ /* checks if the LT bit of CR field (bfa) is set */
+ if (val & 8)
+ op->val = -1;
+ /* checks if the GT bit of CR field (bfa) is set */
+ else if (val & 4)
+ op->val = 1;
+ else
+ op->val = 0;
+ goto compute_done;
+
case 144: /* mtcrf */
op->type = COMPUTE + SETCC;
imm = 0xf0000000UL;
val = regs->gpr[rd];
op->ccval = regs->ccr;
for (sh = 0; sh < 8; ++sh) {
- if (instr & (0x80000 >> sh))
+ if (word & (0x80000 >> sh))
op->ccval = (op->ccval & ~imm) |
(val & imm);
imm >>= 4;
@@ -1568,7 +1816,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 1;
case 339: /* mfspr */
- spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
+ spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
op->type = MFSPR;
op->reg = rd;
op->spr = spr;
@@ -1578,7 +1826,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 0;
case 467: /* mtspr */
- spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
+ spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
op->type = MTSPR;
op->val = regs->gpr[rd];
op->spr = spr;
@@ -1703,7 +1951,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#ifdef __powerpc64__
case 265: /* modud */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
op->val = regs->gpr[ra] % regs->gpr[rb];
goto compute_done;
#endif
@@ -1713,7 +1961,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 267: /* moduw */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
op->val = (unsigned int) regs->gpr[ra] %
(unsigned int) regs->gpr[rb];
goto compute_done;
@@ -1736,10 +1984,21 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->val = (int) regs->gpr[ra] /
(int) regs->gpr[rb];
goto arith_done;
-
+#ifdef __powerpc64__
+ case 425: /* divde[.] */
+ asm volatile(PPC_DIVDE(%0, %1, %2) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]));
+ goto arith_done;
+ case 393: /* divdeu[.] */
+ asm volatile(PPC_DIVDEU(%0, %1, %2) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]));
+ goto arith_done;
+#endif
case 755: /* darn */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
switch (ra & 0x3) {
case 0:
/* 32-bit conditioned */
@@ -1757,18 +2016,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
goto compute_done;
}
- return -1;
+ goto unknown_opcode;
#ifdef __powerpc64__
case 777: /* modsd */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
op->val = (long int) regs->gpr[ra] %
(long int) regs->gpr[rb];
goto compute_done;
#endif
case 779: /* modsw */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
op->val = (int) regs->gpr[ra] %
(int) regs->gpr[rb];
goto compute_done;
@@ -1845,14 +2104,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#endif
case 538: /* cnttzw */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
val = (unsigned int) regs->gpr[rd];
op->val = (val ? __builtin_ctz(val) : 32);
goto logical_done;
#ifdef __powerpc64__
case 570: /* cnttzd */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
val = regs->gpr[rd];
op->val = (val ? __builtin_ctzl(val) : 64);
goto logical_done;
@@ -1948,7 +2207,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 826: /* sradi with sh_5 = 0 */
case 827: /* sradi with sh_5 = 1 */
op->type = COMPUTE + SETREG + SETXER;
- sh = rb | ((instr & 2) << 4);
+ sh = rb | ((word & 2) << 4);
ival = (signed long int) regs->gpr[rd];
op->val = ival >> sh;
op->xerval = regs->xer;
@@ -1962,9 +2221,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 890: /* extswsli with sh_5 = 0 */
case 891: /* extswsli with sh_5 = 1 */
if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -1;
+ goto unknown_opcode;
op->type = COMPUTE + SETREG;
- sh = rb | ((instr & 2) << 4);
+ sh = rb | ((word & 2) << 4);
val = (signed int) regs->gpr[rd];
if (sh)
op->val = ROTATE(val, sh) & MASK64(0, 63 - sh);
@@ -1979,34 +2238,34 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
*/
case 54: /* dcbst */
op->type = MKOP(CACHEOP, DCBST, 0);
- op->ea = xform_ea(instr, regs);
+ op->ea = xform_ea(word, regs);
return 0;
case 86: /* dcbf */
op->type = MKOP(CACHEOP, DCBF, 0);
- op->ea = xform_ea(instr, regs);
+ op->ea = xform_ea(word, regs);
return 0;
case 246: /* dcbtst */
op->type = MKOP(CACHEOP, DCBTST, 0);
- op->ea = xform_ea(instr, regs);
+ op->ea = xform_ea(word, regs);
op->reg = rd;
return 0;
case 278: /* dcbt */
op->type = MKOP(CACHEOP, DCBTST, 0);
- op->ea = xform_ea(instr, regs);
+ op->ea = xform_ea(word, regs);
op->reg = rd;
return 0;
case 982: /* icbi */
op->type = MKOP(CACHEOP, ICBI, 0);
- op->ea = xform_ea(instr, regs);
+ op->ea = xform_ea(word, regs);
return 0;
case 1014: /* dcbz */
op->type = MKOP(CACHEOP, DCBZ, 0);
- op->ea = xform_ea(instr, regs);
+ op->ea = xform_ea(word, regs);
return 0;
}
break;
@@ -2019,14 +2278,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->update_reg = ra;
op->reg = rd;
op->val = regs->gpr[rd];
- u = (instr >> 20) & UPDATE;
+ u = (word >> 20) & UPDATE;
op->vsx_flags = 0;
switch (opcode) {
case 31:
- u = instr & UPDATE;
- op->ea = xform_ea(instr, regs);
- switch ((instr >> 1) & 0x3ff) {
+ u = word & UPDATE;
+ op->ea = xform_ea(word, regs);
+ switch ((word >> 1) & 0x3ff) {
case 20: /* lwarx */
op->type = MKOP(LARX, 0, 4);
break;
@@ -2035,15 +2294,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->type = MKOP(STCX, 0, 4);
break;
-#ifdef __powerpc64__
- case 84: /* ldarx */
- op->type = MKOP(LARX, 0, 8);
- break;
-
- case 214: /* stdcx. */
- op->type = MKOP(STCX, 0, 8);
- break;
-
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
case 52: /* lbarx */
op->type = MKOP(LARX, 0, 1);
break;
@@ -2059,6 +2310,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 726: /* sthcx. */
op->type = MKOP(STCX, 0, 2);
break;
+#endif
+#ifdef __powerpc64__
+ case 84: /* ldarx */
+ op->type = MKOP(LARX, 0, 8);
+ break;
+
+ case 214: /* stdcx. */
+ op->type = MKOP(STCX, 0, 8);
+ break;
case 276: /* lqarx */
if (!((rd & 1) || rd == ra || rd == rb))
@@ -2271,25 +2531,27 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#ifdef CONFIG_VSX
case 12: /* lxsiwzx */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 4);
op->element_size = 8;
break;
case 76: /* lxsiwax */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, SIGNEXT, 4);
op->element_size = 8;
break;
case 140: /* stxsiwx */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 4);
op->element_size = 8;
break;
case 268: /* lxvx */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 16);
op->element_size = 16;
op->vsx_flags = VSX_CHECK_VEC;
@@ -2298,33 +2560,47 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 269: /* lxvl */
case 301: { /* lxvll */
int nb;
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->ea = ra ? regs->gpr[ra] : 0;
nb = regs->gpr[rb] & 0xff;
if (nb > 16)
nb = 16;
op->type = MKOP(LOAD_VSX, 0, nb);
op->element_size = 16;
- op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) |
+ op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
VSX_CHECK_VEC;
break;
}
case 332: /* lxvdsx */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 8);
op->element_size = 8;
op->vsx_flags = VSX_SPLAT;
break;
+ case 333: /* lxvpx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(LOAD_VSX, 0, 32);
+ op->element_size = 32;
+ break;
+
case 364: /* lxvwsx */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 4);
op->element_size = 4;
op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC;
break;
case 396: /* stxvx */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 16);
op->element_size = 16;
op->vsx_flags = VSX_CHECK_VEC;
@@ -2333,118 +2609,143 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 397: /* stxvl */
case 429: { /* stxvll */
int nb;
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->ea = ra ? regs->gpr[ra] : 0;
nb = regs->gpr[rb] & 0xff;
if (nb > 16)
nb = 16;
op->type = MKOP(STORE_VSX, 0, nb);
op->element_size = 16;
- op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) |
+ op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
VSX_CHECK_VEC;
break;
}
+ case 461: /* stxvpx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(STORE_VSX, 0, 32);
+ op->element_size = 32;
+ break;
case 524: /* lxsspx */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 4);
op->element_size = 8;
op->vsx_flags = VSX_FPCONV;
break;
case 588: /* lxsdx */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 8);
op->element_size = 8;
break;
case 652: /* stxsspx */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 4);
op->element_size = 8;
op->vsx_flags = VSX_FPCONV;
break;
case 716: /* stxsdx */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 8);
op->element_size = 8;
break;
case 780: /* lxvw4x */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 16);
op->element_size = 4;
break;
case 781: /* lxsibzx */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 1);
op->element_size = 8;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 812: /* lxvh8x */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 16);
op->element_size = 2;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 813: /* lxsihzx */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 2);
op->element_size = 8;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 844: /* lxvd2x */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 16);
op->element_size = 8;
break;
case 876: /* lxvb16x */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 16);
op->element_size = 1;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 908: /* stxvw4x */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 16);
op->element_size = 4;
break;
case 909: /* stxsibx */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 1);
op->element_size = 8;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 940: /* stxvh8x */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 16);
op->element_size = 2;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 941: /* stxsihx */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 2);
op->element_size = 8;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 972: /* stxvd2x */
- op->reg = rd | ((instr & 1) << 5);
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 16);
op->element_size = 8;
break;
case 1004: /* stxvb16x */
- op->reg = rd | ((instr & 1) << 5);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
op->type = MKOP(STORE_VSX, 0, 16);
op->element_size = 1;
op->vsx_flags = VSX_CHECK_VEC;
@@ -2457,80 +2758,80 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 32: /* lwz */
case 33: /* lwzu */
op->type = MKOP(LOAD, u, 4);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 34: /* lbz */
case 35: /* lbzu */
op->type = MKOP(LOAD, u, 1);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 36: /* stw */
case 37: /* stwu */
op->type = MKOP(STORE, u, 4);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 38: /* stb */
case 39: /* stbu */
op->type = MKOP(STORE, u, 1);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 40: /* lhz */
case 41: /* lhzu */
op->type = MKOP(LOAD, u, 2);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 42: /* lha */
case 43: /* lhau */
op->type = MKOP(LOAD, SIGNEXT | u, 2);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 44: /* sth */
case 45: /* sthu */
op->type = MKOP(STORE, u, 2);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 46: /* lmw */
if (ra >= rd)
break; /* invalid form, ra in range to load */
op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd));
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 47: /* stmw */
op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd));
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
#ifdef CONFIG_PPC_FPU
case 48: /* lfs */
case 49: /* lfsu */
op->type = MKOP(LOAD_FP, u | FPCONV, 4);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 50: /* lfd */
case 51: /* lfdu */
op->type = MKOP(LOAD_FP, u, 8);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 52: /* stfs */
case 53: /* stfsu */
op->type = MKOP(STORE_FP, u | FPCONV, 4);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
case 54: /* stfd */
case 55: /* stfdu */
op->type = MKOP(STORE_FP, u, 8);
- op->ea = dform_ea(instr, regs);
+ op->ea = dform_ea(word, regs);
break;
#endif
@@ -2538,26 +2839,30 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 56: /* lq */
if (!((rd & 1) || (rd == ra)))
op->type = MKOP(LOAD, 0, 16);
- op->ea = dqform_ea(instr, regs);
+ op->ea = dqform_ea(word, regs);
break;
#endif
#ifdef CONFIG_VSX
case 57: /* lfdp, lxsd, lxssp */
- op->ea = dsform_ea(instr, regs);
- switch (instr & 3) {
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
case 0: /* lfdp */
if (rd & 1)
break; /* reg must be even */
op->type = MKOP(LOAD_FP, 0, 16);
break;
case 2: /* lxsd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
op->reg = rd + 32;
op->type = MKOP(LOAD_VSX, 0, 8);
op->element_size = 8;
op->vsx_flags = VSX_CHECK_VEC;
break;
case 3: /* lxssp */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
op->reg = rd + 32;
op->type = MKOP(LOAD_VSX, 0, 4);
op->element_size = 8;
@@ -2569,8 +2874,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#ifdef __powerpc64__
case 58: /* ld[u], lwa */
- op->ea = dsform_ea(instr, regs);
- switch (instr & 3) {
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
case 0: /* ld */
op->type = MKOP(LOAD, 0, 8);
break;
@@ -2585,17 +2890,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#endif
#ifdef CONFIG_VSX
+ case 6:
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->element_size = 32;
+ switch (word & 0xf) {
+ case 0: /* lxvp */
+ op->type = MKOP(LOAD_VSX, 0, 32);
+ break;
+ case 1: /* stxvp */
+ op->type = MKOP(STORE_VSX, 0, 32);
+ break;
+ }
+ break;
+
case 61: /* stfdp, lxv, stxsd, stxssp, stxv */
- switch (instr & 7) {
+ switch (word & 7) {
case 0: /* stfdp with LSB of DS field = 0 */
case 4: /* stfdp with LSB of DS field = 1 */
- op->ea = dsform_ea(instr, regs);
+ op->ea = dsform_ea(word, regs);
op->type = MKOP(STORE_FP, 0, 16);
break;
case 1: /* lxv */
- op->ea = dqform_ea(instr, regs);
- if (instr & 8)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ if (word & 8)
op->reg = rd + 32;
op->type = MKOP(LOAD_VSX, 0, 16);
op->element_size = 16;
@@ -2604,7 +2927,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 2: /* stxsd with LSB of DS field = 0 */
case 6: /* stxsd with LSB of DS field = 1 */
- op->ea = dsform_ea(instr, regs);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dsform_ea(word, regs);
op->reg = rd + 32;
op->type = MKOP(STORE_VSX, 0, 8);
op->element_size = 8;
@@ -2613,7 +2938,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 3: /* stxssp with LSB of DS field = 0 */
case 7: /* stxssp with LSB of DS field = 1 */
- op->ea = dsform_ea(instr, regs);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dsform_ea(word, regs);
op->reg = rd + 32;
op->type = MKOP(STORE_VSX, 0, 4);
op->element_size = 8;
@@ -2621,8 +2948,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
break;
case 5: /* stxv */
- op->ea = dqform_ea(instr, regs);
- if (instr & 8)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ if (word & 8)
op->reg = rd + 32;
op->type = MKOP(STORE_VSX, 0, 16);
op->element_size = 16;
@@ -2634,8 +2963,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#ifdef __powerpc64__
case 62: /* std[u] */
- op->ea = dsform_ea(instr, regs);
- switch (instr & 3) {
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
case 0: /* std */
op->type = MKOP(STORE, 0, 8);
break;
@@ -2648,10 +2977,161 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
break;
}
break;
+ case 1: /* Prefixed instructions */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+
+ prefix_r = GET_PREFIX_R(word);
+ ra = GET_PREFIX_RA(suffix);
+ op->update_reg = ra;
+ rd = (suffix >> 21) & 0x1f;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+
+ suffixopcode = get_op(suffix);
+ prefixtype = (word >> 24) & 0x3;
+ switch (prefixtype) {
+ case 0: /* Type 00 Eight-Byte Load/Store */
+ if (prefix_r && ra)
+ break;
+ op->ea = mlsd_8lsd_ea(word, suffix, regs);
+ switch (suffixopcode) {
+ case 41: /* plwa */
+ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4);
+ break;
+#ifdef CONFIG_VSX
+ case 42: /* plxsd */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, PREFIXED, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 43: /* plxssp */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, PREFIXED, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ case 46: /* pstxsd */
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, PREFIXED, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 47: /* pstxssp */
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, PREFIXED, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ case 51: /* plxv1 */
+ op->reg += 32;
+ fallthrough;
+ case 50: /* plxv0 */
+ op->type = MKOP(LOAD_VSX, PREFIXED, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 55: /* pstxv1 */
+ op->reg = rd + 32;
+ fallthrough;
+ case 54: /* pstxv0 */
+ op->type = MKOP(STORE_VSX, PREFIXED, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+#endif /* CONFIG_VSX */
+ case 56: /* plq */
+ op->type = MKOP(LOAD, PREFIXED, 16);
+ break;
+ case 57: /* pld */
+ op->type = MKOP(LOAD, PREFIXED, 8);
+ break;
+#ifdef CONFIG_VSX
+ case 58: /* plxvp */
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+ op->element_size = 32;
+ break;
+#endif /* CONFIG_VSX */
+ case 60: /* pstq */
+ op->type = MKOP(STORE, PREFIXED, 16);
+ break;
+ case 61: /* pstd */
+ op->type = MKOP(STORE, PREFIXED, 8);
+ break;
+#ifdef CONFIG_VSX
+ case 62: /* pstxvp */
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(STORE_VSX, PREFIXED, 32);
+ op->element_size = 32;
+ break;
+#endif /* CONFIG_VSX */
+ }
+ break;
+ case 1: /* Type 01 Eight-Byte Register-to-Register */
+ break;
+ case 2: /* Type 10 Modified Load/Store */
+ if (prefix_r && ra)
+ break;
+ op->ea = mlsd_8lsd_ea(word, suffix, regs);
+ switch (suffixopcode) {
+ case 32: /* plwz */
+ op->type = MKOP(LOAD, PREFIXED, 4);
+ break;
+ case 34: /* plbz */
+ op->type = MKOP(LOAD, PREFIXED, 1);
+ break;
+ case 36: /* pstw */
+ op->type = MKOP(STORE, PREFIXED, 4);
+ break;
+ case 38: /* pstb */
+ op->type = MKOP(STORE, PREFIXED, 1);
+ break;
+ case 40: /* plhz */
+ op->type = MKOP(LOAD, PREFIXED, 2);
+ break;
+ case 42: /* plha */
+ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2);
+ break;
+ case 44: /* psth */
+ op->type = MKOP(STORE, PREFIXED, 2);
+ break;
+ case 48: /* plfs */
+ op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4);
+ break;
+ case 50: /* plfd */
+ op->type = MKOP(LOAD_FP, PREFIXED, 8);
+ break;
+ case 52: /* pstfs */
+ op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4);
+ break;
+ case 54: /* pstfd */
+ op->type = MKOP(STORE_FP, PREFIXED, 8);
+ break;
+ }
+ break;
+ case 3: /* Type 11 Modified Register-to-Register */
+ break;
+ }
#endif /* __powerpc64__ */
}
+ if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) {
+ switch (GETTYPE(op->type)) {
+ case LOAD:
+ if (ra == rd)
+ goto unknown_opcode;
+ fallthrough;
+ case STORE:
+ case LOAD_FP:
+ case STORE_FP:
+ if (ra == 0)
+ goto unknown_opcode;
+ }
+ }
+
#ifdef CONFIG_VSX
if ((GETTYPE(op->type) == LOAD_VSX ||
GETTYPE(op->type) == STORE_VSX) &&
@@ -2662,8 +3142,12 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 0;
+ unknown_opcode:
+ op->type = UNKNOWN;
+ return 0;
+
logical_done:
- if (instr & 1)
+ if (word & 1)
set_cr0(regs, op);
logical_done_nocc:
op->reg = ra;
@@ -2671,7 +3155,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
return 1;
arith_done:
- if (instr & 1)
+ if (word & 1)
set_cr0(regs, op);
compute_done:
op->reg = rd;
@@ -2701,15 +3185,6 @@ NOKPROBE_SYMBOL(analyse_instr);
*/
static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs)
{
-#ifdef CONFIG_PPC32
- /*
- * Check if we will touch kernel stack overflow
- */
- if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
- printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n");
- return -EINVAL;
- }
-#endif /* CONFIG_PPC32 */
/*
* Check if we already set since that means we'll
* lose the previous value.
@@ -2756,7 +3231,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
{
unsigned long next_pc;
- next_pc = truncate_if_32bit(regs->msr, regs->nip + 4);
+ next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type));
switch (GETTYPE(op->type)) {
case COMPUTE:
if (op->type & SETREG)
@@ -2787,12 +3262,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
case BARRIER_EIEIO:
eieio();
break;
+#ifdef CONFIG_PPC64
case BARRIER_LWSYNC:
asm volatile("lwsync" : : : "memory");
break;
case BARRIER_PTESYNC:
asm volatile("ptesync" : : : "memory");
break;
+#endif
}
break;
@@ -2831,7 +3308,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
default:
WARN_ON_ONCE(1);
}
- regs->nip = next_pc;
+ regs_set_return_ip(regs, next_pc);
}
NOKPROBE_SYMBOL(emulate_update_regs);
@@ -2868,7 +3345,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
err = 0;
val = 0;
switch (size) {
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
case 1:
__get_user_asmx(val, ea, err, "lbarx");
break;
@@ -2910,7 +3387,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
__put_user_asmx(op->val, ea, err, "stbcx.", cr);
break;
case 2:
- __put_user_asmx(op->val, ea, err, "stbcx.", cr);
+ __put_user_asmx(op->val, ea, err, "sthcx.", cr);
break;
#endif
case 4:
@@ -2960,14 +3437,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
* stored in the thread_struct. If the instruction is in
* the kernel, we must not touch the state in the thread_struct.
*/
- if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+ if (!user_mode(regs) && !(regs->msr & MSR_FP))
return 0;
err = do_fp_load(op, ea, regs, cross_endian);
break;
#endif
#ifdef CONFIG_ALTIVEC
case LOAD_VMX:
- if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+ if (!user_mode(regs) && !(regs->msr & MSR_VEC))
return 0;
err = do_vec_load(op->reg, ea, size, regs, cross_endian);
break;
@@ -2982,7 +3459,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
*/
if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
msrbit = MSR_VEC;
- if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+ if (!user_mode(regs) && !(regs->msr & msrbit))
return 0;
err = do_vsx_load(op, ea, regs, cross_endian);
break;
@@ -3018,8 +3495,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
}
#endif
if ((op->type & UPDATE) && size == sizeof(long) &&
- op->reg == 1 && op->update_reg == 1 &&
- !(regs->msr & MSR_PR) &&
+ op->reg == 1 && op->update_reg == 1 && !user_mode(regs) &&
ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
err = handle_stack_update(ea, regs);
break;
@@ -3031,14 +3507,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
#ifdef CONFIG_PPC_FPU
case STORE_FP:
- if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+ if (!user_mode(regs) && !(regs->msr & MSR_FP))
return 0;
err = do_fp_store(op, ea, regs, cross_endian);
break;
#endif
#ifdef CONFIG_ALTIVEC
case STORE_VMX:
- if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+ if (!user_mode(regs) && !(regs->msr & MSR_VEC))
return 0;
err = do_vec_store(op->reg, ea, size, regs, cross_endian);
break;
@@ -3053,7 +3529,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
*/
if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
msrbit = MSR_VEC;
- if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+ if (!user_mode(regs) && !(regs->msr & msrbit))
return 0;
err = do_vsx_store(op, ea, regs, cross_endian);
break;
@@ -3101,7 +3577,7 @@ NOKPROBE_SYMBOL(emulate_loadstore);
* or -1 if the instruction is one that should not be stepped,
* such as an rfid, or a mtmsrd that would clear MSR_RI.
*/
-int emulate_step(struct pt_regs *regs, unsigned int instr)
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
{
struct instruction_op op;
int r, err, type;
@@ -3169,38 +3645,31 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
/* can't step mtmsr[d] that would clear MSR_RI */
return -1;
/* here op.val is the mask of bits to change */
- regs->msr = (regs->msr & ~op.val) | (val & op.val);
+ regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val));
goto instr_done;
-#ifdef CONFIG_PPC64
case SYSCALL: /* sc */
/*
- * N.B. this uses knowledge about how the syscall
- * entry code works. If that is changed, this will
- * need to be changed also.
+ * Per ISA v3.1, section 7.5.15 'Trace Interrupt', we can't
+ * single step a system call instruction:
+ *
+ * Successful completion for an instruction means that the
+ * instruction caused no other interrupt. Thus a Trace
+ * interrupt never occurs for a System Call or System Call
+ * Vectored instruction, or for a Trap instruction that
+ * traps.
*/
- if (regs->gpr[0] == 0x1ebe &&
- cpu_has_feature(CPU_FTR_REAL_LE)) {
- regs->msr ^= MSR_LE;
- goto instr_done;
- }
- regs->gpr[9] = regs->gpr[13];
- regs->gpr[10] = MSR_KERNEL;
- regs->gpr[11] = regs->nip + 4;
- regs->gpr[12] = regs->msr & MSR_MASK;
- regs->gpr[13] = (unsigned long) get_paca();
- regs->nip = (unsigned long) &system_call_common;
- regs->msr = MSR_KERNEL;
- return 1;
-
+ return -1;
+ case SYSCALL_VECTORED_0: /* scv 0 */
+ return -1;
case RFI:
return -1;
-#endif
}
return 0;
instr_done:
- regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+ regs_set_return_ip(regs,
+ truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)));
return 1;
}
NOKPROBE_SYMBOL(emulate_step);
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 2752b1cc1d45..daa72061dc0c 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -4,8 +4,8 @@
*
* Copyright (C) 1996 Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/cache.h>
.text
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index f69a6aab7bfb..3ee45619a3f8 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -7,8 +7,8 @@
*
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/cache.h>
.text
@@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
-_GLOBAL(__clear_user)
+_GLOBAL(__arch_clear_user)
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
@@ -87,4 +87,4 @@ _GLOBAL(__clear_user)
EX_TABLE(8b, 91b)
EX_TABLE(9b, 91b)
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 507b18b1660e..a25eb8588434 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -6,18 +6,13 @@
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/linkage.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
-
- .section ".toc","aw"
-PPC64_CACHES:
- .tc ppc64_caches[TC],ppc64_caches
- .section ".text"
/**
- * __clear_user: - Zero a block of memory in user space, with less checking.
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
@@ -58,7 +53,7 @@ err3; stb r0,0(r3)
mr r3,r4
blr
-_GLOBAL_TOC(__clear_user)
+_GLOBAL_TOC(__arch_clear_user)
cmpdi r4,32
neg r6,r3
li r0,0
@@ -133,7 +128,7 @@ err1; stb r0,0(r3)
blr
.Llong_clear:
- ld r5,PPC64_CACHES@toc(r2)
+ LOAD_REG_ADDR(r5, ppc64_caches)
bf cr7*4+0,11f
err2; std r0,0(r3)
@@ -181,4 +176,4 @@ err1; dcbz 0,r3
cmpdi r4,32
blt .Lshort_clear
b .Lmedium_clear
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S
index 0a8d3f64d493..bbd24feb233f 100644
--- a/arch/powerpc/lib/strlen_32.S
+++ b/arch/powerpc/lib/strlen_32.S
@@ -6,8 +6,8 @@
*
* Inspired from glibc implementation
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/cache.h>
.text
diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c
new file mode 100644
index 000000000000..c44823292f73
--- /dev/null
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/code-patching.h>
+
+static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
+{
+ if (instr_is_branch_iform(ppc_inst_read(instr)) ||
+ instr_is_branch_bform(ppc_inst_read(instr)))
+ return branch_target(instr) == addr;
+
+ return 0;
+}
+
+static void __init test_trampoline(void)
+{
+ asm ("nop;nop;\n");
+}
+
+#define check(x) do { \
+ if (!(x)) \
+ pr_err("code-patching: test failed at line %d\n", __LINE__); \
+} while (0)
+
+static void __init test_branch_iform(void)
+{
+ int err;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned long addr = (unsigned long)tmp;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_iform(ppc_inst(0x48000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_iform(ppc_inst(0x4bffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0xcbffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0x7bffffff)));
+
+ /* Simplest case, branch to self with link */
+ check(instr_is_branch_iform(ppc_inst(0x48000001)));
+ /* All bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bfffffd)));
+ /* Some bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bff00fd)));
+ /* Must be a valid branch to start with */
+ check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
+
+ /* Absolute branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x48000103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute branch to 0x420fc */
+ ppc_inst_write(iptr, ppc_inst(0x480420ff));
+ check(instr_is_branch_to_addr(iptr, 0x420fc));
+ /* Maximum positive relative branch, + 20MB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x49fffffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
+ /* Smallest negative relative branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x4bfffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative branch, - 32 MB */
+ ppc_inst_write(iptr, ppc_inst(0x4a000000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Branch to self, with link */
+ err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100, with link */
+ err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100, no link */
+ err = create_branch(&instr, iptr, addr + 0x100, 0);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 MB */
+ err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Out of range relative negative offset, - 32 MB + 4*/
+ err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 MB */
+ err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
+ check(err);
+
+ /* Unaligned target */
+ err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
+}
+
+static void __init test_create_function_call(void)
+{
+ u32 *iptr;
+ unsigned long dest;
+ ppc_inst_t instr;
+
+ /* Check we can create a function call */
+ iptr = (u32 *)ppc_function_entry(test_trampoline);
+ dest = ppc_function_entry(test_create_function_call);
+ create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, dest));
+}
+
+static void __init test_branch_bform(void)
+{
+ int err;
+ unsigned long addr;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned int flags;
+
+ addr = (unsigned long)iptr;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_bform(ppc_inst(0x40000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_bform(ppc_inst(0x43ffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0xc3ffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
+
+ /* Absolute conditional branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x43ff0103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute conditional branch to 0x20fc */
+ ppc_inst_write(iptr, ppc_inst(0x43ff20ff));
+ check(instr_is_branch_to_addr(iptr, 0x20fc));
+ /* Maximum positive relative conditional branch, + 32 KB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43ff7ffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
+ /* Smallest negative relative conditional branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43fffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative conditional branch, - 32 KB */
+ ppc_inst_write(iptr, ppc_inst(0x43ff8000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* All condition code bits set & link */
+ flags = 0x3ff000 | BRANCH_SET_LINK;
+
+ /* Branch to self */
+ err = create_cond_branch(&instr, iptr, addr, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100 */
+ err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100 */
+ err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 KB */
+ err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* Out of range relative negative offset, - 32 KB + 4*/
+ err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 KB */
+ err = create_cond_branch(&instr, iptr, addr + 0x8000, flags);
+ check(err);
+
+ /* Unaligned target */
+ err = create_cond_branch(&instr, iptr, addr + 3, flags);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
+}
+
+static void __init test_translate_branch(void)
+{
+ unsigned long addr;
+ void *p, *q;
+ ppc_inst_t instr;
+ void *buf;
+
+ buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
+ check(buf);
+ if (!buf)
+ return;
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = p + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 MB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 0x2000000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000)));
+
+ /* Maximum positive case, move x to x - 32 MB + 4 */
+ p = buf + 0x2000000;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc)));
+
+ /* Jump to x + 16 MB moved to x + 20 MB */
+ p = buf;
+ addr = 0x1000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x1400000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 16 MB moved to x - 16 MB + 4 */
+ p = buf + 0x1000000;
+ addr = 0x2000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+
+ /* Conditional branch tests */
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 KB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 0x8000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000)));
+
+ /* Maximum positive case, move x to x - 32 KB + 4 */
+ p = buf + 0x8000;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc)));
+
+ /* Jump to x + 12 KB moved to x + 20 KB */
+ p = buf;
+ addr = 0x3000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x5000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 8 KB moved to x - 8 KB + 4 */
+ p = buf + 0x2000;
+ addr = 0x4000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Free the buffer we were using */
+ vfree(buf);
+}
+
+static void __init test_prefixed_patching(void)
+{
+ u32 *iptr = (u32 *)ppc_function_entry(test_trampoline);
+ u32 expected[2] = {OP_PREFIX << 26, 0};
+ ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0);
+
+ if (!IS_ENABLED(CONFIG_PPC64))
+ return;
+
+ patch_instruction(iptr, inst);
+
+ check(!memcmp(iptr, expected, sizeof(expected)));
+}
+
+static int __init test_code_patching(void)
+{
+ pr_info("Running code patching self-tests ...\n");
+
+ test_branch_iform();
+ test_branch_bform();
+ test_create_function_call();
+ test_translate_branch();
+ test_prefixed_patching();
+
+ return 0;
+}
+late_initcall(test_code_patching);
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 42347067739c..23c7805fb7b3 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -8,57 +8,50 @@
#define pr_fmt(fmt) "emulate_step_test: " fmt
#include <linux/ptrace.h>
+#include <asm/cpu_has_feature.h>
#include <asm/sstep.h>
#include <asm/ppc-opcode.h>
#include <asm/code-patching.h>
-
-#define IMM_L(i) ((uintptr_t)(i) & 0xffff)
-
-/*
- * Defined with TEST_ prefix so it does not conflict with other
- * definitions.
- */
-#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \
- ___PPC_RA(base) | ((i) & 0xfffc))
-#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b) | \
- __PPC_EH(eh))
-#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b))
-#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b))
-#define TEST_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
-#define TEST_ADDC(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_ADDC_DOT(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#include <asm/inst.h>
#define MAX_SUBTESTS 16
#define IGNORE_GPR(n) (0x1UL << (n))
#define IGNORE_XER (0x1UL << 32)
#define IGNORE_CCR (0x1UL << 33)
+#define NEGATIVE_TEST (0x1UL << 63)
+
+#define TEST_PLD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLWZ(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_RAW_LWZ(r, base, i))
+
+#define TEST_PSTD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFS(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFS(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PADDI(t, a, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_RAW_ADDI(t, a, i))
static void __init init_pt_regs(struct pt_regs *regs)
{
@@ -103,7 +96,7 @@ static void __init test_ld(void)
regs.gpr[3] = (unsigned long) &a;
/* ld r5, 0(r3) */
- stepped = emulate_step(&regs, TEST_LD(5, 3, 0));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LD(5, 3, 0)));
if (stepped == 1 && regs.gpr[5] == a)
show_result("ld", "PASS");
@@ -111,6 +104,29 @@ static void __init test_ld(void)
show_result("ld", "FAIL");
}
+static void __init test_pld(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x23;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+
+ /* pld r5, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLD(5, 3, 0, 0));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("pld", "PASS");
+ else
+ show_result("pld", "FAIL");
+}
+
static void __init test_lwz(void)
{
struct pt_regs regs;
@@ -121,7 +137,7 @@ static void __init test_lwz(void)
regs.gpr[3] = (unsigned long) &a;
/* lwz r5, 0(r3) */
- stepped = emulate_step(&regs, TEST_LWZ(5, 3, 0));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZ(5, 3, 0)));
if (stepped == 1 && regs.gpr[5] == a)
show_result("lwz", "PASS");
@@ -129,6 +145,30 @@ static void __init test_lwz(void)
show_result("lwz", "FAIL");
}
+static void __init test_plwz(void)
+{
+ struct pt_regs regs;
+ unsigned int a = 0x4545;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+
+ /* plwz r5, 0(r3), 0 */
+
+ stepped = emulate_step(&regs, TEST_PLWZ(5, 3, 0, 0));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("plwz", "PASS");
+ else
+ show_result("plwz", "FAIL");
+}
+
static void __init test_lwzx(void)
{
struct pt_regs regs;
@@ -141,7 +181,7 @@ static void __init test_lwzx(void)
regs.gpr[5] = 0x8765;
/* lwzx r5, r3, r4 */
- stepped = emulate_step(&regs, TEST_LWZX(5, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZX(5, 3, 4)));
if (stepped == 1 && regs.gpr[5] == a[2])
show_result("lwzx", "PASS");
else
@@ -159,13 +199,36 @@ static void __init test_std(void)
regs.gpr[5] = 0x5678;
/* std r5, 0(r3) */
- stepped = emulate_step(&regs, TEST_STD(5, 3, 0));
- if (stepped == 1 || regs.gpr[5] == a)
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STD(5, 3, 0)));
+ if (stepped == 1 && regs.gpr[5] == a)
show_result("std", "PASS");
else
show_result("std", "FAIL");
}
+static void __init test_pstd(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x1234;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+ regs.gpr[5] = 0x5678;
+
+ /* pstd r5, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTD(5, 3, 0, 0));
+ if (stepped == 1 || regs.gpr[5] == a)
+ show_result("pstd", "PASS");
+ else
+ show_result("pstd", "FAIL");
+}
+
static void __init test_ldarx_stdcx(void)
{
struct pt_regs regs;
@@ -184,7 +247,7 @@ static void __init test_ldarx_stdcx(void)
regs.gpr[5] = 0x5678;
/* ldarx r5, r3, r4, 0 */
- stepped = emulate_step(&regs, TEST_LDARX(5, 3, 4, 0));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0)));
/*
* Don't touch 'a' here. Touching 'a' can do Load/store
@@ -202,7 +265,7 @@ static void __init test_ldarx_stdcx(void)
regs.gpr[5] = 0x9ABC;
/* stdcx. r5, r3, r4 */
- stepped = emulate_step(&regs, TEST_STDCX(5, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STDCX(5, 3, 4)));
/*
* Two possible scenarios that indicates successful emulation
@@ -242,7 +305,7 @@ static void __init test_lfsx_stfsx(void)
regs.gpr[4] = 0;
/* lfsx frt10, r3, r4 */
- stepped = emulate_step(&regs, TEST_LFSX(10, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFSX(10, 3, 4)));
if (stepped == 1)
show_result("lfsx", "PASS");
@@ -255,7 +318,7 @@ static void __init test_lfsx_stfsx(void)
c.a = 678.91;
/* stfsx frs10, r3, r4 */
- stepped = emulate_step(&regs, TEST_STFSX(10, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFSX(10, 3, 4)));
if (stepped == 1 && c.b == cached_b)
show_result("stfsx", "PASS");
@@ -263,6 +326,53 @@ static void __init test_lfsx_stfsx(void)
show_result("stfsx", "FAIL");
}
+static void __init test_plfs_pstfs(void)
+{
+ struct pt_regs regs;
+ union {
+ float a;
+ int b;
+ } c;
+ int cached_b;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+
+ /*** plfs ***/
+
+ c.a = 123.45;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long)&c.a;
+
+ /* plfs frt10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLFS(10, 3, 0, 0));
+
+ if (stepped == 1)
+ show_result("plfs", "PASS");
+ else
+ show_result("plfs", "FAIL");
+
+
+ /*** pstfs ***/
+
+ c.a = 678.91;
+
+ /* pstfs frs10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTFS(10, 3, 0, 0));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("pstfs", "PASS");
+ else
+ show_result("pstfs", "FAIL");
+}
+
static void __init test_lfdx_stfdx(void)
{
struct pt_regs regs;
@@ -285,7 +395,7 @@ static void __init test_lfdx_stfdx(void)
regs.gpr[4] = 0;
/* lfdx frt10, r3, r4 */
- stepped = emulate_step(&regs, TEST_LFDX(10, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFDX(10, 3, 4)));
if (stepped == 1)
show_result("lfdx", "PASS");
@@ -298,13 +408,60 @@ static void __init test_lfdx_stfdx(void)
c.a = 987654.32;
/* stfdx frs10, r3, r4 */
- stepped = emulate_step(&regs, TEST_STFDX(10, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFDX(10, 3, 4)));
if (stepped == 1 && c.b == cached_b)
show_result("stfdx", "PASS");
else
show_result("stfdx", "FAIL");
}
+
+static void __init test_plfd_pstfd(void)
+{
+ struct pt_regs regs;
+ union {
+ double a;
+ long b;
+ } c;
+ long cached_b;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+
+ /*** plfd ***/
+
+ c.a = 123456.78;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long)&c.a;
+
+ /* plfd frt10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLFD(10, 3, 0, 0));
+
+ if (stepped == 1)
+ show_result("plfd", "PASS");
+ else
+ show_result("plfd", "FAIL");
+
+
+ /*** pstfd ***/
+
+ c.a = 987654.32;
+
+ /* pstfd frs10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTFD(10, 3, 0, 0));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("pstfd", "PASS");
+ else
+ show_result("pstfd", "FAIL");
+}
#else
static void __init test_lfsx_stfsx(void)
{
@@ -312,11 +469,23 @@ static void __init test_lfsx_stfsx(void)
show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
}
+static void __init test_plfs_pstfs(void)
+{
+ show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
static void __init test_lfdx_stfdx(void)
{
show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
}
+
+static void __init test_plfd_pstfd(void)
+{
+ show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)");
+}
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
@@ -344,7 +513,7 @@ static void __init test_lvx_stvx(void)
regs.gpr[4] = 0;
/* lvx vrt10, r3, r4 */
- stepped = emulate_step(&regs, TEST_LVX(10, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LVX(10, 3, 4)));
if (stepped == 1)
show_result("lvx", "PASS");
@@ -360,7 +529,7 @@ static void __init test_lvx_stvx(void)
c.b[3] = 498532;
/* stvx vrs10, r3, r4 */
- stepped = emulate_step(&regs, TEST_STVX(10, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STVX(10, 3, 4)));
if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
@@ -401,7 +570,7 @@ static void __init test_lxvd2x_stxvd2x(void)
regs.gpr[4] = 0;
/* lxvd2x vsr39, r3, r4 */
- stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4)));
if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
show_result("lxvd2x", "PASS");
@@ -421,7 +590,7 @@ static void __init test_lxvd2x_stxvd2x(void)
c.b[3] = 4;
/* stxvd2x vsr39, r3, r4 */
- stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4)));
if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
cached_b[2] == c.b[2] && cached_b[3] == c.b[3] &&
@@ -442,36 +611,315 @@ static void __init test_lxvd2x_stxvd2x(void)
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_VSX
+static void __init test_lxvp_stxvp(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+ /*** lxvp ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ regs.gpr[4] = (unsigned long)&c[0].a;
+
+ /*
+ * lxvp XTp,DQ(RA)
+ * XTp = 32xTX + 2xTp
+ * let TX=1 Tp=1 RA=4 DQ=0
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVP(34, 4, 0)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvp", "FAIL");
+ }
+
+ /*** stxvp ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * stxvp XSp,DQ(RA)
+ * XSp = 32xSX + 2xSp
+ * let SX=1 Sp=1 RA=4 DQ=0
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVP(34, 4, 0)));
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvp", "FAIL");
+ }
+}
+#else
+static void __init test_lxvp_stxvp(void)
+{
+ show_result("lxvp", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvpx_stxvpx(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+ /*** lxvpx ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ regs.gpr[3] = (unsigned long)&c[0].a;
+ regs.gpr[4] = 0;
+
+ /*
+ * lxvpx XTp,RA,RB
+ * XTp = 32xTX + 2xTp
+ * let TX=1 Tp=1 RA=3 RB=4
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVPX(34, 3, 4)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvpx", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvpx", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvpx", "FAIL");
+ }
+
+ /*** stxvpx ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * stxvpx XSp,RA,RB
+ * XSp = 32xSX + 2xSp
+ * let SX=1 Sp=1 RA=3 RB=4
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVPX(34, 3, 4)));
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvpx", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvpx", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvpx", "FAIL");
+ }
+}
+#else
+static void __init test_lxvpx_stxvpx(void)
+{
+ show_result("lxvpx", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvpx", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_plxvp_pstxvp(void)
+{
+ ppc_inst_t instr;
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ /*** plxvp ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&c[0].a;
+
+ /*
+ * plxvp XTp,D(RA),R
+ * XTp = 32xTX + 2xTp
+ * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1
+ */
+ instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0));
+
+ stepped = emulate_step(&regs, instr);
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("plxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("plxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("plxvp", "FAIL");
+ }
+
+ /*** pstxvp ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * pstxvp XSp,D(RA),R
+ * XSp = 32xSX + 2xSp
+ * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1
+ */
+ instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0));
+
+ stepped = emulate_step(&regs, instr);
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("pstxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("pstxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("pstxvp", "FAIL");
+ }
+}
+#else
+static void __init test_plxvp_pstxvp(void)
+{
+ show_result("plxvp", "SKIP (CONFIG_VSX is not set)");
+ show_result("pstxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
static void __init run_tests_load_store(void)
{
test_ld();
+ test_pld();
test_lwz();
+ test_plwz();
test_lwzx();
test_std();
+ test_pstd();
test_ldarx_stdcx();
test_lfsx_stfsx();
+ test_plfs_pstfs();
test_lfdx_stfdx();
+ test_plfd_pstfd();
test_lvx_stvx();
test_lxvd2x_stxvd2x();
+ test_lxvp_stxvp();
+ test_lxvpx_stxvpx();
+ test_plxvp_pstxvp();
}
struct compute_test {
char *mnemonic;
+ unsigned long cpu_feature;
struct {
char *descr;
unsigned long flags;
- unsigned int instr;
+ ppc_inst_t instr;
struct pt_regs regs;
} subtests[MAX_SUBTESTS + 1];
};
+/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */
+#define SI_MIN BIT(33)
+#define SI_MAX (BIT(33) - 1)
+#define SI_UMAX (BIT(34) - 1)
+
static struct compute_test compute_tests[] = {
{
.mnemonic = "nop",
.subtests = {
{
.descr = "R0 = LONG_MAX",
- .instr = PPC_INST_NOP,
+ .instr = ppc_inst(PPC_RAW_NOP()),
.regs = {
.gpr[0] = LONG_MAX,
}
@@ -479,11 +927,38 @@ static struct compute_test compute_tests[] = {
}
},
{
+ .mnemonic = "setb",
+ .cpu_feature = CPU_FTR_ARCH_300,
+ .subtests = {
+ {
+ .descr = "BFA = 1, CR = GT",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 1)),
+ .regs = {
+ .ccr = 0x4000000,
+ }
+ },
+ {
+ .descr = "BFA = 4, CR = LT",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 4)),
+ .regs = {
+ .ccr = 0x8000,
+ }
+ },
+ {
+ .descr = "BFA = 5, CR = EQ",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 5)),
+ .regs = {
+ .ccr = 0x200,
+ }
+ }
+ }
+ },
+ {
.mnemonic = "add",
.subtests = {
{
.descr = "RA = LONG_MIN, RB = LONG_MIN",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MIN,
@@ -491,7 +966,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MIN, RB = LONG_MAX",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MAX,
@@ -499,7 +974,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MAX, RB = LONG_MAX",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MAX,
.gpr[22] = LONG_MAX,
@@ -507,7 +982,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = ULONG_MAX,
@@ -515,7 +990,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = 0x1",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = 0x1,
@@ -523,7 +998,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MIN",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MIN,
@@ -531,7 +1006,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MAX",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MAX,
@@ -539,7 +1014,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MAX, RB = INT_MAX",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = INT_MAX,
.gpr[22] = INT_MAX,
@@ -547,7 +1022,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = UINT_MAX",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = UINT_MAX,
@@ -555,7 +1030,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = 0x1",
- .instr = TEST_ADD(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = 0x1,
@@ -569,7 +1044,7 @@ static struct compute_test compute_tests[] = {
{
.descr = "RA = LONG_MIN, RB = LONG_MIN",
.flags = IGNORE_CCR,
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MIN,
@@ -577,7 +1052,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MIN, RB = LONG_MAX",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MAX,
@@ -586,7 +1061,7 @@ static struct compute_test compute_tests[] = {
{
.descr = "RA = LONG_MAX, RB = LONG_MAX",
.flags = IGNORE_CCR,
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MAX,
.gpr[22] = LONG_MAX,
@@ -594,7 +1069,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = ULONG_MAX,
@@ -602,7 +1077,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = 0x1",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = 0x1,
@@ -610,7 +1085,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MIN",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MIN,
@@ -618,7 +1093,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MAX",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MAX,
@@ -626,7 +1101,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MAX, RB = INT_MAX",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = INT_MAX,
.gpr[22] = INT_MAX,
@@ -634,7 +1109,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = UINT_MAX",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = UINT_MAX,
@@ -642,7 +1117,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = 0x1",
- .instr = TEST_ADD_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = 0x1,
@@ -655,7 +1130,7 @@ static struct compute_test compute_tests[] = {
.subtests = {
{
.descr = "RA = LONG_MIN, RB = LONG_MIN",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MIN,
@@ -663,7 +1138,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MIN, RB = LONG_MAX",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MAX,
@@ -671,7 +1146,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MAX, RB = LONG_MAX",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MAX,
.gpr[22] = LONG_MAX,
@@ -679,7 +1154,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = ULONG_MAX,
@@ -687,7 +1162,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = 0x1",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = 0x1,
@@ -695,7 +1170,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MIN",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MIN,
@@ -703,7 +1178,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MAX",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MAX,
@@ -711,7 +1186,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MAX, RB = INT_MAX",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = INT_MAX,
.gpr[22] = INT_MAX,
@@ -719,7 +1194,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = UINT_MAX",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = UINT_MAX,
@@ -727,7 +1202,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = 0x1",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = 0x1,
@@ -735,7 +1210,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
- .instr = TEST_ADDC(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN | (uint)INT_MIN,
.gpr[22] = LONG_MIN | (uint)INT_MIN,
@@ -749,7 +1224,7 @@ static struct compute_test compute_tests[] = {
{
.descr = "RA = LONG_MIN, RB = LONG_MIN",
.flags = IGNORE_CCR,
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MIN,
@@ -757,7 +1232,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MIN, RB = LONG_MAX",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN,
.gpr[22] = LONG_MAX,
@@ -766,7 +1241,7 @@ static struct compute_test compute_tests[] = {
{
.descr = "RA = LONG_MAX, RB = LONG_MAX",
.flags = IGNORE_CCR,
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MAX,
.gpr[22] = LONG_MAX,
@@ -774,7 +1249,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = ULONG_MAX,
@@ -782,7 +1257,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = ULONG_MAX, RB = 0x1",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = ULONG_MAX,
.gpr[22] = 0x1,
@@ -790,7 +1265,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MIN",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MIN,
@@ -798,7 +1273,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MIN, RB = INT_MAX",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = INT_MIN,
.gpr[22] = INT_MAX,
@@ -806,7 +1281,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = INT_MAX, RB = INT_MAX",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = INT_MAX,
.gpr[22] = INT_MAX,
@@ -814,7 +1289,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = UINT_MAX",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = UINT_MAX,
@@ -822,7 +1297,7 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = UINT_MAX, RB = 0x1",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = UINT_MAX,
.gpr[22] = 0x1,
@@ -830,47 +1305,336 @@ static struct compute_test compute_tests[] = {
},
{
.descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
- .instr = TEST_ADDC_DOT(20, 21, 22),
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
.regs = {
.gpr[21] = LONG_MIN | (uint)INT_MIN,
.gpr[22] = LONG_MIN | (uint)INT_MIN,
}
}
}
+ },
+ {
+ .mnemonic = "divde",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divde.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divdeu",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX - 1,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN + 1,
+ .gpr[22] = LONG_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divdeu.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX - 1,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN + 1,
+ .gpr[22] = LONG_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "paddi",
+ .cpu_feature = CPU_FTR_ARCH_31,
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_UMAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, SI = 0x1, R = 0",
+ .instr = TEST_PADDI(21, 22, 0x1, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, SI = 0x1, R = 0",
+ .instr = TEST_PADDI(21, 22, 0x1, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 0, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0x0,
+ }
+ },
+ {
+ .descr = "RA = 0, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0x0,
+ .gpr[22] = 0x0,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = 0, R = 1",
+ .instr = TEST_PADDI(21, 0, 0, 1),
+ .regs = {
+ .gpr[21] = 0,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = SI_MIN, R = 1",
+ .instr = TEST_PADDI(21, 0, SI_MIN, 1),
+ .regs = {
+ .gpr[21] = 0,
+ }
+ },
+ /* Invalid instruction form with R = 1 and RA != 0 */
+ {
+ .descr = "RA = R22(0), SI = 0, R = 1",
+ .instr = TEST_PADDI(21, 22, 0, 1),
+ .flags = NEGATIVE_TEST,
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = 0,
+ }
+ }
+ }
}
};
static int __init emulate_compute_instr(struct pt_regs *regs,
- unsigned int instr)
+ ppc_inst_t instr,
+ bool negative)
{
+ int analysed;
struct instruction_op op;
- if (!regs || !instr)
+ if (!regs || !ppc_inst_val(instr))
return -EINVAL;
- if (analyse_instr(&op, regs, instr) != 1 ||
- GETTYPE(op.type) != COMPUTE) {
- pr_info("emulation failed, instruction = 0x%08x\n", instr);
+ /* This is not a return frame regs */
+ regs->nip = patch_site_addr(&patch__exec_instr);
+
+ analysed = analyse_instr(&op, regs, instr);
+ if (analysed != 1 || GETTYPE(op.type) != COMPUTE) {
+ if (negative)
+ return -EFAULT;
+ pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
return -EFAULT;
}
-
- emulate_update_regs(regs, &op);
+ if (analysed == 1 && negative)
+ pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+ if (!negative)
+ emulate_update_regs(regs, &op);
return 0;
}
static int __init execute_compute_instr(struct pt_regs *regs,
- unsigned int instr)
+ ppc_inst_t instr)
{
extern int exec_instr(struct pt_regs *regs);
- extern s32 patch__exec_instr;
- if (!regs || !instr)
+ if (!regs || !ppc_inst_val(instr))
return -EINVAL;
/* Patch the NOP with the actual instruction */
patch_instruction_site(&patch__exec_instr, instr);
if (exec_instr(regs)) {
- pr_info("execution failed, instruction = 0x%08x\n", instr);
+ pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
return -EFAULT;
}
@@ -890,16 +1654,23 @@ static void __init run_tests_compute(void)
unsigned long flags;
struct compute_test *test;
struct pt_regs *regs, exp, got;
- unsigned int i, j, k, instr;
- bool ignore_gpr, ignore_xer, ignore_ccr, passed;
+ unsigned int i, j, k;
+ ppc_inst_t instr;
+ bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative;
for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
test = &compute_tests[i];
+ if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) {
+ show_result(test->mnemonic, "SKIP (!CPU_FTR)");
+ continue;
+ }
+
for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) {
instr = test->subtests[j].instr;
flags = test->subtests[j].flags;
regs = &test->subtests[j].regs;
+ negative = flags & NEGATIVE_TEST;
ignore_xer = flags & IGNORE_XER;
ignore_ccr = flags & IGNORE_CCR;
passed = true;
@@ -914,8 +1685,12 @@ static void __init run_tests_compute(void)
exp.msr = MSR_KERNEL;
got.msr = MSR_KERNEL;
- if (emulate_compute_instr(&got, instr) ||
- execute_compute_instr(&exp, instr)) {
+ rc = emulate_compute_instr(&got, instr, negative) != 0;
+ if (negative) {
+ /* skip executing instruction */
+ passed = rc;
+ goto print;
+ } else if (rc || execute_compute_instr(&exp, instr)) {
passed = false;
goto print;
}
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
index 1580f34f4f4f..e2b646a4f7fa 100644
--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -16,7 +16,7 @@ _GLOBAL(exec_instr)
/*
* Stack frame layout (INT_FRAME_SIZE bytes)
- * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD)
+ * In-memory pt_regs (SP + STACK_INT_FRAME_REGS)
* Scratch space (SP + 8)
* Back chain (SP + 0)
*/
@@ -37,7 +37,7 @@ _GLOBAL(exec_instr)
* The stack pointer (GPR1) and the thread pointer (GPR13) are not
* saved as these should not be modified anyway.
*/
- SAVE_2GPRS(2, r1)
+ SAVE_GPRS(2, 3, r1)
SAVE_NVGPRS(r1)
/*
@@ -75,12 +75,13 @@ _GLOBAL(exec_instr)
/* Load GPRs from pt_regs */
REST_GPR(0, r31)
- REST_10GPRS(2, r31)
- REST_GPR(12, r31)
+ REST_GPRS(2, 12, r31)
REST_NVGPRS(r31)
/* Placeholder for the test instruction */
+ .balign 64
1: nop
+ nop
patch_site 1b patch__exec_instr
/*
@@ -97,8 +98,7 @@ _GLOBAL(exec_instr)
subi r3, r3, GPR0
SAVE_GPR(0, r3)
SAVE_GPR(2, r3)
- SAVE_8GPRS(4, r3)
- SAVE_GPR(12, r3)
+ SAVE_GPRS(4, 12, r3)
SAVE_NVGPRS(r3)
/* Save resulting LR to pt_regs */
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index 62e6c3045252..d491da8d1838 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -9,7 +9,6 @@
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <asm/switch_to.h>
-#include <asm/asm-prototypes.h>
int enter_vmx_usercopy(void)
{
@@ -37,7 +36,17 @@ int exit_vmx_usercopy(void)
{
disable_kernel_altivec();
pagefault_enable();
- preempt_enable();
+ preempt_enable_no_resched();
+ /*
+ * Must never explicitly call schedule (including preempt_enable())
+ * while in a kuap-unlocked user copy, because the AMR register will
+ * not be saved and restored across context switch. However preempt
+ * kernels need to be preempted as soon as possible if need_resched is
+ * set and we are preemptible. The hack here is to schedule a
+ * decrementer to fire here and reschedule for us if necessary.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
+ set_dec(1);
return 0;
}
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index 54e61979e80e..aab49d056d18 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -49,8 +49,9 @@ typedef vector signed char unative_t;
V1##_3 = vec_xor(V1##_3, V2##_3); \
} while (0)
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
} while (--lines > 0);
}
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
} while (--lines > 0);
}
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
} while (--lines > 0);
}
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in,
+ const unsigned long * __restrict v5_in)
{
DEFINE(v1);
DEFINE(v2);
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
index 5c2b0839b179..573c41d90dac 100644
--- a/arch/powerpc/lib/xor_vmx.h
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -6,16 +6,17 @@
* outside of the enable/disable altivec block.
*/
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in);
-
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in);
-
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in);
-
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in);
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
index 80dba916c367..35d917ece4d1 100644
--- a/arch/powerpc/lib/xor_vmx_glue.c
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -12,47 +12,51 @@
#include <asm/xor_altivec.h>
#include "xor_vmx.h"
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in)
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_2(bytes, v1_in, v2_in);
+ __xor_altivec_2(bytes, p1, p2);
disable_kernel_altivec();
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_2);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in)
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_3(bytes, v1_in, v2_in, v3_in);
+ __xor_altivec_3(bytes, p1, p2, p3);
disable_kernel_altivec();
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_3);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in)
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in);
+ __xor_altivec_4(bytes, p1, p2, p3, p4);
disable_kernel_altivec();
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_4);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in)
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in);
+ __xor_altivec_5(bytes, p1, p2, p3, p4, p5);
disable_kernel_altivec();
preempt_enable();
}