summaryrefslogtreecommitdiff
path: root/arch/powerpc/lib/memcmp_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib/memcmp_64.S')
-rw-r--r--arch/powerpc/lib/memcmp_64.S29
1 files changed, 17 insertions, 12 deletions
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index 844d8e774492..142c666d3897 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Author: Anton Blanchard <anton@au.ibm.com>
* Copyright 2015 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/ppc-opcode.h>
#define off8 r6
@@ -48,7 +44,7 @@
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
std r0,16(r1); \
stdu r1,-STACKFRAMESIZE(r1); \
- bl enter_vmx_ops; \
+ bl CFUNC(enter_vmx_ops); \
cmpwi cr1,r3,0; \
ld r0,STACKFRAMESIZE+16(r1); \
ld r3,STK_REG(R31)(r1); \
@@ -64,7 +60,7 @@
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
std r0,16(r1); \
stdu r1,-STACKFRAMESIZE(r1); \
- bl exit_vmx_ops; \
+ bl CFUNC(exit_vmx_ops); \
ld r0,STACKFRAMESIZE+16(r1); \
ld r3,STK_REG(R31)(r1); \
ld r4,STK_REG(R30)(r1); \
@@ -215,11 +211,20 @@ _GLOBAL_TOC(memcmp)
beq .Lzero
.Lcmp_rest_lt8bytes:
- /* Here we have only less than 8 bytes to compare with. at least s1
- * Address is aligned with 8 bytes.
- * The next double words are load and shift right with appropriate
- * bits.
+ /*
+ * Here we have less than 8 bytes to compare. At least s1 is aligned to
+ * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
+ * page boundary, otherwise we might read past the end of the buffer and
+ * trigger a page fault. We use 4K as the conservative minimum page
+ * size. If we detect that case we go to the byte-by-byte loop.
+ *
+ * Otherwise the next double word is loaded from s1 and s2, and shifted
+ * right to compare the appropriate bits.
*/
+ clrldi r6,r4,(64-12) // r6 = r4 & 0xfff
+ cmpdi r6,0xff8
+ bgt .Lshort
+
subfic r6,r5,8
slwi r6,r6,3
LD rA,0,r3