From ac85227f7637cfb0d811519b8253c454d0d0a159 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Thu, 12 Dec 2013 16:21:00 +0000
Subject: MIPS: checksum: Split the 'copy_user' symbol

The 'copy_user' symbol can be used to copy from or to
userland so we will use two different symbols for these
operations. This makes no difference in the existing code,
but when the core is operating in EVA mode, different instructions
need to be used to read and write to userland address space.
The old function has also been renamed to 'copy_kernel' to denote
that it is suitable for copy data to and from kernel space.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
---
 arch/mips/lib/csum_partial.S | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/mips/lib/csum_partial.S')

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index a6adffbb4e5f..5d73d0d704a3 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -8,6 +8,7 @@
  * Copyright (C) 1998, 1999 Ralf Baechle
  * Copyright (C) 1999 Silicon Graphics, Inc.
  * Copyright (C) 2007  Maciej W. Rozycki
+ * Copyright (C) 2014 Imagination Technologies Ltd.
  */
 #include <linux/errno.h>
 #include <asm/asm.h>
@@ -296,7 +297,7 @@ LEAF(csum_partial)
  * checksum and copy routines based on memcpy.S
  *
  *	csum_partial_copy_nocheck(src, dst, len, sum)
- *	__csum_partial_copy_user(src, dst, len, sum, errp)
+ *	__csum_partial_copy_kernel(src, dst, len, sum, errp)
  *
  * See "Spec" in memcpy.S for details.	Unlike __copy_user, all
  * function in this file use the standard calling convention.
@@ -396,7 +397,9 @@ LEAF(csum_partial)
 	.set	at=v1
 #endif
 
-LEAF(__csum_partial_copy_user)
+LEAF(__csum_partial_copy_kernel)
+FEXPORT(__csum_partial_copy_to_user)
+FEXPORT(__csum_partial_copy_from_user)
 	PTR_ADDU	AT, src, len	/* See (1) above. */
 #ifdef CONFIG_64BIT
 	move	errptr, a4
@@ -757,4 +760,4 @@ EXC(	lbu	t1, 0(src),	.Ll_exc)
 	jr	ra
 	 sw	v1, (errptr)
 	.set	pop
-	END(__csum_partial_copy_user)
+	END(__csum_partial_copy_kernel)
-- 
cgit 


From 2ab82e66483798670e129c48c05d7fc8a39ea996 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Thu, 16 Jan 2014 17:02:13 +0000
Subject: MIPS: lib: csum_partial: Merge EXC and load/store macros

Each load/store macro always adds an entry to the __ex_table
using the EXC macro. There are cases where a load instruction may
never fail such as when we are sure the load happens in the kernel
address space. Therefore, we merge these the EXC and LOADX/STOREX
macros into a single one. We also expand the argument list in the EXC
macro to make the macro more flexible. The extra 'type' argument is not
used by this commit, but it will be used when EVA support is added to
memcpy.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
---
 arch/mips/lib/csum_partial.S | 160 ++++++++++++++++++++++++-------------------
 1 file changed, 91 insertions(+), 69 deletions(-)

(limited to 'arch/mips/lib/csum_partial.S')

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 5d73d0d704a3..bff5167b59a6 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -328,20 +328,39 @@ LEAF(csum_partial)
  * These handlers do not need to overwrite any data.
  */
 
-#define EXC(inst_reg,addr,handler)		\
-9:	inst_reg, addr;				\
+/* Instruction type */
+#define LD_INSN 1
+#define ST_INSN 2
+
+/*
+ * Wrapper to add an entry in the exception table
+ * in case the insn causes a memory exception.
+ * Arguments:
+ * insn    : Load/store instruction
+ * type    : Instruction type
+ * reg     : Register
+ * addr    : Address
+ * handler : Exception handler
+ */
+#define EXC(insn, type, reg, addr, handler)	\
+9:	insn reg, addr;				\
 	.section __ex_table,"a";		\
 	PTR	9b, handler;			\
 	.previous
 
+#undef LOAD
+
 #ifdef USE_DOUBLE
 
-#define LOAD   ld
-#define LOADL  ldl
-#define LOADR  ldr
-#define STOREL sdl
-#define STORER sdr
-#define STORE  sd
+#define LOADK	ld /* No exception */
+#define LOAD(reg, addr, handler)	EXC(ld, LD_INSN, reg, addr, handler)
+#define LOADBU(reg, addr, handler)	EXC(lbu, LD_INSN, reg, addr, handler)
+#define LOADL(reg, addr, handler)	EXC(ldl, LD_INSN, reg, addr, handler)
+#define LOADR(reg, addr, handler)	EXC(ldr, LD_INSN, reg, addr, handler)
+#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler)
+#define STOREL(reg, addr, handler)	EXC(sdl, ST_INSN, reg, addr, handler)
+#define STORER(reg, addr, handler)	EXC(sdr, ST_INSN, reg, addr, handler)
+#define STORE(reg, addr, handler)	EXC(sd, ST_INSN, reg, addr, handler)
 #define ADD    daddu
 #define SUB    dsubu
 #define SRL    dsrl
@@ -353,12 +372,15 @@ LEAF(csum_partial)
 
 #else
 
-#define LOAD   lw
-#define LOADL  lwl
-#define LOADR  lwr
-#define STOREL swl
-#define STORER swr
-#define STORE  sw
+#define LOADK	lw /* No exception */
+#define LOAD(reg, addr, handler)	EXC(lw, LD_INSN, reg, addr, handler)
+#define LOADBU(reg, addr, handler)	EXC(lbu, LD_INSN, reg, addr, handler)
+#define LOADL(reg, addr, handler)	EXC(lwl, LD_INSN, reg, addr, handler)
+#define LOADR(reg, addr, handler)	EXC(lwr, LD_INSN, reg, addr, handler)
+#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler)
+#define STOREL(reg, addr, handler)	EXC(swl, ST_INSN, reg, addr, handler)
+#define STORER(reg, addr, handler)	EXC(swr, ST_INSN, reg, addr, handler)
+#define STORE(reg, addr, handler)	EXC(sw, ST_INSN, reg, addr, handler)
 #define ADD    addu
 #define SUB    subu
 #define SRL    srl
@@ -439,31 +461,31 @@ FEXPORT(csum_partial_copy_nocheck)
 	SUB	len, 8*NBYTES		# subtract here for bgez loop
 	.align	4
 1:
-EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
-EXC(	LOAD	t4, UNIT(4)(src),	.Ll_exc_copy)
-EXC(	LOAD	t5, UNIT(5)(src),	.Ll_exc_copy)
-EXC(	LOAD	t6, UNIT(6)(src),	.Ll_exc_copy)
-EXC(	LOAD	t7, UNIT(7)(src),	.Ll_exc_copy)
+	LOAD(t0, UNIT(0)(src), .Ll_exc)
+	LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
+	LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
+	LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
+	LOAD(t4, UNIT(4)(src), .Ll_exc_copy)
+	LOAD(t5, UNIT(5)(src), .Ll_exc_copy)
+	LOAD(t6, UNIT(6)(src), .Ll_exc_copy)
+	LOAD(t7, UNIT(7)(src), .Ll_exc_copy)
 	SUB	len, len, 8*NBYTES
 	ADD	src, src, 8*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc)
+	STORE(t0, UNIT(0)(dst),	.Ls_exc)
 	ADDC(sum, t0)
-EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc)
+	STORE(t1, UNIT(1)(dst),	.Ls_exc)
 	ADDC(sum, t1)
-EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc)
+	STORE(t2, UNIT(2)(dst),	.Ls_exc)
 	ADDC(sum, t2)
-EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
+	STORE(t3, UNIT(3)(dst),	.Ls_exc)
 	ADDC(sum, t3)
-EXC(	STORE	t4, UNIT(4)(dst),	.Ls_exc)
+	STORE(t4, UNIT(4)(dst),	.Ls_exc)
 	ADDC(sum, t4)
-EXC(	STORE	t5, UNIT(5)(dst),	.Ls_exc)
+	STORE(t5, UNIT(5)(dst),	.Ls_exc)
 	ADDC(sum, t5)
-EXC(	STORE	t6, UNIT(6)(dst),	.Ls_exc)
+	STORE(t6, UNIT(6)(dst),	.Ls_exc)
 	ADDC(sum, t6)
-EXC(	STORE	t7, UNIT(7)(dst),	.Ls_exc)
+	STORE(t7, UNIT(7)(dst),	.Ls_exc)
 	ADDC(sum, t7)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 8*NBYTES
@@ -483,19 +505,19 @@ EXC(	STORE	t7, UNIT(7)(dst),	.Ls_exc)
 	/*
 	 * len >= 4*NBYTES
 	 */
-EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
+	LOAD(t0, UNIT(0)(src), .Ll_exc)
+	LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
+	LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
+	LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
 	SUB	len, len, 4*NBYTES
 	ADD	src, src, 4*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc)
+	STORE(t0, UNIT(0)(dst),	.Ls_exc)
 	ADDC(sum, t0)
-EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc)
+	STORE(t1, UNIT(1)(dst),	.Ls_exc)
 	ADDC(sum, t1)
-EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc)
+	STORE(t2, UNIT(2)(dst),	.Ls_exc)
 	ADDC(sum, t2)
-EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
+	STORE(t3, UNIT(3)(dst),	.Ls_exc)
 	ADDC(sum, t3)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
@@ -508,10 +530,10 @@ EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
 	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
-EXC(	LOAD	t0, 0(src),		.Ll_exc)
+	LOAD(t0, 0(src), .Ll_exc)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-EXC(	STORE	t0, 0(dst),		.Ls_exc)
+	STORE(t0, 0(dst), .Ls_exc)
 	ADDC(sum, t0)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
@@ -534,10 +556,10 @@ EXC(	STORE	t0, 0(dst),		.Ls_exc)
 	 ADD	t1, dst, len	# t1 is just past last byte of dst
 	li	bits, 8*NBYTES
 	SLL	rem, len, 3	# rem = number of bits to keep
-EXC(	LOAD	t0, 0(src),		.Ll_exc)
+	LOAD(t0, 0(src), .Ll_exc)
 	SUB	bits, bits, rem # bits = number of bits to discard
 	SHIFT_DISCARD t0, t0, bits
-EXC(	STREST	t0, -1(t1),		.Ls_exc)
+	STREST(t0, -1(t1), .Ls_exc)
 	SHIFT_DISCARD_REVERT t0, t0, bits
 	.set reorder
 	ADDC(sum, t0)
@@ -554,12 +576,12 @@ EXC(	STREST	t0, -1(t1),		.Ls_exc)
 	 * Set match = (src and dst have same alignment)
 	 */
 #define match rem
-EXC(	LDFIRST t3, FIRST(0)(src),	.Ll_exc)
+	LDFIRST(t3, FIRST(0)(src), .Ll_exc)
 	ADD	t2, zero, NBYTES
-EXC(	LDREST	t3, REST(0)(src),	.Ll_exc_copy)
+	LDREST(t3, REST(0)(src), .Ll_exc_copy)
 	SUB	t2, t2, t1	# t2 = number of bytes copied
 	xor	match, t0, t1
-EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc)
+	STFIRST(t3, FIRST(0)(dst), .Ls_exc)
 	SLL	t4, t1, 3		# t4 = number of bits to discard
 	SHIFT_DISCARD t3, t3, t4
 	/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
@@ -581,26 +603,26 @@ EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc)
  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
  * are to the same unit (unless src is aligned, but it's not).
  */
-EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc)
-EXC(	LDFIRST t1, FIRST(1)(src),	.Ll_exc_copy)
+	LDFIRST(t0, FIRST(0)(src), .Ll_exc)
+	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy)
 	SUB	len, len, 4*NBYTES
-EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
-EXC(	LDREST	t1, REST(1)(src),	.Ll_exc_copy)
-EXC(	LDFIRST t2, FIRST(2)(src),	.Ll_exc_copy)
-EXC(	LDFIRST t3, FIRST(3)(src),	.Ll_exc_copy)
-EXC(	LDREST	t2, REST(2)(src),	.Ll_exc_copy)
-EXC(	LDREST	t3, REST(3)(src),	.Ll_exc_copy)
+	LDREST(t0, REST(0)(src), .Ll_exc_copy)
+	LDREST(t1, REST(1)(src), .Ll_exc_copy)
+	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy)
+	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy)
+	LDREST(t2, REST(2)(src), .Ll_exc_copy)
+	LDREST(t3, REST(3)(src), .Ll_exc_copy)
 	ADD	src, src, 4*NBYTES
 #ifdef CONFIG_CPU_SB1
 	nop				# improves slotting
 #endif
-EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc)
+	STORE(t0, UNIT(0)(dst),	.Ls_exc)
 	ADDC(sum, t0)
-EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc)
+	STORE(t1, UNIT(1)(dst),	.Ls_exc)
 	ADDC(sum, t1)
-EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc)
+	STORE(t2, UNIT(2)(dst),	.Ls_exc)
 	ADDC(sum, t2)
-EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
+	STORE(t3, UNIT(3)(dst),	.Ls_exc)
 	ADDC(sum, t3)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
@@ -613,11 +635,11 @@ EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
 	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
-EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc)
-EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
+	LDFIRST(t0, FIRST(0)(src), .Ll_exc)
+	LDREST(t0, REST(0)(src), .Ll_exc_copy)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-EXC(	STORE	t0, 0(dst),		.Ls_exc)
+	STORE(t0, 0(dst), .Ls_exc)
 	ADDC(sum, t0)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
@@ -640,9 +662,9 @@ EXC(	STORE	t0, 0(dst),		.Ls_exc)
 	li	t3, SHIFT_START # shift
 /* use .Ll_exc_copy here to return correct sum on fault */
 #define COPY_BYTE(N)			\
-EXC(	lbu	t0, N(src), .Ll_exc_copy);	\
+	LOADBU(t0, N(src), .Ll_exc_copy);	\
 	SUB	len, len, 1;		\
-EXC(	sb	t0, N(dst), .Ls_exc);	\
+	STOREB(t0, N(dst), .Ls_exc);	\
 	SLLV	t0, t0, t3;		\
 	addu	t3, SHIFT_INC;		\
 	beqz	len, .Lcopy_bytes_done; \
@@ -656,9 +678,9 @@ EXC(	sb	t0, N(dst), .Ls_exc);	\
 	COPY_BYTE(4)
 	COPY_BYTE(5)
 #endif
-EXC(	lbu	t0, NBYTES-2(src), .Ll_exc_copy)
+	LOADBU(t0, NBYTES-2(src), .Ll_exc_copy)
 	SUB	len, len, 1
-EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
+	STOREB(t0, NBYTES-2(dst), .Ls_exc)
 	SLLV	t0, t0, t3
 	or	t2, t0
 .Lcopy_bytes_done:
@@ -703,11 +725,11 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
 	 *
 	 * Assumes src < THREAD_BUADDR($28)
 	 */
-	LOAD	t0, TI_TASK($28)
+	LOADK	t0, TI_TASK($28)
 	 li	t2, SHIFT_START
-	LOAD	t0, THREAD_BUADDR(t0)
+	LOADK	t0, THREAD_BUADDR(t0)
 1:
-EXC(	lbu	t1, 0(src),	.Ll_exc)
+	LOADBU(t1, 0(src), .Ll_exc)
 	ADD	src, src, 1
 	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
 	SLLV	t1, t1, t2
@@ -718,9 +740,9 @@ EXC(	lbu	t1, 0(src),	.Ll_exc)
 	bne	src, t0, 1b
 	.set	noreorder
 .Ll_exc:
-	LOAD	t0, TI_TASK($28)
+	LOADK	t0, TI_TASK($28)
 	 nop
-	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
+	LOADK	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
 	 nop
 	SUB	len, AT, t0		# len number of uncopied bytes
 	/*
-- 
cgit 


From e89fb56c8bcf5514cfe7abd7a3dda9e6007b7238 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Fri, 17 Jan 2014 10:48:46 +0000
Subject: MIPS: lib: csum_partial: Add macro to build csum_partial symbols

In preparation for EVA support, we use a macro to build the
__csum_partial_copy_user main code so it can be shared across
multiple implementations. EVA uses the same code but it replaces
the load/store/prefetch instructions with the EVA specific ones
therefore using a macro avoids unnecessary code duplications.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
---
 arch/mips/lib/csum_partial.S | 200 +++++++++++++++++++++++--------------------
 1 file changed, 108 insertions(+), 92 deletions(-)

(limited to 'arch/mips/lib/csum_partial.S')

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index bff5167b59a6..62c8768a59ce 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -331,6 +331,10 @@ LEAF(csum_partial)
 /* Instruction type */
 #define LD_INSN 1
 #define ST_INSN 2
+#define LEGACY_MODE 1
+#define EVA_MODE    2
+#define USEROP   1
+#define KERNELOP 2
 
 /*
  * Wrapper to add an entry in the exception table
@@ -343,10 +347,12 @@ LEAF(csum_partial)
  * handler : Exception handler
  */
 #define EXC(insn, type, reg, addr, handler)	\
-9:	insn reg, addr;				\
-	.section __ex_table,"a";		\
-	PTR	9b, handler;			\
-	.previous
+	.if \mode == LEGACY_MODE;		\
+9:		insn reg, addr;			\
+		.section __ex_table,"a";	\
+		PTR	9b, handler;		\
+		.previous;			\
+	.endif
 
 #undef LOAD
 
@@ -419,16 +425,20 @@ LEAF(csum_partial)
 	.set	at=v1
 #endif
 
-LEAF(__csum_partial_copy_kernel)
-FEXPORT(__csum_partial_copy_to_user)
-FEXPORT(__csum_partial_copy_from_user)
+	.macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck
+
 	PTR_ADDU	AT, src, len	/* See (1) above. */
+	/* initialize __nocheck if this the first time we execute this
+	 * macro
+	 */
 #ifdef CONFIG_64BIT
 	move	errptr, a4
 #else
 	lw	errptr, 16(sp)
 #endif
-FEXPORT(csum_partial_copy_nocheck)
+	.if \__nocheck == 1
+	FEXPORT(csum_partial_copy_nocheck)
+	.endif
 	move	sum, zero
 	move	odd, zero
 	/*
@@ -444,48 +454,48 @@ FEXPORT(csum_partial_copy_nocheck)
 	 */
 	sltu	t2, len, NBYTES
 	and	t1, dst, ADDRMASK
-	bnez	t2, .Lcopy_bytes_checklen
+	bnez	t2, .Lcopy_bytes_checklen\@
 	 and	t0, src, ADDRMASK
 	andi	odd, dst, 0x1			/* odd buffer? */
-	bnez	t1, .Ldst_unaligned
+	bnez	t1, .Ldst_unaligned\@
 	 nop
-	bnez	t0, .Lsrc_unaligned_dst_aligned
+	bnez	t0, .Lsrc_unaligned_dst_aligned\@
 	/*
 	 * use delay slot for fall-through
 	 * src and dst are aligned; need to compute rem
 	 */
-.Lboth_aligned:
+.Lboth_aligned\@:
 	 SRL	t0, len, LOG_NBYTES+3	 # +3 for 8 units/iter
-	beqz	t0, .Lcleanup_both_aligned # len < 8*NBYTES
+	beqz	t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
 	 nop
 	SUB	len, 8*NBYTES		# subtract here for bgez loop
 	.align	4
 1:
-	LOAD(t0, UNIT(0)(src), .Ll_exc)
-	LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
-	LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
-	LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
-	LOAD(t4, UNIT(4)(src), .Ll_exc_copy)
-	LOAD(t5, UNIT(5)(src), .Ll_exc_copy)
-	LOAD(t6, UNIT(6)(src), .Ll_exc_copy)
-	LOAD(t7, UNIT(7)(src), .Ll_exc_copy)
+	LOAD(t0, UNIT(0)(src), .Ll_exc\@)
+	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
+	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
+	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
+	LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
+	LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@)
+	LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@)
+	LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@)
 	SUB	len, len, 8*NBYTES
 	ADD	src, src, 8*NBYTES
-	STORE(t0, UNIT(0)(dst),	.Ls_exc)
+	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
 	ADDC(sum, t0)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc)
+	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
 	ADDC(sum, t1)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc)
+	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
 	ADDC(sum, t2)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc)
+	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
 	ADDC(sum, t3)
-	STORE(t4, UNIT(4)(dst),	.Ls_exc)
+	STORE(t4, UNIT(4)(dst),	.Ls_exc\@)
 	ADDC(sum, t4)
-	STORE(t5, UNIT(5)(dst),	.Ls_exc)
+	STORE(t5, UNIT(5)(dst),	.Ls_exc\@)
 	ADDC(sum, t5)
-	STORE(t6, UNIT(6)(dst),	.Ls_exc)
+	STORE(t6, UNIT(6)(dst),	.Ls_exc\@)
 	ADDC(sum, t6)
-	STORE(t7, UNIT(7)(dst),	.Ls_exc)
+	STORE(t7, UNIT(7)(dst),	.Ls_exc\@)
 	ADDC(sum, t7)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 8*NBYTES
@@ -496,44 +506,44 @@ FEXPORT(csum_partial_copy_nocheck)
 	/*
 	 * len == the number of bytes left to copy < 8*NBYTES
 	 */
-.Lcleanup_both_aligned:
+.Lcleanup_both_aligned\@:
 #define rem t7
-	beqz	len, .Ldone
+	beqz	len, .Ldone\@
 	 sltu	t0, len, 4*NBYTES
-	bnez	t0, .Lless_than_4units
+	bnez	t0, .Lless_than_4units\@
 	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
 	/*
 	 * len >= 4*NBYTES
 	 */
-	LOAD(t0, UNIT(0)(src), .Ll_exc)
-	LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
-	LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
-	LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
+	LOAD(t0, UNIT(0)(src), .Ll_exc\@)
+	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
+	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
+	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
 	SUB	len, len, 4*NBYTES
 	ADD	src, src, 4*NBYTES
-	STORE(t0, UNIT(0)(dst),	.Ls_exc)
+	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
 	ADDC(sum, t0)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc)
+	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
 	ADDC(sum, t1)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc)
+	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
 	ADDC(sum, t2)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc)
+	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
 	ADDC(sum, t3)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
-	beqz	len, .Ldone
+	beqz	len, .Ldone\@
 	.set	noreorder
-.Lless_than_4units:
+.Lless_than_4units\@:
 	/*
 	 * rem = len % NBYTES
 	 */
-	beq	rem, len, .Lcopy_bytes
+	beq	rem, len, .Lcopy_bytes\@
 	 nop
 1:
-	LOAD(t0, 0(src), .Ll_exc)
+	LOAD(t0, 0(src), .Ll_exc\@)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-	STORE(t0, 0(dst), .Ls_exc)
+	STORE(t0, 0(dst), .Ls_exc\@)
 	ADDC(sum, t0)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
@@ -552,20 +562,20 @@ FEXPORT(csum_partial_copy_nocheck)
 	 * more instruction-level parallelism.
 	 */
 #define bits t2
-	beqz	len, .Ldone
+	beqz	len, .Ldone\@
 	 ADD	t1, dst, len	# t1 is just past last byte of dst
 	li	bits, 8*NBYTES
 	SLL	rem, len, 3	# rem = number of bits to keep
-	LOAD(t0, 0(src), .Ll_exc)
+	LOAD(t0, 0(src), .Ll_exc\@)
 	SUB	bits, bits, rem # bits = number of bits to discard
 	SHIFT_DISCARD t0, t0, bits
-	STREST(t0, -1(t1), .Ls_exc)
+	STREST(t0, -1(t1), .Ls_exc\@)
 	SHIFT_DISCARD_REVERT t0, t0, bits
 	.set reorder
 	ADDC(sum, t0)
-	b	.Ldone
+	b	.Ldone\@
 	.set noreorder
-.Ldst_unaligned:
+.Ldst_unaligned\@:
 	/*
 	 * dst is unaligned
 	 * t0 = src & ADDRMASK
@@ -576,25 +586,25 @@ FEXPORT(csum_partial_copy_nocheck)
 	 * Set match = (src and dst have same alignment)
 	 */
 #define match rem
-	LDFIRST(t3, FIRST(0)(src), .Ll_exc)
+	LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
 	ADD	t2, zero, NBYTES
-	LDREST(t3, REST(0)(src), .Ll_exc_copy)
+	LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
 	SUB	t2, t2, t1	# t2 = number of bytes copied
 	xor	match, t0, t1
-	STFIRST(t3, FIRST(0)(dst), .Ls_exc)
+	STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
 	SLL	t4, t1, 3		# t4 = number of bits to discard
 	SHIFT_DISCARD t3, t3, t4
 	/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
 	ADDC(sum, t3)
-	beq	len, t2, .Ldone
+	beq	len, t2, .Ldone\@
 	 SUB	len, len, t2
 	ADD	dst, dst, t2
-	beqz	match, .Lboth_aligned
+	beqz	match, .Lboth_aligned\@
 	 ADD	src, src, t2
 
-.Lsrc_unaligned_dst_aligned:
+.Lsrc_unaligned_dst_aligned\@:
 	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
-	beqz	t0, .Lcleanup_src_unaligned
+	beqz	t0, .Lcleanup_src_unaligned\@
 	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
 1:
 /*
@@ -603,53 +613,53 @@ FEXPORT(csum_partial_copy_nocheck)
  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
  * are to the same unit (unless src is aligned, but it's not).
  */
-	LDFIRST(t0, FIRST(0)(src), .Ll_exc)
-	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy)
+	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
+	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
 	SUB	len, len, 4*NBYTES
-	LDREST(t0, REST(0)(src), .Ll_exc_copy)
-	LDREST(t1, REST(1)(src), .Ll_exc_copy)
-	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy)
-	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy)
-	LDREST(t2, REST(2)(src), .Ll_exc_copy)
-	LDREST(t3, REST(3)(src), .Ll_exc_copy)
+	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
+	LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
+	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
+	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
+	LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
+	LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
 	ADD	src, src, 4*NBYTES
 #ifdef CONFIG_CPU_SB1
 	nop				# improves slotting
 #endif
-	STORE(t0, UNIT(0)(dst),	.Ls_exc)
+	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
 	ADDC(sum, t0)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc)
+	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
 	ADDC(sum, t1)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc)
+	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
 	ADDC(sum, t2)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc)
+	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
 	ADDC(sum, t3)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
 	bne	len, rem, 1b
 	.set	noreorder
 
-.Lcleanup_src_unaligned:
-	beqz	len, .Ldone
+.Lcleanup_src_unaligned\@:
+	beqz	len, .Ldone\@
 	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, .Lcopy_bytes
+	beq	rem, len, .Lcopy_bytes\@
 	 nop
 1:
-	LDFIRST(t0, FIRST(0)(src), .Ll_exc)
-	LDREST(t0, REST(0)(src), .Ll_exc_copy)
+	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
+	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-	STORE(t0, 0(dst), .Ls_exc)
+	STORE(t0, 0(dst), .Ls_exc\@)
 	ADDC(sum, t0)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
 	bne	len, rem, 1b
 	.set	noreorder
 
-.Lcopy_bytes_checklen:
-	beqz	len, .Ldone
+.Lcopy_bytes_checklen\@:
+	beqz	len, .Ldone\@
 	 nop
-.Lcopy_bytes:
+.Lcopy_bytes\@:
 	/* 0 < len < NBYTES  */
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 #define SHIFT_START 0
@@ -662,12 +672,12 @@ FEXPORT(csum_partial_copy_nocheck)
 	li	t3, SHIFT_START # shift
 /* use .Ll_exc_copy here to return correct sum on fault */
 #define COPY_BYTE(N)			\
-	LOADBU(t0, N(src), .Ll_exc_copy);	\
+	LOADBU(t0, N(src), .Ll_exc_copy\@);	\
 	SUB	len, len, 1;		\
-	STOREB(t0, N(dst), .Ls_exc);	\
+	STOREB(t0, N(dst), .Ls_exc\@);	\
 	SLLV	t0, t0, t3;		\
 	addu	t3, SHIFT_INC;		\
-	beqz	len, .Lcopy_bytes_done; \
+	beqz	len, .Lcopy_bytes_done\@; \
 	 or	t2, t0
 
 	COPY_BYTE(0)
@@ -678,14 +688,14 @@ FEXPORT(csum_partial_copy_nocheck)
 	COPY_BYTE(4)
 	COPY_BYTE(5)
 #endif
-	LOADBU(t0, NBYTES-2(src), .Ll_exc_copy)
+	LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@)
 	SUB	len, len, 1
-	STOREB(t0, NBYTES-2(dst), .Ls_exc)
+	STOREB(t0, NBYTES-2(dst), .Ls_exc\@)
 	SLLV	t0, t0, t3
 	or	t2, t0
-.Lcopy_bytes_done:
+.Lcopy_bytes_done\@:
 	ADDC(sum, t2)
-.Ldone:
+.Ldone\@:
 	/* fold checksum */
 #ifdef USE_DOUBLE
 	dsll32	v1, sum, 0
@@ -714,7 +724,7 @@ FEXPORT(csum_partial_copy_nocheck)
 	jr	ra
 	.set noreorder
 
-.Ll_exc_copy:
+.Ll_exc_copy\@:
 	/*
 	 * Copy bytes from src until faulting load address (or until a
 	 * lb faults)
@@ -729,7 +739,7 @@ FEXPORT(csum_partial_copy_nocheck)
 	 li	t2, SHIFT_START
 	LOADK	t0, THREAD_BUADDR(t0)
 1:
-	LOADBU(t1, 0(src), .Ll_exc)
+	LOADBU(t1, 0(src), .Ll_exc\@)
 	ADD	src, src, 1
 	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
 	SLLV	t1, t1, t2
@@ -739,7 +749,7 @@ FEXPORT(csum_partial_copy_nocheck)
 	ADD	dst, dst, 1
 	bne	src, t0, 1b
 	.set	noreorder
-.Ll_exc:
+.Ll_exc\@:
 	LOADK	t0, TI_TASK($28)
 	 nop
 	LOADK	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
@@ -758,7 +768,7 @@ FEXPORT(csum_partial_copy_nocheck)
 	 */
 	.set	reorder				/* DADDI_WAR */
 	SUB	src, len, 1
-	beqz	len, .Ldone
+	beqz	len, .Ldone\@
 	.set	noreorder
 1:	sb	zero, 0(dst)
 	ADD	dst, dst, 1
@@ -773,13 +783,19 @@ FEXPORT(csum_partial_copy_nocheck)
 	 SUB	src, src, v1
 #endif
 	li	v1, -EFAULT
-	b	.Ldone
+	b	.Ldone\@
 	 sw	v1, (errptr)
 
-.Ls_exc:
+.Ls_exc\@:
 	li	v0, -1 /* invalid checksum */
 	li	v1, -EFAULT
 	jr	ra
 	 sw	v1, (errptr)
 	.set	pop
-	END(__csum_partial_copy_kernel)
+	.endm
+
+LEAF(__csum_partial_copy_kernel)
+FEXPORT(__csum_partial_copy_to_user)
+FEXPORT(__csum_partial_copy_from_user)
+__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1
+END(__csum_partial_copy_kernel)
-- 
cgit 


From 6f85cebe49a4cd25a381f356ad51ccc376d00a7c Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Fri, 17 Jan 2014 11:36:16 +0000
Subject: MIPS: lib: csum_partial: Add EVA support

Use EVA specific functions to read and write data to
user address space.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
---
 arch/mips/lib/csum_partial.S | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'arch/mips/lib/csum_partial.S')

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 62c8768a59ce..2e4825e48388 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -352,6 +352,19 @@ LEAF(csum_partial)
 		.section __ex_table,"a";	\
 		PTR	9b, handler;		\
 		.previous;			\
+	/* This is enabled in EVA mode */	\
+	.else;					\
+		/* If loading from user or storing to user */	\
+		.if ((\from == USEROP) && (type == LD_INSN)) || \
+		    ((\to == USEROP) && (type == ST_INSN));	\
+9:			__BUILD_EVA_INSN(insn##e, reg, addr);	\
+			.section __ex_table,"a";		\
+			PTR	9b, handler;			\
+			.previous;				\
+		.else;						\
+			/* EVA without exception */		\
+			insn reg, addr;				\
+		.endif;						\
 	.endif
 
 #undef LOAD
@@ -795,7 +808,19 @@ LEAF(csum_partial)
 	.endm
 
 LEAF(__csum_partial_copy_kernel)
+#ifndef CONFIG_EVA
 FEXPORT(__csum_partial_copy_to_user)
 FEXPORT(__csum_partial_copy_from_user)
+#endif
 __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1
 END(__csum_partial_copy_kernel)
+
+#ifdef CONFIG_EVA
+LEAF(__csum_partial_copy_to_user)
+__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 0
+END(__csum_partial_copy_to_user)
+
+LEAF(__csum_partial_copy_from_user)
+__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 0
+END(__csum_partial_copy_from_user)
+#endif
-- 
cgit