From ac85227f7637cfb0d811519b8253c454d0d0a159 Mon Sep 17 00:00:00 2001 From: Markos Chandras <markos.chandras@imgtec.com> Date: Thu, 12 Dec 2013 16:21:00 +0000 Subject: MIPS: checksum: Split the 'copy_user' symbol The 'copy_user' symbol can be used to copy from or to userland so we will use two different symbols for these operations. This makes no difference in the existing code, but when the core is operating in EVA mode, different instructions need to be used to read and write to userland address space. The old function has also been renamed to 'copy_kernel' to denote that it is suitable for copy data to and from kernel space. Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> --- arch/mips/lib/csum_partial.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/mips/lib/csum_partial.S') diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index a6adffbb4e5f..5d73d0d704a3 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -8,6 +8,7 @@ * Copyright (C) 1998, 1999 Ralf Baechle * Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) 2007 Maciej W. Rozycki + * Copyright (C) 2014 Imagination Technologies Ltd. */ #include <linux/errno.h> #include <asm/asm.h> @@ -296,7 +297,7 @@ LEAF(csum_partial) * checksum and copy routines based on memcpy.S * * csum_partial_copy_nocheck(src, dst, len, sum) - * __csum_partial_copy_user(src, dst, len, sum, errp) + * __csum_partial_copy_kernel(src, dst, len, sum, errp) * * See "Spec" in memcpy.S for details. Unlike __copy_user, all * function in this file use the standard calling convention. @@ -396,7 +397,9 @@ LEAF(csum_partial) .set at=v1 #endif -LEAF(__csum_partial_copy_user) +LEAF(__csum_partial_copy_kernel) +FEXPORT(__csum_partial_copy_to_user) +FEXPORT(__csum_partial_copy_from_user) PTR_ADDU AT, src, len /* See (1) above. */ #ifdef CONFIG_64BIT move errptr, a4 @@ -757,4 +760,4 @@ EXC( lbu t1, 0(src), .Ll_exc) jr ra sw v1, (errptr) .set pop - END(__csum_partial_copy_user) + END(__csum_partial_copy_kernel) -- cgit From 2ab82e66483798670e129c48c05d7fc8a39ea996 Mon Sep 17 00:00:00 2001 From: Markos Chandras <markos.chandras@imgtec.com> Date: Thu, 16 Jan 2014 17:02:13 +0000 Subject: MIPS: lib: csum_partial: Merge EXC and load/store macros Each load/store macro always adds an entry to the __ex_table using the EXC macro. There are cases where a load instruction may never fail such as when we are sure the load happens in the kernel address space. Therefore, we merge these the EXC and LOADX/STOREX macros into a single one. We also expand the argument list in the EXC macro to make the macro more flexible. The extra 'type' argument is not used by this commit, but it will be used when EVA support is added to memcpy. Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> --- arch/mips/lib/csum_partial.S | 160 ++++++++++++++++++++++++------------------- 1 file changed, 91 insertions(+), 69 deletions(-) (limited to 'arch/mips/lib/csum_partial.S') diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 5d73d0d704a3..bff5167b59a6 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -328,20 +328,39 @@ LEAF(csum_partial) * These handlers do not need to overwrite any data. */ -#define EXC(inst_reg,addr,handler) \ -9: inst_reg, addr; \ +/* Instruction type */ +#define LD_INSN 1 +#define ST_INSN 2 + +/* + * Wrapper to add an entry in the exception table + * in case the insn causes a memory exception. + * Arguments: + * insn : Load/store instruction + * type : Instruction type + * reg : Register + * addr : Address + * handler : Exception handler + */ +#define EXC(insn, type, reg, addr, handler) \ +9: insn reg, addr; \ .section __ex_table,"a"; \ PTR 9b, handler; \ .previous +#undef LOAD + #ifdef USE_DOUBLE -#define LOAD ld -#define LOADL ldl -#define LOADR ldr -#define STOREL sdl -#define STORER sdr -#define STORE sd +#define LOADK ld /* No exception */ +#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler) +#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler) +#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler) +#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler) +#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) +#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler) +#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler) +#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler) #define ADD daddu #define SUB dsubu #define SRL dsrl @@ -353,12 +372,15 @@ LEAF(csum_partial) #else -#define LOAD lw -#define LOADL lwl -#define LOADR lwr -#define STOREL swl -#define STORER swr -#define STORE sw +#define LOADK lw /* No exception */ +#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler) +#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler) +#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler) +#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler) +#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) +#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler) +#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler) +#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler) #define ADD addu #define SUB subu #define SRL srl @@ -439,31 +461,31 @@ FEXPORT(csum_partial_copy_nocheck) SUB len, 8*NBYTES # subtract here for bgez loop .align 4 1: -EXC( LOAD t0, UNIT(0)(src), .Ll_exc) -EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) -EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) -EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) -EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) -EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy) -EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy) -EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy) + LOAD(t0, UNIT(0)(src), .Ll_exc) + LOAD(t1, UNIT(1)(src), .Ll_exc_copy) + LOAD(t2, UNIT(2)(src), .Ll_exc_copy) + LOAD(t3, UNIT(3)(src), .Ll_exc_copy) + LOAD(t4, UNIT(4)(src), .Ll_exc_copy) + LOAD(t5, UNIT(5)(src), .Ll_exc_copy) + LOAD(t6, UNIT(6)(src), .Ll_exc_copy) + LOAD(t7, UNIT(7)(src), .Ll_exc_copy) SUB len, len, 8*NBYTES ADD src, src, 8*NBYTES -EXC( STORE t0, UNIT(0)(dst), .Ls_exc) + STORE(t0, UNIT(0)(dst), .Ls_exc) ADDC(sum, t0) -EXC( STORE t1, UNIT(1)(dst), .Ls_exc) + STORE(t1, UNIT(1)(dst), .Ls_exc) ADDC(sum, t1) -EXC( STORE t2, UNIT(2)(dst), .Ls_exc) + STORE(t2, UNIT(2)(dst), .Ls_exc) ADDC(sum, t2) -EXC( STORE t3, UNIT(3)(dst), .Ls_exc) + STORE(t3, UNIT(3)(dst), .Ls_exc) ADDC(sum, t3) -EXC( STORE t4, UNIT(4)(dst), .Ls_exc) + STORE(t4, UNIT(4)(dst), .Ls_exc) ADDC(sum, t4) -EXC( STORE t5, UNIT(5)(dst), .Ls_exc) + STORE(t5, UNIT(5)(dst), .Ls_exc) ADDC(sum, t5) -EXC( STORE t6, UNIT(6)(dst), .Ls_exc) + STORE(t6, UNIT(6)(dst), .Ls_exc) ADDC(sum, t6) -EXC( STORE t7, UNIT(7)(dst), .Ls_exc) + STORE(t7, UNIT(7)(dst), .Ls_exc) ADDC(sum, t7) .set reorder /* DADDI_WAR */ ADD dst, dst, 8*NBYTES @@ -483,19 +505,19 @@ EXC( STORE t7, UNIT(7)(dst), .Ls_exc) /* * len >= 4*NBYTES */ -EXC( LOAD t0, UNIT(0)(src), .Ll_exc) -EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) -EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) -EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) + LOAD(t0, UNIT(0)(src), .Ll_exc) + LOAD(t1, UNIT(1)(src), .Ll_exc_copy) + LOAD(t2, UNIT(2)(src), .Ll_exc_copy) + LOAD(t3, UNIT(3)(src), .Ll_exc_copy) SUB len, len, 4*NBYTES ADD src, src, 4*NBYTES -EXC( STORE t0, UNIT(0)(dst), .Ls_exc) + STORE(t0, UNIT(0)(dst), .Ls_exc) ADDC(sum, t0) -EXC( STORE t1, UNIT(1)(dst), .Ls_exc) + STORE(t1, UNIT(1)(dst), .Ls_exc) ADDC(sum, t1) -EXC( STORE t2, UNIT(2)(dst), .Ls_exc) + STORE(t2, UNIT(2)(dst), .Ls_exc) ADDC(sum, t2) -EXC( STORE t3, UNIT(3)(dst), .Ls_exc) + STORE(t3, UNIT(3)(dst), .Ls_exc) ADDC(sum, t3) .set reorder /* DADDI_WAR */ ADD dst, dst, 4*NBYTES @@ -508,10 +530,10 @@ EXC( STORE t3, UNIT(3)(dst), .Ls_exc) beq rem, len, .Lcopy_bytes nop 1: -EXC( LOAD t0, 0(src), .Ll_exc) + LOAD(t0, 0(src), .Ll_exc) ADD src, src, NBYTES SUB len, len, NBYTES -EXC( STORE t0, 0(dst), .Ls_exc) + STORE(t0, 0(dst), .Ls_exc) ADDC(sum, t0) .set reorder /* DADDI_WAR */ ADD dst, dst, NBYTES @@ -534,10 +556,10 @@ EXC( STORE t0, 0(dst), .Ls_exc) ADD t1, dst, len # t1 is just past last byte of dst li bits, 8*NBYTES SLL rem, len, 3 # rem = number of bits to keep -EXC( LOAD t0, 0(src), .Ll_exc) + LOAD(t0, 0(src), .Ll_exc) SUB bits, bits, rem # bits = number of bits to discard SHIFT_DISCARD t0, t0, bits -EXC( STREST t0, -1(t1), .Ls_exc) + STREST(t0, -1(t1), .Ls_exc) SHIFT_DISCARD_REVERT t0, t0, bits .set reorder ADDC(sum, t0) @@ -554,12 +576,12 @@ EXC( STREST t0, -1(t1), .Ls_exc) * Set match = (src and dst have same alignment) */ #define match rem -EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) + LDFIRST(t3, FIRST(0)(src), .Ll_exc) ADD t2, zero, NBYTES -EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) + LDREST(t3, REST(0)(src), .Ll_exc_copy) SUB t2, t2, t1 # t2 = number of bytes copied xor match, t0, t1 -EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) + STFIRST(t3, FIRST(0)(dst), .Ls_exc) SLL t4, t1, 3 # t4 = number of bits to discard SHIFT_DISCARD t3, t3, t4 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ @@ -581,26 +603,26 @@ EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) * It's OK to load FIRST(N+1) before REST(N) because the two addresses * are to the same unit (unless src is aligned, but it's not). */ -EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) -EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) + LDFIRST(t0, FIRST(0)(src), .Ll_exc) + LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy) SUB len, len, 4*NBYTES -EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) -EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) -EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) -EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) -EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) -EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) + LDREST(t0, REST(0)(src), .Ll_exc_copy) + LDREST(t1, REST(1)(src), .Ll_exc_copy) + LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy) + LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) + LDREST(t2, REST(2)(src), .Ll_exc_copy) + LDREST(t3, REST(3)(src), .Ll_exc_copy) ADD src, src, 4*NBYTES #ifdef CONFIG_CPU_SB1 nop # improves slotting #endif -EXC( STORE t0, UNIT(0)(dst), .Ls_exc) + STORE(t0, UNIT(0)(dst), .Ls_exc) ADDC(sum, t0) -EXC( STORE t1, UNIT(1)(dst), .Ls_exc) + STORE(t1, UNIT(1)(dst), .Ls_exc) ADDC(sum, t1) -EXC( STORE t2, UNIT(2)(dst), .Ls_exc) + STORE(t2, UNIT(2)(dst), .Ls_exc) ADDC(sum, t2) -EXC( STORE t3, UNIT(3)(dst), .Ls_exc) + STORE(t3, UNIT(3)(dst), .Ls_exc) ADDC(sum, t3) .set reorder /* DADDI_WAR */ ADD dst, dst, 4*NBYTES @@ -613,11 +635,11 @@ EXC( STORE t3, UNIT(3)(dst), .Ls_exc) beq rem, len, .Lcopy_bytes nop 1: -EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) -EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) + LDFIRST(t0, FIRST(0)(src), .Ll_exc) + LDREST(t0, REST(0)(src), .Ll_exc_copy) ADD src, src, NBYTES SUB len, len, NBYTES -EXC( STORE t0, 0(dst), .Ls_exc) + STORE(t0, 0(dst), .Ls_exc) ADDC(sum, t0) .set reorder /* DADDI_WAR */ ADD dst, dst, NBYTES @@ -640,9 +662,9 @@ EXC( STORE t0, 0(dst), .Ls_exc) li t3, SHIFT_START # shift /* use .Ll_exc_copy here to return correct sum on fault */ #define COPY_BYTE(N) \ -EXC( lbu t0, N(src), .Ll_exc_copy); \ + LOADBU(t0, N(src), .Ll_exc_copy); \ SUB len, len, 1; \ -EXC( sb t0, N(dst), .Ls_exc); \ + STOREB(t0, N(dst), .Ls_exc); \ SLLV t0, t0, t3; \ addu t3, SHIFT_INC; \ beqz len, .Lcopy_bytes_done; \ @@ -656,9 +678,9 @@ EXC( sb t0, N(dst), .Ls_exc); \ COPY_BYTE(4) COPY_BYTE(5) #endif -EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy) + LOADBU(t0, NBYTES-2(src), .Ll_exc_copy) SUB len, len, 1 -EXC( sb t0, NBYTES-2(dst), .Ls_exc) + STOREB(t0, NBYTES-2(dst), .Ls_exc) SLLV t0, t0, t3 or t2, t0 .Lcopy_bytes_done: @@ -703,11 +725,11 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc) * * Assumes src < THREAD_BUADDR($28) */ - LOAD t0, TI_TASK($28) + LOADK t0, TI_TASK($28) li t2, SHIFT_START - LOAD t0, THREAD_BUADDR(t0) + LOADK t0, THREAD_BUADDR(t0) 1: -EXC( lbu t1, 0(src), .Ll_exc) + LOADBU(t1, 0(src), .Ll_exc) ADD src, src, 1 sb t1, 0(dst) # can't fault -- we're copy_from_user SLLV t1, t1, t2 @@ -718,9 +740,9 @@ EXC( lbu t1, 0(src), .Ll_exc) bne src, t0, 1b .set noreorder .Ll_exc: - LOAD t0, TI_TASK($28) + LOADK t0, TI_TASK($28) nop - LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address + LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address nop SUB len, AT, t0 # len number of uncopied bytes /* -- cgit From e89fb56c8bcf5514cfe7abd7a3dda9e6007b7238 Mon Sep 17 00:00:00 2001 From: Markos Chandras <markos.chandras@imgtec.com> Date: Fri, 17 Jan 2014 10:48:46 +0000 Subject: MIPS: lib: csum_partial: Add macro to build csum_partial symbols In preparation for EVA support, we use a macro to build the __csum_partial_copy_user main code so it can be shared across multiple implementations. EVA uses the same code but it replaces the load/store/prefetch instructions with the EVA specific ones therefore using a macro avoids unnecessary code duplications. Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> --- arch/mips/lib/csum_partial.S | 200 +++++++++++++++++++++++-------------------- 1 file changed, 108 insertions(+), 92 deletions(-) (limited to 'arch/mips/lib/csum_partial.S') diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index bff5167b59a6..62c8768a59ce 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -331,6 +331,10 @@ LEAF(csum_partial) /* Instruction type */ #define LD_INSN 1 #define ST_INSN 2 +#define LEGACY_MODE 1 +#define EVA_MODE 2 +#define USEROP 1 +#define KERNELOP 2 /* * Wrapper to add an entry in the exception table @@ -343,10 +347,12 @@ LEAF(csum_partial) * handler : Exception handler */ #define EXC(insn, type, reg, addr, handler) \ -9: insn reg, addr; \ - .section __ex_table,"a"; \ - PTR 9b, handler; \ - .previous + .if \mode == LEGACY_MODE; \ +9: insn reg, addr; \ + .section __ex_table,"a"; \ + PTR 9b, handler; \ + .previous; \ + .endif #undef LOAD @@ -419,16 +425,20 @@ LEAF(csum_partial) .set at=v1 #endif -LEAF(__csum_partial_copy_kernel) -FEXPORT(__csum_partial_copy_to_user) -FEXPORT(__csum_partial_copy_from_user) + .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck + PTR_ADDU AT, src, len /* See (1) above. */ + /* initialize __nocheck if this the first time we execute this + * macro + */ #ifdef CONFIG_64BIT move errptr, a4 #else lw errptr, 16(sp) #endif -FEXPORT(csum_partial_copy_nocheck) + .if \__nocheck == 1 + FEXPORT(csum_partial_copy_nocheck) + .endif move sum, zero move odd, zero /* @@ -444,48 +454,48 @@ FEXPORT(csum_partial_copy_nocheck) */ sltu t2, len, NBYTES and t1, dst, ADDRMASK - bnez t2, .Lcopy_bytes_checklen + bnez t2, .Lcopy_bytes_checklen\@ and t0, src, ADDRMASK andi odd, dst, 0x1 /* odd buffer? */ - bnez t1, .Ldst_unaligned + bnez t1, .Ldst_unaligned\@ nop - bnez t0, .Lsrc_unaligned_dst_aligned + bnez t0, .Lsrc_unaligned_dst_aligned\@ /* * use delay slot for fall-through * src and dst are aligned; need to compute rem */ -.Lboth_aligned: +.Lboth_aligned\@: SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter - beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES + beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES nop SUB len, 8*NBYTES # subtract here for bgez loop .align 4 1: - LOAD(t0, UNIT(0)(src), .Ll_exc) - LOAD(t1, UNIT(1)(src), .Ll_exc_copy) - LOAD(t2, UNIT(2)(src), .Ll_exc_copy) - LOAD(t3, UNIT(3)(src), .Ll_exc_copy) - LOAD(t4, UNIT(4)(src), .Ll_exc_copy) - LOAD(t5, UNIT(5)(src), .Ll_exc_copy) - LOAD(t6, UNIT(6)(src), .Ll_exc_copy) - LOAD(t7, UNIT(7)(src), .Ll_exc_copy) + LOAD(t0, UNIT(0)(src), .Ll_exc\@) + LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) + LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) + LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) + LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) + LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@) + LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@) + LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@) SUB len, len, 8*NBYTES ADD src, src, 8*NBYTES - STORE(t0, UNIT(0)(dst), .Ls_exc) + STORE(t0, UNIT(0)(dst), .Ls_exc\@) ADDC(sum, t0) - STORE(t1, UNIT(1)(dst), .Ls_exc) + STORE(t1, UNIT(1)(dst), .Ls_exc\@) ADDC(sum, t1) - STORE(t2, UNIT(2)(dst), .Ls_exc) + STORE(t2, UNIT(2)(dst), .Ls_exc\@) ADDC(sum, t2) - STORE(t3, UNIT(3)(dst), .Ls_exc) + STORE(t3, UNIT(3)(dst), .Ls_exc\@) ADDC(sum, t3) - STORE(t4, UNIT(4)(dst), .Ls_exc) + STORE(t4, UNIT(4)(dst), .Ls_exc\@) ADDC(sum, t4) - STORE(t5, UNIT(5)(dst), .Ls_exc) + STORE(t5, UNIT(5)(dst), .Ls_exc\@) ADDC(sum, t5) - STORE(t6, UNIT(6)(dst), .Ls_exc) + STORE(t6, UNIT(6)(dst), .Ls_exc\@) ADDC(sum, t6) - STORE(t7, UNIT(7)(dst), .Ls_exc) + STORE(t7, UNIT(7)(dst), .Ls_exc\@) ADDC(sum, t7) .set reorder /* DADDI_WAR */ ADD dst, dst, 8*NBYTES @@ -496,44 +506,44 @@ FEXPORT(csum_partial_copy_nocheck) /* * len == the number of bytes left to copy < 8*NBYTES */ -.Lcleanup_both_aligned: +.Lcleanup_both_aligned\@: #define rem t7 - beqz len, .Ldone + beqz len, .Ldone\@ sltu t0, len, 4*NBYTES - bnez t0, .Lless_than_4units + bnez t0, .Lless_than_4units\@ and rem, len, (NBYTES-1) # rem = len % NBYTES /* * len >= 4*NBYTES */ - LOAD(t0, UNIT(0)(src), .Ll_exc) - LOAD(t1, UNIT(1)(src), .Ll_exc_copy) - LOAD(t2, UNIT(2)(src), .Ll_exc_copy) - LOAD(t3, UNIT(3)(src), .Ll_exc_copy) + LOAD(t0, UNIT(0)(src), .Ll_exc\@) + LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) + LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) + LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) SUB len, len, 4*NBYTES ADD src, src, 4*NBYTES - STORE(t0, UNIT(0)(dst), .Ls_exc) + STORE(t0, UNIT(0)(dst), .Ls_exc\@) ADDC(sum, t0) - STORE(t1, UNIT(1)(dst), .Ls_exc) + STORE(t1, UNIT(1)(dst), .Ls_exc\@) ADDC(sum, t1) - STORE(t2, UNIT(2)(dst), .Ls_exc) + STORE(t2, UNIT(2)(dst), .Ls_exc\@) ADDC(sum, t2) - STORE(t3, UNIT(3)(dst), .Ls_exc) + STORE(t3, UNIT(3)(dst), .Ls_exc\@) ADDC(sum, t3) .set reorder /* DADDI_WAR */ ADD dst, dst, 4*NBYTES - beqz len, .Ldone + beqz len, .Ldone\@ .set noreorder -.Lless_than_4units: +.Lless_than_4units\@: /* * rem = len % NBYTES */ - beq rem, len, .Lcopy_bytes + beq rem, len, .Lcopy_bytes\@ nop 1: - LOAD(t0, 0(src), .Ll_exc) + LOAD(t0, 0(src), .Ll_exc\@) ADD src, src, NBYTES SUB len, len, NBYTES - STORE(t0, 0(dst), .Ls_exc) + STORE(t0, 0(dst), .Ls_exc\@) ADDC(sum, t0) .set reorder /* DADDI_WAR */ ADD dst, dst, NBYTES @@ -552,20 +562,20 @@ FEXPORT(csum_partial_copy_nocheck) * more instruction-level parallelism. */ #define bits t2 - beqz len, .Ldone + beqz len, .Ldone\@ ADD t1, dst, len # t1 is just past last byte of dst li bits, 8*NBYTES SLL rem, len, 3 # rem = number of bits to keep - LOAD(t0, 0(src), .Ll_exc) + LOAD(t0, 0(src), .Ll_exc\@) SUB bits, bits, rem # bits = number of bits to discard SHIFT_DISCARD t0, t0, bits - STREST(t0, -1(t1), .Ls_exc) + STREST(t0, -1(t1), .Ls_exc\@) SHIFT_DISCARD_REVERT t0, t0, bits .set reorder ADDC(sum, t0) - b .Ldone + b .Ldone\@ .set noreorder -.Ldst_unaligned: +.Ldst_unaligned\@: /* * dst is unaligned * t0 = src & ADDRMASK @@ -576,25 +586,25 @@ FEXPORT(csum_partial_copy_nocheck) * Set match = (src and dst have same alignment) */ #define match rem - LDFIRST(t3, FIRST(0)(src), .Ll_exc) + LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) ADD t2, zero, NBYTES - LDREST(t3, REST(0)(src), .Ll_exc_copy) + LDREST(t3, REST(0)(src), .Ll_exc_copy\@) SUB t2, t2, t1 # t2 = number of bytes copied xor match, t0, t1 - STFIRST(t3, FIRST(0)(dst), .Ls_exc) + STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) SLL t4, t1, 3 # t4 = number of bits to discard SHIFT_DISCARD t3, t3, t4 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ ADDC(sum, t3) - beq len, t2, .Ldone + beq len, t2, .Ldone\@ SUB len, len, t2 ADD dst, dst, t2 - beqz match, .Lboth_aligned + beqz match, .Lboth_aligned\@ ADD src, src, t2 -.Lsrc_unaligned_dst_aligned: +.Lsrc_unaligned_dst_aligned\@: SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter - beqz t0, .Lcleanup_src_unaligned + beqz t0, .Lcleanup_src_unaligned\@ and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 1: /* @@ -603,53 +613,53 @@ FEXPORT(csum_partial_copy_nocheck) * It's OK to load FIRST(N+1) before REST(N) because the two addresses * are to the same unit (unless src is aligned, but it's not). */ - LDFIRST(t0, FIRST(0)(src), .Ll_exc) - LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy) + LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) + LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) SUB len, len, 4*NBYTES - LDREST(t0, REST(0)(src), .Ll_exc_copy) - LDREST(t1, REST(1)(src), .Ll_exc_copy) - LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy) - LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) - LDREST(t2, REST(2)(src), .Ll_exc_copy) - LDREST(t3, REST(3)(src), .Ll_exc_copy) + LDREST(t0, REST(0)(src), .Ll_exc_copy\@) + LDREST(t1, REST(1)(src), .Ll_exc_copy\@) + LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) + LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) + LDREST(t2, REST(2)(src), .Ll_exc_copy\@) + LDREST(t3, REST(3)(src), .Ll_exc_copy\@) ADD src, src, 4*NBYTES #ifdef CONFIG_CPU_SB1 nop # improves slotting #endif - STORE(t0, UNIT(0)(dst), .Ls_exc) + STORE(t0, UNIT(0)(dst), .Ls_exc\@) ADDC(sum, t0) - STORE(t1, UNIT(1)(dst), .Ls_exc) + STORE(t1, UNIT(1)(dst), .Ls_exc\@) ADDC(sum, t1) - STORE(t2, UNIT(2)(dst), .Ls_exc) + STORE(t2, UNIT(2)(dst), .Ls_exc\@) ADDC(sum, t2) - STORE(t3, UNIT(3)(dst), .Ls_exc) + STORE(t3, UNIT(3)(dst), .Ls_exc\@) ADDC(sum, t3) .set reorder /* DADDI_WAR */ ADD dst, dst, 4*NBYTES bne len, rem, 1b .set noreorder -.Lcleanup_src_unaligned: - beqz len, .Ldone +.Lcleanup_src_unaligned\@: + beqz len, .Ldone\@ and rem, len, NBYTES-1 # rem = len % NBYTES - beq rem, len, .Lcopy_bytes + beq rem, len, .Lcopy_bytes\@ nop 1: - LDFIRST(t0, FIRST(0)(src), .Ll_exc) - LDREST(t0, REST(0)(src), .Ll_exc_copy) + LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) + LDREST(t0, REST(0)(src), .Ll_exc_copy\@) ADD src, src, NBYTES SUB len, len, NBYTES - STORE(t0, 0(dst), .Ls_exc) + STORE(t0, 0(dst), .Ls_exc\@) ADDC(sum, t0) .set reorder /* DADDI_WAR */ ADD dst, dst, NBYTES bne len, rem, 1b .set noreorder -.Lcopy_bytes_checklen: - beqz len, .Ldone +.Lcopy_bytes_checklen\@: + beqz len, .Ldone\@ nop -.Lcopy_bytes: +.Lcopy_bytes\@: /* 0 < len < NBYTES */ #ifdef CONFIG_CPU_LITTLE_ENDIAN #define SHIFT_START 0 @@ -662,12 +672,12 @@ FEXPORT(csum_partial_copy_nocheck) li t3, SHIFT_START # shift /* use .Ll_exc_copy here to return correct sum on fault */ #define COPY_BYTE(N) \ - LOADBU(t0, N(src), .Ll_exc_copy); \ + LOADBU(t0, N(src), .Ll_exc_copy\@); \ SUB len, len, 1; \ - STOREB(t0, N(dst), .Ls_exc); \ + STOREB(t0, N(dst), .Ls_exc\@); \ SLLV t0, t0, t3; \ addu t3, SHIFT_INC; \ - beqz len, .Lcopy_bytes_done; \ + beqz len, .Lcopy_bytes_done\@; \ or t2, t0 COPY_BYTE(0) @@ -678,14 +688,14 @@ FEXPORT(csum_partial_copy_nocheck) COPY_BYTE(4) COPY_BYTE(5) #endif - LOADBU(t0, NBYTES-2(src), .Ll_exc_copy) + LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@) SUB len, len, 1 - STOREB(t0, NBYTES-2(dst), .Ls_exc) + STOREB(t0, NBYTES-2(dst), .Ls_exc\@) SLLV t0, t0, t3 or t2, t0 -.Lcopy_bytes_done: +.Lcopy_bytes_done\@: ADDC(sum, t2) -.Ldone: +.Ldone\@: /* fold checksum */ #ifdef USE_DOUBLE dsll32 v1, sum, 0 @@ -714,7 +724,7 @@ FEXPORT(csum_partial_copy_nocheck) jr ra .set noreorder -.Ll_exc_copy: +.Ll_exc_copy\@: /* * Copy bytes from src until faulting load address (or until a * lb faults) @@ -729,7 +739,7 @@ FEXPORT(csum_partial_copy_nocheck) li t2, SHIFT_START LOADK t0, THREAD_BUADDR(t0) 1: - LOADBU(t1, 0(src), .Ll_exc) + LOADBU(t1, 0(src), .Ll_exc\@) ADD src, src, 1 sb t1, 0(dst) # can't fault -- we're copy_from_user SLLV t1, t1, t2 @@ -739,7 +749,7 @@ FEXPORT(csum_partial_copy_nocheck) ADD dst, dst, 1 bne src, t0, 1b .set noreorder -.Ll_exc: +.Ll_exc\@: LOADK t0, TI_TASK($28) nop LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address @@ -758,7 +768,7 @@ FEXPORT(csum_partial_copy_nocheck) */ .set reorder /* DADDI_WAR */ SUB src, len, 1 - beqz len, .Ldone + beqz len, .Ldone\@ .set noreorder 1: sb zero, 0(dst) ADD dst, dst, 1 @@ -773,13 +783,19 @@ FEXPORT(csum_partial_copy_nocheck) SUB src, src, v1 #endif li v1, -EFAULT - b .Ldone + b .Ldone\@ sw v1, (errptr) -.Ls_exc: +.Ls_exc\@: li v0, -1 /* invalid checksum */ li v1, -EFAULT jr ra sw v1, (errptr) .set pop - END(__csum_partial_copy_kernel) + .endm + +LEAF(__csum_partial_copy_kernel) +FEXPORT(__csum_partial_copy_to_user) +FEXPORT(__csum_partial_copy_from_user) +__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1 +END(__csum_partial_copy_kernel) -- cgit From 6f85cebe49a4cd25a381f356ad51ccc376d00a7c Mon Sep 17 00:00:00 2001 From: Markos Chandras <markos.chandras@imgtec.com> Date: Fri, 17 Jan 2014 11:36:16 +0000 Subject: MIPS: lib: csum_partial: Add EVA support Use EVA specific functions to read and write data to user address space. Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> --- arch/mips/lib/csum_partial.S | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/mips/lib/csum_partial.S') diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 62c8768a59ce..2e4825e48388 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -352,6 +352,19 @@ LEAF(csum_partial) .section __ex_table,"a"; \ PTR 9b, handler; \ .previous; \ + /* This is enabled in EVA mode */ \ + .else; \ + /* If loading from user or storing to user */ \ + .if ((\from == USEROP) && (type == LD_INSN)) || \ + ((\to == USEROP) && (type == ST_INSN)); \ +9: __BUILD_EVA_INSN(insn##e, reg, addr); \ + .section __ex_table,"a"; \ + PTR 9b, handler; \ + .previous; \ + .else; \ + /* EVA without exception */ \ + insn reg, addr; \ + .endif; \ .endif #undef LOAD @@ -795,7 +808,19 @@ LEAF(csum_partial) .endm LEAF(__csum_partial_copy_kernel) +#ifndef CONFIG_EVA FEXPORT(__csum_partial_copy_to_user) FEXPORT(__csum_partial_copy_from_user) +#endif __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1 END(__csum_partial_copy_kernel) + +#ifdef CONFIG_EVA +LEAF(__csum_partial_copy_to_user) +__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 0 +END(__csum_partial_copy_to_user) + +LEAF(__csum_partial_copy_from_user) +__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 0 +END(__csum_partial_copy_from_user) +#endif -- cgit