summaryrefslogtreecommitdiff
path: root/arch/xtensa/lib/memcopy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/xtensa/lib/memcopy.S')
-rw-r--r--arch/xtensa/lib/memcopy.S148
1 files changed, 57 insertions, 91 deletions
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index b1c219acabe7..f60760396cee 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -9,23 +9,9 @@
* Copyright (C) 2002 - 2012 Tensilica Inc.
*/
-#include <variant/core.h>
-
- .macro src_b r, w0, w1
-#ifdef __XTENSA_EB__
- src \r, \w0, \w1
-#else
- src \r, \w1, \w0
-#endif
- .endm
-
- .macro ssa8 r
-#ifdef __XTENSA_EB__
- ssa8b \r
-#else
- ssa8l \r
-#endif
- .endm
+#include <linux/linkage.h>
+#include <asm/asmmacro.h>
+#include <asm/core.h>
/*
* void *memcpy(void *dst, const void *src, size_t len);
@@ -93,7 +79,7 @@
bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
#endif /* !XCHAL_HAVE_LOOPS */
.Lbytecopydone:
- retw
+ abi_ret_default
/*
* Destination is unaligned
@@ -123,12 +109,10 @@
addi a5, a5, 2
j .Ldstaligned # dst is now aligned, return to main algorithm
- .align 4
- .global memcpy
- .type memcpy,@function
-memcpy:
+ENTRY(__memcpy)
+WEAK(memcpy)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ dst, a3/ src, a4/ len
mov a5, a2 # copy dst so that a2 is return value
.Lcommon:
@@ -177,7 +161,7 @@ memcpy:
bbsi.l a4, 2, .L3
bbsi.l a4, 1, .L4
bbsi.l a4, 0, .L5
- retw
+ abi_ret_default
.L3:
# copy 4 bytes
l32i a6, a3, 0
@@ -186,7 +170,7 @@ memcpy:
addi a5, a5, 4
bbsi.l a4, 1, .L4
bbsi.l a4, 0, .L5
- retw
+ abi_ret_default
.L4:
# copy 2 bytes
l16ui a6, a3, 0
@@ -194,12 +178,12 @@ memcpy:
s16i a6, a5, 0
addi a5, a5, 2
bbsi.l a4, 0, .L5
- retw
+ abi_ret_default
.L5:
# copy 1 byte
l8ui a6, a3, 0
s8i a6, a5, 0
- retw
+ abi_ret_default
/*
* Destination is aligned, Source is unaligned
@@ -209,7 +193,7 @@ memcpy:
.Lsrcunaligned:
_beqz a4, .Ldone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src
- ssa8 a3 # set shift amount from byte offset
+ __ssa8 a3 # set shift amount from byte offset
/* set to 1 when running on ISS (simulator) with the
lint or ferret client, or 0 to save a few cycles */
@@ -229,16 +213,16 @@ memcpy:
.Loop2:
l32i a7, a3, 4
l32i a8, a3, 8
- src_b a6, a6, a7
+ __src_b a6, a6, a7
s32i a6, a5, 0
l32i a9, a3, 12
- src_b a7, a7, a8
+ __src_b a7, a7, a8
s32i a7, a5, 4
l32i a6, a3, 16
- src_b a8, a8, a9
+ __src_b a8, a8, a9
s32i a8, a5, 8
addi a3, a3, 16
- src_b a9, a9, a6
+ __src_b a9, a9, a6
s32i a9, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
@@ -249,10 +233,10 @@ memcpy:
# copy 8 bytes
l32i a7, a3, 4
l32i a8, a3, 8
- src_b a6, a6, a7
+ __src_b a6, a6, a7
s32i a6, a5, 0
addi a3, a3, 8
- src_b a7, a7, a8
+ __src_b a7, a7, a8
s32i a7, a5, 4
addi a5, a5, 8
mov a6, a8
@@ -261,7 +245,7 @@ memcpy:
# copy 4 bytes
l32i a7, a3, 4
addi a3, a3, 4
- src_b a6, a6, a7
+ __src_b a6, a6, a7
s32i a6, a5, 0
addi a5, a5, 4
mov a6, a7
@@ -271,7 +255,7 @@ memcpy:
#endif
bbsi.l a4, 1, .L14
bbsi.l a4, 0, .L15
-.Ldone: retw
+.Ldone: abi_ret_default
.L14:
# copy 2 bytes
l8ui a6, a3, 0
@@ -281,27 +265,16 @@ memcpy:
s8i a7, a5, 1
addi a5, a5, 2
bbsi.l a4, 0, .L15
- retw
+ abi_ret_default
.L15:
# copy 1 byte
l8ui a6, a3, 0
s8i a6, a5, 0
- retw
-
+ abi_ret_default
-/*
- * void bcopy(const void *src, void *dest, size_t n);
- */
- .align 4
- .global bcopy
- .type bcopy,@function
-bcopy:
- entry sp, 16 # minimal stack frame
- # a2=src, a3=dst, a4=len
- mov a5, a3
- mov a3, a2
- mov a2, a5
- j .Lmovecommon # go to common code for memmove+bcopy
+ENDPROC(__memcpy)
+EXPORT_SYMBOL(__memcpy)
+EXPORT_SYMBOL(memcpy)
/*
* void *memmove(void *dst, const void *src, size_t len);
@@ -360,7 +333,7 @@ bcopy:
# $a3:src != $a7:src_start
#endif /* !XCHAL_HAVE_LOOPS */
.Lbackbytecopydone:
- retw
+ abi_ret_default
/*
* Destination is unaligned
@@ -391,12 +364,10 @@ bcopy:
j .Lbackdstaligned # dst is now aligned,
# return to main algorithm
- .align 4
- .global memmove
- .type memmove,@function
-memmove:
+ENTRY(__memmove)
+WEAK(memmove)
- entry sp, 16 # minimal stack frame
+ abi_entry_default
# a2/ dst, a3/ src, a4/ len
mov a5, a2 # copy dst so that a2 is return value
.Lmovecommon:
@@ -418,13 +389,13 @@ memmove:
*/
# copy 16 bytes per iteration for word-aligned dst and word-aligned src
#if XCHAL_HAVE_LOOPS
- loopnez a7, .backLoop1done
+ loopnez a7, .LbackLoop1done
#else /* !XCHAL_HAVE_LOOPS */
- beqz a7, .backLoop1done
+ beqz a7, .LbackLoop1done
slli a8, a7, 4
sub a8, a3, a8 # a8 = start of first 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1:
+.LbackLoop1:
addi a3, a3, -16
l32i a7, a3, 12
l32i a6, a3, 8
@@ -436,9 +407,9 @@ memmove:
s32i a7, a5, 4
s32i a6, a5, 0
#if !XCHAL_HAVE_LOOPS
- bne a3, a8, .backLoop1 # continue loop if a3:src != a8:src_start
+ bne a3, a8, .LbackLoop1 # continue loop if a3:src != a8:src_start
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1done:
+.LbackLoop1done:
bbci.l a4, 3, .Lback2
# copy 8 bytes
addi a3, a3, -8
@@ -451,7 +422,7 @@ memmove:
bbsi.l a4, 2, .Lback3
bbsi.l a4, 1, .Lback4
bbsi.l a4, 0, .Lback5
- retw
+ abi_ret_default
.Lback3:
# copy 4 bytes
addi a3, a3, -4
@@ -460,7 +431,7 @@ memmove:
s32i a6, a5, 0
bbsi.l a4, 1, .Lback4
bbsi.l a4, 0, .Lback5
- retw
+ abi_ret_default
.Lback4:
# copy 2 bytes
addi a3, a3, -2
@@ -468,14 +439,14 @@ memmove:
addi a5, a5, -2
s16i a6, a5, 0
bbsi.l a4, 0, .Lback5
- retw
+ abi_ret_default
.Lback5:
# copy 1 byte
addi a3, a3, -1
l8ui a6, a3, 0
addi a5, a5, -1
s8i a6, a5, 0
- retw
+ abi_ret_default
/*
* Destination is aligned, Source is unaligned
@@ -485,7 +456,7 @@ memmove:
.Lbacksrcunaligned:
_beqz a4, .Lbackdone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src
- ssa8 a3 # set shift amount from byte offset
+ __ssa8 a3 # set shift amount from byte offset
#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
* the lint or ferret client, or 0
* to save a few cycles */
@@ -495,40 +466,40 @@ memmove:
#endif
l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS
- loopnez a7, .backLoop2done
+ loopnez a7, .LbackLoop2done
#else /* !XCHAL_HAVE_LOOPS */
- beqz a7, .backLoop2done
+ beqz a7, .LbackLoop2done
slli a10, a7, 4
sub a10, a3, a10 # a10 = start of first 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2:
+.LbackLoop2:
addi a3, a3, -16
l32i a7, a3, 12
l32i a8, a3, 8
addi a5, a5, -16
- src_b a6, a7, a6
+ __src_b a6, a7, a6
s32i a6, a5, 12
l32i a9, a3, 4
- src_b a7, a8, a7
+ __src_b a7, a8, a7
s32i a7, a5, 8
l32i a6, a3, 0
- src_b a8, a9, a8
+ __src_b a8, a9, a8
s32i a8, a5, 4
- src_b a9, a6, a9
+ __src_b a9, a6, a9
s32i a9, a5, 0
#if !XCHAL_HAVE_LOOPS
- bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
+ bne a3, a10, .LbackLoop2 # continue loop if a3:src != a10:src_start
#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2done:
+.LbackLoop2done:
bbci.l a4, 3, .Lback12
# copy 8 bytes
addi a3, a3, -8
l32i a7, a3, 4
l32i a8, a3, 0
addi a5, a5, -8
- src_b a6, a7, a6
+ __src_b a6, a7, a6
s32i a6, a5, 4
- src_b a7, a8, a7
+ __src_b a7, a8, a7
s32i a7, a5, 0
mov a6, a8
.Lback12:
@@ -537,7 +508,7 @@ memmove:
addi a3, a3, -4
l32i a7, a3, 0
addi a5, a5, -4
- src_b a6, a7, a6
+ __src_b a6, a7, a6
s32i a6, a5, 0
mov a6, a7
.Lback13:
@@ -547,7 +518,7 @@ memmove:
bbsi.l a4, 1, .Lback14
bbsi.l a4, 0, .Lback15
.Lbackdone:
- retw
+ abi_ret_default
.Lback14:
# copy 2 bytes
addi a3, a3, -2
@@ -557,20 +528,15 @@ memmove:
s8i a6, a5, 0
s8i a7, a5, 1
bbsi.l a4, 0, .Lback15
- retw
+ abi_ret_default
.Lback15:
# copy 1 byte
addi a3, a3, -1
addi a5, a5, -1
l8ui a6, a3, 0
s8i a6, a5, 0
- retw
+ abi_ret_default
-
-/*
- * Local Variables:
- * mode:fundamental
- * comment-start: "# "
- * comment-start-skip: "# *"
- * End:
- */
+ENDPROC(__memmove)
+EXPORT_SYMBOL(__memmove)
+EXPORT_SYMBOL(memmove)