From 5210d1e6889c8183ecad269e86e2d9c524015b5f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:18 +0530 Subject: ARC: String library Hand optimised asm code for ARC700 pipeline. Originally written/optimized by Joern Rennecke Signed-off-by: Vineet Gupta Cc: Joern Rennecke --- arch/arc/lib/strcpy-700.S | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 arch/arc/lib/strcpy-700.S (limited to 'arch/arc/lib/strcpy-700.S') diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S new file mode 100644 index 000000000000..b7ca4ae81d88 --- /dev/null +++ b/arch/arc/lib/strcpy-700.S @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* If dst and src are 4 byte aligned, copy 8 bytes at a time. + If the src is 4, but not 8 byte aligned, we first read 4 bytes to get + it 8 byte aligned. Thus, we can do a little read-ahead, without + dereferencing a cache line that we should not touch. + Note that short and long instructions have been scheduled to avoid + branch stalls. + The beq_s to r3z could be made unaligned & long to avoid a stall + there, but the it is not likely to be taken often, and it + would also be likey to cost an unaligned mispredict at the next call. */ + +#include + +ARC_ENTRY strcpy + or r2,r0,r1 + bmsk_s r2,r2,1 + brne.d r2,0,charloop + mov_s r10,r0 + ld_s r3,[r1,0] + mov r8,0x01010101 + bbit0.d r1,2,loop_start + ror r12,r8 + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne r3z + mov_s r4,r3 + .balign 4 +loop: + ld.a r3,[r1,4] + st.ab r4,[r10,4] +loop_start: + ld.a r4,[r1,4] + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne_s r3z + st.ab r3,[r10,4] + sub r2,r4,r8 + bic r2,r2,r4 + tst r2,r12 + beq loop + mov_s r3,r4 +#ifdef __LITTLE_ENDIAN__ +r3z: bmsk.f r1,r3,7 + lsr_s r3,r3,8 +#else +r3z: lsr.f r1,r3,24 + asl_s r3,r3,8 +#endif + bne.d r3z + stb.ab r1,[r10,1] + j_s [blink] + + .balign 4 +charloop: + ldb.ab r3,[r1,1] + + + brne.d r3,0,charloop + stb.ab r3,[r10,1] + j [blink] +ARC_EXIT strcpy -- cgit