diff options
Diffstat (limited to 'arch/mips/lib/memset.S')
| -rw-r--r-- | arch/mips/lib/memset.S | 245 |
1 files changed, 183 insertions, 62 deletions
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 0580194e7402..79405c32cc85 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -8,6 +8,7 @@ * Copyright (C) 2007 by Maciej W. Rozycki * Copyright (C) 2011, 2012 MIPS Technologies, Inc. */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/asm-offsets.h> #include <asm/regdef.h> @@ -34,13 +35,27 @@ #define FILLPTRG t0 #endif +#define LEGACY_MODE 1 +#define EVA_MODE 2 + +/* + * No need to protect it with EVA #ifdefery. The generated block of code + * will never be assembled if EVA is not enabled. + */ +#define __EVAFY(insn, reg, addr) __BUILD_EVA_INSN(insn##e, reg, addr) +#define ___BUILD_EVA_INSN(insn, reg, addr) __EVAFY(insn, reg, addr) + #define EX(insn,reg,addr,handler) \ -9: insn reg, addr; \ + .if \mode == LEGACY_MODE; \ +9: insn reg, addr; \ + .else; \ +9: ___BUILD_EVA_INSN(insn, reg, addr); \ + .endif; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous - .macro f_fill64 dst, offset, val, fixup + .macro f_fill64 dst, offset, val, fixup, mode EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup) EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup) @@ -63,39 +78,31 @@ #endif .endm -/* - * memset(void *s, int c, size_t n) - * - * a0: start of area to clear - * a1: char to fill with - * a2: size of area to clear - */ - .set noreorder .align 5 -LEAF(memset) - beqz a1, 1f - move v0, a0 /* result */ - andi a1, 0xff /* spread fillword */ - LONG_SLL t1, a1, 8 - or a1, t1 - LONG_SLL t1, a1, 16 -#if LONGSIZE == 8 - or a1, t1 - LONG_SLL t1, a1, 32 -#endif - or a1, t1 -1: + /* + * Macro to generate the __bzero{,_user} symbol + * Arguments: + * mode: LEGACY_MODE or EVA_MODE + */ + .macro __BUILD_BZERO mode + /* Initialize __memset if this is the first time we call this macro */ + .ifnotdef __memset + .set __memset, 1 + .hidden __memset /* Make sure it does not leak */ + .endif -FEXPORT(__bzero) sltiu t0, a2, STORSIZE /* very small region? */ - bnez t0, .Lsmall_memset + .set noreorder + bnez t0, .Lsmall_memset\@ andi t0, a0, STORMASK /* aligned? */ + .set reorder #ifdef CONFIG_CPU_MICROMIPS move t8, a1 /* used by 'swp' instruction */ move t9, a1 #endif + .set noreorder #ifndef CONFIG_CPU_DADDI_WORKAROUNDS beqz t0, 1f PTR_SUBU t0, STORSIZE /* alignment in bytes */ @@ -106,31 +113,56 @@ FEXPORT(__bzero) PTR_SUBU t0, AT /* alignment in bytes */ .set at #endif + .set reorder +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR R10KCBARRIER(0(ra)) #ifdef __MIPSEB__ - EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ -#endif -#ifdef __MIPSEL__ - EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ + EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */ +#else + EX(LONG_S_R, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */ #endif PTR_SUBU a0, t0 /* long align ptr */ PTR_ADDU a2, t0 /* correct size */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +#define STORE_BYTE(N) \ + EX(sb, a1, N(a0), .Lbyte_fixup\@); \ + .set noreorder; \ + beqz t0, 0f; \ + PTR_ADDU t0, 1; \ + .set reorder; + + PTR_ADDU a2, t0 /* correct size */ + PTR_ADDU t0, 1 + STORE_BYTE(0) + STORE_BYTE(1) +#if LONGSIZE == 4 + EX(sb, a1, 2(a0), .Lbyte_fixup\@) +#else + STORE_BYTE(2) + STORE_BYTE(3) + STORE_BYTE(4) + STORE_BYTE(5) + EX(sb, a1, 6(a0), .Lbyte_fixup\@) +#endif +0: + ori a0, STORMASK + xori a0, STORMASK + PTR_ADDIU a0, STORSIZE +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ 1: ori t1, a2, 0x3f /* # of full blocks */ xori t1, 0x3f - beqz t1, .Lmemset_partial /* no block to fill */ - andi t0, a2, 0x40-STORSIZE + andi t0, a2, 0x40-STORSIZE + beqz t1, .Lmemset_partial\@ /* no block to fill */ PTR_ADDU t1, a0 /* end address */ - .set reorder 1: PTR_ADDIU a0, 64 R10KCBARRIER(0(ra)) - f_fill64 a0, -64, FILL64RG, .Lfwd_fixup + f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode bne t1, a0, 1b - .set noreorder -.Lmemset_partial: +.Lmemset_partial\@: R10KCBARRIER(0(ra)) PTR_LA t1, 2f /* where to start */ #ifdef CONFIG_CPU_MICROMIPS @@ -144,61 +176,150 @@ FEXPORT(__bzero) PTR_SUBU t1, AT .set at #endif + PTR_ADDU a0, t0 /* dest ptr */ jr t1 - PTR_ADDU a0, t0 /* dest ptr */ - .set push - .set noreorder - .set nomacro - f_fill64 a0, -64, FILL64RG, .Lpartial_fixup /* ... but first do longs ... */ -2: .set pop - andi a2, STORMASK /* At most one long to go */ + /* ... but first do longs ... */ + f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode +2: andi a2, STORMASK /* At most one long to go */ + .set noreorder beqz a2, 1f +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR PTR_ADDU a0, a2 /* What's left */ + .set reorder R10KCBARRIER(0(ra)) #ifdef __MIPSEB__ - EX(LONG_S_R, a1, -1(a0), .Llast_fixup) + EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) +#else + EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@) #endif -#ifdef __MIPSEL__ - EX(LONG_S_L, a1, -1(a0), .Llast_fixup) +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ + PTR_SUBU t0, $0, a2 + .set reorder + move a2, zero /* No remaining longs */ + PTR_ADDIU t0, 1 + STORE_BYTE(0) + STORE_BYTE(1) +#if LONGSIZE == 4 + EX(sb, a1, 2(a0), .Lbyte_fixup\@) +#else + STORE_BYTE(2) + STORE_BYTE(3) + STORE_BYTE(4) + STORE_BYTE(5) + EX(sb, a1, 6(a0), .Lbyte_fixup\@) #endif -1: jr ra - move a2, zero +0: +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ +1: move a2, zero + jr ra -.Lsmall_memset: +.Lsmall_memset\@: + PTR_ADDU t1, a0, a2 beqz a2, 2f - PTR_ADDU t1, a0, a2 1: PTR_ADDIU a0, 1 /* fill bytewise */ R10KCBARRIER(0(ra)) + .set noreorder bne t1, a0, 1b - sb a1, -1(a0) + EX(sb, a1, -1(a0), .Lsmall_fixup\@) + .set reorder -2: jr ra /* done */ - move a2, zero +2: move a2, zero + jr ra /* done */ + .if __memset == 1 END(memset) + .set __memset, 0 + .hidden __memset + .endif -.Lfirst_fixup: +#ifdef CONFIG_CPU_NO_LOAD_STORE_LR +.Lbyte_fixup\@: + /* + * unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1 + * a2 = a2 - t0 + 1 + */ + PTR_SUBU a2, t0 + PTR_ADDIU a2, 1 + jr ra +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + +.Lfirst_fixup\@: + /* unset_bytes already in a2 */ jr ra - nop -.Lfwd_fixup: +.Lfwd_fixup\@: + /* + * unset_bytes = partial_start_addr + #bytes - fault_addr + * a2 = t1 + (a2 & 3f) - $28->task->BUADDR + */ PTR_L t0, TI_TASK($28) andi a2, 0x3f LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, t1 + LONG_SUBU a2, t0 jr ra - LONG_SUBU a2, t0 -.Lpartial_fixup: +.Lpartial_fixup\@: + /* + * unset_bytes = partial_end_addr + #bytes - fault_addr + * a2 = a0 + (a2 & STORMASK) - $28->task->BUADDR + */ PTR_L t0, TI_TASK($28) andi a2, STORMASK LONG_L t0, THREAD_BUADDR(t0) - LONG_ADDU a2, t1 + LONG_ADDU a2, a0 + LONG_SUBU a2, t0 jr ra - LONG_SUBU a2, t0 -.Llast_fixup: +.Llast_fixup\@: + /* unset_bytes already in a2 */ jr ra - andi v1, a2, STORMASK + +.Lsmall_fixup\@: + /* + * unset_bytes = end_addr - current_addr + 1 + * a2 = t1 - a0 + 1 + */ + PTR_SUBU a2, t1, a0 + PTR_ADDIU a2, 1 + jr ra + + .endm + +/* + * memset(void *s, int c, size_t n) + * + * a0: start of area to clear + * a1: char to fill with + * a2: size of area to clear + */ + +LEAF(memset) +EXPORT_SYMBOL(memset) + move v0, a0 /* result */ + beqz a1, 1f + + andi a1, 0xff /* spread fillword */ + LONG_SLL t1, a1, 8 + or a1, t1 + LONG_SLL t1, a1, 16 +#if LONGSIZE == 8 + or a1, t1 + LONG_SLL t1, a1, 32 +#endif + or a1, t1 +1: +#ifndef CONFIG_EVA +FEXPORT(__bzero) +EXPORT_SYMBOL(__bzero) +#endif + __BUILD_BZERO LEGACY_MODE + +#ifdef CONFIG_EVA +LEAF(__bzero) +EXPORT_SYMBOL(__bzero) + __BUILD_BZERO EVA_MODE +END(__bzero) +#endif |
