diff options
| author | Fenghua Yu <fenghua.yu@intel.com> | 2011-05-17 15:29:18 -0700 | 
|---|---|---|
| committer | H. Peter Anvin <hpa@linux.intel.com> | 2011-05-17 15:40:31 -0700 | 
| commit | 2f19e06ac30771c7cb96fd61d8aeacfa74dac21c (patch) | |
| tree | 00a13ef37681e1f983a40680b17c850ce035fb87 | |
| parent | 057e05c1d6440117875f455e59da8691e08f65d5 (diff) | |
x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB
Support memset() with enhanced rep stosb. On processors supporting enhanced
REP MOVSB/STOSB, the alternative memset_c_e function using enhanced rep stosb
overrides the fast string alternative memset_c and the original function.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-10-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
| -rw-r--r-- | arch/x86/lib/memset_64.S | 54 | 
1 files changed, 42 insertions, 12 deletions
| diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 09d344269652..79bd454b78a3 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -2,9 +2,13 @@  #include <linux/linkage.h>  #include <asm/dwarf2.h> +#include <asm/cpufeature.h> +#include <asm/alternative-asm.h>  /* - * ISO C memset - set a memory block to a byte value. + * ISO C memset - set a memory block to a byte value. This function uses fast + * string to get better performance than the original function. The code is + * simpler and shorter than the orignal function as well.   *	   * rdi   destination   * rsi   value (char)  @@ -31,6 +35,28 @@  .Lmemset_e:  	.previous +/* + * ISO C memset - set a memory block to a byte value. This function uses + * enhanced rep stosb to override the fast string function. + * The code is simpler and shorter than the fast string function as well. + * + * rdi   destination + * rsi   value (char) + * rdx   count (bytes) + * + * rax   original destination + */ +	.section .altinstr_replacement, "ax", @progbits +.Lmemset_c_e: +	movq %rdi,%r9 +	movb %sil,%al +	movl %edx,%ecx +	rep stosb +	movq %r9,%rax +	ret +.Lmemset_e_e: +	.previous +  ENTRY(memset)  ENTRY(__memset)  	CFI_STARTPROC @@ -112,16 +138,20 @@ ENTRY(__memset)  ENDPROC(memset)  ENDPROC(__memset) -	/* Some CPUs run faster using the string instructions. -	   It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - +	/* Some CPUs support enhanced REP MOVSB/STOSB feature. +	 * It is recommended to use this when possible. +	 * +	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string +	 * instructions. +	 * +	 * Otherwise, use original memset function. +	 * +	 * In .altinstructions section, ERMS feature is placed after REG_GOOD +         * feature to implement the right patch order. +	 */  	.section .altinstructions,"a" -	.align 8 -	.quad memset -	.quad .Lmemset_c -	.word X86_FEATURE_REP_GOOD -	.byte .Lfinal - memset -	.byte .Lmemset_e - .Lmemset_c +	altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ +			     .Lfinal-memset,.Lmemset_e-.Lmemset_c +	altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ +			     .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e  	.previous | 
