diff options
author | Fenghua Yu <fenghua.yu@intel.com> | 2011-05-17 18:29:17 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2011-05-17 18:40:30 -0400 |
commit | 057e05c1d6440117875f455e59da8691e08f65d5 (patch) | |
tree | 79c8306de2ebe31252b730170e33bd62ed18ccdb /arch/x86/lib/memmove_64.S | |
parent | 101068c1f4a947ffa08f2782c78e40097300754d (diff) |
x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB
Support memmove() by enhanced rep movsb. On processors supporting enhanced
REP MOVSB/STOSB, the alternative memmove() function using enhanced rep movsb
overrides the original function.
The patch doesn't change the backward memmove case to use enhanced rep
movsb.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-9-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/lib/memmove_64.S')
-rw-r--r-- | arch/x86/lib/memmove_64.S | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0ecb8433e5a8..d0ec9c2936d7 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S | |||
@@ -8,6 +8,7 @@ | |||
8 | #define _STRING_C | 8 | #define _STRING_C |
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | #include <asm/cpufeature.h> | ||
11 | 12 | ||
12 | #undef memmove | 13 | #undef memmove |
13 | 14 | ||
@@ -24,6 +25,7 @@ | |||
24 | */ | 25 | */ |
25 | ENTRY(memmove) | 26 | ENTRY(memmove) |
26 | CFI_STARTPROC | 27 | CFI_STARTPROC |
28 | |||
27 | /* Handle more 32bytes in loop */ | 29 | /* Handle more 32bytes in loop */ |
28 | mov %rdi, %rax | 30 | mov %rdi, %rax |
29 | cmp $0x20, %rdx | 31 | cmp $0x20, %rdx |
@@ -31,8 +33,13 @@ ENTRY(memmove) | |||
31 | 33 | ||
32 | /* Decide forward/backward copy mode */ | 34 | /* Decide forward/backward copy mode */ |
33 | cmp %rdi, %rsi | 35 | cmp %rdi, %rsi |
34 | jb 2f | 36 | jge .Lmemmove_begin_forward |
37 | mov %rsi, %r8 | ||
38 | add %rdx, %r8 | ||
39 | cmp %rdi, %r8 | ||
40 | jg 2f | ||
35 | 41 | ||
42 | .Lmemmove_begin_forward: | ||
36 | /* | 43 | /* |
37 | * movsq instruction have many startup latency | 44 | * movsq instruction have many startup latency |
38 | * so we handle small size by general register. | 45 | * so we handle small size by general register. |
@@ -78,6 +85,8 @@ ENTRY(memmove) | |||
78 | rep movsq | 85 | rep movsq |
79 | movq %r11, (%r10) | 86 | movq %r11, (%r10) |
80 | jmp 13f | 87 | jmp 13f |
88 | .Lmemmove_end_forward: | ||
89 | |||
81 | /* | 90 | /* |
82 | * Handle data backward by movsq. | 91 | * Handle data backward by movsq. |
83 | */ | 92 | */ |
@@ -194,4 +203,22 @@ ENTRY(memmove) | |||
194 | 13: | 203 | 13: |
195 | retq | 204 | retq |
196 | CFI_ENDPROC | 205 | CFI_ENDPROC |
206 | |||
207 | .section .altinstr_replacement,"ax" | ||
208 | .Lmemmove_begin_forward_efs: | ||
209 | /* Forward moving data. */ | ||
210 | movq %rdx, %rcx | ||
211 | rep movsb | ||
212 | retq | ||
213 | .Lmemmove_end_forward_efs: | ||
214 | .previous | ||
215 | |||
216 | .section .altinstructions,"a" | ||
217 | .align 8 | ||
218 | .quad .Lmemmove_begin_forward | ||
219 | .quad .Lmemmove_begin_forward_efs | ||
220 | .word X86_FEATURE_ERMS | ||
221 | .byte .Lmemmove_end_forward-.Lmemmove_begin_forward | ||
222 | .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs | ||
223 | .previous | ||
197 | ENDPROC(memmove) | 224 | ENDPROC(memmove) |