diff options
Diffstat (limited to 'arch/x86/lib/memcpy_64.S')
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 47 |
1 files changed, 33 insertions, 14 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 75ef61e35e38..efbf2a0ecdea 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include <asm/cpufeature.h> | 5 | #include <asm/cpufeature.h> |
6 | #include <asm/dwarf2.h> | 6 | #include <asm/dwarf2.h> |
7 | #include <asm/alternative-asm.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * memcpy - Copy a memory block. | 10 | * memcpy - Copy a memory block. |
@@ -37,6 +38,23 @@ | |||
37 | .Lmemcpy_e: | 38 | .Lmemcpy_e: |
38 | .previous | 39 | .previous |
39 | 40 | ||
41 | /* | ||
42 | * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than | ||
43 | * memcpy_c. Use memcpy_c_e when possible. | ||
44 | * | ||
45 | * This gets patched over the unrolled variant (below) via the | ||
46 | * alternative instructions framework: | ||
47 | */ | ||
48 | .section .altinstr_replacement, "ax", @progbits | ||
49 | .Lmemcpy_c_e: | ||
50 | movq %rdi, %rax | ||
51 | |||
52 | movl %edx, %ecx | ||
53 | rep movsb | ||
54 | ret | ||
55 | .Lmemcpy_e_e: | ||
56 | .previous | ||
57 | |||
40 | ENTRY(__memcpy) | 58 | ENTRY(__memcpy) |
41 | ENTRY(memcpy) | 59 | ENTRY(memcpy) |
42 | CFI_STARTPROC | 60 | CFI_STARTPROC |
@@ -49,7 +67,7 @@ ENTRY(memcpy) | |||
49 | jb .Lhandle_tail | 67 | jb .Lhandle_tail |
50 | 68 | ||
51 | /* | 69 | /* |
52 | * We check whether memory false dependece could occur, | 70 | * We check whether memory false dependence could occur, |
53 | * then jump to corresponding copy mode. | 71 | * then jump to corresponding copy mode. |
54 | */ | 72 | */ |
55 | cmp %dil, %sil | 73 | cmp %dil, %sil |
@@ -171,21 +189,22 @@ ENDPROC(memcpy) | |||
171 | ENDPROC(__memcpy) | 189 | ENDPROC(__memcpy) |
172 | 190 | ||
173 | /* | 191 | /* |
174 | * Some CPUs run faster using the string copy instructions. | 192 | * Some CPUs are adding enhanced REP MOVSB/STOSB feature |
175 | * It is also a lot simpler. Use this when possible: | 193 | * If the feature is supported, memcpy_c_e() is the first choice. |
176 | */ | 194 | * If enhanced rep movsb copy is not available, use fast string copy |
177 | 195 | * memcpy_c() when possible. This is faster and code is simpler than | |
178 | .section .altinstructions, "a" | 196 | * original memcpy(). |
179 | .align 8 | 197 | * Otherwise, original memcpy() is used. |
180 | .quad memcpy | 198 | * In .altinstructions section, ERMS feature is placed after REG_GOOD |
181 | .quad .Lmemcpy_c | 199 | * feature to implement the right patch order. |
182 | .word X86_FEATURE_REP_GOOD | 200 | * |
183 | |||
184 | /* | ||
185 | * Replace only beginning, memcpy is used to apply alternatives, | 201 | * Replace only beginning, memcpy is used to apply alternatives, |
186 | * so it is silly to overwrite itself with nops - reboot is the | 202 | * so it is silly to overwrite itself with nops - reboot is the |
187 | * only outcome... | 203 | * only outcome... |
188 | */ | 204 | */ |
189 | .byte .Lmemcpy_e - .Lmemcpy_c | 205 | .section .altinstructions, "a" |
190 | .byte .Lmemcpy_e - .Lmemcpy_c | 206 | altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ |
207 | .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c | ||
208 | altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ | ||
209 | .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e | ||
191 | .previous | 210 | .previous |