aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/memcpy_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib/memcpy_64.S')
-rw-r--r--arch/x86/lib/memcpy_64.S47
1 files changed, 33 insertions, 14 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 75ef61e35e38..efbf2a0ecdea 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,6 +4,7 @@
4 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h> 6#include <asm/dwarf2.h>
7#include <asm/alternative-asm.h>
7 8
8/* 9/*
9 * memcpy - Copy a memory block. 10 * memcpy - Copy a memory block.
@@ -37,6 +38,23 @@
37.Lmemcpy_e: 38.Lmemcpy_e:
38 .previous 39 .previous
39 40
41/*
42 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
43 * memcpy_c. Use memcpy_c_e when possible.
44 *
45 * This gets patched over the unrolled variant (below) via the
46 * alternative instructions framework:
47 */
48 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e:
50 movq %rdi, %rax
51
52 movl %edx, %ecx
53 rep movsb
54 ret
55.Lmemcpy_e_e:
56 .previous
57
40ENTRY(__memcpy) 58ENTRY(__memcpy)
41ENTRY(memcpy) 59ENTRY(memcpy)
42 CFI_STARTPROC 60 CFI_STARTPROC
@@ -49,7 +67,7 @@ ENTRY(memcpy)
49 jb .Lhandle_tail 67 jb .Lhandle_tail
50 68
51 /* 69 /*
52 * We check whether memory false dependece could occur, 70 * We check whether memory false dependence could occur,
53 * then jump to corresponding copy mode. 71 * then jump to corresponding copy mode.
54 */ 72 */
55 cmp %dil, %sil 73 cmp %dil, %sil
@@ -171,21 +189,22 @@ ENDPROC(memcpy)
171ENDPROC(__memcpy) 189ENDPROC(__memcpy)
172 190
173 /* 191 /*
174 * Some CPUs run faster using the string copy instructions. 192 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
175 * It is also a lot simpler. Use this when possible: 193 * If the feature is supported, memcpy_c_e() is the first choice.
176 */ 194 * If enhanced rep movsb copy is not available, use fast string copy
177 195 * memcpy_c() when possible. This is faster and code is simpler than
178 .section .altinstructions, "a" 196 * original memcpy().
179 .align 8 197 * Otherwise, original memcpy() is used.
180 .quad memcpy 198 * In .altinstructions section, ERMS feature is placed after REG_GOOD
181 .quad .Lmemcpy_c 199 * feature to implement the right patch order.
182 .word X86_FEATURE_REP_GOOD 200 *
183
184 /*
185 * Replace only beginning, memcpy is used to apply alternatives, 201 * Replace only beginning, memcpy is used to apply alternatives,
186 * so it is silly to overwrite itself with nops - reboot is the 202 * so it is silly to overwrite itself with nops - reboot is the
187 * only outcome... 203 * only outcome...
188 */ 204 */
189 .byte .Lmemcpy_e - .Lmemcpy_c 205 .section .altinstructions, "a"
190 .byte .Lmemcpy_e - .Lmemcpy_c 206 altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
207 .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
208 altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
209 .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
191 .previous 210 .previous