diff options
| -rw-r--r-- | arch/x86/lib/memcpy_64.S | 45 |
1 files changed, 32 insertions, 13 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 75ef61e35e38..daab21dae2d1 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <asm/cpufeature.h> | 5 | #include <asm/cpufeature.h> |
| 6 | #include <asm/dwarf2.h> | 6 | #include <asm/dwarf2.h> |
| 7 | #include <asm/alternative-asm.h> | ||
| 7 | 8 | ||
| 8 | /* | 9 | /* |
| 9 | * memcpy - Copy a memory block. | 10 | * memcpy - Copy a memory block. |
| @@ -37,6 +38,23 @@ | |||
| 37 | .Lmemcpy_e: | 38 | .Lmemcpy_e: |
| 38 | .previous | 39 | .previous |
| 39 | 40 | ||
| 41 | /* | ||
| 42 | * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than | ||
| 43 | * memcpy_c. Use memcpy_c_e when possible. | ||
| 44 | * | ||
| 45 | * This gets patched over the unrolled variant (below) via the | ||
| 46 | * alternative instructions framework: | ||
| 47 | */ | ||
| 48 | .section .altinstr_replacement, "ax", @progbits | ||
| 49 | .Lmemcpy_c_e: | ||
| 50 | movq %rdi, %rax | ||
| 51 | |||
| 52 | movl %edx, %ecx | ||
| 53 | rep movsb | ||
| 54 | ret | ||
| 55 | .Lmemcpy_e_e: | ||
| 56 | .previous | ||
| 57 | |||
| 40 | ENTRY(__memcpy) | 58 | ENTRY(__memcpy) |
| 41 | ENTRY(memcpy) | 59 | ENTRY(memcpy) |
| 42 | CFI_STARTPROC | 60 | CFI_STARTPROC |
| @@ -171,21 +189,22 @@ ENDPROC(memcpy) | |||
| 171 | ENDPROC(__memcpy) | 189 | ENDPROC(__memcpy) |
| 172 | 190 | ||
| 173 | /* | 191 | /* |
| 174 | * Some CPUs run faster using the string copy instructions. | 192 | * Some CPUs are adding enhanced REP MOVSB/STOSB feature |
| 175 | * It is also a lot simpler. Use this when possible: | 193 | * If the feature is supported, memcpy_c_e() is the first choice. |
| 176 | */ | 194 | * If enhanced rep movsb copy is not available, use fast string copy |
| 177 | 195 | * memcpy_c() when possible. This is faster and code is simpler than | |
| 178 | .section .altinstructions, "a" | 196 | * original memcpy(). |
| 179 | .align 8 | 197 | * Otherwise, original memcpy() is used. |
| 180 | .quad memcpy | 198 | * In .altinstructions section, ERMS feature is placed after REG_GOOD |
| 181 | .quad .Lmemcpy_c | 199 | * feature to implement the right patch order. |
| 182 | .word X86_FEATURE_REP_GOOD | 200 | * |
| 183 | |||
| 184 | /* | ||
| 185 | * Replace only beginning, memcpy is used to apply alternatives, | 201 | * Replace only beginning, memcpy is used to apply alternatives, |
| 186 | * so it is silly to overwrite itself with nops - reboot is the | 202 | * so it is silly to overwrite itself with nops - reboot is the |
| 187 | * only outcome... | 203 | * only outcome... |
| 188 | */ | 204 | */ |
| 189 | .byte .Lmemcpy_e - .Lmemcpy_c | 205 | .section .altinstructions, "a" |
| 190 | .byte .Lmemcpy_e - .Lmemcpy_c | 206 | altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ |
| 207 | .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c | ||
| 208 | altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ | ||
| 209 | .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e | ||
| 191 | .previous | 210 | .previous |
