diff options
Diffstat (limited to 'arch/x86_64/lib/memcpy.S')
-rw-r--r-- | arch/x86_64/lib/memcpy.S | 69 |
1 files changed, 39 insertions, 30 deletions
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index 5554948b5554..967b22fa7d07 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S | |||
@@ -1,6 +1,10 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | 1 | /* Copyright 2002 Andi Kleen */ |
2 | 2 | ||
3 | #include <asm/cpufeature.h> | 3 | #include <linux/config.h> |
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | #include <asm/cpufeature.h> | ||
7 | |||
4 | /* | 8 | /* |
5 | * memcpy - Copy a memory block. | 9 | * memcpy - Copy a memory block. |
6 | * | 10 | * |
@@ -13,12 +17,26 @@ | |||
13 | * rax original destination | 17 | * rax original destination |
14 | */ | 18 | */ |
15 | 19 | ||
16 | .globl __memcpy | 20 | ALIGN |
17 | .globl memcpy | 21 | memcpy_c: |
18 | .p2align 4 | 22 | CFI_STARTPROC |
19 | __memcpy: | 23 | movq %rdi,%rax |
20 | memcpy: | 24 | movl %edx,%ecx |
25 | shrl $3,%ecx | ||
26 | andl $7,%edx | ||
27 | rep movsq | ||
28 | movl %edx,%ecx | ||
29 | rep movsb | ||
30 | ret | ||
31 | CFI_ENDPROC | ||
32 | ENDPROC(memcpy_c) | ||
33 | |||
34 | ENTRY(__memcpy) | ||
35 | ENTRY(memcpy) | ||
36 | CFI_STARTPROC | ||
21 | pushq %rbx | 37 | pushq %rbx |
38 | CFI_ADJUST_CFA_OFFSET 8 | ||
39 | CFI_REL_OFFSET rbx, 0 | ||
22 | movq %rdi,%rax | 40 | movq %rdi,%rax |
23 | 41 | ||
24 | movl %edx,%ecx | 42 | movl %edx,%ecx |
@@ -86,36 +104,27 @@ memcpy: | |||
86 | 104 | ||
87 | .Lende: | 105 | .Lende: |
88 | popq %rbx | 106 | popq %rbx |
107 | CFI_ADJUST_CFA_OFFSET -8 | ||
108 | CFI_RESTORE rbx | ||
89 | ret | 109 | ret |
90 | .Lfinal: | 110 | .Lfinal: |
111 | CFI_ENDPROC | ||
112 | ENDPROC(memcpy) | ||
113 | ENDPROC(__memcpy) | ||
91 | 114 | ||
92 | /* Some CPUs run faster using the string copy instructions. | 115 | /* Some CPUs run faster using the string copy instructions. |
93 | It is also a lot simpler. Use this when possible */ | 116 | It is also a lot simpler. Use this when possible */ |
94 | 117 | ||
118 | .section .altinstr_replacement,"ax" | ||
119 | 1: .byte 0xeb /* jmp <disp8> */ | ||
120 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | ||
121 | 2: | ||
122 | .previous | ||
95 | .section .altinstructions,"a" | 123 | .section .altinstructions,"a" |
96 | .align 8 | 124 | .align 8 |
97 | .quad memcpy | 125 | .quad memcpy |
98 | .quad memcpy_c | 126 | .quad 1b |
99 | .byte X86_FEATURE_REP_GOOD | 127 | .byte X86_FEATURE_REP_GOOD |
100 | .byte .Lfinal-memcpy | 128 | .byte .Lfinal - memcpy |
101 | .byte memcpy_c_end-memcpy_c | 129 | .byte 2b - 1b |
102 | .previous | ||
103 | |||
104 | .section .altinstr_replacement,"ax" | ||
105 | /* rdi destination | ||
106 | * rsi source | ||
107 | * rdx count | ||
108 | */ | ||
109 | memcpy_c: | ||
110 | movq %rdi,%rax | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | andl $7,%edx | ||
114 | rep | ||
115 | movsq | ||
116 | movl %edx,%ecx | ||
117 | rep | ||
118 | movsb | ||
119 | ret | ||
120 | memcpy_c_end: | ||
121 | .previous | 130 | .previous |