aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@novell.com>2009-12-18 11:16:03 -0500
committerIngo Molnar <mingo@elte.hu>2009-12-30 05:57:32 -0500
commit7269e8812a59f74fb1ce134465d0bcf5683b93a1 (patch)
treecfa0022a98907232edc8ffbe145926a272da14f1 /arch/x86/lib
parent1b1d9258181bae199dc940f4bd0298126b9a73d9 (diff)
x86-64: Modify memcpy()/memset() alternatives mechanism
In order to avoid unnecessary chains of branches, rather than implementing memcpy()/memset()'s access to their alternative implementations via a jump, patch the (larger) original function directly. The memcpy() part of this is slightly subtle: while alternative instruction patching does itself use memcpy(), with the replacement block being less than 64-bytes in size the main loop of the original function doesn't get used for copying memcpy_c() over memcpy(), and hence we can safely write over its beginning. Also note that the CFI annotations are fine for both variants of each of the functions. Signed-off-by: Jan Beulich <jbeulich@novell.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/memcpy_64.S23
-rw-r--r--arch/x86/lib/memset_64.S18
2 files changed, 14 insertions, 27 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index ad5441ed1b57..f82e884928af 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -20,12 +20,11 @@
20/* 20/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant. 21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 * 22 *
23 * Calls to this get patched into the kernel image via the 23 * This gets patched over the unrolled variant (below) via the
24 * alternative instructions framework: 24 * alternative instructions framework:
25 */ 25 */
26 ALIGN 26 .section .altinstr_replacement, "ax", @progbits
27memcpy_c: 27.Lmemcpy_c:
28 CFI_STARTPROC
29 movq %rdi, %rax 28 movq %rdi, %rax
30 29
31 movl %edx, %ecx 30 movl %edx, %ecx
@@ -35,8 +34,8 @@ memcpy_c:
35 movl %edx, %ecx 34 movl %edx, %ecx
36 rep movsb 35 rep movsb
37 ret 36 ret
38 CFI_ENDPROC 37.Lmemcpy_e:
39ENDPROC(memcpy_c) 38 .previous
40 39
41ENTRY(__memcpy) 40ENTRY(__memcpy)
42ENTRY(memcpy) 41ENTRY(memcpy)
@@ -128,16 +127,10 @@ ENDPROC(__memcpy)
128 * It is also a lot simpler. Use this when possible: 127 * It is also a lot simpler. Use this when possible:
129 */ 128 */
130 129
131 .section .altinstr_replacement, "ax"
1321: .byte 0xeb /* jmp <disp8> */
133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1342:
135 .previous
136
137 .section .altinstructions, "a" 130 .section .altinstructions, "a"
138 .align 8 131 .align 8
139 .quad memcpy 132 .quad memcpy
140 .quad 1b 133 .quad .Lmemcpy_c
141 .byte X86_FEATURE_REP_GOOD 134 .byte X86_FEATURE_REP_GOOD
142 135
143 /* 136 /*
@@ -145,6 +138,6 @@ ENDPROC(__memcpy)
145 * so it is silly to overwrite itself with nops - reboot is the 138 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome... 139 * only outcome...
147 */ 140 */
148 .byte 2b - 1b 141 .byte .Lmemcpy_e - .Lmemcpy_c
149 .byte 2b - 1b 142 .byte .Lmemcpy_e - .Lmemcpy_c
150 .previous 143 .previous
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2c5948116bd2..e88d3b81644a 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -12,9 +12,8 @@
12 * 12 *
13 * rax original destination 13 * rax original destination
14 */ 14 */
15 ALIGN 15 .section .altinstr_replacement, "ax", @progbits
16memset_c: 16.Lmemset_c:
17 CFI_STARTPROC
18 movq %rdi,%r9 17 movq %rdi,%r9
19 movl %edx,%r8d 18 movl %edx,%r8d
20 andl $7,%r8d 19 andl $7,%r8d
@@ -29,8 +28,8 @@ memset_c:
29 rep stosb 28 rep stosb
30 movq %r9,%rax 29 movq %r9,%rax
31 ret 30 ret
32 CFI_ENDPROC 31.Lmemset_e:
33ENDPROC(memset_c) 32 .previous
34 33
35ENTRY(memset) 34ENTRY(memset)
36ENTRY(__memset) 35ENTRY(__memset)
@@ -118,16 +117,11 @@ ENDPROC(__memset)
118 117
119#include <asm/cpufeature.h> 118#include <asm/cpufeature.h>
120 119
121 .section .altinstr_replacement,"ax"
1221: .byte 0xeb /* jmp <disp8> */
123 .byte (memset_c - memset) - (2f - 1b) /* offset */
1242:
125 .previous
126 .section .altinstructions,"a" 120 .section .altinstructions,"a"
127 .align 8 121 .align 8
128 .quad memset 122 .quad memset
129 .quad 1b 123 .quad .Lmemset_c
130 .byte X86_FEATURE_REP_GOOD 124 .byte X86_FEATURE_REP_GOOD
131 .byte .Lfinal - memset 125 .byte .Lfinal - memset
132 .byte 2b - 1b 126 .byte .Lmemset_e - .Lmemset_c
133 .previous 127 .previous