diff options
author | Jan Beulich <JBeulich@novell.com> | 2009-12-18 11:16:03 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-30 05:57:32 -0500 |
commit | 7269e8812a59f74fb1ce134465d0bcf5683b93a1 (patch) | |
tree | cfa0022a98907232edc8ffbe145926a272da14f1 /arch/x86 | |
parent | 1b1d9258181bae199dc940f4bd0298126b9a73d9 (diff) |
x86-64: Modify memcpy()/memset() alternatives mechanism
In order to avoid unnecessary chains of branches, rather than
implementing memcpy()/memset()'s access to their alternative
implementations via a jump, patch the (larger) original function
directly.
The memcpy() part of this is slightly subtle: while alternative
instruction patching does itself use memcpy(), with the
replacement block being less than 64-bytes in size the main loop
of the original function doesn't get used for copying memcpy_c()
over memcpy(), and hence we can safely write over its beginning.
Also note that the CFI annotations are fine for both variants of
each of the functions.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 23 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 18 |
2 files changed, 14 insertions, 27 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index ad5441ed1b57..f82e884928af 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -20,12 +20,11 @@ | |||
20 | /* | 20 | /* |
21 | * memcpy_c() - fast string ops (REP MOVSQ) based variant. | 21 | * memcpy_c() - fast string ops (REP MOVSQ) based variant. |
22 | * | 22 | * |
23 | * Calls to this get patched into the kernel image via the | 23 | * This gets patched over the unrolled variant (below) via the |
24 | * alternative instructions framework: | 24 | * alternative instructions framework: |
25 | */ | 25 | */ |
26 | ALIGN | 26 | .section .altinstr_replacement, "ax", @progbits |
27 | memcpy_c: | 27 | .Lmemcpy_c: |
28 | CFI_STARTPROC | ||
29 | movq %rdi, %rax | 28 | movq %rdi, %rax |
30 | 29 | ||
31 | movl %edx, %ecx | 30 | movl %edx, %ecx |
@@ -35,8 +34,8 @@ memcpy_c: | |||
35 | movl %edx, %ecx | 34 | movl %edx, %ecx |
36 | rep movsb | 35 | rep movsb |
37 | ret | 36 | ret |
38 | CFI_ENDPROC | 37 | .Lmemcpy_e: |
39 | ENDPROC(memcpy_c) | 38 | .previous |
40 | 39 | ||
41 | ENTRY(__memcpy) | 40 | ENTRY(__memcpy) |
42 | ENTRY(memcpy) | 41 | ENTRY(memcpy) |
@@ -128,16 +127,10 @@ ENDPROC(__memcpy) | |||
128 | * It is also a lot simpler. Use this when possible: | 127 | * It is also a lot simpler. Use this when possible: |
129 | */ | 128 | */ |
130 | 129 | ||
131 | .section .altinstr_replacement, "ax" | ||
132 | 1: .byte 0xeb /* jmp <disp8> */ | ||
133 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | ||
134 | 2: | ||
135 | .previous | ||
136 | |||
137 | .section .altinstructions, "a" | 130 | .section .altinstructions, "a" |
138 | .align 8 | 131 | .align 8 |
139 | .quad memcpy | 132 | .quad memcpy |
140 | .quad 1b | 133 | .quad .Lmemcpy_c |
141 | .byte X86_FEATURE_REP_GOOD | 134 | .byte X86_FEATURE_REP_GOOD |
142 | 135 | ||
143 | /* | 136 | /* |
@@ -145,6 +138,6 @@ ENDPROC(__memcpy) | |||
145 | * so it is silly to overwrite itself with nops - reboot is the | 138 | * so it is silly to overwrite itself with nops - reboot is the |
146 | * only outcome... | 139 | * only outcome... |
147 | */ | 140 | */ |
148 | .byte 2b - 1b | 141 | .byte .Lmemcpy_e - .Lmemcpy_c |
149 | .byte 2b - 1b | 142 | .byte .Lmemcpy_e - .Lmemcpy_c |
150 | .previous | 143 | .previous |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c5948116bd2..e88d3b81644a 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -12,9 +12,8 @@ | |||
12 | * | 12 | * |
13 | * rax original destination | 13 | * rax original destination |
14 | */ | 14 | */ |
15 | ALIGN | 15 | .section .altinstr_replacement, "ax", @progbits |
16 | memset_c: | 16 | .Lmemset_c: |
17 | CFI_STARTPROC | ||
18 | movq %rdi,%r9 | 17 | movq %rdi,%r9 |
19 | movl %edx,%r8d | 18 | movl %edx,%r8d |
20 | andl $7,%r8d | 19 | andl $7,%r8d |
@@ -29,8 +28,8 @@ memset_c: | |||
29 | rep stosb | 28 | rep stosb |
30 | movq %r9,%rax | 29 | movq %r9,%rax |
31 | ret | 30 | ret |
32 | CFI_ENDPROC | 31 | .Lmemset_e: |
33 | ENDPROC(memset_c) | 32 | .previous |
34 | 33 | ||
35 | ENTRY(memset) | 34 | ENTRY(memset) |
36 | ENTRY(__memset) | 35 | ENTRY(__memset) |
@@ -118,16 +117,11 @@ ENDPROC(__memset) | |||
118 | 117 | ||
119 | #include <asm/cpufeature.h> | 118 | #include <asm/cpufeature.h> |
120 | 119 | ||
121 | .section .altinstr_replacement,"ax" | ||
122 | 1: .byte 0xeb /* jmp <disp8> */ | ||
123 | .byte (memset_c - memset) - (2f - 1b) /* offset */ | ||
124 | 2: | ||
125 | .previous | ||
126 | .section .altinstructions,"a" | 120 | .section .altinstructions,"a" |
127 | .align 8 | 121 | .align 8 |
128 | .quad memset | 122 | .quad memset |
129 | .quad 1b | 123 | .quad .Lmemset_c |
130 | .byte X86_FEATURE_REP_GOOD | 124 | .byte X86_FEATURE_REP_GOOD |
131 | .byte .Lfinal - memset | 125 | .byte .Lfinal - memset |
132 | .byte 2b - 1b | 126 | .byte .Lmemset_e - .Lmemset_c |
133 | .previous | 127 | .previous |