diff options
| author | Jan Beulich <JBeulich@novell.com> | 2009-12-18 11:16:03 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-12-30 05:57:32 -0500 |
| commit | 7269e8812a59f74fb1ce134465d0bcf5683b93a1 (patch) | |
| tree | cfa0022a98907232edc8ffbe145926a272da14f1 | |
| parent | 1b1d9258181bae199dc940f4bd0298126b9a73d9 (diff) | |
x86-64: Modify memcpy()/memset() alternatives mechanism
In order to avoid unnecessary chains of branches, rather than
implementing memcpy()/memset()'s access to their alternative
implementations via a jump, patch the (larger) original function
directly.
The memcpy() part of this is slightly subtle: while alternative
instruction patching does itself use memcpy(), with the
replacement block being less than 64-bytes in size the main loop
of the original function doesn't get used for copying memcpy_c()
over memcpy(), and hence we can safely write over its beginning.
Also note that the CFI annotations are fine for both variants of
each of the functions.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | arch/x86/lib/memcpy_64.S | 23 | ||||
| -rw-r--r-- | arch/x86/lib/memset_64.S | 18 |
2 files changed, 14 insertions, 27 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index ad5441ed1b57..f82e884928af 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
| @@ -20,12 +20,11 @@ | |||
| 20 | /* | 20 | /* |
| 21 | * memcpy_c() - fast string ops (REP MOVSQ) based variant. | 21 | * memcpy_c() - fast string ops (REP MOVSQ) based variant. |
| 22 | * | 22 | * |
| 23 | * Calls to this get patched into the kernel image via the | 23 | * This gets patched over the unrolled variant (below) via the |
| 24 | * alternative instructions framework: | 24 | * alternative instructions framework: |
| 25 | */ | 25 | */ |
| 26 | ALIGN | 26 | .section .altinstr_replacement, "ax", @progbits |
| 27 | memcpy_c: | 27 | .Lmemcpy_c: |
| 28 | CFI_STARTPROC | ||
| 29 | movq %rdi, %rax | 28 | movq %rdi, %rax |
| 30 | 29 | ||
| 31 | movl %edx, %ecx | 30 | movl %edx, %ecx |
| @@ -35,8 +34,8 @@ memcpy_c: | |||
| 35 | movl %edx, %ecx | 34 | movl %edx, %ecx |
| 36 | rep movsb | 35 | rep movsb |
| 37 | ret | 36 | ret |
| 38 | CFI_ENDPROC | 37 | .Lmemcpy_e: |
| 39 | ENDPROC(memcpy_c) | 38 | .previous |
| 40 | 39 | ||
| 41 | ENTRY(__memcpy) | 40 | ENTRY(__memcpy) |
| 42 | ENTRY(memcpy) | 41 | ENTRY(memcpy) |
| @@ -128,16 +127,10 @@ ENDPROC(__memcpy) | |||
| 128 | * It is also a lot simpler. Use this when possible: | 127 | * It is also a lot simpler. Use this when possible: |
| 129 | */ | 128 | */ |
| 130 | 129 | ||
| 131 | .section .altinstr_replacement, "ax" | ||
| 132 | 1: .byte 0xeb /* jmp <disp8> */ | ||
| 133 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | ||
| 134 | 2: | ||
| 135 | .previous | ||
| 136 | |||
| 137 | .section .altinstructions, "a" | 130 | .section .altinstructions, "a" |
| 138 | .align 8 | 131 | .align 8 |
| 139 | .quad memcpy | 132 | .quad memcpy |
| 140 | .quad 1b | 133 | .quad .Lmemcpy_c |
| 141 | .byte X86_FEATURE_REP_GOOD | 134 | .byte X86_FEATURE_REP_GOOD |
| 142 | 135 | ||
| 143 | /* | 136 | /* |
| @@ -145,6 +138,6 @@ ENDPROC(__memcpy) | |||
| 145 | * so it is silly to overwrite itself with nops - reboot is the | 138 | * so it is silly to overwrite itself with nops - reboot is the |
| 146 | * only outcome... | 139 | * only outcome... |
| 147 | */ | 140 | */ |
| 148 | .byte 2b - 1b | 141 | .byte .Lmemcpy_e - .Lmemcpy_c |
| 149 | .byte 2b - 1b | 142 | .byte .Lmemcpy_e - .Lmemcpy_c |
| 150 | .previous | 143 | .previous |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c5948116bd2..e88d3b81644a 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
| @@ -12,9 +12,8 @@ | |||
| 12 | * | 12 | * |
| 13 | * rax original destination | 13 | * rax original destination |
| 14 | */ | 14 | */ |
| 15 | ALIGN | 15 | .section .altinstr_replacement, "ax", @progbits |
| 16 | memset_c: | 16 | .Lmemset_c: |
| 17 | CFI_STARTPROC | ||
| 18 | movq %rdi,%r9 | 17 | movq %rdi,%r9 |
| 19 | movl %edx,%r8d | 18 | movl %edx,%r8d |
| 20 | andl $7,%r8d | 19 | andl $7,%r8d |
| @@ -29,8 +28,8 @@ memset_c: | |||
| 29 | rep stosb | 28 | rep stosb |
| 30 | movq %r9,%rax | 29 | movq %r9,%rax |
| 31 | ret | 30 | ret |
| 32 | CFI_ENDPROC | 31 | .Lmemset_e: |
| 33 | ENDPROC(memset_c) | 32 | .previous |
| 34 | 33 | ||
| 35 | ENTRY(memset) | 34 | ENTRY(memset) |
| 36 | ENTRY(__memset) | 35 | ENTRY(__memset) |
| @@ -118,16 +117,11 @@ ENDPROC(__memset) | |||
| 118 | 117 | ||
| 119 | #include <asm/cpufeature.h> | 118 | #include <asm/cpufeature.h> |
| 120 | 119 | ||
| 121 | .section .altinstr_replacement,"ax" | ||
| 122 | 1: .byte 0xeb /* jmp <disp8> */ | ||
| 123 | .byte (memset_c - memset) - (2f - 1b) /* offset */ | ||
| 124 | 2: | ||
| 125 | .previous | ||
| 126 | .section .altinstructions,"a" | 120 | .section .altinstructions,"a" |
| 127 | .align 8 | 121 | .align 8 |
| 128 | .quad memset | 122 | .quad memset |
| 129 | .quad 1b | 123 | .quad .Lmemset_c |
| 130 | .byte X86_FEATURE_REP_GOOD | 124 | .byte X86_FEATURE_REP_GOOD |
| 131 | .byte .Lfinal - memset | 125 | .byte .Lfinal - memset |
| 132 | .byte 2b - 1b | 126 | .byte .Lmemset_e - .Lmemset_c |
| 133 | .previous | 127 | .previous |
