aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@suse.com>2012-01-26 10:55:32 -0500
committerIngo Molnar <mingo@elte.hu>2012-01-26 15:19:20 -0500
commit9d8e22777e66f420e46490e9fc6f8cb7e0e2222b (patch)
treedd0ec6122dda1409206dda70f6ae4fd3c9a2cd35 /arch/x86/lib
parent2ab560911a427fdc73bfd3a7d2944d8ee0ca6db8 (diff)
x86-64: Handle byte-wise tail copying in memcpy() without a loop
While hard to measure, reducing the number of possibly/likely mis-predicted branches can generally be expected to be slightly better. Other than apparent at the first glance, this also doesn't grow the function size (the alignment gap to the next function just gets smaller). Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/4F218584020000780006F422@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/memcpy_64.S19
1 files changed, 10 insertions, 9 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 1235b04a9a60..1c273be7c97e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -164,18 +164,19 @@ ENTRY(memcpy)
164 retq 164 retq
165 .p2align 4 165 .p2align 4
166.Lless_3bytes: 166.Lless_3bytes:
167 cmpl $0, %edx 167 subl $1, %edx
168 je .Lend 168 jb .Lend
169 /* 169 /*
170 * Move data from 1 bytes to 3 bytes. 170 * Move data from 1 bytes to 3 bytes.
171 */ 171 */
172.Lloop_1: 172 movzbl (%rsi), %ecx
173 movb (%rsi), %r8b 173 jz .Lstore_1byte
174 movb %r8b, (%rdi) 174 movzbq 1(%rsi), %r8
175 incq %rdi 175 movzbq (%rsi, %rdx), %r9
176 incq %rsi 176 movb %r8b, 1(%rdi)
177 decl %edx 177 movb %r9b, (%rdi, %rdx)
178 jnz .Lloop_1 178.Lstore_1byte:
179 movb %cl, (%rdi)
179 180
180.Lend: 181.Lend:
181 retq 182 retq