aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@suse.com>2012-01-26 10:50:55 -0500
committerIngo Molnar <mingo@elte.hu>2012-01-26 15:19:18 -0500
commit2ab560911a427fdc73bfd3a7d2944d8ee0ca6db8 (patch)
tree1e7d65a8a0589347da02629feaa315adff6aea8e /arch/x86/lib
parent5d7244e7c984cecead412bde6395ce18618a4a37 (diff)
x86-64: Fix memcpy() to support sizes of 4Gb and above
While currently there doesn't appear to be any reachable in-tree case where such large memory blocks may be passed to memcpy(), we already had hit the problem in our Xen kernels. Just like done recently for mmeset(), rather than working around it, prevent others from falling into the same trap by fixing this long standing limitation. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/4F21846F020000780006F3FA@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/memcpy_64.S25
1 files changed, 10 insertions, 15 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0ecdea..1235b04a9a60 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
27 .section .altinstr_replacement, "ax", @progbits 27 .section .altinstr_replacement, "ax", @progbits
28.Lmemcpy_c: 28.Lmemcpy_c:
29 movq %rdi, %rax 29 movq %rdi, %rax
30 30 movq %rdx, %rcx
31 movl %edx, %ecx 31 shrq $3, %rcx
32 shrl $3, %ecx
33 andl $7, %edx 32 andl $7, %edx
34 rep movsq 33 rep movsq
35 movl %edx, %ecx 34 movl %edx, %ecx
@@ -48,8 +47,7 @@
48 .section .altinstr_replacement, "ax", @progbits 47 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e: 48.Lmemcpy_c_e:
50 movq %rdi, %rax 49 movq %rdi, %rax
51 50 movq %rdx, %rcx
52 movl %edx, %ecx
53 rep movsb 51 rep movsb
54 ret 52 ret
55.Lmemcpy_e_e: 53.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
60 CFI_STARTPROC 58 CFI_STARTPROC
61 movq %rdi, %rax 59 movq %rdi, %rax
62 60
63 /* 61 cmpq $0x20, %rdx
64 * Use 32bit CMP here to avoid long NOP padding.
65 */
66 cmp $0x20, %edx
67 jb .Lhandle_tail 62 jb .Lhandle_tail
68 63
69 /* 64 /*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
72 */ 67 */
73 cmp %dil, %sil 68 cmp %dil, %sil
74 jl .Lcopy_backward 69 jl .Lcopy_backward
75 subl $0x20, %edx 70 subq $0x20, %rdx
76.Lcopy_forward_loop: 71.Lcopy_forward_loop:
77 subq $0x20, %rdx 72 subq $0x20, %rdx
78 73
@@ -91,7 +86,7 @@ ENTRY(memcpy)
91 movq %r11, 3*8(%rdi) 86 movq %r11, 3*8(%rdi)
92 leaq 4*8(%rdi), %rdi 87 leaq 4*8(%rdi), %rdi
93 jae .Lcopy_forward_loop 88 jae .Lcopy_forward_loop
94 addq $0x20, %rdx 89 addl $0x20, %edx
95 jmp .Lhandle_tail 90 jmp .Lhandle_tail
96 91
97.Lcopy_backward: 92.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
123 /* 118 /*
124 * Calculate copy position to head. 119 * Calculate copy position to head.
125 */ 120 */
126 addq $0x20, %rdx 121 addl $0x20, %edx
127 subq %rdx, %rsi 122 subq %rdx, %rsi
128 subq %rdx, %rdi 123 subq %rdx, %rdi
129.Lhandle_tail: 124.Lhandle_tail:
130 cmpq $16, %rdx 125 cmpl $16, %edx
131 jb .Lless_16bytes 126 jb .Lless_16bytes
132 127
133 /* 128 /*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
144 retq 139 retq
145 .p2align 4 140 .p2align 4
146.Lless_16bytes: 141.Lless_16bytes:
147 cmpq $8, %rdx 142 cmpl $8, %edx
148 jb .Lless_8bytes 143 jb .Lless_8bytes
149 /* 144 /*
150 * Move data from 8 bytes to 15 bytes. 145 * Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
156 retq 151 retq
157 .p2align 4 152 .p2align 4
158.Lless_8bytes: 153.Lless_8bytes:
159 cmpq $4, %rdx 154 cmpl $4, %edx
160 jb .Lless_3bytes 155 jb .Lless_3bytes
161 156
162 /* 157 /*