diff options
author | Jan Beulich <JBeulich@suse.com> | 2012-01-26 10:50:55 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-01-26 15:19:18 -0500 |
commit | 2ab560911a427fdc73bfd3a7d2944d8ee0ca6db8 (patch) | |
tree | 1e7d65a8a0589347da02629feaa315adff6aea8e /arch/x86/lib | |
parent | 5d7244e7c984cecead412bde6395ce18618a4a37 (diff) |
x86-64: Fix memcpy() to support sizes of 4Gb and above
While currently there doesn't appear to be any reachable in-tree
case where such large memory blocks may be passed to memcpy(),
we already had hit the problem in our Xen kernels. Just like
done recently for mmeset(), rather than working around it,
prevent others from falling into the same trap by fixing this
long standing limitation.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F21846F020000780006F3FA@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 25 |
1 files changed, 10 insertions, 15 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index efbf2a0ecdea..1235b04a9a60 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -27,9 +27,8 @@ | |||
27 | .section .altinstr_replacement, "ax", @progbits | 27 | .section .altinstr_replacement, "ax", @progbits |
28 | .Lmemcpy_c: | 28 | .Lmemcpy_c: |
29 | movq %rdi, %rax | 29 | movq %rdi, %rax |
30 | 30 | movq %rdx, %rcx | |
31 | movl %edx, %ecx | 31 | shrq $3, %rcx |
32 | shrl $3, %ecx | ||
33 | andl $7, %edx | 32 | andl $7, %edx |
34 | rep movsq | 33 | rep movsq |
35 | movl %edx, %ecx | 34 | movl %edx, %ecx |
@@ -48,8 +47,7 @@ | |||
48 | .section .altinstr_replacement, "ax", @progbits | 47 | .section .altinstr_replacement, "ax", @progbits |
49 | .Lmemcpy_c_e: | 48 | .Lmemcpy_c_e: |
50 | movq %rdi, %rax | 49 | movq %rdi, %rax |
51 | 50 | movq %rdx, %rcx | |
52 | movl %edx, %ecx | ||
53 | rep movsb | 51 | rep movsb |
54 | ret | 52 | ret |
55 | .Lmemcpy_e_e: | 53 | .Lmemcpy_e_e: |
@@ -60,10 +58,7 @@ ENTRY(memcpy) | |||
60 | CFI_STARTPROC | 58 | CFI_STARTPROC |
61 | movq %rdi, %rax | 59 | movq %rdi, %rax |
62 | 60 | ||
63 | /* | 61 | cmpq $0x20, %rdx |
64 | * Use 32bit CMP here to avoid long NOP padding. | ||
65 | */ | ||
66 | cmp $0x20, %edx | ||
67 | jb .Lhandle_tail | 62 | jb .Lhandle_tail |
68 | 63 | ||
69 | /* | 64 | /* |
@@ -72,7 +67,7 @@ ENTRY(memcpy) | |||
72 | */ | 67 | */ |
73 | cmp %dil, %sil | 68 | cmp %dil, %sil |
74 | jl .Lcopy_backward | 69 | jl .Lcopy_backward |
75 | subl $0x20, %edx | 70 | subq $0x20, %rdx |
76 | .Lcopy_forward_loop: | 71 | .Lcopy_forward_loop: |
77 | subq $0x20, %rdx | 72 | subq $0x20, %rdx |
78 | 73 | ||
@@ -91,7 +86,7 @@ ENTRY(memcpy) | |||
91 | movq %r11, 3*8(%rdi) | 86 | movq %r11, 3*8(%rdi) |
92 | leaq 4*8(%rdi), %rdi | 87 | leaq 4*8(%rdi), %rdi |
93 | jae .Lcopy_forward_loop | 88 | jae .Lcopy_forward_loop |
94 | addq $0x20, %rdx | 89 | addl $0x20, %edx |
95 | jmp .Lhandle_tail | 90 | jmp .Lhandle_tail |
96 | 91 | ||
97 | .Lcopy_backward: | 92 | .Lcopy_backward: |
@@ -123,11 +118,11 @@ ENTRY(memcpy) | |||
123 | /* | 118 | /* |
124 | * Calculate copy position to head. | 119 | * Calculate copy position to head. |
125 | */ | 120 | */ |
126 | addq $0x20, %rdx | 121 | addl $0x20, %edx |
127 | subq %rdx, %rsi | 122 | subq %rdx, %rsi |
128 | subq %rdx, %rdi | 123 | subq %rdx, %rdi |
129 | .Lhandle_tail: | 124 | .Lhandle_tail: |
130 | cmpq $16, %rdx | 125 | cmpl $16, %edx |
131 | jb .Lless_16bytes | 126 | jb .Lless_16bytes |
132 | 127 | ||
133 | /* | 128 | /* |
@@ -144,7 +139,7 @@ ENTRY(memcpy) | |||
144 | retq | 139 | retq |
145 | .p2align 4 | 140 | .p2align 4 |
146 | .Lless_16bytes: | 141 | .Lless_16bytes: |
147 | cmpq $8, %rdx | 142 | cmpl $8, %edx |
148 | jb .Lless_8bytes | 143 | jb .Lless_8bytes |
149 | /* | 144 | /* |
150 | * Move data from 8 bytes to 15 bytes. | 145 | * Move data from 8 bytes to 15 bytes. |
@@ -156,7 +151,7 @@ ENTRY(memcpy) | |||
156 | retq | 151 | retq |
157 | .p2align 4 | 152 | .p2align 4 |
158 | .Lless_8bytes: | 153 | .Lless_8bytes: |
159 | cmpq $4, %rdx | 154 | cmpl $4, %edx |
160 | jb .Lless_3bytes | 155 | jb .Lless_3bytes |
161 | 156 | ||
162 | /* | 157 | /* |