diff options
author | Jan Beulich <JBeulich@suse.com> | 2012-01-05 11:10:42 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-01-26 05:50:04 -0500 |
commit | 5d7244e7c984cecead412bde6395ce18618a4a37 (patch) | |
tree | d1b468fe47733ab3770f127d44d04ac2937965ee /arch/x86/lib | |
parent | 426932909093e4e7729777a0e2beed4b54911361 (diff) |
x86-64: Fix memset() to support sizes of 4Gb and above
While currently there doesn't appear to be any reachable in-tree
case where such large memory blocks may be passed to memset()
(alloc_bootmem() being the primary non-reachable one, as it gets
called with suitably large sizes in FLATMEM configurations), we
have recently hit the problem a second time in our Xen kernels.
Rather than working around it a second time, prevent others from
falling into the same trap by fixing this long standing
limitation.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F05D992020000780006AA09@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/memset_64.S | 33 |
1 files changed, 15 insertions, 18 deletions
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 79bd454b78a3..2dcb3808cbda 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -19,16 +19,15 @@ | |||
19 | .section .altinstr_replacement, "ax", @progbits | 19 | .section .altinstr_replacement, "ax", @progbits |
20 | .Lmemset_c: | 20 | .Lmemset_c: |
21 | movq %rdi,%r9 | 21 | movq %rdi,%r9 |
22 | movl %edx,%r8d | 22 | movq %rdx,%rcx |
23 | andl $7,%r8d | 23 | andl $7,%edx |
24 | movl %edx,%ecx | 24 | shrq $3,%rcx |
25 | shrl $3,%ecx | ||
26 | /* expand byte value */ | 25 | /* expand byte value */ |
27 | movzbl %sil,%esi | 26 | movzbl %sil,%esi |
28 | movabs $0x0101010101010101,%rax | 27 | movabs $0x0101010101010101,%rax |
29 | mulq %rsi /* with rax, clobbers rdx */ | 28 | imulq %rsi,%rax |
30 | rep stosq | 29 | rep stosq |
31 | movl %r8d,%ecx | 30 | movl %edx,%ecx |
32 | rep stosb | 31 | rep stosb |
33 | movq %r9,%rax | 32 | movq %r9,%rax |
34 | ret | 33 | ret |
@@ -50,7 +49,7 @@ | |||
50 | .Lmemset_c_e: | 49 | .Lmemset_c_e: |
51 | movq %rdi,%r9 | 50 | movq %rdi,%r9 |
52 | movb %sil,%al | 51 | movb %sil,%al |
53 | movl %edx,%ecx | 52 | movq %rdx,%rcx |
54 | rep stosb | 53 | rep stosb |
55 | movq %r9,%rax | 54 | movq %r9,%rax |
56 | ret | 55 | ret |
@@ -61,12 +60,11 @@ ENTRY(memset) | |||
61 | ENTRY(__memset) | 60 | ENTRY(__memset) |
62 | CFI_STARTPROC | 61 | CFI_STARTPROC |
63 | movq %rdi,%r10 | 62 | movq %rdi,%r10 |
64 | movq %rdx,%r11 | ||
65 | 63 | ||
66 | /* expand byte value */ | 64 | /* expand byte value */ |
67 | movzbl %sil,%ecx | 65 | movzbl %sil,%ecx |
68 | movabs $0x0101010101010101,%rax | 66 | movabs $0x0101010101010101,%rax |
69 | mul %rcx /* with rax, clobbers rdx */ | 67 | imulq %rcx,%rax |
70 | 68 | ||
71 | /* align dst */ | 69 | /* align dst */ |
72 | movl %edi,%r9d | 70 | movl %edi,%r9d |
@@ -75,13 +73,13 @@ ENTRY(__memset) | |||
75 | CFI_REMEMBER_STATE | 73 | CFI_REMEMBER_STATE |
76 | .Lafter_bad_alignment: | 74 | .Lafter_bad_alignment: |
77 | 75 | ||
78 | movl %r11d,%ecx | 76 | movq %rdx,%rcx |
79 | shrl $6,%ecx | 77 | shrq $6,%rcx |
80 | jz .Lhandle_tail | 78 | jz .Lhandle_tail |
81 | 79 | ||
82 | .p2align 4 | 80 | .p2align 4 |
83 | .Lloop_64: | 81 | .Lloop_64: |
84 | decl %ecx | 82 | decq %rcx |
85 | movq %rax,(%rdi) | 83 | movq %rax,(%rdi) |
86 | movq %rax,8(%rdi) | 84 | movq %rax,8(%rdi) |
87 | movq %rax,16(%rdi) | 85 | movq %rax,16(%rdi) |
@@ -97,7 +95,7 @@ ENTRY(__memset) | |||
97 | to predict jump tables. */ | 95 | to predict jump tables. */ |
98 | .p2align 4 | 96 | .p2align 4 |
99 | .Lhandle_tail: | 97 | .Lhandle_tail: |
100 | movl %r11d,%ecx | 98 | movl %edx,%ecx |
101 | andl $63&(~7),%ecx | 99 | andl $63&(~7),%ecx |
102 | jz .Lhandle_7 | 100 | jz .Lhandle_7 |
103 | shrl $3,%ecx | 101 | shrl $3,%ecx |
@@ -109,12 +107,11 @@ ENTRY(__memset) | |||
109 | jnz .Lloop_8 | 107 | jnz .Lloop_8 |
110 | 108 | ||
111 | .Lhandle_7: | 109 | .Lhandle_7: |
112 | movl %r11d,%ecx | 110 | andl $7,%edx |
113 | andl $7,%ecx | ||
114 | jz .Lende | 111 | jz .Lende |
115 | .p2align 4 | 112 | .p2align 4 |
116 | .Lloop_1: | 113 | .Lloop_1: |
117 | decl %ecx | 114 | decl %edx |
118 | movb %al,(%rdi) | 115 | movb %al,(%rdi) |
119 | leaq 1(%rdi),%rdi | 116 | leaq 1(%rdi),%rdi |
120 | jnz .Lloop_1 | 117 | jnz .Lloop_1 |
@@ -125,13 +122,13 @@ ENTRY(__memset) | |||
125 | 122 | ||
126 | CFI_RESTORE_STATE | 123 | CFI_RESTORE_STATE |
127 | .Lbad_alignment: | 124 | .Lbad_alignment: |
128 | cmpq $7,%r11 | 125 | cmpq $7,%rdx |
129 | jbe .Lhandle_7 | 126 | jbe .Lhandle_7 |
130 | movq %rax,(%rdi) /* unaligned store */ | 127 | movq %rax,(%rdi) /* unaligned store */ |
131 | movq $8,%r8 | 128 | movq $8,%r8 |
132 | subq %r9,%r8 | 129 | subq %r9,%r8 |
133 | addq %r8,%rdi | 130 | addq %r8,%rdi |
134 | subq %r8,%r11 | 131 | subq %r8,%rdx |
135 | jmp .Lafter_bad_alignment | 132 | jmp .Lafter_bad_alignment |
136 | .Lfinal: | 133 | .Lfinal: |
137 | CFI_ENDPROC | 134 | CFI_ENDPROC |