aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/memset_64.S
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@suse.com>2012-01-05 11:10:42 -0500
committerIngo Molnar <mingo@elte.hu>2012-01-26 05:50:04 -0500
commit5d7244e7c984cecead412bde6395ce18618a4a37 (patch)
treed1b468fe47733ab3770f127d44d04ac2937965ee /arch/x86/lib/memset_64.S
parent426932909093e4e7729777a0e2beed4b54911361 (diff)
x86-64: Fix memset() to support sizes of 4Gb and above
While currently there doesn't appear to be any reachable in-tree case where such large memory blocks may be passed to memset() (alloc_bootmem() being the primary non-reachable one, as it gets called with suitably large sizes in FLATMEM configurations), we have recently hit the problem a second time in our Xen kernels. Rather than working around it a second time, prevent others from falling into the same trap by fixing this long standing limitation. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/4F05D992020000780006AA09@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib/memset_64.S')
-rw-r--r--arch/x86/lib/memset_64.S33
1 files changed, 15 insertions, 18 deletions
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 79bd454b78a..2dcb3808cbd 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,16 +19,15 @@
19 .section .altinstr_replacement, "ax", @progbits 19 .section .altinstr_replacement, "ax", @progbits
20.Lmemset_c: 20.Lmemset_c:
21 movq %rdi,%r9 21 movq %rdi,%r9
22 movl %edx,%r8d 22 movq %rdx,%rcx
23 andl $7,%r8d 23 andl $7,%edx
24 movl %edx,%ecx 24 shrq $3,%rcx
25 shrl $3,%ecx
26 /* expand byte value */ 25 /* expand byte value */
27 movzbl %sil,%esi 26 movzbl %sil,%esi
28 movabs $0x0101010101010101,%rax 27 movabs $0x0101010101010101,%rax
29 mulq %rsi /* with rax, clobbers rdx */ 28 imulq %rsi,%rax
30 rep stosq 29 rep stosq
31 movl %r8d,%ecx 30 movl %edx,%ecx
32 rep stosb 31 rep stosb
33 movq %r9,%rax 32 movq %r9,%rax
34 ret 33 ret
@@ -50,7 +49,7 @@
50.Lmemset_c_e: 49.Lmemset_c_e:
51 movq %rdi,%r9 50 movq %rdi,%r9
52 movb %sil,%al 51 movb %sil,%al
53 movl %edx,%ecx 52 movq %rdx,%rcx
54 rep stosb 53 rep stosb
55 movq %r9,%rax 54 movq %r9,%rax
56 ret 55 ret
@@ -61,12 +60,11 @@ ENTRY(memset)
61ENTRY(__memset) 60ENTRY(__memset)
62 CFI_STARTPROC 61 CFI_STARTPROC
63 movq %rdi,%r10 62 movq %rdi,%r10
64 movq %rdx,%r11
65 63
66 /* expand byte value */ 64 /* expand byte value */
67 movzbl %sil,%ecx 65 movzbl %sil,%ecx
68 movabs $0x0101010101010101,%rax 66 movabs $0x0101010101010101,%rax
69 mul %rcx /* with rax, clobbers rdx */ 67 imulq %rcx,%rax
70 68
71 /* align dst */ 69 /* align dst */
72 movl %edi,%r9d 70 movl %edi,%r9d
@@ -75,13 +73,13 @@ ENTRY(__memset)
75 CFI_REMEMBER_STATE 73 CFI_REMEMBER_STATE
76.Lafter_bad_alignment: 74.Lafter_bad_alignment:
77 75
78 movl %r11d,%ecx 76 movq %rdx,%rcx
79 shrl $6,%ecx 77 shrq $6,%rcx
80 jz .Lhandle_tail 78 jz .Lhandle_tail
81 79
82 .p2align 4 80 .p2align 4
83.Lloop_64: 81.Lloop_64:
84 decl %ecx 82 decq %rcx
85 movq %rax,(%rdi) 83 movq %rax,(%rdi)
86 movq %rax,8(%rdi) 84 movq %rax,8(%rdi)
87 movq %rax,16(%rdi) 85 movq %rax,16(%rdi)
@@ -97,7 +95,7 @@ ENTRY(__memset)
97 to predict jump tables. */ 95 to predict jump tables. */
98 .p2align 4 96 .p2align 4
99.Lhandle_tail: 97.Lhandle_tail:
100 movl %r11d,%ecx 98 movl %edx,%ecx
101 andl $63&(~7),%ecx 99 andl $63&(~7),%ecx
102 jz .Lhandle_7 100 jz .Lhandle_7
103 shrl $3,%ecx 101 shrl $3,%ecx
@@ -109,12 +107,11 @@ ENTRY(__memset)
109 jnz .Lloop_8 107 jnz .Lloop_8
110 108
111.Lhandle_7: 109.Lhandle_7:
112 movl %r11d,%ecx 110 andl $7,%edx
113 andl $7,%ecx
114 jz .Lende 111 jz .Lende
115 .p2align 4 112 .p2align 4
116.Lloop_1: 113.Lloop_1:
117 decl %ecx 114 decl %edx
118 movb %al,(%rdi) 115 movb %al,(%rdi)
119 leaq 1(%rdi),%rdi 116 leaq 1(%rdi),%rdi
120 jnz .Lloop_1 117 jnz .Lloop_1
@@ -125,13 +122,13 @@ ENTRY(__memset)
125 122
126 CFI_RESTORE_STATE 123 CFI_RESTORE_STATE
127.Lbad_alignment: 124.Lbad_alignment:
128 cmpq $7,%r11 125 cmpq $7,%rdx
129 jbe .Lhandle_7 126 jbe .Lhandle_7
130 movq %rax,(%rdi) /* unaligned store */ 127 movq %rax,(%rdi) /* unaligned store */
131 movq $8,%r8 128 movq $8,%r8
132 subq %r9,%r8 129 subq %r9,%r8
133 addq %r8,%rdi 130 addq %r8,%rdi
134 subq %r8,%r11 131 subq %r8,%rdx
135 jmp .Lafter_bad_alignment 132 jmp .Lafter_bad_alignment
136.Lfinal: 133.Lfinal:
137 CFI_ENDPROC 134 CFI_ENDPROC