aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-22 12:13:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-22 12:13:24 -0400
commite17fdf5c6778ff77d93dd769910992e4073b9348 (patch)
treed1a7ca2b1faf4301b39300fbd82f9b91e605a77e /arch/x86/lib
parent95211279c5ad00a317c98221d7e4365e02f20836 (diff)
parenta240ada241dafe290e7532d1ddeb98fdf1419068 (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86: Include probe_roms.h in probe_roms.c x86/32: Print control and debug registers for kerenel context x86: Tighten dependencies of CPU_SUP_*_32 x86/numa: Improve internode cache alignment x86: Fix the NMI nesting comments x86-64: Improve insn scheduling in SAVE_ARGS_IRQ x86-64: Fix CFI annotations for NMI nesting code bitops: Add missing parentheses to new get_order macro bitops: Optimise get_order() bitops: Adjust the comment on get_order() to describe the size==0 case x86/spinlocks: Eliminate TICKET_MASK x86-64: Handle byte-wise tail copying in memcpy() without a loop x86-64: Fix memcpy() to support sizes of 4Gb and above x86-64: Fix memset() to support sizes of 4Gb and above x86-64: Slightly shorten copy_page()
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/copy_page_64.S12
-rw-r--r--arch/x86/lib/memcpy_64.S44
-rw-r--r--arch/x86/lib/memset_64.S33
3 files changed, 39 insertions, 50 deletions
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 01c805ba5359..6b34d04d096a 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
20 20
21ENTRY(copy_page) 21ENTRY(copy_page)
22 CFI_STARTPROC 22 CFI_STARTPROC
23 subq $3*8,%rsp 23 subq $2*8,%rsp
24 CFI_ADJUST_CFA_OFFSET 3*8 24 CFI_ADJUST_CFA_OFFSET 2*8
25 movq %rbx,(%rsp) 25 movq %rbx,(%rsp)
26 CFI_REL_OFFSET rbx, 0 26 CFI_REL_OFFSET rbx, 0
27 movq %r12,1*8(%rsp) 27 movq %r12,1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8 28 CFI_REL_OFFSET r12, 1*8
29 movq %r13,2*8(%rsp)
30 CFI_REL_OFFSET r13, 2*8
31 29
32 movl $(4096/64)-5,%ecx 30 movl $(4096/64)-5,%ecx
33 .p2align 4 31 .p2align 4
@@ -91,10 +89,8 @@ ENTRY(copy_page)
91 CFI_RESTORE rbx 89 CFI_RESTORE rbx
92 movq 1*8(%rsp),%r12 90 movq 1*8(%rsp),%r12
93 CFI_RESTORE r12 91 CFI_RESTORE r12
94 movq 2*8(%rsp),%r13 92 addq $2*8,%rsp
95 CFI_RESTORE r13 93 CFI_ADJUST_CFA_OFFSET -2*8
96 addq $3*8,%rsp
97 CFI_ADJUST_CFA_OFFSET -3*8
98 ret 94 ret
99.Lcopy_page_end: 95.Lcopy_page_end:
100 CFI_ENDPROC 96 CFI_ENDPROC
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0ecdea..1c273be7c97e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
27 .section .altinstr_replacement, "ax", @progbits 27 .section .altinstr_replacement, "ax", @progbits
28.Lmemcpy_c: 28.Lmemcpy_c:
29 movq %rdi, %rax 29 movq %rdi, %rax
30 30 movq %rdx, %rcx
31 movl %edx, %ecx 31 shrq $3, %rcx
32 shrl $3, %ecx
33 andl $7, %edx 32 andl $7, %edx
34 rep movsq 33 rep movsq
35 movl %edx, %ecx 34 movl %edx, %ecx
@@ -48,8 +47,7 @@
48 .section .altinstr_replacement, "ax", @progbits 47 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e: 48.Lmemcpy_c_e:
50 movq %rdi, %rax 49 movq %rdi, %rax
51 50 movq %rdx, %rcx
52 movl %edx, %ecx
53 rep movsb 51 rep movsb
54 ret 52 ret
55.Lmemcpy_e_e: 53.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
60 CFI_STARTPROC 58 CFI_STARTPROC
61 movq %rdi, %rax 59 movq %rdi, %rax
62 60
63 /* 61 cmpq $0x20, %rdx
64 * Use 32bit CMP here to avoid long NOP padding.
65 */
66 cmp $0x20, %edx
67 jb .Lhandle_tail 62 jb .Lhandle_tail
68 63
69 /* 64 /*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
72 */ 67 */
73 cmp %dil, %sil 68 cmp %dil, %sil
74 jl .Lcopy_backward 69 jl .Lcopy_backward
75 subl $0x20, %edx 70 subq $0x20, %rdx
76.Lcopy_forward_loop: 71.Lcopy_forward_loop:
77 subq $0x20, %rdx 72 subq $0x20, %rdx
78 73
@@ -91,7 +86,7 @@ ENTRY(memcpy)
91 movq %r11, 3*8(%rdi) 86 movq %r11, 3*8(%rdi)
92 leaq 4*8(%rdi), %rdi 87 leaq 4*8(%rdi), %rdi
93 jae .Lcopy_forward_loop 88 jae .Lcopy_forward_loop
94 addq $0x20, %rdx 89 addl $0x20, %edx
95 jmp .Lhandle_tail 90 jmp .Lhandle_tail
96 91
97.Lcopy_backward: 92.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
123 /* 118 /*
124 * Calculate copy position to head. 119 * Calculate copy position to head.
125 */ 120 */
126 addq $0x20, %rdx 121 addl $0x20, %edx
127 subq %rdx, %rsi 122 subq %rdx, %rsi
128 subq %rdx, %rdi 123 subq %rdx, %rdi
129.Lhandle_tail: 124.Lhandle_tail:
130 cmpq $16, %rdx 125 cmpl $16, %edx
131 jb .Lless_16bytes 126 jb .Lless_16bytes
132 127
133 /* 128 /*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
144 retq 139 retq
145 .p2align 4 140 .p2align 4
146.Lless_16bytes: 141.Lless_16bytes:
147 cmpq $8, %rdx 142 cmpl $8, %edx
148 jb .Lless_8bytes 143 jb .Lless_8bytes
149 /* 144 /*
150 * Move data from 8 bytes to 15 bytes. 145 * Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
156 retq 151 retq
157 .p2align 4 152 .p2align 4
158.Lless_8bytes: 153.Lless_8bytes:
159 cmpq $4, %rdx 154 cmpl $4, %edx
160 jb .Lless_3bytes 155 jb .Lless_3bytes
161 156
162 /* 157 /*
@@ -169,18 +164,19 @@ ENTRY(memcpy)
169 retq 164 retq
170 .p2align 4 165 .p2align 4
171.Lless_3bytes: 166.Lless_3bytes:
172 cmpl $0, %edx 167 subl $1, %edx
173 je .Lend 168 jb .Lend
174 /* 169 /*
175 * Move data from 1 bytes to 3 bytes. 170 * Move data from 1 bytes to 3 bytes.
176 */ 171 */
177.Lloop_1: 172 movzbl (%rsi), %ecx
178 movb (%rsi), %r8b 173 jz .Lstore_1byte
179 movb %r8b, (%rdi) 174 movzbq 1(%rsi), %r8
180 incq %rdi 175 movzbq (%rsi, %rdx), %r9
181 incq %rsi 176 movb %r8b, 1(%rdi)
182 decl %edx 177 movb %r9b, (%rdi, %rdx)
183 jnz .Lloop_1 178.Lstore_1byte:
179 movb %cl, (%rdi)
184 180
185.Lend: 181.Lend:
186 retq 182 retq
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 79bd454b78a3..2dcb3808cbda 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,16 +19,15 @@
19 .section .altinstr_replacement, "ax", @progbits 19 .section .altinstr_replacement, "ax", @progbits
20.Lmemset_c: 20.Lmemset_c:
21 movq %rdi,%r9 21 movq %rdi,%r9
22 movl %edx,%r8d 22 movq %rdx,%rcx
23 andl $7,%r8d 23 andl $7,%edx
24 movl %edx,%ecx 24 shrq $3,%rcx
25 shrl $3,%ecx
26 /* expand byte value */ 25 /* expand byte value */
27 movzbl %sil,%esi 26 movzbl %sil,%esi
28 movabs $0x0101010101010101,%rax 27 movabs $0x0101010101010101,%rax
29 mulq %rsi /* with rax, clobbers rdx */ 28 imulq %rsi,%rax
30 rep stosq 29 rep stosq
31 movl %r8d,%ecx 30 movl %edx,%ecx
32 rep stosb 31 rep stosb
33 movq %r9,%rax 32 movq %r9,%rax
34 ret 33 ret
@@ -50,7 +49,7 @@
50.Lmemset_c_e: 49.Lmemset_c_e:
51 movq %rdi,%r9 50 movq %rdi,%r9
52 movb %sil,%al 51 movb %sil,%al
53 movl %edx,%ecx 52 movq %rdx,%rcx
54 rep stosb 53 rep stosb
55 movq %r9,%rax 54 movq %r9,%rax
56 ret 55 ret
@@ -61,12 +60,11 @@ ENTRY(memset)
61ENTRY(__memset) 60ENTRY(__memset)
62 CFI_STARTPROC 61 CFI_STARTPROC
63 movq %rdi,%r10 62 movq %rdi,%r10
64 movq %rdx,%r11
65 63
66 /* expand byte value */ 64 /* expand byte value */
67 movzbl %sil,%ecx 65 movzbl %sil,%ecx
68 movabs $0x0101010101010101,%rax 66 movabs $0x0101010101010101,%rax
69 mul %rcx /* with rax, clobbers rdx */ 67 imulq %rcx,%rax
70 68
71 /* align dst */ 69 /* align dst */
72 movl %edi,%r9d 70 movl %edi,%r9d
@@ -75,13 +73,13 @@ ENTRY(__memset)
75 CFI_REMEMBER_STATE 73 CFI_REMEMBER_STATE
76.Lafter_bad_alignment: 74.Lafter_bad_alignment:
77 75
78 movl %r11d,%ecx 76 movq %rdx,%rcx
79 shrl $6,%ecx 77 shrq $6,%rcx
80 jz .Lhandle_tail 78 jz .Lhandle_tail
81 79
82 .p2align 4 80 .p2align 4
83.Lloop_64: 81.Lloop_64:
84 decl %ecx 82 decq %rcx
85 movq %rax,(%rdi) 83 movq %rax,(%rdi)
86 movq %rax,8(%rdi) 84 movq %rax,8(%rdi)
87 movq %rax,16(%rdi) 85 movq %rax,16(%rdi)
@@ -97,7 +95,7 @@ ENTRY(__memset)
97 to predict jump tables. */ 95 to predict jump tables. */
98 .p2align 4 96 .p2align 4
99.Lhandle_tail: 97.Lhandle_tail:
100 movl %r11d,%ecx 98 movl %edx,%ecx
101 andl $63&(~7),%ecx 99 andl $63&(~7),%ecx
102 jz .Lhandle_7 100 jz .Lhandle_7
103 shrl $3,%ecx 101 shrl $3,%ecx
@@ -109,12 +107,11 @@ ENTRY(__memset)
109 jnz .Lloop_8 107 jnz .Lloop_8
110 108
111.Lhandle_7: 109.Lhandle_7:
112 movl %r11d,%ecx 110 andl $7,%edx
113 andl $7,%ecx
114 jz .Lende 111 jz .Lende
115 .p2align 4 112 .p2align 4
116.Lloop_1: 113.Lloop_1:
117 decl %ecx 114 decl %edx
118 movb %al,(%rdi) 115 movb %al,(%rdi)
119 leaq 1(%rdi),%rdi 116 leaq 1(%rdi),%rdi
120 jnz .Lloop_1 117 jnz .Lloop_1
@@ -125,13 +122,13 @@ ENTRY(__memset)
125 122
126 CFI_RESTORE_STATE 123 CFI_RESTORE_STATE
127.Lbad_alignment: 124.Lbad_alignment:
128 cmpq $7,%r11 125 cmpq $7,%rdx
129 jbe .Lhandle_7 126 jbe .Lhandle_7
130 movq %rax,(%rdi) /* unaligned store */ 127 movq %rax,(%rdi) /* unaligned store */
131 movq $8,%r8 128 movq $8,%r8
132 subq %r9,%r8 129 subq %r9,%r8
133 addq %r8,%rdi 130 addq %r8,%rdi
134 subq %r8,%r11 131 subq %r8,%rdx
135 jmp .Lafter_bad_alignment 132 jmp .Lafter_bad_alignment
136.Lfinal: 133.Lfinal:
137 CFI_ENDPROC 134 CFI_ENDPROC