diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 12:13:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 12:13:24 -0400 |
commit | e17fdf5c6778ff77d93dd769910992e4073b9348 (patch) | |
tree | d1a7ca2b1faf4301b39300fbd82f9b91e605a77e /arch/x86/lib | |
parent | 95211279c5ad00a317c98221d7e4365e02f20836 (diff) | |
parent | a240ada241dafe290e7532d1ddeb98fdf1419068 (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86: Include probe_roms.h in probe_roms.c
x86/32: Print control and debug registers for kerenel context
x86: Tighten dependencies of CPU_SUP_*_32
x86/numa: Improve internode cache alignment
x86: Fix the NMI nesting comments
x86-64: Improve insn scheduling in SAVE_ARGS_IRQ
x86-64: Fix CFI annotations for NMI nesting code
bitops: Add missing parentheses to new get_order macro
bitops: Optimise get_order()
bitops: Adjust the comment on get_order() to describe the size==0 case
x86/spinlocks: Eliminate TICKET_MASK
x86-64: Handle byte-wise tail copying in memcpy() without a loop
x86-64: Fix memcpy() to support sizes of 4Gb and above
x86-64: Fix memset() to support sizes of 4Gb and above
x86-64: Slightly shorten copy_page()
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/copy_page_64.S | 12 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 44 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 33 |
3 files changed, 39 insertions, 50 deletions
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 01c805ba5359..6b34d04d096a 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c) | |||
20 | 20 | ||
21 | ENTRY(copy_page) | 21 | ENTRY(copy_page) |
22 | CFI_STARTPROC | 22 | CFI_STARTPROC |
23 | subq $3*8,%rsp | 23 | subq $2*8,%rsp |
24 | CFI_ADJUST_CFA_OFFSET 3*8 | 24 | CFI_ADJUST_CFA_OFFSET 2*8 |
25 | movq %rbx,(%rsp) | 25 | movq %rbx,(%rsp) |
26 | CFI_REL_OFFSET rbx, 0 | 26 | CFI_REL_OFFSET rbx, 0 |
27 | movq %r12,1*8(%rsp) | 27 | movq %r12,1*8(%rsp) |
28 | CFI_REL_OFFSET r12, 1*8 | 28 | CFI_REL_OFFSET r12, 1*8 |
29 | movq %r13,2*8(%rsp) | ||
30 | CFI_REL_OFFSET r13, 2*8 | ||
31 | 29 | ||
32 | movl $(4096/64)-5,%ecx | 30 | movl $(4096/64)-5,%ecx |
33 | .p2align 4 | 31 | .p2align 4 |
@@ -91,10 +89,8 @@ ENTRY(copy_page) | |||
91 | CFI_RESTORE rbx | 89 | CFI_RESTORE rbx |
92 | movq 1*8(%rsp),%r12 | 90 | movq 1*8(%rsp),%r12 |
93 | CFI_RESTORE r12 | 91 | CFI_RESTORE r12 |
94 | movq 2*8(%rsp),%r13 | 92 | addq $2*8,%rsp |
95 | CFI_RESTORE r13 | 93 | CFI_ADJUST_CFA_OFFSET -2*8 |
96 | addq $3*8,%rsp | ||
97 | CFI_ADJUST_CFA_OFFSET -3*8 | ||
98 | ret | 94 | ret |
99 | .Lcopy_page_end: | 95 | .Lcopy_page_end: |
100 | CFI_ENDPROC | 96 | CFI_ENDPROC |
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index efbf2a0ecdea..1c273be7c97e 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -27,9 +27,8 @@ | |||
27 | .section .altinstr_replacement, "ax", @progbits | 27 | .section .altinstr_replacement, "ax", @progbits |
28 | .Lmemcpy_c: | 28 | .Lmemcpy_c: |
29 | movq %rdi, %rax | 29 | movq %rdi, %rax |
30 | 30 | movq %rdx, %rcx | |
31 | movl %edx, %ecx | 31 | shrq $3, %rcx |
32 | shrl $3, %ecx | ||
33 | andl $7, %edx | 32 | andl $7, %edx |
34 | rep movsq | 33 | rep movsq |
35 | movl %edx, %ecx | 34 | movl %edx, %ecx |
@@ -48,8 +47,7 @@ | |||
48 | .section .altinstr_replacement, "ax", @progbits | 47 | .section .altinstr_replacement, "ax", @progbits |
49 | .Lmemcpy_c_e: | 48 | .Lmemcpy_c_e: |
50 | movq %rdi, %rax | 49 | movq %rdi, %rax |
51 | 50 | movq %rdx, %rcx | |
52 | movl %edx, %ecx | ||
53 | rep movsb | 51 | rep movsb |
54 | ret | 52 | ret |
55 | .Lmemcpy_e_e: | 53 | .Lmemcpy_e_e: |
@@ -60,10 +58,7 @@ ENTRY(memcpy) | |||
60 | CFI_STARTPROC | 58 | CFI_STARTPROC |
61 | movq %rdi, %rax | 59 | movq %rdi, %rax |
62 | 60 | ||
63 | /* | 61 | cmpq $0x20, %rdx |
64 | * Use 32bit CMP here to avoid long NOP padding. | ||
65 | */ | ||
66 | cmp $0x20, %edx | ||
67 | jb .Lhandle_tail | 62 | jb .Lhandle_tail |
68 | 63 | ||
69 | /* | 64 | /* |
@@ -72,7 +67,7 @@ ENTRY(memcpy) | |||
72 | */ | 67 | */ |
73 | cmp %dil, %sil | 68 | cmp %dil, %sil |
74 | jl .Lcopy_backward | 69 | jl .Lcopy_backward |
75 | subl $0x20, %edx | 70 | subq $0x20, %rdx |
76 | .Lcopy_forward_loop: | 71 | .Lcopy_forward_loop: |
77 | subq $0x20, %rdx | 72 | subq $0x20, %rdx |
78 | 73 | ||
@@ -91,7 +86,7 @@ ENTRY(memcpy) | |||
91 | movq %r11, 3*8(%rdi) | 86 | movq %r11, 3*8(%rdi) |
92 | leaq 4*8(%rdi), %rdi | 87 | leaq 4*8(%rdi), %rdi |
93 | jae .Lcopy_forward_loop | 88 | jae .Lcopy_forward_loop |
94 | addq $0x20, %rdx | 89 | addl $0x20, %edx |
95 | jmp .Lhandle_tail | 90 | jmp .Lhandle_tail |
96 | 91 | ||
97 | .Lcopy_backward: | 92 | .Lcopy_backward: |
@@ -123,11 +118,11 @@ ENTRY(memcpy) | |||
123 | /* | 118 | /* |
124 | * Calculate copy position to head. | 119 | * Calculate copy position to head. |
125 | */ | 120 | */ |
126 | addq $0x20, %rdx | 121 | addl $0x20, %edx |
127 | subq %rdx, %rsi | 122 | subq %rdx, %rsi |
128 | subq %rdx, %rdi | 123 | subq %rdx, %rdi |
129 | .Lhandle_tail: | 124 | .Lhandle_tail: |
130 | cmpq $16, %rdx | 125 | cmpl $16, %edx |
131 | jb .Lless_16bytes | 126 | jb .Lless_16bytes |
132 | 127 | ||
133 | /* | 128 | /* |
@@ -144,7 +139,7 @@ ENTRY(memcpy) | |||
144 | retq | 139 | retq |
145 | .p2align 4 | 140 | .p2align 4 |
146 | .Lless_16bytes: | 141 | .Lless_16bytes: |
147 | cmpq $8, %rdx | 142 | cmpl $8, %edx |
148 | jb .Lless_8bytes | 143 | jb .Lless_8bytes |
149 | /* | 144 | /* |
150 | * Move data from 8 bytes to 15 bytes. | 145 | * Move data from 8 bytes to 15 bytes. |
@@ -156,7 +151,7 @@ ENTRY(memcpy) | |||
156 | retq | 151 | retq |
157 | .p2align 4 | 152 | .p2align 4 |
158 | .Lless_8bytes: | 153 | .Lless_8bytes: |
159 | cmpq $4, %rdx | 154 | cmpl $4, %edx |
160 | jb .Lless_3bytes | 155 | jb .Lless_3bytes |
161 | 156 | ||
162 | /* | 157 | /* |
@@ -169,18 +164,19 @@ ENTRY(memcpy) | |||
169 | retq | 164 | retq |
170 | .p2align 4 | 165 | .p2align 4 |
171 | .Lless_3bytes: | 166 | .Lless_3bytes: |
172 | cmpl $0, %edx | 167 | subl $1, %edx |
173 | je .Lend | 168 | jb .Lend |
174 | /* | 169 | /* |
175 | * Move data from 1 bytes to 3 bytes. | 170 | * Move data from 1 bytes to 3 bytes. |
176 | */ | 171 | */ |
177 | .Lloop_1: | 172 | movzbl (%rsi), %ecx |
178 | movb (%rsi), %r8b | 173 | jz .Lstore_1byte |
179 | movb %r8b, (%rdi) | 174 | movzbq 1(%rsi), %r8 |
180 | incq %rdi | 175 | movzbq (%rsi, %rdx), %r9 |
181 | incq %rsi | 176 | movb %r8b, 1(%rdi) |
182 | decl %edx | 177 | movb %r9b, (%rdi, %rdx) |
183 | jnz .Lloop_1 | 178 | .Lstore_1byte: |
179 | movb %cl, (%rdi) | ||
184 | 180 | ||
185 | .Lend: | 181 | .Lend: |
186 | retq | 182 | retq |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 79bd454b78a3..2dcb3808cbda 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -19,16 +19,15 @@ | |||
19 | .section .altinstr_replacement, "ax", @progbits | 19 | .section .altinstr_replacement, "ax", @progbits |
20 | .Lmemset_c: | 20 | .Lmemset_c: |
21 | movq %rdi,%r9 | 21 | movq %rdi,%r9 |
22 | movl %edx,%r8d | 22 | movq %rdx,%rcx |
23 | andl $7,%r8d | 23 | andl $7,%edx |
24 | movl %edx,%ecx | 24 | shrq $3,%rcx |
25 | shrl $3,%ecx | ||
26 | /* expand byte value */ | 25 | /* expand byte value */ |
27 | movzbl %sil,%esi | 26 | movzbl %sil,%esi |
28 | movabs $0x0101010101010101,%rax | 27 | movabs $0x0101010101010101,%rax |
29 | mulq %rsi /* with rax, clobbers rdx */ | 28 | imulq %rsi,%rax |
30 | rep stosq | 29 | rep stosq |
31 | movl %r8d,%ecx | 30 | movl %edx,%ecx |
32 | rep stosb | 31 | rep stosb |
33 | movq %r9,%rax | 32 | movq %r9,%rax |
34 | ret | 33 | ret |
@@ -50,7 +49,7 @@ | |||
50 | .Lmemset_c_e: | 49 | .Lmemset_c_e: |
51 | movq %rdi,%r9 | 50 | movq %rdi,%r9 |
52 | movb %sil,%al | 51 | movb %sil,%al |
53 | movl %edx,%ecx | 52 | movq %rdx,%rcx |
54 | rep stosb | 53 | rep stosb |
55 | movq %r9,%rax | 54 | movq %r9,%rax |
56 | ret | 55 | ret |
@@ -61,12 +60,11 @@ ENTRY(memset) | |||
61 | ENTRY(__memset) | 60 | ENTRY(__memset) |
62 | CFI_STARTPROC | 61 | CFI_STARTPROC |
63 | movq %rdi,%r10 | 62 | movq %rdi,%r10 |
64 | movq %rdx,%r11 | ||
65 | 63 | ||
66 | /* expand byte value */ | 64 | /* expand byte value */ |
67 | movzbl %sil,%ecx | 65 | movzbl %sil,%ecx |
68 | movabs $0x0101010101010101,%rax | 66 | movabs $0x0101010101010101,%rax |
69 | mul %rcx /* with rax, clobbers rdx */ | 67 | imulq %rcx,%rax |
70 | 68 | ||
71 | /* align dst */ | 69 | /* align dst */ |
72 | movl %edi,%r9d | 70 | movl %edi,%r9d |
@@ -75,13 +73,13 @@ ENTRY(__memset) | |||
75 | CFI_REMEMBER_STATE | 73 | CFI_REMEMBER_STATE |
76 | .Lafter_bad_alignment: | 74 | .Lafter_bad_alignment: |
77 | 75 | ||
78 | movl %r11d,%ecx | 76 | movq %rdx,%rcx |
79 | shrl $6,%ecx | 77 | shrq $6,%rcx |
80 | jz .Lhandle_tail | 78 | jz .Lhandle_tail |
81 | 79 | ||
82 | .p2align 4 | 80 | .p2align 4 |
83 | .Lloop_64: | 81 | .Lloop_64: |
84 | decl %ecx | 82 | decq %rcx |
85 | movq %rax,(%rdi) | 83 | movq %rax,(%rdi) |
86 | movq %rax,8(%rdi) | 84 | movq %rax,8(%rdi) |
87 | movq %rax,16(%rdi) | 85 | movq %rax,16(%rdi) |
@@ -97,7 +95,7 @@ ENTRY(__memset) | |||
97 | to predict jump tables. */ | 95 | to predict jump tables. */ |
98 | .p2align 4 | 96 | .p2align 4 |
99 | .Lhandle_tail: | 97 | .Lhandle_tail: |
100 | movl %r11d,%ecx | 98 | movl %edx,%ecx |
101 | andl $63&(~7),%ecx | 99 | andl $63&(~7),%ecx |
102 | jz .Lhandle_7 | 100 | jz .Lhandle_7 |
103 | shrl $3,%ecx | 101 | shrl $3,%ecx |
@@ -109,12 +107,11 @@ ENTRY(__memset) | |||
109 | jnz .Lloop_8 | 107 | jnz .Lloop_8 |
110 | 108 | ||
111 | .Lhandle_7: | 109 | .Lhandle_7: |
112 | movl %r11d,%ecx | 110 | andl $7,%edx |
113 | andl $7,%ecx | ||
114 | jz .Lende | 111 | jz .Lende |
115 | .p2align 4 | 112 | .p2align 4 |
116 | .Lloop_1: | 113 | .Lloop_1: |
117 | decl %ecx | 114 | decl %edx |
118 | movb %al,(%rdi) | 115 | movb %al,(%rdi) |
119 | leaq 1(%rdi),%rdi | 116 | leaq 1(%rdi),%rdi |
120 | jnz .Lloop_1 | 117 | jnz .Lloop_1 |
@@ -125,13 +122,13 @@ ENTRY(__memset) | |||
125 | 122 | ||
126 | CFI_RESTORE_STATE | 123 | CFI_RESTORE_STATE |
127 | .Lbad_alignment: | 124 | .Lbad_alignment: |
128 | cmpq $7,%r11 | 125 | cmpq $7,%rdx |
129 | jbe .Lhandle_7 | 126 | jbe .Lhandle_7 |
130 | movq %rax,(%rdi) /* unaligned store */ | 127 | movq %rax,(%rdi) /* unaligned store */ |
131 | movq $8,%r8 | 128 | movq $8,%r8 |
132 | subq %r9,%r8 | 129 | subq %r9,%r8 |
133 | addq %r8,%rdi | 130 | addq %r8,%rdi |
134 | subq %r8,%r11 | 131 | subq %r8,%rdx |
135 | jmp .Lafter_bad_alignment | 132 | jmp .Lafter_bad_alignment |
136 | .Lfinal: | 133 | .Lfinal: |
137 | CFI_ENDPROC | 134 | CFI_ENDPROC |