aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S6
-rw-r--r--arch/x86/lib/checksum_32.S63
-rw-r--r--arch/x86/lib/clear_page_64.S33
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S65
-rw-r--r--arch/x86/lib/copy_user_64.S71
-rw-r--r--arch/x86/lib/csum-copy_64.S242
-rw-r--r--arch/x86/lib/csum-partial_64.c2
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/memcpy_32.c199
-rw-r--r--arch/x86/lib/memcpy_64.S203
-rw-r--r--arch/x86/lib/memmove_64.S224
-rw-r--r--arch/x86/lib/memmove_64.c21
-rw-r--r--arch/x86/lib/memset_64.S54
-rw-r--r--arch/x86/lib/rwsem_64.S56
-rw-r--r--arch/x86/lib/semaphore_32.S38
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S27
19 files changed, 926 insertions, 405 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index e10cf070ede0..f2479f19ddde 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -42,4 +42,5 @@ else
42 lib-y += memmove_64.o memset_64.o 42 lib-y += memmove_64.o memset_64.o
43 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o 43 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
44 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o 44 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
45 lib-y += cmpxchg16b_emu.o
45endif 46endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 2cda60a06e65..e8e7e0d06f42 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -15,14 +15,12 @@
15 15
16/* if you want SMP support, implement these with real spinlocks */ 16/* if you want SMP support, implement these with real spinlocks */
17.macro LOCK reg 17.macro LOCK reg
18 pushfl 18 pushfl_cfi
19 CFI_ADJUST_CFA_OFFSET 4
20 cli 19 cli
21.endm 20.endm
22 21
23.macro UNLOCK reg 22.macro UNLOCK reg
24 popfl 23 popfl_cfi
25 CFI_ADJUST_CFA_OFFSET -4
26.endm 24.endm
27 25
28#define BEGIN(op) \ 26#define BEGIN(op) \
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 71e080de3352..391a083674b4 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -14,14 +14,12 @@
14#include <asm/dwarf2.h> 14#include <asm/dwarf2.h>
15 15
16.macro SAVE reg 16.macro SAVE reg
17 pushl %\reg 17 pushl_cfi %\reg
18 CFI_ADJUST_CFA_OFFSET 4
19 CFI_REL_OFFSET \reg, 0 18 CFI_REL_OFFSET \reg, 0
20.endm 19.endm
21 20
22.macro RESTORE reg 21.macro RESTORE reg
23 popl %\reg 22 popl_cfi %\reg
24 CFI_ADJUST_CFA_OFFSET -4
25 CFI_RESTORE \reg 23 CFI_RESTORE \reg
26.endm 24.endm
27 25
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index adbccd0bbb78..78d16a554db0 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
50 */ 50 */
51ENTRY(csum_partial) 51ENTRY(csum_partial)
52 CFI_STARTPROC 52 CFI_STARTPROC
53 pushl %esi 53 pushl_cfi %esi
54 CFI_ADJUST_CFA_OFFSET 4
55 CFI_REL_OFFSET esi, 0 54 CFI_REL_OFFSET esi, 0
56 pushl %ebx 55 pushl_cfi %ebx
57 CFI_ADJUST_CFA_OFFSET 4
58 CFI_REL_OFFSET ebx, 0 56 CFI_REL_OFFSET ebx, 0
59 movl 20(%esp),%eax # Function arg: unsigned int sum 57 movl 20(%esp),%eax # Function arg: unsigned int sum
60 movl 16(%esp),%ecx # Function arg: int len 58 movl 16(%esp),%ecx # Function arg: int len
@@ -132,11 +130,9 @@ ENTRY(csum_partial)
132 jz 8f 130 jz 8f
133 roll $8, %eax 131 roll $8, %eax
1348: 1328:
135 popl %ebx 133 popl_cfi %ebx
136 CFI_ADJUST_CFA_OFFSET -4
137 CFI_RESTORE ebx 134 CFI_RESTORE ebx
138 popl %esi 135 popl_cfi %esi
139 CFI_ADJUST_CFA_OFFSET -4
140 CFI_RESTORE esi 136 CFI_RESTORE esi
141 ret 137 ret
142 CFI_ENDPROC 138 CFI_ENDPROC
@@ -148,11 +144,9 @@ ENDPROC(csum_partial)
148 144
149ENTRY(csum_partial) 145ENTRY(csum_partial)
150 CFI_STARTPROC 146 CFI_STARTPROC
151 pushl %esi 147 pushl_cfi %esi
152 CFI_ADJUST_CFA_OFFSET 4
153 CFI_REL_OFFSET esi, 0 148 CFI_REL_OFFSET esi, 0
154 pushl %ebx 149 pushl_cfi %ebx
155 CFI_ADJUST_CFA_OFFSET 4
156 CFI_REL_OFFSET ebx, 0 150 CFI_REL_OFFSET ebx, 0
157 movl 20(%esp),%eax # Function arg: unsigned int sum 151 movl 20(%esp),%eax # Function arg: unsigned int sum
158 movl 16(%esp),%ecx # Function arg: int len 152 movl 16(%esp),%ecx # Function arg: int len
@@ -260,11 +254,9 @@ ENTRY(csum_partial)
260 jz 90f 254 jz 90f
261 roll $8, %eax 255 roll $8, %eax
26290: 25690:
263 popl %ebx 257 popl_cfi %ebx
264 CFI_ADJUST_CFA_OFFSET -4
265 CFI_RESTORE ebx 258 CFI_RESTORE ebx
266 popl %esi 259 popl_cfi %esi
267 CFI_ADJUST_CFA_OFFSET -4
268 CFI_RESTORE esi 260 CFI_RESTORE esi
269 ret 261 ret
270 CFI_ENDPROC 262 CFI_ENDPROC
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic)
309 CFI_STARTPROC 301 CFI_STARTPROC
310 subl $4,%esp 302 subl $4,%esp
311 CFI_ADJUST_CFA_OFFSET 4 303 CFI_ADJUST_CFA_OFFSET 4
312 pushl %edi 304 pushl_cfi %edi
313 CFI_ADJUST_CFA_OFFSET 4
314 CFI_REL_OFFSET edi, 0 305 CFI_REL_OFFSET edi, 0
315 pushl %esi 306 pushl_cfi %esi
316 CFI_ADJUST_CFA_OFFSET 4
317 CFI_REL_OFFSET esi, 0 307 CFI_REL_OFFSET esi, 0
318 pushl %ebx 308 pushl_cfi %ebx
319 CFI_ADJUST_CFA_OFFSET 4
320 CFI_REL_OFFSET ebx, 0 309 CFI_REL_OFFSET ebx, 0
321 movl ARGBASE+16(%esp),%eax # sum 310 movl ARGBASE+16(%esp),%eax # sum
322 movl ARGBASE+12(%esp),%ecx # len 311 movl ARGBASE+12(%esp),%ecx # len
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) )
426 415
427.previous 416.previous
428 417
429 popl %ebx 418 popl_cfi %ebx
430 CFI_ADJUST_CFA_OFFSET -4
431 CFI_RESTORE ebx 419 CFI_RESTORE ebx
432 popl %esi 420 popl_cfi %esi
433 CFI_ADJUST_CFA_OFFSET -4
434 CFI_RESTORE esi 421 CFI_RESTORE esi
435 popl %edi 422 popl_cfi %edi
436 CFI_ADJUST_CFA_OFFSET -4
437 CFI_RESTORE edi 423 CFI_RESTORE edi
438 popl %ecx # equivalent to addl $4,%esp 424 popl_cfi %ecx # equivalent to addl $4,%esp
439 CFI_ADJUST_CFA_OFFSET -4
440 ret 425 ret
441 CFI_ENDPROC 426 CFI_ENDPROC
442ENDPROC(csum_partial_copy_generic) 427ENDPROC(csum_partial_copy_generic)
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic)
459 444
460ENTRY(csum_partial_copy_generic) 445ENTRY(csum_partial_copy_generic)
461 CFI_STARTPROC 446 CFI_STARTPROC
462 pushl %ebx 447 pushl_cfi %ebx
463 CFI_ADJUST_CFA_OFFSET 4
464 CFI_REL_OFFSET ebx, 0 448 CFI_REL_OFFSET ebx, 0
465 pushl %edi 449 pushl_cfi %edi
466 CFI_ADJUST_CFA_OFFSET 4
467 CFI_REL_OFFSET edi, 0 450 CFI_REL_OFFSET edi, 0
468 pushl %esi 451 pushl_cfi %esi
469 CFI_ADJUST_CFA_OFFSET 4
470 CFI_REL_OFFSET esi, 0 452 CFI_REL_OFFSET esi, 0
471 movl ARGBASE+4(%esp),%esi #src 453 movl ARGBASE+4(%esp),%esi #src
472 movl ARGBASE+8(%esp),%edi #dst 454 movl ARGBASE+8(%esp),%edi #dst
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) )
527 jmp 7b 509 jmp 7b
528.previous 510.previous
529 511
530 popl %esi 512 popl_cfi %esi
531 CFI_ADJUST_CFA_OFFSET -4
532 CFI_RESTORE esi 513 CFI_RESTORE esi
533 popl %edi 514 popl_cfi %edi
534 CFI_ADJUST_CFA_OFFSET -4
535 CFI_RESTORE edi 515 CFI_RESTORE edi
536 popl %ebx 516 popl_cfi %ebx
537 CFI_ADJUST_CFA_OFFSET -4
538 CFI_RESTORE ebx 517 CFI_RESTORE ebx
539 ret 518 ret
540 CFI_ENDPROC 519 CFI_ENDPROC
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index aa4326bfb24a..f2145cfa12a6 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,6 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <asm/dwarf2.h> 2#include <asm/dwarf2.h>
3#include <asm/alternative-asm.h>
3 4
4/* 5/*
5 * Zero a page. 6 * Zero a page.
@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
14 CFI_ENDPROC 15 CFI_ENDPROC
15ENDPROC(clear_page_c) 16ENDPROC(clear_page_c)
16 17
18ENTRY(clear_page_c_e)
19 CFI_STARTPROC
20 movl $4096,%ecx
21 xorl %eax,%eax
22 rep stosb
23 ret
24 CFI_ENDPROC
25ENDPROC(clear_page_c_e)
26
17ENTRY(clear_page) 27ENTRY(clear_page)
18 CFI_STARTPROC 28 CFI_STARTPROC
19 xorl %eax,%eax 29 xorl %eax,%eax
@@ -38,21 +48,26 @@ ENTRY(clear_page)
38.Lclear_page_end: 48.Lclear_page_end:
39ENDPROC(clear_page) 49ENDPROC(clear_page)
40 50
41 /* Some CPUs run faster using the string instructions. 51 /*
42 It is also a lot simpler. Use this when possible */ 52 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
53 * It is recommended to use this when possible.
54 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
55 * Otherwise, use original function.
56 *
57 */
43 58
44#include <asm/cpufeature.h> 59#include <asm/cpufeature.h>
45 60
46 .section .altinstr_replacement,"ax" 61 .section .altinstr_replacement,"ax"
471: .byte 0xeb /* jmp <disp8> */ 621: .byte 0xeb /* jmp <disp8> */
48 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ 63 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
492: 642: .byte 0xeb /* jmp <disp8> */
65 .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
663:
50 .previous 67 .previous
51 .section .altinstructions,"a" 68 .section .altinstructions,"a"
52 .align 8 69 altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
53 .quad clear_page 70 .Lclear_page_end-clear_page, 2b-1b
54 .quad 1b 71 altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
55 .word X86_FEATURE_REP_GOOD 72 .Lclear_page_end-clear_page,3b-2b
56 .byte .Lclear_page_end - clear_page
57 .byte 2b - 1b
58 .previous 73 .previous
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
new file mode 100644
index 000000000000..1e572c507d06
--- /dev/null
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -0,0 +1,65 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; version 2
5 * of the License.
6 *
7 */
8#include <linux/linkage.h>
9#include <asm/alternative-asm.h>
10#include <asm/frame.h>
11#include <asm/dwarf2.h>
12
13#ifdef CONFIG_SMP
14#define SEG_PREFIX %gs:
15#else
16#define SEG_PREFIX
17#endif
18
19.text
20
21/*
22 * Inputs:
23 * %rsi : memory location to compare
24 * %rax : low 64 bits of old value
25 * %rdx : high 64 bits of old value
26 * %rbx : low 64 bits of new value
27 * %rcx : high 64 bits of new value
28 * %al : Operation successful
29 */
30ENTRY(this_cpu_cmpxchg16b_emu)
31CFI_STARTPROC
32
33#
34# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
35# via the ZF. Caller will access %al to get result.
36#
37# Note that this is only useful for a cpuops operation. Meaning that we
38# do *not* have a fully atomic operation but just an operation that is
39# *atomic* on a single cpu (as provided by the this_cpu_xx class of
40# macros).
41#
42this_cpu_cmpxchg16b_emu:
43 pushf
44 cli
45
46 cmpq SEG_PREFIX(%rsi), %rax
47 jne not_same
48 cmpq SEG_PREFIX 8(%rsi), %rdx
49 jne not_same
50
51 movq %rbx, SEG_PREFIX(%rsi)
52 movq %rcx, SEG_PREFIX 8(%rsi)
53
54 popf
55 mov $1, %al
56 ret
57
58 not_same:
59 popf
60 xor %al,%al
61 ret
62
63CFI_ENDPROC
64
65ENDPROC(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index a460158b5ac5..024840266ba0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
15#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/cpufeature.h> 17#include <asm/cpufeature.h>
18#include <asm/alternative-asm.h>
18 19
19 .macro ALTERNATIVE_JUMP feature,orig,alt 20/*
21 * By placing feature2 after feature1 in altinstructions section, we logically
22 * implement:
23 * If CPU has feature2, jmp to alt2 is used
24 * else if CPU has feature1, jmp to alt1 is used
25 * else jmp to orig is used.
26 */
27 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
200: 280:
21 .byte 0xe9 /* 32bit jump */ 29 .byte 0xe9 /* 32bit jump */
22 .long \orig-1f /* by default jump to orig */ 30 .long \orig-1f /* by default jump to orig */
231: 311:
24 .section .altinstr_replacement,"ax" 32 .section .altinstr_replacement,"ax"
252: .byte 0xe9 /* near jump with 32bit immediate */ 332: .byte 0xe9 /* near jump with 32bit immediate */
26 .long \alt-1b /* offset */ /* or alternatively to alt */ 34 .long \alt1-1b /* offset */ /* or alternatively to alt1 */
353: .byte 0xe9 /* near jump with 32bit immediate */
36 .long \alt2-1b /* offset */ /* or alternatively to alt2 */
27 .previous 37 .previous
38
28 .section .altinstructions,"a" 39 .section .altinstructions,"a"
29 .align 8 40 altinstruction_entry 0b,2b,\feature1,5,5
30 .quad 0b 41 altinstruction_entry 0b,3b,\feature2,5,5
31 .quad 2b
32 .word \feature /* when feature is set */
33 .byte 5
34 .byte 5
35 .previous 42 .previous
36 .endm 43 .endm
37 44
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user)
72 addq %rdx,%rcx 79 addq %rdx,%rcx
73 jc bad_to_user 80 jc bad_to_user
74 cmpq TI_addr_limit(%rax),%rcx 81 cmpq TI_addr_limit(%rax),%rcx
75 jae bad_to_user 82 ja bad_to_user
76 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 83 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
84 copy_user_generic_unrolled,copy_user_generic_string, \
85 copy_user_enhanced_fast_string
77 CFI_ENDPROC 86 CFI_ENDPROC
78ENDPROC(_copy_to_user) 87ENDPROC(_copy_to_user)
79 88
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user)
85 addq %rdx,%rcx 94 addq %rdx,%rcx
86 jc bad_from_user 95 jc bad_from_user
87 cmpq TI_addr_limit(%rax),%rcx 96 cmpq TI_addr_limit(%rax),%rcx
88 jae bad_from_user 97 ja bad_from_user
89 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 98 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
99 copy_user_generic_unrolled,copy_user_generic_string, \
100 copy_user_enhanced_fast_string
90 CFI_ENDPROC 101 CFI_ENDPROC
91ENDPROC(_copy_from_user) 102ENDPROC(_copy_from_user)
92 103
@@ -117,7 +128,7 @@ ENDPROC(bad_from_user)
117 * rdx count 128 * rdx count
118 * 129 *
119 * Output: 130 * Output:
120 * eax uncopied bytes or 0 if successfull. 131 * eax uncopied bytes or 0 if successful.
121 */ 132 */
122ENTRY(copy_user_generic_unrolled) 133ENTRY(copy_user_generic_unrolled)
123 CFI_STARTPROC 134 CFI_STARTPROC
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
255 .previous 266 .previous
256 CFI_ENDPROC 267 CFI_ENDPROC
257ENDPROC(copy_user_generic_string) 268ENDPROC(copy_user_generic_string)
269
270/*
271 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
272 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
273 *
274 * Input:
275 * rdi destination
276 * rsi source
277 * rdx count
278 *
279 * Output:
280 * eax uncopied bytes or 0 if successful.
281 */
282ENTRY(copy_user_enhanced_fast_string)
283 CFI_STARTPROC
284 andl %edx,%edx
285 jz 2f
286 movl %edx,%ecx
2871: rep
288 movsb
2892: xorl %eax,%eax
290 ret
291
292 .section .fixup,"ax"
29312: movl %ecx,%edx /* ecx is zerorest also */
294 jmp copy_user_handle_tail
295 .previous
296
297 .section __ex_table,"a"
298 .align 8
299 .quad 1b,12b
300 .previous
301 CFI_ENDPROC
302ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index f0dba36578ea..fb903b758da8 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 * 3 *
4 * This file is subject to the terms and conditions of the GNU General Public 4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive 5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all. 6 * for more details. No warranty for anything given at all.
@@ -11,82 +11,82 @@
11 11
12/* 12/*
13 * Checksum copy with exception handling. 13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed. 15 * destination is zeroed.
16 * 16 *
17 * Input 17 * Input
18 * rdi source 18 * rdi source
19 * rsi destination 19 * rsi destination
20 * edx len (32bit) 20 * edx len (32bit)
21 * ecx sum (32bit) 21 * ecx sum (32bit)
22 * r8 src_err_ptr (int) 22 * r8 src_err_ptr (int)
23 * r9 dst_err_ptr (int) 23 * r9 dst_err_ptr (int)
24 * 24 *
25 * Output 25 * Output
26 * eax 64bit sum. undefined in case of exception. 26 * eax 64bit sum. undefined in case of exception.
27 * 27 *
28 * Wrappers need to take care of valid exception sum and zeroing. 28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes. 29 * They also should align source or destination to 8 bytes.
30 */ 30 */
31 31
32 .macro source 32 .macro source
3310: 3310:
34 .section __ex_table,"a" 34 .section __ex_table, "a"
35 .align 8 35 .align 8
36 .quad 10b,.Lbad_source 36 .quad 10b, .Lbad_source
37 .previous 37 .previous
38 .endm 38 .endm
39 39
40 .macro dest 40 .macro dest
4120: 4120:
42 .section __ex_table,"a" 42 .section __ex_table, "a"
43 .align 8 43 .align 8
44 .quad 20b,.Lbad_dest 44 .quad 20b, .Lbad_dest
45 .previous 45 .previous
46 .endm 46 .endm
47 47
48 .macro ignore L=.Lignore 48 .macro ignore L=.Lignore
4930: 4930:
50 .section __ex_table,"a" 50 .section __ex_table, "a"
51 .align 8 51 .align 8
52 .quad 30b,\L 52 .quad 30b, \L
53 .previous 53 .previous
54 .endm 54 .endm
55 55
56 56
57ENTRY(csum_partial_copy_generic) 57ENTRY(csum_partial_copy_generic)
58 CFI_STARTPROC 58 CFI_STARTPROC
59 cmpl $3*64,%edx 59 cmpl $3*64, %edx
60 jle .Lignore 60 jle .Lignore
61 61
62.Lignore: 62.Lignore:
63 subq $7*8,%rsp 63 subq $7*8, %rsp
64 CFI_ADJUST_CFA_OFFSET 7*8 64 CFI_ADJUST_CFA_OFFSET 7*8
65 movq %rbx,2*8(%rsp) 65 movq %rbx, 2*8(%rsp)
66 CFI_REL_OFFSET rbx, 2*8 66 CFI_REL_OFFSET rbx, 2*8
67 movq %r12,3*8(%rsp) 67 movq %r12, 3*8(%rsp)
68 CFI_REL_OFFSET r12, 3*8 68 CFI_REL_OFFSET r12, 3*8
69 movq %r14,4*8(%rsp) 69 movq %r14, 4*8(%rsp)
70 CFI_REL_OFFSET r14, 4*8 70 CFI_REL_OFFSET r14, 4*8
71 movq %r13,5*8(%rsp) 71 movq %r13, 5*8(%rsp)
72 CFI_REL_OFFSET r13, 5*8 72 CFI_REL_OFFSET r13, 5*8
73 movq %rbp,6*8(%rsp) 73 movq %rbp, 6*8(%rsp)
74 CFI_REL_OFFSET rbp, 6*8 74 CFI_REL_OFFSET rbp, 6*8
75 75
76 movq %r8,(%rsp) 76 movq %r8, (%rsp)
77 movq %r9,1*8(%rsp) 77 movq %r9, 1*8(%rsp)
78
79 movl %ecx,%eax
80 movl %edx,%ecx
81 78
82 xorl %r9d,%r9d 79 movl %ecx, %eax
83 movq %rcx,%r12 80 movl %edx, %ecx
84 81
85 shrq $6,%r12 82 xorl %r9d, %r9d
86 jz .Lhandle_tail /* < 64 */ 83 movq %rcx, %r12
84
85 shrq $6, %r12
86 jz .Lhandle_tail /* < 64 */
87 87
88 clc 88 clc
89 89
90 /* main loop. clear in 64 byte blocks */ 90 /* main loop. clear in 64 byte blocks */
91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
92 /* r11: temp3, rdx: temp4, r12 loopcnt */ 92 /* r11: temp3, rdx: temp4, r12 loopcnt */
@@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic)
94 .p2align 4 94 .p2align 4
95.Lloop: 95.Lloop:
96 source 96 source
97 movq (%rdi),%rbx 97 movq (%rdi), %rbx
98 source 98 source
99 movq 8(%rdi),%r8 99 movq 8(%rdi), %r8
100 source 100 source
101 movq 16(%rdi),%r11 101 movq 16(%rdi), %r11
102 source 102 source
103 movq 24(%rdi),%rdx 103 movq 24(%rdi), %rdx
104 104
105 source 105 source
106 movq 32(%rdi),%r10 106 movq 32(%rdi), %r10
107 source 107 source
108 movq 40(%rdi),%rbp 108 movq 40(%rdi), %rbp
109 source 109 source
110 movq 48(%rdi),%r14 110 movq 48(%rdi), %r14
111 source 111 source
112 movq 56(%rdi),%r13 112 movq 56(%rdi), %r13
113 113
114 ignore 2f 114 ignore 2f
115 prefetcht0 5*64(%rdi) 115 prefetcht0 5*64(%rdi)
1162: 1162:
117 adcq %rbx,%rax 117 adcq %rbx, %rax
118 adcq %r8,%rax 118 adcq %r8, %rax
119 adcq %r11,%rax 119 adcq %r11, %rax
120 adcq %rdx,%rax 120 adcq %rdx, %rax
121 adcq %r10,%rax 121 adcq %r10, %rax
122 adcq %rbp,%rax 122 adcq %rbp, %rax
123 adcq %r14,%rax 123 adcq %r14, %rax
124 adcq %r13,%rax 124 adcq %r13, %rax
125 125
126 decl %r12d 126 decl %r12d
127 127
128 dest 128 dest
129 movq %rbx,(%rsi) 129 movq %rbx, (%rsi)
130 dest 130 dest
131 movq %r8,8(%rsi) 131 movq %r8, 8(%rsi)
132 dest 132 dest
133 movq %r11,16(%rsi) 133 movq %r11, 16(%rsi)
134 dest 134 dest
135 movq %rdx,24(%rsi) 135 movq %rdx, 24(%rsi)
136 136
137 dest 137 dest
138 movq %r10,32(%rsi) 138 movq %r10, 32(%rsi)
139 dest 139 dest
140 movq %rbp,40(%rsi) 140 movq %rbp, 40(%rsi)
141 dest 141 dest
142 movq %r14,48(%rsi) 142 movq %r14, 48(%rsi)
143 dest 143 dest
144 movq %r13,56(%rsi) 144 movq %r13, 56(%rsi)
145 145
1463: 1463:
147
148 leaq 64(%rdi),%rdi
149 leaq 64(%rsi),%rsi
150 147
151 jnz .Lloop 148 leaq 64(%rdi), %rdi
149 leaq 64(%rsi), %rsi
152 150
153 adcq %r9,%rax 151 jnz .Lloop
154 152
155 /* do last upto 56 bytes */ 153 adcq %r9, %rax
154
155 /* do last up to 56 bytes */
156.Lhandle_tail: 156.Lhandle_tail:
157 /* ecx: count */ 157 /* ecx: count */
158 movl %ecx,%r10d 158 movl %ecx, %r10d
159 andl $63,%ecx 159 andl $63, %ecx
160 shrl $3,%ecx 160 shrl $3, %ecx
161 jz .Lfold 161 jz .Lfold
162 clc 162 clc
163 .p2align 4 163 .p2align 4
164.Lloop_8: 164.Lloop_8:
165 source 165 source
166 movq (%rdi),%rbx 166 movq (%rdi), %rbx
167 adcq %rbx,%rax 167 adcq %rbx, %rax
168 decl %ecx 168 decl %ecx
169 dest 169 dest
170 movq %rbx,(%rsi) 170 movq %rbx, (%rsi)
171 leaq 8(%rsi),%rsi /* preserve carry */ 171 leaq 8(%rsi), %rsi /* preserve carry */
172 leaq 8(%rdi),%rdi 172 leaq 8(%rdi), %rdi
173 jnz .Lloop_8 173 jnz .Lloop_8
174 adcq %r9,%rax /* add in carry */ 174 adcq %r9, %rax /* add in carry */
175 175
176.Lfold: 176.Lfold:
177 /* reduce checksum to 32bits */ 177 /* reduce checksum to 32bits */
178 movl %eax,%ebx 178 movl %eax, %ebx
179 shrq $32,%rax 179 shrq $32, %rax
180 addl %ebx,%eax 180 addl %ebx, %eax
181 adcl %r9d,%eax 181 adcl %r9d, %eax
182 182
183 /* do last upto 6 bytes */ 183 /* do last up to 6 bytes */
184.Lhandle_7: 184.Lhandle_7:
185 movl %r10d,%ecx 185 movl %r10d, %ecx
186 andl $7,%ecx 186 andl $7, %ecx
187 shrl $1,%ecx 187 shrl $1, %ecx
188 jz .Lhandle_1 188 jz .Lhandle_1
189 movl $2,%edx 189 movl $2, %edx
190 xorl %ebx,%ebx 190 xorl %ebx, %ebx
191 clc 191 clc
192 .p2align 4 192 .p2align 4
193.Lloop_1: 193.Lloop_1:
194 source 194 source
195 movw (%rdi),%bx 195 movw (%rdi), %bx
196 adcl %ebx,%eax 196 adcl %ebx, %eax
197 decl %ecx 197 decl %ecx
198 dest 198 dest
199 movw %bx,(%rsi) 199 movw %bx, (%rsi)
200 leaq 2(%rdi),%rdi 200 leaq 2(%rdi), %rdi
201 leaq 2(%rsi),%rsi 201 leaq 2(%rsi), %rsi
202 jnz .Lloop_1 202 jnz .Lloop_1
203 adcl %r9d,%eax /* add in carry */ 203 adcl %r9d, %eax /* add in carry */
204 204
205 /* handle last odd byte */ 205 /* handle last odd byte */
206.Lhandle_1: 206.Lhandle_1:
207 testl $1,%r10d 207 testl $1, %r10d
208 jz .Lende 208 jz .Lende
209 xorl %ebx,%ebx 209 xorl %ebx, %ebx
210 source 210 source
211 movb (%rdi),%bl 211 movb (%rdi), %bl
212 dest 212 dest
213 movb %bl,(%rsi) 213 movb %bl, (%rsi)
214 addl %ebx,%eax 214 addl %ebx, %eax
215 adcl %r9d,%eax /* carry */ 215 adcl %r9d, %eax /* carry */
216 216
217 CFI_REMEMBER_STATE 217 CFI_REMEMBER_STATE
218.Lende: 218.Lende:
219 movq 2*8(%rsp),%rbx 219 movq 2*8(%rsp), %rbx
220 CFI_RESTORE rbx 220 CFI_RESTORE rbx
221 movq 3*8(%rsp),%r12 221 movq 3*8(%rsp), %r12
222 CFI_RESTORE r12 222 CFI_RESTORE r12
223 movq 4*8(%rsp),%r14 223 movq 4*8(%rsp), %r14
224 CFI_RESTORE r14 224 CFI_RESTORE r14
225 movq 5*8(%rsp),%r13 225 movq 5*8(%rsp), %r13
226 CFI_RESTORE r13 226 CFI_RESTORE r13
227 movq 6*8(%rsp),%rbp 227 movq 6*8(%rsp), %rbp
228 CFI_RESTORE rbp 228 CFI_RESTORE rbp
229 addq $7*8,%rsp 229 addq $7*8, %rsp
230 CFI_ADJUST_CFA_OFFSET -7*8 230 CFI_ADJUST_CFA_OFFSET -7*8
231 ret 231 ret
232 CFI_RESTORE_STATE 232 CFI_RESTORE_STATE
233 233
234 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 234 /* Exception handlers. Very simple, zeroing is done in the wrappers */
235.Lbad_source: 235.Lbad_source:
236 movq (%rsp),%rax 236 movq (%rsp), %rax
237 testq %rax,%rax 237 testq %rax, %rax
238 jz .Lende 238 jz .Lende
239 movl $-EFAULT,(%rax) 239 movl $-EFAULT, (%rax)
240 jmp .Lende 240 jmp .Lende
241 241
242.Lbad_dest: 242.Lbad_dest:
243 movq 8(%rsp),%rax 243 movq 8(%rsp), %rax
244 testq %rax,%rax 244 testq %rax, %rax
245 jz .Lende 245 jz .Lende
246 movl $-EFAULT,(%rax) 246 movl $-EFAULT, (%rax)
247 jmp .Lende 247 jmp .Lende
248 CFI_ENDPROC 248 CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic) 249ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index bf51144d97e1..9845371c5c36 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -84,7 +84,7 @@ static unsigned do_csum(const unsigned char *buff, unsigned len)
84 count64--; 84 count64--;
85 } 85 }
86 86
87 /* last upto 7 8byte blocks */ 87 /* last up to 7 8byte blocks */
88 count %= 8; 88 count %= 8;
89 while (count) { 89 while (count) {
90 asm("addq %1,%0\n\t" 90 asm("addq %1,%0\n\t"
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index ff485d361182..fc45ba887d05 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -121,7 +121,7 @@ inline void __const_udelay(unsigned long xloops)
121 asm("mull %%edx" 121 asm("mull %%edx"
122 :"=d" (xloops), "=&a" (d0) 122 :"=d" (xloops), "=&a" (d0)
123 :"1" (xloops), "0" 123 :"1" (xloops), "0"
124 (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); 124 (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4)));
125 125
126 __delay(++xloops); 126 __delay(++xloops);
127} 127}
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 5415a9d06f53..b908a59eccf5 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -22,22 +22,187 @@ EXPORT_SYMBOL(memset);
22 22
23void *memmove(void *dest, const void *src, size_t n) 23void *memmove(void *dest, const void *src, size_t n)
24{ 24{
25 int d0, d1, d2; 25 int d0,d1,d2,d3,d4,d5;
26 26 char *ret = dest;
27 if (dest < src) { 27
28 memcpy(dest, src, n); 28 __asm__ __volatile__(
29 } else { 29 /* Handle more 16bytes in loop */
30 __asm__ __volatile__( 30 "cmp $0x10, %0\n\t"
31 "std\n\t" 31 "jb 1f\n\t"
32 "rep\n\t" 32
33 "movsb\n\t" 33 /* Decide forward/backward copy mode */
34 "cld" 34 "cmp %2, %1\n\t"
35 : "=&c" (d0), "=&S" (d1), "=&D" (d2) 35 "jb 2f\n\t"
36 :"0" (n), 36
37 "1" (n-1+src), 37 /*
38 "2" (n-1+dest) 38 * movs instruction have many startup latency
39 :"memory"); 39 * so we handle small size by general register.
40 } 40 */
41 return dest; 41 "cmp $680, %0\n\t"
42 "jb 3f\n\t"
43 /*
44 * movs instruction is only good for aligned case.
45 */
46 "mov %1, %3\n\t"
47 "xor %2, %3\n\t"
48 "and $0xff, %3\n\t"
49 "jz 4f\n\t"
50 "3:\n\t"
51 "sub $0x10, %0\n\t"
52
53 /*
54 * We gobble 16byts forward in each loop.
55 */
56 "3:\n\t"
57 "sub $0x10, %0\n\t"
58 "mov 0*4(%1), %3\n\t"
59 "mov 1*4(%1), %4\n\t"
60 "mov %3, 0*4(%2)\n\t"
61 "mov %4, 1*4(%2)\n\t"
62 "mov 2*4(%1), %3\n\t"
63 "mov 3*4(%1), %4\n\t"
64 "mov %3, 2*4(%2)\n\t"
65 "mov %4, 3*4(%2)\n\t"
66 "lea 0x10(%1), %1\n\t"
67 "lea 0x10(%2), %2\n\t"
68 "jae 3b\n\t"
69 "add $0x10, %0\n\t"
70 "jmp 1f\n\t"
71
72 /*
73 * Handle data forward by movs.
74 */
75 ".p2align 4\n\t"
76 "4:\n\t"
77 "mov -4(%1, %0), %3\n\t"
78 "lea -4(%2, %0), %4\n\t"
79 "shr $2, %0\n\t"
80 "rep movsl\n\t"
81 "mov %3, (%4)\n\t"
82 "jmp 11f\n\t"
83 /*
84 * Handle data backward by movs.
85 */
86 ".p2align 4\n\t"
87 "6:\n\t"
88 "mov (%1), %3\n\t"
89 "mov %2, %4\n\t"
90 "lea -4(%1, %0), %1\n\t"
91 "lea -4(%2, %0), %2\n\t"
92 "shr $2, %0\n\t"
93 "std\n\t"
94 "rep movsl\n\t"
95 "mov %3,(%4)\n\t"
96 "cld\n\t"
97 "jmp 11f\n\t"
98
99 /*
100 * Start to prepare for backward copy.
101 */
102 ".p2align 4\n\t"
103 "2:\n\t"
104 "cmp $680, %0\n\t"
105 "jb 5f\n\t"
106 "mov %1, %3\n\t"
107 "xor %2, %3\n\t"
108 "and $0xff, %3\n\t"
109 "jz 6b\n\t"
110
111 /*
112 * Calculate copy position to tail.
113 */
114 "5:\n\t"
115 "add %0, %1\n\t"
116 "add %0, %2\n\t"
117 "sub $0x10, %0\n\t"
118
119 /*
120 * We gobble 16byts backward in each loop.
121 */
122 "7:\n\t"
123 "sub $0x10, %0\n\t"
124
125 "mov -1*4(%1), %3\n\t"
126 "mov -2*4(%1), %4\n\t"
127 "mov %3, -1*4(%2)\n\t"
128 "mov %4, -2*4(%2)\n\t"
129 "mov -3*4(%1), %3\n\t"
130 "mov -4*4(%1), %4\n\t"
131 "mov %3, -3*4(%2)\n\t"
132 "mov %4, -4*4(%2)\n\t"
133 "lea -0x10(%1), %1\n\t"
134 "lea -0x10(%2), %2\n\t"
135 "jae 7b\n\t"
136 /*
137 * Calculate copy position to head.
138 */
139 "add $0x10, %0\n\t"
140 "sub %0, %1\n\t"
141 "sub %0, %2\n\t"
142
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 ".p2align 4\n\t"
147 "1:\n\t"
148 "cmp $8, %0\n\t"
149 "jb 8f\n\t"
150 "mov 0*4(%1), %3\n\t"
151 "mov 1*4(%1), %4\n\t"
152 "mov -2*4(%1, %0), %5\n\t"
153 "mov -1*4(%1, %0), %1\n\t"
154
155 "mov %3, 0*4(%2)\n\t"
156 "mov %4, 1*4(%2)\n\t"
157 "mov %5, -2*4(%2, %0)\n\t"
158 "mov %1, -1*4(%2, %0)\n\t"
159 "jmp 11f\n\t"
160
161 /*
162 * Move data from 4 bytes to 7 bytes.
163 */
164 ".p2align 4\n\t"
165 "8:\n\t"
166 "cmp $4, %0\n\t"
167 "jb 9f\n\t"
168 "mov 0*4(%1), %3\n\t"
169 "mov -1*4(%1, %0), %4\n\t"
170 "mov %3, 0*4(%2)\n\t"
171 "mov %4, -1*4(%2, %0)\n\t"
172 "jmp 11f\n\t"
173
174 /*
175 * Move data from 2 bytes to 3 bytes.
176 */
177 ".p2align 4\n\t"
178 "9:\n\t"
179 "cmp $2, %0\n\t"
180 "jb 10f\n\t"
181 "movw 0*2(%1), %%dx\n\t"
182 "movw -1*2(%1, %0), %%bx\n\t"
183 "movw %%dx, 0*2(%2)\n\t"
184 "movw %%bx, -1*2(%2, %0)\n\t"
185 "jmp 11f\n\t"
186
187 /*
188 * Move data for 1 byte.
189 */
190 ".p2align 4\n\t"
191 "10:\n\t"
192 "cmp $1, %0\n\t"
193 "jb 11f\n\t"
194 "movb (%1), %%cl\n\t"
195 "movb %%cl, (%2)\n\t"
196 ".p2align 4\n\t"
197 "11:"
198 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
199 "=r" (d3),"=r" (d4), "=r"(d5)
200 :"0" (n),
201 "1" (src),
202 "2" (dest)
203 :"memory");
204
205 return ret;
206
42} 207}
43EXPORT_SYMBOL(memmove); 208EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bcbcd1e0f7d5..efbf2a0ecdea 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,6 +4,7 @@
4 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h> 6#include <asm/dwarf2.h>
7#include <asm/alternative-asm.h>
7 8
8/* 9/*
9 * memcpy - Copy a memory block. 10 * memcpy - Copy a memory block.
@@ -37,107 +38,173 @@
37.Lmemcpy_e: 38.Lmemcpy_e:
38 .previous 39 .previous
39 40
41/*
42 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
43 * memcpy_c. Use memcpy_c_e when possible.
44 *
45 * This gets patched over the unrolled variant (below) via the
46 * alternative instructions framework:
47 */
48 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e:
50 movq %rdi, %rax
51
52 movl %edx, %ecx
53 rep movsb
54 ret
55.Lmemcpy_e_e:
56 .previous
57
40ENTRY(__memcpy) 58ENTRY(__memcpy)
41ENTRY(memcpy) 59ENTRY(memcpy)
42 CFI_STARTPROC 60 CFI_STARTPROC
61 movq %rdi, %rax
43 62
44 /* 63 /*
45 * Put the number of full 64-byte blocks into %ecx. 64 * Use 32bit CMP here to avoid long NOP padding.
46 * Tail portion is handled at the end:
47 */ 65 */
48 movq %rdi, %rax 66 cmp $0x20, %edx
49 movl %edx, %ecx 67 jb .Lhandle_tail
50 shrl $6, %ecx
51 jz .Lhandle_tail
52 68
53 .p2align 4
54.Lloop_64:
55 /* 69 /*
56 * We decrement the loop index here - and the zero-flag is 70 * We check whether memory false dependence could occur,
57 * checked at the end of the loop (instructions inbetween do 71 * then jump to corresponding copy mode.
58 * not change the zero flag):
59 */ 72 */
60 decl %ecx 73 cmp %dil, %sil
74 jl .Lcopy_backward
75 subl $0x20, %edx
76.Lcopy_forward_loop:
77 subq $0x20, %rdx
61 78
62 /* 79 /*
63 * Move in blocks of 4x16 bytes: 80 * Move in blocks of 4x8 bytes:
64 */ 81 */
65 movq 0*8(%rsi), %r11 82 movq 0*8(%rsi), %r8
66 movq 1*8(%rsi), %r8 83 movq 1*8(%rsi), %r9
67 movq %r11, 0*8(%rdi) 84 movq 2*8(%rsi), %r10
68 movq %r8, 1*8(%rdi) 85 movq 3*8(%rsi), %r11
69 86 leaq 4*8(%rsi), %rsi
70 movq 2*8(%rsi), %r9 87
71 movq 3*8(%rsi), %r10 88 movq %r8, 0*8(%rdi)
72 movq %r9, 2*8(%rdi) 89 movq %r9, 1*8(%rdi)
73 movq %r10, 3*8(%rdi) 90 movq %r10, 2*8(%rdi)
74 91 movq %r11, 3*8(%rdi)
75 movq 4*8(%rsi), %r11 92 leaq 4*8(%rdi), %rdi
76 movq 5*8(%rsi), %r8 93 jae .Lcopy_forward_loop
77 movq %r11, 4*8(%rdi) 94 addq $0x20, %rdx
78 movq %r8, 5*8(%rdi) 95 jmp .Lhandle_tail
79 96
80 movq 6*8(%rsi), %r9 97.Lcopy_backward:
81 movq 7*8(%rsi), %r10 98 /*
82 movq %r9, 6*8(%rdi) 99 * Calculate copy position to tail.
83 movq %r10, 7*8(%rdi) 100 */
84 101 addq %rdx, %rsi
85 leaq 64(%rsi), %rsi 102 addq %rdx, %rdi
86 leaq 64(%rdi), %rdi 103 subq $0x20, %rdx
87 104 /*
88 jnz .Lloop_64 105 * At most 3 ALU operations in one cycle,
106 * so append NOPS in the same 16bytes trunk.
107 */
108 .p2align 4
109.Lcopy_backward_loop:
110 subq $0x20, %rdx
111 movq -1*8(%rsi), %r8
112 movq -2*8(%rsi), %r9
113 movq -3*8(%rsi), %r10
114 movq -4*8(%rsi), %r11
115 leaq -4*8(%rsi), %rsi
116 movq %r8, -1*8(%rdi)
117 movq %r9, -2*8(%rdi)
118 movq %r10, -3*8(%rdi)
119 movq %r11, -4*8(%rdi)
120 leaq -4*8(%rdi), %rdi
121 jae .Lcopy_backward_loop
89 122
123 /*
124 * Calculate copy position to head.
125 */
126 addq $0x20, %rdx
127 subq %rdx, %rsi
128 subq %rdx, %rdi
90.Lhandle_tail: 129.Lhandle_tail:
91 movl %edx, %ecx 130 cmpq $16, %rdx
92 andl $63, %ecx 131 jb .Lless_16bytes
93 shrl $3, %ecx
94 jz .Lhandle_7
95 132
133 /*
134 * Move data from 16 bytes to 31 bytes.
135 */
136 movq 0*8(%rsi), %r8
137 movq 1*8(%rsi), %r9
138 movq -2*8(%rsi, %rdx), %r10
139 movq -1*8(%rsi, %rdx), %r11
140 movq %r8, 0*8(%rdi)
141 movq %r9, 1*8(%rdi)
142 movq %r10, -2*8(%rdi, %rdx)
143 movq %r11, -1*8(%rdi, %rdx)
144 retq
96 .p2align 4 145 .p2align 4
97.Lloop_8: 146.Lless_16bytes:
98 decl %ecx 147 cmpq $8, %rdx
99 movq (%rsi), %r8 148 jb .Lless_8bytes
100 movq %r8, (%rdi) 149 /*
101 leaq 8(%rdi), %rdi 150 * Move data from 8 bytes to 15 bytes.
102 leaq 8(%rsi), %rsi 151 */
103 jnz .Lloop_8 152 movq 0*8(%rsi), %r8
104 153 movq -1*8(%rsi, %rdx), %r9
105.Lhandle_7: 154 movq %r8, 0*8(%rdi)
106 movl %edx, %ecx 155 movq %r9, -1*8(%rdi, %rdx)
107 andl $7, %ecx 156 retq
108 jz .Lend 157 .p2align 4
158.Lless_8bytes:
159 cmpq $4, %rdx
160 jb .Lless_3bytes
109 161
162 /*
163 * Move data from 4 bytes to 7 bytes.
164 */
165 movl (%rsi), %ecx
166 movl -4(%rsi, %rdx), %r8d
167 movl %ecx, (%rdi)
168 movl %r8d, -4(%rdi, %rdx)
169 retq
110 .p2align 4 170 .p2align 4
171.Lless_3bytes:
172 cmpl $0, %edx
173 je .Lend
174 /*
175 * Move data from 1 bytes to 3 bytes.
176 */
111.Lloop_1: 177.Lloop_1:
112 movb (%rsi), %r8b 178 movb (%rsi), %r8b
113 movb %r8b, (%rdi) 179 movb %r8b, (%rdi)
114 incq %rdi 180 incq %rdi
115 incq %rsi 181 incq %rsi
116 decl %ecx 182 decl %edx
117 jnz .Lloop_1 183 jnz .Lloop_1
118 184
119.Lend: 185.Lend:
120 ret 186 retq
121 CFI_ENDPROC 187 CFI_ENDPROC
122ENDPROC(memcpy) 188ENDPROC(memcpy)
123ENDPROC(__memcpy) 189ENDPROC(__memcpy)
124 190
125 /* 191 /*
126 * Some CPUs run faster using the string copy instructions. 192 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
127 * It is also a lot simpler. Use this when possible: 193 * If the feature is supported, memcpy_c_e() is the first choice.
128 */ 194 * If enhanced rep movsb copy is not available, use fast string copy
129 195 * memcpy_c() when possible. This is faster and code is simpler than
130 .section .altinstructions, "a" 196 * original memcpy().
131 .align 8 197 * Otherwise, original memcpy() is used.
132 .quad memcpy 198 * In .altinstructions section, ERMS feature is placed after REG_GOOD
133 .quad .Lmemcpy_c 199 * feature to implement the right patch order.
134 .word X86_FEATURE_REP_GOOD 200 *
135
136 /*
137 * Replace only beginning, memcpy is used to apply alternatives, 201 * Replace only beginning, memcpy is used to apply alternatives,
138 * so it is silly to overwrite itself with nops - reboot is the 202 * so it is silly to overwrite itself with nops - reboot is the
139 * only outcome... 203 * only outcome...
140 */ 204 */
141 .byte .Lmemcpy_e - .Lmemcpy_c 205 .section .altinstructions, "a"
142 .byte .Lmemcpy_e - .Lmemcpy_c 206 altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
207 .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
208 altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
209 .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
143 .previous 210 .previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..d0ec9c2936d7
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,224 @@
1/*
2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
4 *
5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */
8#define _STRING_C
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11#include <asm/cpufeature.h>
12
13#undef memmove
14
15/*
16 * Implement memmove(). This can handle overlap between src and dst.
17 *
18 * Input:
19 * rdi: dest
20 * rsi: src
21 * rdx: count
22 *
23 * Output:
24 * rax: dest
25 */
26ENTRY(memmove)
27 CFI_STARTPROC
28
29 /* Handle more 32bytes in loop */
30 mov %rdi, %rax
31 cmp $0x20, %rdx
32 jb 1f
33
34 /* Decide forward/backward copy mode */
35 cmp %rdi, %rsi
36 jge .Lmemmove_begin_forward
37 mov %rsi, %r8
38 add %rdx, %r8
39 cmp %rdi, %r8
40 jg 2f
41
42.Lmemmove_begin_forward:
43 /*
44 * movsq instruction have many startup latency
45 * so we handle small size by general register.
46 */
47 cmp $680, %rdx
48 jb 3f
49 /*
50 * movsq instruction is only good for aligned case.
51 */
52
53 cmpb %dil, %sil
54 je 4f
553:
56 sub $0x20, %rdx
57 /*
58 * We gobble 32byts forward in each loop.
59 */
605:
61 sub $0x20, %rdx
62 movq 0*8(%rsi), %r11
63 movq 1*8(%rsi), %r10
64 movq 2*8(%rsi), %r9
65 movq 3*8(%rsi), %r8
66 leaq 4*8(%rsi), %rsi
67
68 movq %r11, 0*8(%rdi)
69 movq %r10, 1*8(%rdi)
70 movq %r9, 2*8(%rdi)
71 movq %r8, 3*8(%rdi)
72 leaq 4*8(%rdi), %rdi
73 jae 5b
74 addq $0x20, %rdx
75 jmp 1f
76 /*
77 * Handle data forward by movsq.
78 */
79 .p2align 4
804:
81 movq %rdx, %rcx
82 movq -8(%rsi, %rdx), %r11
83 lea -8(%rdi, %rdx), %r10
84 shrq $3, %rcx
85 rep movsq
86 movq %r11, (%r10)
87 jmp 13f
88.Lmemmove_end_forward:
89
90 /*
91 * Handle data backward by movsq.
92 */
93 .p2align 4
947:
95 movq %rdx, %rcx
96 movq (%rsi), %r11
97 movq %rdi, %r10
98 leaq -8(%rsi, %rdx), %rsi
99 leaq -8(%rdi, %rdx), %rdi
100 shrq $3, %rcx
101 std
102 rep movsq
103 cld
104 movq %r11, (%r10)
105 jmp 13f
106
107 /*
108 * Start to prepare for backward copy.
109 */
110 .p2align 4
1112:
112 cmp $680, %rdx
113 jb 6f
114 cmp %dil, %sil
115 je 7b
1166:
117 /*
118 * Calculate copy position to tail.
119 */
120 addq %rdx, %rsi
121 addq %rdx, %rdi
122 subq $0x20, %rdx
123 /*
124 * We gobble 32byts backward in each loop.
125 */
1268:
127 subq $0x20, %rdx
128 movq -1*8(%rsi), %r11
129 movq -2*8(%rsi), %r10
130 movq -3*8(%rsi), %r9
131 movq -4*8(%rsi), %r8
132 leaq -4*8(%rsi), %rsi
133
134 movq %r11, -1*8(%rdi)
135 movq %r10, -2*8(%rdi)
136 movq %r9, -3*8(%rdi)
137 movq %r8, -4*8(%rdi)
138 leaq -4*8(%rdi), %rdi
139 jae 8b
140 /*
141 * Calculate copy position to head.
142 */
143 addq $0x20, %rdx
144 subq %rdx, %rsi
145 subq %rdx, %rdi
1461:
147 cmpq $16, %rdx
148 jb 9f
149 /*
150 * Move data from 16 bytes to 31 bytes.
151 */
152 movq 0*8(%rsi), %r11
153 movq 1*8(%rsi), %r10
154 movq -2*8(%rsi, %rdx), %r9
155 movq -1*8(%rsi, %rdx), %r8
156 movq %r11, 0*8(%rdi)
157 movq %r10, 1*8(%rdi)
158 movq %r9, -2*8(%rdi, %rdx)
159 movq %r8, -1*8(%rdi, %rdx)
160 jmp 13f
161 .p2align 4
1629:
163 cmpq $8, %rdx
164 jb 10f
165 /*
166 * Move data from 8 bytes to 15 bytes.
167 */
168 movq 0*8(%rsi), %r11
169 movq -1*8(%rsi, %rdx), %r10
170 movq %r11, 0*8(%rdi)
171 movq %r10, -1*8(%rdi, %rdx)
172 jmp 13f
17310:
174 cmpq $4, %rdx
175 jb 11f
176 /*
177 * Move data from 4 bytes to 7 bytes.
178 */
179 movl (%rsi), %r11d
180 movl -4(%rsi, %rdx), %r10d
181 movl %r11d, (%rdi)
182 movl %r10d, -4(%rdi, %rdx)
183 jmp 13f
18411:
185 cmp $2, %rdx
186 jb 12f
187 /*
188 * Move data from 2 bytes to 3 bytes.
189 */
190 movw (%rsi), %r11w
191 movw -2(%rsi, %rdx), %r10w
192 movw %r11w, (%rdi)
193 movw %r10w, -2(%rdi, %rdx)
194 jmp 13f
19512:
196 cmp $1, %rdx
197 jb 13f
198 /*
199 * Move data for 1 byte.
200 */
201 movb (%rsi), %r11b
202 movb %r11b, (%rdi)
20313:
204 retq
205 CFI_ENDPROC
206
207 .section .altinstr_replacement,"ax"
208.Lmemmove_begin_forward_efs:
209 /* Forward moving data. */
210 movq %rdx, %rcx
211 rep movsb
212 retq
213.Lmemmove_end_forward_efs:
214 .previous
215
216 .section .altinstructions,"a"
217 .align 8
218 .quad .Lmemmove_begin_forward
219 .quad .Lmemmove_begin_forward_efs
220 .word X86_FEATURE_ERMS
221 .byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222 .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
223 .previous
224ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 0a33909bf122..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,21 +0,0 @@
1/* Normally compiler builtins are used, but sometimes the compiler calls out
2 of line code. Based on asm-i386/string.h.
3 */
4#define _STRING_C
5#include <linux/string.h>
6#include <linux/module.h>
7
8#undef memmove
9void *memmove(void *dest, const void *src, size_t count)
10{
11 if (dest < src) {
12 return memcpy(dest, src, count);
13 } else {
14 char *p = dest + count;
15 const char *s = src + count;
16 while (count--)
17 *--p = *--s;
18 }
19 return dest;
20}
21EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 09d344269652..79bd454b78a3 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -2,9 +2,13 @@
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h>
5 7
6/* 8/*
7 * ISO C memset - set a memory block to a byte value. 9 * ISO C memset - set a memory block to a byte value. This function uses fast
10 * string to get better performance than the original function. The code is
11 * simpler and shorter than the orignal function as well.
8 * 12 *
9 * rdi destination 13 * rdi destination
10 * rsi value (char) 14 * rsi value (char)
@@ -31,6 +35,28 @@
31.Lmemset_e: 35.Lmemset_e:
32 .previous 36 .previous
33 37
38/*
39 * ISO C memset - set a memory block to a byte value. This function uses
40 * enhanced rep stosb to override the fast string function.
41 * The code is simpler and shorter than the fast string function as well.
42 *
43 * rdi destination
44 * rsi value (char)
45 * rdx count (bytes)
46 *
47 * rax original destination
48 */
49 .section .altinstr_replacement, "ax", @progbits
50.Lmemset_c_e:
51 movq %rdi,%r9
52 movb %sil,%al
53 movl %edx,%ecx
54 rep stosb
55 movq %r9,%rax
56 ret
57.Lmemset_e_e:
58 .previous
59
34ENTRY(memset) 60ENTRY(memset)
35ENTRY(__memset) 61ENTRY(__memset)
36 CFI_STARTPROC 62 CFI_STARTPROC
@@ -112,16 +138,20 @@ ENTRY(__memset)
112ENDPROC(memset) 138ENDPROC(memset)
113ENDPROC(__memset) 139ENDPROC(__memset)
114 140
115 /* Some CPUs run faster using the string instructions. 141 /* Some CPUs support enhanced REP MOVSB/STOSB feature.
116 It is also a lot simpler. Use this when possible */ 142 * It is recommended to use this when possible.
117 143 *
118#include <asm/cpufeature.h> 144 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
119 145 * instructions.
146 *
147 * Otherwise, use original memset function.
148 *
149 * In .altinstructions section, ERMS feature is placed after REG_GOOD
150 * feature to implement the right patch order.
151 */
120 .section .altinstructions,"a" 152 .section .altinstructions,"a"
121 .align 8 153 altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
122 .quad memset 154 .Lfinal-memset,.Lmemset_e-.Lmemset_c
123 .quad .Lmemset_c 155 altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
124 .word X86_FEATURE_REP_GOOD 156 .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
125 .byte .Lfinal - memset
126 .byte .Lmemset_e - .Lmemset_c
127 .previous 157 .previous
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 41fcf00e49df..67743977398b 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -23,43 +23,50 @@
23#include <asm/dwarf2.h> 23#include <asm/dwarf2.h>
24 24
25#define save_common_regs \ 25#define save_common_regs \
26 pushq %rdi; \ 26 pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
27 pushq %rsi; \ 27 pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
28 pushq %rcx; \ 28 pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
29 pushq %r8; \ 29 pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
30 pushq %r9; \ 30 pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
31 pushq %r10; \ 31 pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
32 pushq %r11 32 pushq_cfi %r11; CFI_REL_OFFSET r11, 0
33 33
34#define restore_common_regs \ 34#define restore_common_regs \
35 popq %r11; \ 35 popq_cfi %r11; CFI_RESTORE r11; \
36 popq %r10; \ 36 popq_cfi %r10; CFI_RESTORE r10; \
37 popq %r9; \ 37 popq_cfi %r9; CFI_RESTORE r9; \
38 popq %r8; \ 38 popq_cfi %r8; CFI_RESTORE r8; \
39 popq %rcx; \ 39 popq_cfi %rcx; CFI_RESTORE rcx; \
40 popq %rsi; \ 40 popq_cfi %rsi; CFI_RESTORE rsi; \
41 popq %rdi 41 popq_cfi %rdi; CFI_RESTORE rdi
42 42
43/* Fix up special calling conventions */ 43/* Fix up special calling conventions */
44ENTRY(call_rwsem_down_read_failed) 44ENTRY(call_rwsem_down_read_failed)
45 CFI_STARTPROC
45 save_common_regs 46 save_common_regs
46 pushq %rdx 47 pushq_cfi %rdx
48 CFI_REL_OFFSET rdx, 0
47 movq %rax,%rdi 49 movq %rax,%rdi
48 call rwsem_down_read_failed 50 call rwsem_down_read_failed
49 popq %rdx 51 popq_cfi %rdx
52 CFI_RESTORE rdx
50 restore_common_regs 53 restore_common_regs
51 ret 54 ret
52 ENDPROC(call_rwsem_down_read_failed) 55 CFI_ENDPROC
56ENDPROC(call_rwsem_down_read_failed)
53 57
54ENTRY(call_rwsem_down_write_failed) 58ENTRY(call_rwsem_down_write_failed)
59 CFI_STARTPROC
55 save_common_regs 60 save_common_regs
56 movq %rax,%rdi 61 movq %rax,%rdi
57 call rwsem_down_write_failed 62 call rwsem_down_write_failed
58 restore_common_regs 63 restore_common_regs
59 ret 64 ret
60 ENDPROC(call_rwsem_down_write_failed) 65 CFI_ENDPROC
66ENDPROC(call_rwsem_down_write_failed)
61 67
62ENTRY(call_rwsem_wake) 68ENTRY(call_rwsem_wake)
69 CFI_STARTPROC
63 decl %edx /* do nothing if still outstanding active readers */ 70 decl %edx /* do nothing if still outstanding active readers */
64 jnz 1f 71 jnz 1f
65 save_common_regs 72 save_common_regs
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake)
67 call rwsem_wake 74 call rwsem_wake
68 restore_common_regs 75 restore_common_regs
691: ret 761: ret
70 ENDPROC(call_rwsem_wake) 77 CFI_ENDPROC
78ENDPROC(call_rwsem_wake)
71 79
72/* Fix up special calling conventions */ 80/* Fix up special calling conventions */
73ENTRY(call_rwsem_downgrade_wake) 81ENTRY(call_rwsem_downgrade_wake)
82 CFI_STARTPROC
74 save_common_regs 83 save_common_regs
75 pushq %rdx 84 pushq_cfi %rdx
85 CFI_REL_OFFSET rdx, 0
76 movq %rax,%rdi 86 movq %rax,%rdi
77 call rwsem_downgrade_wake 87 call rwsem_downgrade_wake
78 popq %rdx 88 popq_cfi %rdx
89 CFI_RESTORE rdx
79 restore_common_regs 90 restore_common_regs
80 ret 91 ret
81 ENDPROC(call_rwsem_downgrade_wake) 92 CFI_ENDPROC
93ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 648fe4741782..06691daa4108 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -36,7 +36,7 @@
36 */ 36 */
37#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
38ENTRY(__write_lock_failed) 38ENTRY(__write_lock_failed)
39 CFI_STARTPROC simple 39 CFI_STARTPROC
40 FRAME 40 FRAME
412: LOCK_PREFIX 412: LOCK_PREFIX
42 addl $ RW_LOCK_BIAS,(%eax) 42 addl $ RW_LOCK_BIAS,(%eax)
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed)
74/* Fix up special calling conventions */ 74/* Fix up special calling conventions */
75ENTRY(call_rwsem_down_read_failed) 75ENTRY(call_rwsem_down_read_failed)
76 CFI_STARTPROC 76 CFI_STARTPROC
77 push %ecx 77 pushl_cfi %ecx
78 CFI_ADJUST_CFA_OFFSET 4
79 CFI_REL_OFFSET ecx,0 78 CFI_REL_OFFSET ecx,0
80 push %edx 79 pushl_cfi %edx
81 CFI_ADJUST_CFA_OFFSET 4
82 CFI_REL_OFFSET edx,0 80 CFI_REL_OFFSET edx,0
83 call rwsem_down_read_failed 81 call rwsem_down_read_failed
84 pop %edx 82 popl_cfi %edx
85 CFI_ADJUST_CFA_OFFSET -4 83 popl_cfi %ecx
86 pop %ecx
87 CFI_ADJUST_CFA_OFFSET -4
88 ret 84 ret
89 CFI_ENDPROC 85 CFI_ENDPROC
90 ENDPROC(call_rwsem_down_read_failed) 86 ENDPROC(call_rwsem_down_read_failed)
91 87
92ENTRY(call_rwsem_down_write_failed) 88ENTRY(call_rwsem_down_write_failed)
93 CFI_STARTPROC 89 CFI_STARTPROC
94 push %ecx 90 pushl_cfi %ecx
95 CFI_ADJUST_CFA_OFFSET 4
96 CFI_REL_OFFSET ecx,0 91 CFI_REL_OFFSET ecx,0
97 calll rwsem_down_write_failed 92 calll rwsem_down_write_failed
98 pop %ecx 93 popl_cfi %ecx
99 CFI_ADJUST_CFA_OFFSET -4
100 ret 94 ret
101 CFI_ENDPROC 95 CFI_ENDPROC
102 ENDPROC(call_rwsem_down_write_failed) 96 ENDPROC(call_rwsem_down_write_failed)
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake)
105 CFI_STARTPROC 99 CFI_STARTPROC
106 decw %dx /* do nothing if still outstanding active readers */ 100 decw %dx /* do nothing if still outstanding active readers */
107 jnz 1f 101 jnz 1f
108 push %ecx 102 pushl_cfi %ecx
109 CFI_ADJUST_CFA_OFFSET 4
110 CFI_REL_OFFSET ecx,0 103 CFI_REL_OFFSET ecx,0
111 call rwsem_wake 104 call rwsem_wake
112 pop %ecx 105 popl_cfi %ecx
113 CFI_ADJUST_CFA_OFFSET -4
1141: ret 1061: ret
115 CFI_ENDPROC 107 CFI_ENDPROC
116 ENDPROC(call_rwsem_wake) 108 ENDPROC(call_rwsem_wake)
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake)
118/* Fix up special calling conventions */ 110/* Fix up special calling conventions */
119ENTRY(call_rwsem_downgrade_wake) 111ENTRY(call_rwsem_downgrade_wake)
120 CFI_STARTPROC 112 CFI_STARTPROC
121 push %ecx 113 pushl_cfi %ecx
122 CFI_ADJUST_CFA_OFFSET 4
123 CFI_REL_OFFSET ecx,0 114 CFI_REL_OFFSET ecx,0
124 push %edx 115 pushl_cfi %edx
125 CFI_ADJUST_CFA_OFFSET 4
126 CFI_REL_OFFSET edx,0 116 CFI_REL_OFFSET edx,0
127 call rwsem_downgrade_wake 117 call rwsem_downgrade_wake
128 pop %edx 118 popl_cfi %edx
129 CFI_ADJUST_CFA_OFFSET -4 119 popl_cfi %ecx
130 pop %ecx
131 CFI_ADJUST_CFA_OFFSET -4
132 ret 120 ret
133 CFI_ENDPROC 121 CFI_ENDPROC
134 ENDPROC(call_rwsem_downgrade_wake) 122 ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 650b11e00ecc..2930ae05d773 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -7,24 +7,6 @@
7 7
8 #include <linux/linkage.h> 8 #include <linux/linkage.h>
9 9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS 10#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */ 11 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func 12 .macro thunk_ra name,func
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index bf9a7d5a5428..782b082c9ff7 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -22,26 +22,6 @@
22 CFI_ENDPROC 22 CFI_ENDPROC
23 .endm 23 .endm
24 24
25 /* rdi: arg1 ... normal C conventions. rax is passed from C. */
26 .macro thunk_retrax name,func
27 .globl \name
28\name:
29 CFI_STARTPROC
30 SAVE_ARGS
31 call \func
32 jmp restore_norax
33 CFI_ENDPROC
34 .endm
35
36
37 .section .sched.text, "ax"
38#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
39 thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
40 thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
41 thunk rwsem_wake_thunk,rwsem_wake
42 thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
43#endif
44
45#ifdef CONFIG_TRACE_IRQFLAGS 25#ifdef CONFIG_TRACE_IRQFLAGS
46 /* put return address in rdi (arg1) */ 26 /* put return address in rdi (arg1) */
47 .macro thunk_ra name,func 27 .macro thunk_ra name,func
@@ -72,10 +52,3 @@ restore:
72 RESTORE_ARGS 52 RESTORE_ARGS
73 ret 53 ret
74 CFI_ENDPROC 54 CFI_ENDPROC
75
76 CFI_STARTPROC
77 SAVE_ARGS
78restore_norax:
79 RESTORE_ARGS 1
80 ret
81 CFI_ENDPROC