diff options
author | Len Brown <len.brown@intel.com> | 2011-03-23 02:34:54 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2011-03-23 02:34:54 -0400 |
commit | 02e2407858fd62053bf60349c0e72cd1c7a4a60e (patch) | |
tree | 0ebdbddc97d3abbc675916010e7771065b70c137 /arch/x86/lib | |
parent | 96e1c408ea8a556c5b51e0e7d56bd2afbfbf5fe9 (diff) | |
parent | 6447f55da90b77faec1697d499ed7986bb4f6de6 (diff) |
Merge branch 'linus' into release
Conflicts:
arch/x86/kernel/acpi/sleep.c
Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/lib/atomic64_386_32.S | 6 | ||||
-rw-r--r-- | arch/x86/lib/atomic64_cx8_32.S | 6 | ||||
-rw-r--r-- | arch/x86/lib/checksum_32.S | 63 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 59 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/csum-copy_64.S | 242 | ||||
-rw-r--r-- | arch/x86/lib/csum-partial_64.c | 2 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.S | 197 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.c | 192 | ||||
-rw-r--r-- | arch/x86/lib/rwsem_64.S | 56 | ||||
-rw-r--r-- | arch/x86/lib/semaphore_32.S | 38 | ||||
-rw-r--r-- | arch/x86/lib/thunk_32.S | 18 | ||||
-rw-r--r-- | arch/x86/lib/thunk_64.S | 27 |
14 files changed, 452 insertions, 457 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede0..f2479f19ddde 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -42,4 +42,5 @@ else | |||
42 | lib-y += memmove_64.o memset_64.o | 42 | lib-y += memmove_64.o memset_64.o |
43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o |
45 | lib-y += cmpxchg16b_emu.o | ||
45 | endif | 46 | endif |
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 2cda60a06e65..e8e7e0d06f42 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S | |||
@@ -15,14 +15,12 @@ | |||
15 | 15 | ||
16 | /* if you want SMP support, implement these with real spinlocks */ | 16 | /* if you want SMP support, implement these with real spinlocks */ |
17 | .macro LOCK reg | 17 | .macro LOCK reg |
18 | pushfl | 18 | pushfl_cfi |
19 | CFI_ADJUST_CFA_OFFSET 4 | ||
20 | cli | 19 | cli |
21 | .endm | 20 | .endm |
22 | 21 | ||
23 | .macro UNLOCK reg | 22 | .macro UNLOCK reg |
24 | popfl | 23 | popfl_cfi |
25 | CFI_ADJUST_CFA_OFFSET -4 | ||
26 | .endm | 24 | .endm |
27 | 25 | ||
28 | #define BEGIN(op) \ | 26 | #define BEGIN(op) \ |
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 71e080de3352..391a083674b4 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
@@ -14,14 +14,12 @@ | |||
14 | #include <asm/dwarf2.h> | 14 | #include <asm/dwarf2.h> |
15 | 15 | ||
16 | .macro SAVE reg | 16 | .macro SAVE reg |
17 | pushl %\reg | 17 | pushl_cfi %\reg |
18 | CFI_ADJUST_CFA_OFFSET 4 | ||
19 | CFI_REL_OFFSET \reg, 0 | 18 | CFI_REL_OFFSET \reg, 0 |
20 | .endm | 19 | .endm |
21 | 20 | ||
22 | .macro RESTORE reg | 21 | .macro RESTORE reg |
23 | popl %\reg | 22 | popl_cfi %\reg |
24 | CFI_ADJUST_CFA_OFFSET -4 | ||
25 | CFI_RESTORE \reg | 23 | CFI_RESTORE \reg |
26 | .endm | 24 | .endm |
27 | 25 | ||
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index adbccd0bbb78..78d16a554db0 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S | |||
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |||
50 | */ | 50 | */ |
51 | ENTRY(csum_partial) | 51 | ENTRY(csum_partial) |
52 | CFI_STARTPROC | 52 | CFI_STARTPROC |
53 | pushl %esi | 53 | pushl_cfi %esi |
54 | CFI_ADJUST_CFA_OFFSET 4 | ||
55 | CFI_REL_OFFSET esi, 0 | 54 | CFI_REL_OFFSET esi, 0 |
56 | pushl %ebx | 55 | pushl_cfi %ebx |
57 | CFI_ADJUST_CFA_OFFSET 4 | ||
58 | CFI_REL_OFFSET ebx, 0 | 56 | CFI_REL_OFFSET ebx, 0 |
59 | movl 20(%esp),%eax # Function arg: unsigned int sum | 57 | movl 20(%esp),%eax # Function arg: unsigned int sum |
60 | movl 16(%esp),%ecx # Function arg: int len | 58 | movl 16(%esp),%ecx # Function arg: int len |
@@ -132,11 +130,9 @@ ENTRY(csum_partial) | |||
132 | jz 8f | 130 | jz 8f |
133 | roll $8, %eax | 131 | roll $8, %eax |
134 | 8: | 132 | 8: |
135 | popl %ebx | 133 | popl_cfi %ebx |
136 | CFI_ADJUST_CFA_OFFSET -4 | ||
137 | CFI_RESTORE ebx | 134 | CFI_RESTORE ebx |
138 | popl %esi | 135 | popl_cfi %esi |
139 | CFI_ADJUST_CFA_OFFSET -4 | ||
140 | CFI_RESTORE esi | 136 | CFI_RESTORE esi |
141 | ret | 137 | ret |
142 | CFI_ENDPROC | 138 | CFI_ENDPROC |
@@ -148,11 +144,9 @@ ENDPROC(csum_partial) | |||
148 | 144 | ||
149 | ENTRY(csum_partial) | 145 | ENTRY(csum_partial) |
150 | CFI_STARTPROC | 146 | CFI_STARTPROC |
151 | pushl %esi | 147 | pushl_cfi %esi |
152 | CFI_ADJUST_CFA_OFFSET 4 | ||
153 | CFI_REL_OFFSET esi, 0 | 148 | CFI_REL_OFFSET esi, 0 |
154 | pushl %ebx | 149 | pushl_cfi %ebx |
155 | CFI_ADJUST_CFA_OFFSET 4 | ||
156 | CFI_REL_OFFSET ebx, 0 | 150 | CFI_REL_OFFSET ebx, 0 |
157 | movl 20(%esp),%eax # Function arg: unsigned int sum | 151 | movl 20(%esp),%eax # Function arg: unsigned int sum |
158 | movl 16(%esp),%ecx # Function arg: int len | 152 | movl 16(%esp),%ecx # Function arg: int len |
@@ -260,11 +254,9 @@ ENTRY(csum_partial) | |||
260 | jz 90f | 254 | jz 90f |
261 | roll $8, %eax | 255 | roll $8, %eax |
262 | 90: | 256 | 90: |
263 | popl %ebx | 257 | popl_cfi %ebx |
264 | CFI_ADJUST_CFA_OFFSET -4 | ||
265 | CFI_RESTORE ebx | 258 | CFI_RESTORE ebx |
266 | popl %esi | 259 | popl_cfi %esi |
267 | CFI_ADJUST_CFA_OFFSET -4 | ||
268 | CFI_RESTORE esi | 260 | CFI_RESTORE esi |
269 | ret | 261 | ret |
270 | CFI_ENDPROC | 262 | CFI_ENDPROC |
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic) | |||
309 | CFI_STARTPROC | 301 | CFI_STARTPROC |
310 | subl $4,%esp | 302 | subl $4,%esp |
311 | CFI_ADJUST_CFA_OFFSET 4 | 303 | CFI_ADJUST_CFA_OFFSET 4 |
312 | pushl %edi | 304 | pushl_cfi %edi |
313 | CFI_ADJUST_CFA_OFFSET 4 | ||
314 | CFI_REL_OFFSET edi, 0 | 305 | CFI_REL_OFFSET edi, 0 |
315 | pushl %esi | 306 | pushl_cfi %esi |
316 | CFI_ADJUST_CFA_OFFSET 4 | ||
317 | CFI_REL_OFFSET esi, 0 | 307 | CFI_REL_OFFSET esi, 0 |
318 | pushl %ebx | 308 | pushl_cfi %ebx |
319 | CFI_ADJUST_CFA_OFFSET 4 | ||
320 | CFI_REL_OFFSET ebx, 0 | 309 | CFI_REL_OFFSET ebx, 0 |
321 | movl ARGBASE+16(%esp),%eax # sum | 310 | movl ARGBASE+16(%esp),%eax # sum |
322 | movl ARGBASE+12(%esp),%ecx # len | 311 | movl ARGBASE+12(%esp),%ecx # len |
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) ) | |||
426 | 415 | ||
427 | .previous | 416 | .previous |
428 | 417 | ||
429 | popl %ebx | 418 | popl_cfi %ebx |
430 | CFI_ADJUST_CFA_OFFSET -4 | ||
431 | CFI_RESTORE ebx | 419 | CFI_RESTORE ebx |
432 | popl %esi | 420 | popl_cfi %esi |
433 | CFI_ADJUST_CFA_OFFSET -4 | ||
434 | CFI_RESTORE esi | 421 | CFI_RESTORE esi |
435 | popl %edi | 422 | popl_cfi %edi |
436 | CFI_ADJUST_CFA_OFFSET -4 | ||
437 | CFI_RESTORE edi | 423 | CFI_RESTORE edi |
438 | popl %ecx # equivalent to addl $4,%esp | 424 | popl_cfi %ecx # equivalent to addl $4,%esp |
439 | CFI_ADJUST_CFA_OFFSET -4 | ||
440 | ret | 425 | ret |
441 | CFI_ENDPROC | 426 | CFI_ENDPROC |
442 | ENDPROC(csum_partial_copy_generic) | 427 | ENDPROC(csum_partial_copy_generic) |
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic) | |||
459 | 444 | ||
460 | ENTRY(csum_partial_copy_generic) | 445 | ENTRY(csum_partial_copy_generic) |
461 | CFI_STARTPROC | 446 | CFI_STARTPROC |
462 | pushl %ebx | 447 | pushl_cfi %ebx |
463 | CFI_ADJUST_CFA_OFFSET 4 | ||
464 | CFI_REL_OFFSET ebx, 0 | 448 | CFI_REL_OFFSET ebx, 0 |
465 | pushl %edi | 449 | pushl_cfi %edi |
466 | CFI_ADJUST_CFA_OFFSET 4 | ||
467 | CFI_REL_OFFSET edi, 0 | 450 | CFI_REL_OFFSET edi, 0 |
468 | pushl %esi | 451 | pushl_cfi %esi |
469 | CFI_ADJUST_CFA_OFFSET 4 | ||
470 | CFI_REL_OFFSET esi, 0 | 452 | CFI_REL_OFFSET esi, 0 |
471 | movl ARGBASE+4(%esp),%esi #src | 453 | movl ARGBASE+4(%esp),%esi #src |
472 | movl ARGBASE+8(%esp),%edi #dst | 454 | movl ARGBASE+8(%esp),%edi #dst |
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) ) | |||
527 | jmp 7b | 509 | jmp 7b |
528 | .previous | 510 | .previous |
529 | 511 | ||
530 | popl %esi | 512 | popl_cfi %esi |
531 | CFI_ADJUST_CFA_OFFSET -4 | ||
532 | CFI_RESTORE esi | 513 | CFI_RESTORE esi |
533 | popl %edi | 514 | popl_cfi %edi |
534 | CFI_ADJUST_CFA_OFFSET -4 | ||
535 | CFI_RESTORE edi | 515 | CFI_RESTORE edi |
536 | popl %ebx | 516 | popl_cfi %ebx |
537 | CFI_ADJUST_CFA_OFFSET -4 | ||
538 | CFI_RESTORE ebx | 517 | CFI_RESTORE ebx |
539 | ret | 518 | ret |
540 | CFI_ENDPROC | 519 | CFI_ENDPROC |
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 000000000000..3e8b08a6de2b --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License | ||
4 | * as published by the Free Software Foundation; version 2 | ||
5 | * of the License. | ||
6 | * | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/alternative-asm.h> | ||
10 | #include <asm/frame.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | .text | ||
14 | |||
15 | /* | ||
16 | * Inputs: | ||
17 | * %rsi : memory location to compare | ||
18 | * %rax : low 64 bits of old value | ||
19 | * %rdx : high 64 bits of old value | ||
20 | * %rbx : low 64 bits of new value | ||
21 | * %rcx : high 64 bits of new value | ||
22 | * %al : Operation successful | ||
23 | */ | ||
24 | ENTRY(this_cpu_cmpxchg16b_emu) | ||
25 | CFI_STARTPROC | ||
26 | |||
27 | # | ||
28 | # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not | ||
29 | # via the ZF. Caller will access %al to get result. | ||
30 | # | ||
31 | # Note that this is only useful for a cpuops operation. Meaning that we | ||
32 | # do *not* have a fully atomic operation but just an operation that is | ||
33 | # *atomic* on a single cpu (as provided by the this_cpu_xx class of | ||
34 | # macros). | ||
35 | # | ||
36 | this_cpu_cmpxchg16b_emu: | ||
37 | pushf | ||
38 | cli | ||
39 | |||
40 | cmpq %gs:(%rsi), %rax | ||
41 | jne not_same | ||
42 | cmpq %gs:8(%rsi), %rdx | ||
43 | jne not_same | ||
44 | |||
45 | movq %rbx, %gs:(%rsi) | ||
46 | movq %rcx, %gs:8(%rsi) | ||
47 | |||
48 | popf | ||
49 | mov $1, %al | ||
50 | ret | ||
51 | |||
52 | not_same: | ||
53 | popf | ||
54 | xor %al,%al | ||
55 | ret | ||
56 | |||
57 | CFI_ENDPROC | ||
58 | |||
59 | ENDPROC(this_cpu_cmpxchg16b_emu) | ||
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index a460158b5ac5..99e482615195 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -117,7 +117,7 @@ ENDPROC(bad_from_user) | |||
117 | * rdx count | 117 | * rdx count |
118 | * | 118 | * |
119 | * Output: | 119 | * Output: |
120 | * eax uncopied bytes or 0 if successfull. | 120 | * eax uncopied bytes or 0 if successful. |
121 | */ | 121 | */ |
122 | ENTRY(copy_user_generic_unrolled) | 122 | ENTRY(copy_user_generic_unrolled) |
123 | CFI_STARTPROC | 123 | CFI_STARTPROC |
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index f0dba36578ea..fb903b758da8 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 2 | * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
3 | * | 3 | * |
4 | * This file is subject to the terms and conditions of the GNU General Public | 4 | * This file is subject to the terms and conditions of the GNU General Public |
5 | * License. See the file COPYING in the main directory of this archive | 5 | * License. See the file COPYING in the main directory of this archive |
6 | * for more details. No warranty for anything given at all. | 6 | * for more details. No warranty for anything given at all. |
@@ -11,82 +11,82 @@ | |||
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Checksum copy with exception handling. | 13 | * Checksum copy with exception handling. |
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
15 | * destination is zeroed. | 15 | * destination is zeroed. |
16 | * | 16 | * |
17 | * Input | 17 | * Input |
18 | * rdi source | 18 | * rdi source |
19 | * rsi destination | 19 | * rsi destination |
20 | * edx len (32bit) | 20 | * edx len (32bit) |
21 | * ecx sum (32bit) | 21 | * ecx sum (32bit) |
22 | * r8 src_err_ptr (int) | 22 | * r8 src_err_ptr (int) |
23 | * r9 dst_err_ptr (int) | 23 | * r9 dst_err_ptr (int) |
24 | * | 24 | * |
25 | * Output | 25 | * Output |
26 | * eax 64bit sum. undefined in case of exception. | 26 | * eax 64bit sum. undefined in case of exception. |
27 | * | 27 | * |
28 | * Wrappers need to take care of valid exception sum and zeroing. | 28 | * Wrappers need to take care of valid exception sum and zeroing. |
29 | * They also should align source or destination to 8 bytes. | 29 | * They also should align source or destination to 8 bytes. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | .macro source | 32 | .macro source |
33 | 10: | 33 | 10: |
34 | .section __ex_table,"a" | 34 | .section __ex_table, "a" |
35 | .align 8 | 35 | .align 8 |
36 | .quad 10b,.Lbad_source | 36 | .quad 10b, .Lbad_source |
37 | .previous | 37 | .previous |
38 | .endm | 38 | .endm |
39 | 39 | ||
40 | .macro dest | 40 | .macro dest |
41 | 20: | 41 | 20: |
42 | .section __ex_table,"a" | 42 | .section __ex_table, "a" |
43 | .align 8 | 43 | .align 8 |
44 | .quad 20b,.Lbad_dest | 44 | .quad 20b, .Lbad_dest |
45 | .previous | 45 | .previous |
46 | .endm | 46 | .endm |
47 | 47 | ||
48 | .macro ignore L=.Lignore | 48 | .macro ignore L=.Lignore |
49 | 30: | 49 | 30: |
50 | .section __ex_table,"a" | 50 | .section __ex_table, "a" |
51 | .align 8 | 51 | .align 8 |
52 | .quad 30b,\L | 52 | .quad 30b, \L |
53 | .previous | 53 | .previous |
54 | .endm | 54 | .endm |
55 | 55 | ||
56 | 56 | ||
57 | ENTRY(csum_partial_copy_generic) | 57 | ENTRY(csum_partial_copy_generic) |
58 | CFI_STARTPROC | 58 | CFI_STARTPROC |
59 | cmpl $3*64,%edx | 59 | cmpl $3*64, %edx |
60 | jle .Lignore | 60 | jle .Lignore |
61 | 61 | ||
62 | .Lignore: | 62 | .Lignore: |
63 | subq $7*8,%rsp | 63 | subq $7*8, %rsp |
64 | CFI_ADJUST_CFA_OFFSET 7*8 | 64 | CFI_ADJUST_CFA_OFFSET 7*8 |
65 | movq %rbx,2*8(%rsp) | 65 | movq %rbx, 2*8(%rsp) |
66 | CFI_REL_OFFSET rbx, 2*8 | 66 | CFI_REL_OFFSET rbx, 2*8 |
67 | movq %r12,3*8(%rsp) | 67 | movq %r12, 3*8(%rsp) |
68 | CFI_REL_OFFSET r12, 3*8 | 68 | CFI_REL_OFFSET r12, 3*8 |
69 | movq %r14,4*8(%rsp) | 69 | movq %r14, 4*8(%rsp) |
70 | CFI_REL_OFFSET r14, 4*8 | 70 | CFI_REL_OFFSET r14, 4*8 |
71 | movq %r13,5*8(%rsp) | 71 | movq %r13, 5*8(%rsp) |
72 | CFI_REL_OFFSET r13, 5*8 | 72 | CFI_REL_OFFSET r13, 5*8 |
73 | movq %rbp,6*8(%rsp) | 73 | movq %rbp, 6*8(%rsp) |
74 | CFI_REL_OFFSET rbp, 6*8 | 74 | CFI_REL_OFFSET rbp, 6*8 |
75 | 75 | ||
76 | movq %r8,(%rsp) | 76 | movq %r8, (%rsp) |
77 | movq %r9,1*8(%rsp) | 77 | movq %r9, 1*8(%rsp) |
78 | |||
79 | movl %ecx,%eax | ||
80 | movl %edx,%ecx | ||
81 | 78 | ||
82 | xorl %r9d,%r9d | 79 | movl %ecx, %eax |
83 | movq %rcx,%r12 | 80 | movl %edx, %ecx |
84 | 81 | ||
85 | shrq $6,%r12 | 82 | xorl %r9d, %r9d |
86 | jz .Lhandle_tail /* < 64 */ | 83 | movq %rcx, %r12 |
84 | |||
85 | shrq $6, %r12 | ||
86 | jz .Lhandle_tail /* < 64 */ | ||
87 | 87 | ||
88 | clc | 88 | clc |
89 | 89 | ||
90 | /* main loop. clear in 64 byte blocks */ | 90 | /* main loop. clear in 64 byte blocks */ |
91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | 91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | 92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ |
@@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic) | |||
94 | .p2align 4 | 94 | .p2align 4 |
95 | .Lloop: | 95 | .Lloop: |
96 | source | 96 | source |
97 | movq (%rdi),%rbx | 97 | movq (%rdi), %rbx |
98 | source | 98 | source |
99 | movq 8(%rdi),%r8 | 99 | movq 8(%rdi), %r8 |
100 | source | 100 | source |
101 | movq 16(%rdi),%r11 | 101 | movq 16(%rdi), %r11 |
102 | source | 102 | source |
103 | movq 24(%rdi),%rdx | 103 | movq 24(%rdi), %rdx |
104 | 104 | ||
105 | source | 105 | source |
106 | movq 32(%rdi),%r10 | 106 | movq 32(%rdi), %r10 |
107 | source | 107 | source |
108 | movq 40(%rdi),%rbp | 108 | movq 40(%rdi), %rbp |
109 | source | 109 | source |
110 | movq 48(%rdi),%r14 | 110 | movq 48(%rdi), %r14 |
111 | source | 111 | source |
112 | movq 56(%rdi),%r13 | 112 | movq 56(%rdi), %r13 |
113 | 113 | ||
114 | ignore 2f | 114 | ignore 2f |
115 | prefetcht0 5*64(%rdi) | 115 | prefetcht0 5*64(%rdi) |
116 | 2: | 116 | 2: |
117 | adcq %rbx,%rax | 117 | adcq %rbx, %rax |
118 | adcq %r8,%rax | 118 | adcq %r8, %rax |
119 | adcq %r11,%rax | 119 | adcq %r11, %rax |
120 | adcq %rdx,%rax | 120 | adcq %rdx, %rax |
121 | adcq %r10,%rax | 121 | adcq %r10, %rax |
122 | adcq %rbp,%rax | 122 | adcq %rbp, %rax |
123 | adcq %r14,%rax | 123 | adcq %r14, %rax |
124 | adcq %r13,%rax | 124 | adcq %r13, %rax |
125 | 125 | ||
126 | decl %r12d | 126 | decl %r12d |
127 | 127 | ||
128 | dest | 128 | dest |
129 | movq %rbx,(%rsi) | 129 | movq %rbx, (%rsi) |
130 | dest | 130 | dest |
131 | movq %r8,8(%rsi) | 131 | movq %r8, 8(%rsi) |
132 | dest | 132 | dest |
133 | movq %r11,16(%rsi) | 133 | movq %r11, 16(%rsi) |
134 | dest | 134 | dest |
135 | movq %rdx,24(%rsi) | 135 | movq %rdx, 24(%rsi) |
136 | 136 | ||
137 | dest | 137 | dest |
138 | movq %r10,32(%rsi) | 138 | movq %r10, 32(%rsi) |
139 | dest | 139 | dest |
140 | movq %rbp,40(%rsi) | 140 | movq %rbp, 40(%rsi) |
141 | dest | 141 | dest |
142 | movq %r14,48(%rsi) | 142 | movq %r14, 48(%rsi) |
143 | dest | 143 | dest |
144 | movq %r13,56(%rsi) | 144 | movq %r13, 56(%rsi) |
145 | 145 | ||
146 | 3: | 146 | 3: |
147 | |||
148 | leaq 64(%rdi),%rdi | ||
149 | leaq 64(%rsi),%rsi | ||
150 | 147 | ||
151 | jnz .Lloop | 148 | leaq 64(%rdi), %rdi |
149 | leaq 64(%rsi), %rsi | ||
152 | 150 | ||
153 | adcq %r9,%rax | 151 | jnz .Lloop |
154 | 152 | ||
155 | /* do last upto 56 bytes */ | 153 | adcq %r9, %rax |
154 | |||
155 | /* do last up to 56 bytes */ | ||
156 | .Lhandle_tail: | 156 | .Lhandle_tail: |
157 | /* ecx: count */ | 157 | /* ecx: count */ |
158 | movl %ecx,%r10d | 158 | movl %ecx, %r10d |
159 | andl $63,%ecx | 159 | andl $63, %ecx |
160 | shrl $3,%ecx | 160 | shrl $3, %ecx |
161 | jz .Lfold | 161 | jz .Lfold |
162 | clc | 162 | clc |
163 | .p2align 4 | 163 | .p2align 4 |
164 | .Lloop_8: | 164 | .Lloop_8: |
165 | source | 165 | source |
166 | movq (%rdi),%rbx | 166 | movq (%rdi), %rbx |
167 | adcq %rbx,%rax | 167 | adcq %rbx, %rax |
168 | decl %ecx | 168 | decl %ecx |
169 | dest | 169 | dest |
170 | movq %rbx,(%rsi) | 170 | movq %rbx, (%rsi) |
171 | leaq 8(%rsi),%rsi /* preserve carry */ | 171 | leaq 8(%rsi), %rsi /* preserve carry */ |
172 | leaq 8(%rdi),%rdi | 172 | leaq 8(%rdi), %rdi |
173 | jnz .Lloop_8 | 173 | jnz .Lloop_8 |
174 | adcq %r9,%rax /* add in carry */ | 174 | adcq %r9, %rax /* add in carry */ |
175 | 175 | ||
176 | .Lfold: | 176 | .Lfold: |
177 | /* reduce checksum to 32bits */ | 177 | /* reduce checksum to 32bits */ |
178 | movl %eax,%ebx | 178 | movl %eax, %ebx |
179 | shrq $32,%rax | 179 | shrq $32, %rax |
180 | addl %ebx,%eax | 180 | addl %ebx, %eax |
181 | adcl %r9d,%eax | 181 | adcl %r9d, %eax |
182 | 182 | ||
183 | /* do last upto 6 bytes */ | 183 | /* do last up to 6 bytes */ |
184 | .Lhandle_7: | 184 | .Lhandle_7: |
185 | movl %r10d,%ecx | 185 | movl %r10d, %ecx |
186 | andl $7,%ecx | 186 | andl $7, %ecx |
187 | shrl $1,%ecx | 187 | shrl $1, %ecx |
188 | jz .Lhandle_1 | 188 | jz .Lhandle_1 |
189 | movl $2,%edx | 189 | movl $2, %edx |
190 | xorl %ebx,%ebx | 190 | xorl %ebx, %ebx |
191 | clc | 191 | clc |
192 | .p2align 4 | 192 | .p2align 4 |
193 | .Lloop_1: | 193 | .Lloop_1: |
194 | source | 194 | source |
195 | movw (%rdi),%bx | 195 | movw (%rdi), %bx |
196 | adcl %ebx,%eax | 196 | adcl %ebx, %eax |
197 | decl %ecx | 197 | decl %ecx |
198 | dest | 198 | dest |
199 | movw %bx,(%rsi) | 199 | movw %bx, (%rsi) |
200 | leaq 2(%rdi),%rdi | 200 | leaq 2(%rdi), %rdi |
201 | leaq 2(%rsi),%rsi | 201 | leaq 2(%rsi), %rsi |
202 | jnz .Lloop_1 | 202 | jnz .Lloop_1 |
203 | adcl %r9d,%eax /* add in carry */ | 203 | adcl %r9d, %eax /* add in carry */ |
204 | 204 | ||
205 | /* handle last odd byte */ | 205 | /* handle last odd byte */ |
206 | .Lhandle_1: | 206 | .Lhandle_1: |
207 | testl $1,%r10d | 207 | testl $1, %r10d |
208 | jz .Lende | 208 | jz .Lende |
209 | xorl %ebx,%ebx | 209 | xorl %ebx, %ebx |
210 | source | 210 | source |
211 | movb (%rdi),%bl | 211 | movb (%rdi), %bl |
212 | dest | 212 | dest |
213 | movb %bl,(%rsi) | 213 | movb %bl, (%rsi) |
214 | addl %ebx,%eax | 214 | addl %ebx, %eax |
215 | adcl %r9d,%eax /* carry */ | 215 | adcl %r9d, %eax /* carry */ |
216 | 216 | ||
217 | CFI_REMEMBER_STATE | 217 | CFI_REMEMBER_STATE |
218 | .Lende: | 218 | .Lende: |
219 | movq 2*8(%rsp),%rbx | 219 | movq 2*8(%rsp), %rbx |
220 | CFI_RESTORE rbx | 220 | CFI_RESTORE rbx |
221 | movq 3*8(%rsp),%r12 | 221 | movq 3*8(%rsp), %r12 |
222 | CFI_RESTORE r12 | 222 | CFI_RESTORE r12 |
223 | movq 4*8(%rsp),%r14 | 223 | movq 4*8(%rsp), %r14 |
224 | CFI_RESTORE r14 | 224 | CFI_RESTORE r14 |
225 | movq 5*8(%rsp),%r13 | 225 | movq 5*8(%rsp), %r13 |
226 | CFI_RESTORE r13 | 226 | CFI_RESTORE r13 |
227 | movq 6*8(%rsp),%rbp | 227 | movq 6*8(%rsp), %rbp |
228 | CFI_RESTORE rbp | 228 | CFI_RESTORE rbp |
229 | addq $7*8,%rsp | 229 | addq $7*8, %rsp |
230 | CFI_ADJUST_CFA_OFFSET -7*8 | 230 | CFI_ADJUST_CFA_OFFSET -7*8 |
231 | ret | 231 | ret |
232 | CFI_RESTORE_STATE | 232 | CFI_RESTORE_STATE |
233 | 233 | ||
234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
235 | .Lbad_source: | 235 | .Lbad_source: |
236 | movq (%rsp),%rax | 236 | movq (%rsp), %rax |
237 | testq %rax,%rax | 237 | testq %rax, %rax |
238 | jz .Lende | 238 | jz .Lende |
239 | movl $-EFAULT,(%rax) | 239 | movl $-EFAULT, (%rax) |
240 | jmp .Lende | 240 | jmp .Lende |
241 | 241 | ||
242 | .Lbad_dest: | 242 | .Lbad_dest: |
243 | movq 8(%rsp),%rax | 243 | movq 8(%rsp), %rax |
244 | testq %rax,%rax | 244 | testq %rax, %rax |
245 | jz .Lende | 245 | jz .Lende |
246 | movl $-EFAULT,(%rax) | 246 | movl $-EFAULT, (%rax) |
247 | jmp .Lende | 247 | jmp .Lende |
248 | CFI_ENDPROC | 248 | CFI_ENDPROC |
249 | ENDPROC(csum_partial_copy_generic) | 249 | ENDPROC(csum_partial_copy_generic) |
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index bf51144d97e1..9845371c5c36 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c | |||
@@ -84,7 +84,7 @@ static unsigned do_csum(const unsigned char *buff, unsigned len) | |||
84 | count64--; | 84 | count64--; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* last upto 7 8byte blocks */ | 87 | /* last up to 7 8byte blocks */ |
88 | count %= 8; | 88 | count %= 8; |
89 | while (count) { | 89 | while (count) { |
90 | asm("addq %1,%0\n\t" | 90 | asm("addq %1,%0\n\t" |
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S new file mode 100644 index 000000000000..0ecb8433e5a8 --- /dev/null +++ b/arch/x86/lib/memmove_64.S | |||
@@ -0,0 +1,197 @@ | |||
1 | /* | ||
2 | * Normally compiler builtins are used, but sometimes the compiler calls out | ||
3 | * of line code. Based on asm-i386/string.h. | ||
4 | * | ||
5 | * This assembly file is re-written from memmove_64.c file. | ||
6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> | ||
7 | */ | ||
8 | #define _STRING_C | ||
9 | #include <linux/linkage.h> | ||
10 | #include <asm/dwarf2.h> | ||
11 | |||
12 | #undef memmove | ||
13 | |||
14 | /* | ||
15 | * Implement memmove(). This can handle overlap between src and dst. | ||
16 | * | ||
17 | * Input: | ||
18 | * rdi: dest | ||
19 | * rsi: src | ||
20 | * rdx: count | ||
21 | * | ||
22 | * Output: | ||
23 | * rax: dest | ||
24 | */ | ||
25 | ENTRY(memmove) | ||
26 | CFI_STARTPROC | ||
27 | /* Handle more 32bytes in loop */ | ||
28 | mov %rdi, %rax | ||
29 | cmp $0x20, %rdx | ||
30 | jb 1f | ||
31 | |||
32 | /* Decide forward/backward copy mode */ | ||
33 | cmp %rdi, %rsi | ||
34 | jb 2f | ||
35 | |||
36 | /* | ||
37 | * movsq instruction have many startup latency | ||
38 | * so we handle small size by general register. | ||
39 | */ | ||
40 | cmp $680, %rdx | ||
41 | jb 3f | ||
42 | /* | ||
43 | * movsq instruction is only good for aligned case. | ||
44 | */ | ||
45 | |||
46 | cmpb %dil, %sil | ||
47 | je 4f | ||
48 | 3: | ||
49 | sub $0x20, %rdx | ||
50 | /* | ||
51 | * We gobble 32byts forward in each loop. | ||
52 | */ | ||
53 | 5: | ||
54 | sub $0x20, %rdx | ||
55 | movq 0*8(%rsi), %r11 | ||
56 | movq 1*8(%rsi), %r10 | ||
57 | movq 2*8(%rsi), %r9 | ||
58 | movq 3*8(%rsi), %r8 | ||
59 | leaq 4*8(%rsi), %rsi | ||
60 | |||
61 | movq %r11, 0*8(%rdi) | ||
62 | movq %r10, 1*8(%rdi) | ||
63 | movq %r9, 2*8(%rdi) | ||
64 | movq %r8, 3*8(%rdi) | ||
65 | leaq 4*8(%rdi), %rdi | ||
66 | jae 5b | ||
67 | addq $0x20, %rdx | ||
68 | jmp 1f | ||
69 | /* | ||
70 | * Handle data forward by movsq. | ||
71 | */ | ||
72 | .p2align 4 | ||
73 | 4: | ||
74 | movq %rdx, %rcx | ||
75 | movq -8(%rsi, %rdx), %r11 | ||
76 | lea -8(%rdi, %rdx), %r10 | ||
77 | shrq $3, %rcx | ||
78 | rep movsq | ||
79 | movq %r11, (%r10) | ||
80 | jmp 13f | ||
81 | /* | ||
82 | * Handle data backward by movsq. | ||
83 | */ | ||
84 | .p2align 4 | ||
85 | 7: | ||
86 | movq %rdx, %rcx | ||
87 | movq (%rsi), %r11 | ||
88 | movq %rdi, %r10 | ||
89 | leaq -8(%rsi, %rdx), %rsi | ||
90 | leaq -8(%rdi, %rdx), %rdi | ||
91 | shrq $3, %rcx | ||
92 | std | ||
93 | rep movsq | ||
94 | cld | ||
95 | movq %r11, (%r10) | ||
96 | jmp 13f | ||
97 | |||
98 | /* | ||
99 | * Start to prepare for backward copy. | ||
100 | */ | ||
101 | .p2align 4 | ||
102 | 2: | ||
103 | cmp $680, %rdx | ||
104 | jb 6f | ||
105 | cmp %dil, %sil | ||
106 | je 7b | ||
107 | 6: | ||
108 | /* | ||
109 | * Calculate copy position to tail. | ||
110 | */ | ||
111 | addq %rdx, %rsi | ||
112 | addq %rdx, %rdi | ||
113 | subq $0x20, %rdx | ||
114 | /* | ||
115 | * We gobble 32byts backward in each loop. | ||
116 | */ | ||
117 | 8: | ||
118 | subq $0x20, %rdx | ||
119 | movq -1*8(%rsi), %r11 | ||
120 | movq -2*8(%rsi), %r10 | ||
121 | movq -3*8(%rsi), %r9 | ||
122 | movq -4*8(%rsi), %r8 | ||
123 | leaq -4*8(%rsi), %rsi | ||
124 | |||
125 | movq %r11, -1*8(%rdi) | ||
126 | movq %r10, -2*8(%rdi) | ||
127 | movq %r9, -3*8(%rdi) | ||
128 | movq %r8, -4*8(%rdi) | ||
129 | leaq -4*8(%rdi), %rdi | ||
130 | jae 8b | ||
131 | /* | ||
132 | * Calculate copy position to head. | ||
133 | */ | ||
134 | addq $0x20, %rdx | ||
135 | subq %rdx, %rsi | ||
136 | subq %rdx, %rdi | ||
137 | 1: | ||
138 | cmpq $16, %rdx | ||
139 | jb 9f | ||
140 | /* | ||
141 | * Move data from 16 bytes to 31 bytes. | ||
142 | */ | ||
143 | movq 0*8(%rsi), %r11 | ||
144 | movq 1*8(%rsi), %r10 | ||
145 | movq -2*8(%rsi, %rdx), %r9 | ||
146 | movq -1*8(%rsi, %rdx), %r8 | ||
147 | movq %r11, 0*8(%rdi) | ||
148 | movq %r10, 1*8(%rdi) | ||
149 | movq %r9, -2*8(%rdi, %rdx) | ||
150 | movq %r8, -1*8(%rdi, %rdx) | ||
151 | jmp 13f | ||
152 | .p2align 4 | ||
153 | 9: | ||
154 | cmpq $8, %rdx | ||
155 | jb 10f | ||
156 | /* | ||
157 | * Move data from 8 bytes to 15 bytes. | ||
158 | */ | ||
159 | movq 0*8(%rsi), %r11 | ||
160 | movq -1*8(%rsi, %rdx), %r10 | ||
161 | movq %r11, 0*8(%rdi) | ||
162 | movq %r10, -1*8(%rdi, %rdx) | ||
163 | jmp 13f | ||
164 | 10: | ||
165 | cmpq $4, %rdx | ||
166 | jb 11f | ||
167 | /* | ||
168 | * Move data from 4 bytes to 7 bytes. | ||
169 | */ | ||
170 | movl (%rsi), %r11d | ||
171 | movl -4(%rsi, %rdx), %r10d | ||
172 | movl %r11d, (%rdi) | ||
173 | movl %r10d, -4(%rdi, %rdx) | ||
174 | jmp 13f | ||
175 | 11: | ||
176 | cmp $2, %rdx | ||
177 | jb 12f | ||
178 | /* | ||
179 | * Move data from 2 bytes to 3 bytes. | ||
180 | */ | ||
181 | movw (%rsi), %r11w | ||
182 | movw -2(%rsi, %rdx), %r10w | ||
183 | movw %r11w, (%rdi) | ||
184 | movw %r10w, -2(%rdi, %rdx) | ||
185 | jmp 13f | ||
186 | 12: | ||
187 | cmp $1, %rdx | ||
188 | jb 13f | ||
189 | /* | ||
190 | * Move data for 1 byte. | ||
191 | */ | ||
192 | movb (%rsi), %r11b | ||
193 | movb %r11b, (%rdi) | ||
194 | 13: | ||
195 | retq | ||
196 | CFI_ENDPROC | ||
197 | ENDPROC(memmove) | ||
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c deleted file mode 100644 index 6d0f0ec41b34..000000000000 --- a/arch/x86/lib/memmove_64.c +++ /dev/null | |||
@@ -1,192 +0,0 @@ | |||
1 | /* Normally compiler builtins are used, but sometimes the compiler calls out | ||
2 | of line code. Based on asm-i386/string.h. | ||
3 | */ | ||
4 | #define _STRING_C | ||
5 | #include <linux/string.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | #undef memmove | ||
9 | void *memmove(void *dest, const void *src, size_t count) | ||
10 | { | ||
11 | unsigned long d0,d1,d2,d3,d4,d5,d6,d7; | ||
12 | char *ret; | ||
13 | |||
14 | __asm__ __volatile__( | ||
15 | /* Handle more 32bytes in loop */ | ||
16 | "mov %2, %3\n\t" | ||
17 | "cmp $0x20, %0\n\t" | ||
18 | "jb 1f\n\t" | ||
19 | |||
20 | /* Decide forward/backward copy mode */ | ||
21 | "cmp %2, %1\n\t" | ||
22 | "jb 2f\n\t" | ||
23 | |||
24 | /* | ||
25 | * movsq instruction have many startup latency | ||
26 | * so we handle small size by general register. | ||
27 | */ | ||
28 | "cmp $680, %0\n\t" | ||
29 | "jb 3f\n\t" | ||
30 | /* | ||
31 | * movsq instruction is only good for aligned case. | ||
32 | */ | ||
33 | "cmpb %%dil, %%sil\n\t" | ||
34 | "je 4f\n\t" | ||
35 | "3:\n\t" | ||
36 | "sub $0x20, %0\n\t" | ||
37 | /* | ||
38 | * We gobble 32byts forward in each loop. | ||
39 | */ | ||
40 | "5:\n\t" | ||
41 | "sub $0x20, %0\n\t" | ||
42 | "movq 0*8(%1), %4\n\t" | ||
43 | "movq 1*8(%1), %5\n\t" | ||
44 | "movq 2*8(%1), %6\n\t" | ||
45 | "movq 3*8(%1), %7\n\t" | ||
46 | "leaq 4*8(%1), %1\n\t" | ||
47 | |||
48 | "movq %4, 0*8(%2)\n\t" | ||
49 | "movq %5, 1*8(%2)\n\t" | ||
50 | "movq %6, 2*8(%2)\n\t" | ||
51 | "movq %7, 3*8(%2)\n\t" | ||
52 | "leaq 4*8(%2), %2\n\t" | ||
53 | "jae 5b\n\t" | ||
54 | "addq $0x20, %0\n\t" | ||
55 | "jmp 1f\n\t" | ||
56 | /* | ||
57 | * Handle data forward by movsq. | ||
58 | */ | ||
59 | ".p2align 4\n\t" | ||
60 | "4:\n\t" | ||
61 | "movq %0, %8\n\t" | ||
62 | "movq -8(%1, %0), %4\n\t" | ||
63 | "lea -8(%2, %0), %5\n\t" | ||
64 | "shrq $3, %8\n\t" | ||
65 | "rep movsq\n\t" | ||
66 | "movq %4, (%5)\n\t" | ||
67 | "jmp 13f\n\t" | ||
68 | /* | ||
69 | * Handle data backward by movsq. | ||
70 | */ | ||
71 | ".p2align 4\n\t" | ||
72 | "7:\n\t" | ||
73 | "movq %0, %8\n\t" | ||
74 | "movq (%1), %4\n\t" | ||
75 | "movq %2, %5\n\t" | ||
76 | "leaq -8(%1, %0), %1\n\t" | ||
77 | "leaq -8(%2, %0), %2\n\t" | ||
78 | "shrq $3, %8\n\t" | ||
79 | "std\n\t" | ||
80 | "rep movsq\n\t" | ||
81 | "cld\n\t" | ||
82 | "movq %4, (%5)\n\t" | ||
83 | "jmp 13f\n\t" | ||
84 | |||
85 | /* | ||
86 | * Start to prepare for backward copy. | ||
87 | */ | ||
88 | ".p2align 4\n\t" | ||
89 | "2:\n\t" | ||
90 | "cmp $680, %0\n\t" | ||
91 | "jb 6f \n\t" | ||
92 | "cmp %%dil, %%sil\n\t" | ||
93 | "je 7b \n\t" | ||
94 | "6:\n\t" | ||
95 | /* | ||
96 | * Calculate copy position to tail. | ||
97 | */ | ||
98 | "addq %0, %1\n\t" | ||
99 | "addq %0, %2\n\t" | ||
100 | "subq $0x20, %0\n\t" | ||
101 | /* | ||
102 | * We gobble 32byts backward in each loop. | ||
103 | */ | ||
104 | "8:\n\t" | ||
105 | "subq $0x20, %0\n\t" | ||
106 | "movq -1*8(%1), %4\n\t" | ||
107 | "movq -2*8(%1), %5\n\t" | ||
108 | "movq -3*8(%1), %6\n\t" | ||
109 | "movq -4*8(%1), %7\n\t" | ||
110 | "leaq -4*8(%1), %1\n\t" | ||
111 | |||
112 | "movq %4, -1*8(%2)\n\t" | ||
113 | "movq %5, -2*8(%2)\n\t" | ||
114 | "movq %6, -3*8(%2)\n\t" | ||
115 | "movq %7, -4*8(%2)\n\t" | ||
116 | "leaq -4*8(%2), %2\n\t" | ||
117 | "jae 8b\n\t" | ||
118 | /* | ||
119 | * Calculate copy position to head. | ||
120 | */ | ||
121 | "addq $0x20, %0\n\t" | ||
122 | "subq %0, %1\n\t" | ||
123 | "subq %0, %2\n\t" | ||
124 | "1:\n\t" | ||
125 | "cmpq $16, %0\n\t" | ||
126 | "jb 9f\n\t" | ||
127 | /* | ||
128 | * Move data from 16 bytes to 31 bytes. | ||
129 | */ | ||
130 | "movq 0*8(%1), %4\n\t" | ||
131 | "movq 1*8(%1), %5\n\t" | ||
132 | "movq -2*8(%1, %0), %6\n\t" | ||
133 | "movq -1*8(%1, %0), %7\n\t" | ||
134 | "movq %4, 0*8(%2)\n\t" | ||
135 | "movq %5, 1*8(%2)\n\t" | ||
136 | "movq %6, -2*8(%2, %0)\n\t" | ||
137 | "movq %7, -1*8(%2, %0)\n\t" | ||
138 | "jmp 13f\n\t" | ||
139 | ".p2align 4\n\t" | ||
140 | "9:\n\t" | ||
141 | "cmpq $8, %0\n\t" | ||
142 | "jb 10f\n\t" | ||
143 | /* | ||
144 | * Move data from 8 bytes to 15 bytes. | ||
145 | */ | ||
146 | "movq 0*8(%1), %4\n\t" | ||
147 | "movq -1*8(%1, %0), %5\n\t" | ||
148 | "movq %4, 0*8(%2)\n\t" | ||
149 | "movq %5, -1*8(%2, %0)\n\t" | ||
150 | "jmp 13f\n\t" | ||
151 | "10:\n\t" | ||
152 | "cmpq $4, %0\n\t" | ||
153 | "jb 11f\n\t" | ||
154 | /* | ||
155 | * Move data from 4 bytes to 7 bytes. | ||
156 | */ | ||
157 | "movl (%1), %4d\n\t" | ||
158 | "movl -4(%1, %0), %5d\n\t" | ||
159 | "movl %4d, (%2)\n\t" | ||
160 | "movl %5d, -4(%2, %0)\n\t" | ||
161 | "jmp 13f\n\t" | ||
162 | "11:\n\t" | ||
163 | "cmp $2, %0\n\t" | ||
164 | "jb 12f\n\t" | ||
165 | /* | ||
166 | * Move data from 2 bytes to 3 bytes. | ||
167 | */ | ||
168 | "movw (%1), %4w\n\t" | ||
169 | "movw -2(%1, %0), %5w\n\t" | ||
170 | "movw %4w, (%2)\n\t" | ||
171 | "movw %5w, -2(%2, %0)\n\t" | ||
172 | "jmp 13f\n\t" | ||
173 | "12:\n\t" | ||
174 | "cmp $1, %0\n\t" | ||
175 | "jb 13f\n\t" | ||
176 | /* | ||
177 | * Move data for 1 byte. | ||
178 | */ | ||
179 | "movb (%1), %4b\n\t" | ||
180 | "movb %4b, (%2)\n\t" | ||
181 | "13:\n\t" | ||
182 | : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) , | ||
183 | "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7) | ||
184 | :"0" (count), | ||
185 | "1" (src), | ||
186 | "2" (dest) | ||
187 | :"memory"); | ||
188 | |||
189 | return ret; | ||
190 | |||
191 | } | ||
192 | EXPORT_SYMBOL(memmove); | ||
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S index 41fcf00e49df..67743977398b 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem_64.S | |||
@@ -23,43 +23,50 @@ | |||
23 | #include <asm/dwarf2.h> | 23 | #include <asm/dwarf2.h> |
24 | 24 | ||
25 | #define save_common_regs \ | 25 | #define save_common_regs \ |
26 | pushq %rdi; \ | 26 | pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ |
27 | pushq %rsi; \ | 27 | pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ |
28 | pushq %rcx; \ | 28 | pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ |
29 | pushq %r8; \ | 29 | pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ |
30 | pushq %r9; \ | 30 | pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ |
31 | pushq %r10; \ | 31 | pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ |
32 | pushq %r11 | 32 | pushq_cfi %r11; CFI_REL_OFFSET r11, 0 |
33 | 33 | ||
34 | #define restore_common_regs \ | 34 | #define restore_common_regs \ |
35 | popq %r11; \ | 35 | popq_cfi %r11; CFI_RESTORE r11; \ |
36 | popq %r10; \ | 36 | popq_cfi %r10; CFI_RESTORE r10; \ |
37 | popq %r9; \ | 37 | popq_cfi %r9; CFI_RESTORE r9; \ |
38 | popq %r8; \ | 38 | popq_cfi %r8; CFI_RESTORE r8; \ |
39 | popq %rcx; \ | 39 | popq_cfi %rcx; CFI_RESTORE rcx; \ |
40 | popq %rsi; \ | 40 | popq_cfi %rsi; CFI_RESTORE rsi; \ |
41 | popq %rdi | 41 | popq_cfi %rdi; CFI_RESTORE rdi |
42 | 42 | ||
43 | /* Fix up special calling conventions */ | 43 | /* Fix up special calling conventions */ |
44 | ENTRY(call_rwsem_down_read_failed) | 44 | ENTRY(call_rwsem_down_read_failed) |
45 | CFI_STARTPROC | ||
45 | save_common_regs | 46 | save_common_regs |
46 | pushq %rdx | 47 | pushq_cfi %rdx |
48 | CFI_REL_OFFSET rdx, 0 | ||
47 | movq %rax,%rdi | 49 | movq %rax,%rdi |
48 | call rwsem_down_read_failed | 50 | call rwsem_down_read_failed |
49 | popq %rdx | 51 | popq_cfi %rdx |
52 | CFI_RESTORE rdx | ||
50 | restore_common_regs | 53 | restore_common_regs |
51 | ret | 54 | ret |
52 | ENDPROC(call_rwsem_down_read_failed) | 55 | CFI_ENDPROC |
56 | ENDPROC(call_rwsem_down_read_failed) | ||
53 | 57 | ||
54 | ENTRY(call_rwsem_down_write_failed) | 58 | ENTRY(call_rwsem_down_write_failed) |
59 | CFI_STARTPROC | ||
55 | save_common_regs | 60 | save_common_regs |
56 | movq %rax,%rdi | 61 | movq %rax,%rdi |
57 | call rwsem_down_write_failed | 62 | call rwsem_down_write_failed |
58 | restore_common_regs | 63 | restore_common_regs |
59 | ret | 64 | ret |
60 | ENDPROC(call_rwsem_down_write_failed) | 65 | CFI_ENDPROC |
66 | ENDPROC(call_rwsem_down_write_failed) | ||
61 | 67 | ||
62 | ENTRY(call_rwsem_wake) | 68 | ENTRY(call_rwsem_wake) |
69 | CFI_STARTPROC | ||
63 | decl %edx /* do nothing if still outstanding active readers */ | 70 | decl %edx /* do nothing if still outstanding active readers */ |
64 | jnz 1f | 71 | jnz 1f |
65 | save_common_regs | 72 | save_common_regs |
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake) | |||
67 | call rwsem_wake | 74 | call rwsem_wake |
68 | restore_common_regs | 75 | restore_common_regs |
69 | 1: ret | 76 | 1: ret |
70 | ENDPROC(call_rwsem_wake) | 77 | CFI_ENDPROC |
78 | ENDPROC(call_rwsem_wake) | ||
71 | 79 | ||
72 | /* Fix up special calling conventions */ | 80 | /* Fix up special calling conventions */ |
73 | ENTRY(call_rwsem_downgrade_wake) | 81 | ENTRY(call_rwsem_downgrade_wake) |
82 | CFI_STARTPROC | ||
74 | save_common_regs | 83 | save_common_regs |
75 | pushq %rdx | 84 | pushq_cfi %rdx |
85 | CFI_REL_OFFSET rdx, 0 | ||
76 | movq %rax,%rdi | 86 | movq %rax,%rdi |
77 | call rwsem_downgrade_wake | 87 | call rwsem_downgrade_wake |
78 | popq %rdx | 88 | popq_cfi %rdx |
89 | CFI_RESTORE rdx | ||
79 | restore_common_regs | 90 | restore_common_regs |
80 | ret | 91 | ret |
81 | ENDPROC(call_rwsem_downgrade_wake) | 92 | CFI_ENDPROC |
93 | ENDPROC(call_rwsem_downgrade_wake) | ||
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index 648fe4741782..06691daa4108 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S | |||
@@ -36,7 +36,7 @@ | |||
36 | */ | 36 | */ |
37 | #ifdef CONFIG_SMP | 37 | #ifdef CONFIG_SMP |
38 | ENTRY(__write_lock_failed) | 38 | ENTRY(__write_lock_failed) |
39 | CFI_STARTPROC simple | 39 | CFI_STARTPROC |
40 | FRAME | 40 | FRAME |
41 | 2: LOCK_PREFIX | 41 | 2: LOCK_PREFIX |
42 | addl $ RW_LOCK_BIAS,(%eax) | 42 | addl $ RW_LOCK_BIAS,(%eax) |
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed) | |||
74 | /* Fix up special calling conventions */ | 74 | /* Fix up special calling conventions */ |
75 | ENTRY(call_rwsem_down_read_failed) | 75 | ENTRY(call_rwsem_down_read_failed) |
76 | CFI_STARTPROC | 76 | CFI_STARTPROC |
77 | push %ecx | 77 | pushl_cfi %ecx |
78 | CFI_ADJUST_CFA_OFFSET 4 | ||
79 | CFI_REL_OFFSET ecx,0 | 78 | CFI_REL_OFFSET ecx,0 |
80 | push %edx | 79 | pushl_cfi %edx |
81 | CFI_ADJUST_CFA_OFFSET 4 | ||
82 | CFI_REL_OFFSET edx,0 | 80 | CFI_REL_OFFSET edx,0 |
83 | call rwsem_down_read_failed | 81 | call rwsem_down_read_failed |
84 | pop %edx | 82 | popl_cfi %edx |
85 | CFI_ADJUST_CFA_OFFSET -4 | 83 | popl_cfi %ecx |
86 | pop %ecx | ||
87 | CFI_ADJUST_CFA_OFFSET -4 | ||
88 | ret | 84 | ret |
89 | CFI_ENDPROC | 85 | CFI_ENDPROC |
90 | ENDPROC(call_rwsem_down_read_failed) | 86 | ENDPROC(call_rwsem_down_read_failed) |
91 | 87 | ||
92 | ENTRY(call_rwsem_down_write_failed) | 88 | ENTRY(call_rwsem_down_write_failed) |
93 | CFI_STARTPROC | 89 | CFI_STARTPROC |
94 | push %ecx | 90 | pushl_cfi %ecx |
95 | CFI_ADJUST_CFA_OFFSET 4 | ||
96 | CFI_REL_OFFSET ecx,0 | 91 | CFI_REL_OFFSET ecx,0 |
97 | calll rwsem_down_write_failed | 92 | calll rwsem_down_write_failed |
98 | pop %ecx | 93 | popl_cfi %ecx |
99 | CFI_ADJUST_CFA_OFFSET -4 | ||
100 | ret | 94 | ret |
101 | CFI_ENDPROC | 95 | CFI_ENDPROC |
102 | ENDPROC(call_rwsem_down_write_failed) | 96 | ENDPROC(call_rwsem_down_write_failed) |
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake) | |||
105 | CFI_STARTPROC | 99 | CFI_STARTPROC |
106 | decw %dx /* do nothing if still outstanding active readers */ | 100 | decw %dx /* do nothing if still outstanding active readers */ |
107 | jnz 1f | 101 | jnz 1f |
108 | push %ecx | 102 | pushl_cfi %ecx |
109 | CFI_ADJUST_CFA_OFFSET 4 | ||
110 | CFI_REL_OFFSET ecx,0 | 103 | CFI_REL_OFFSET ecx,0 |
111 | call rwsem_wake | 104 | call rwsem_wake |
112 | pop %ecx | 105 | popl_cfi %ecx |
113 | CFI_ADJUST_CFA_OFFSET -4 | ||
114 | 1: ret | 106 | 1: ret |
115 | CFI_ENDPROC | 107 | CFI_ENDPROC |
116 | ENDPROC(call_rwsem_wake) | 108 | ENDPROC(call_rwsem_wake) |
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake) | |||
118 | /* Fix up special calling conventions */ | 110 | /* Fix up special calling conventions */ |
119 | ENTRY(call_rwsem_downgrade_wake) | 111 | ENTRY(call_rwsem_downgrade_wake) |
120 | CFI_STARTPROC | 112 | CFI_STARTPROC |
121 | push %ecx | 113 | pushl_cfi %ecx |
122 | CFI_ADJUST_CFA_OFFSET 4 | ||
123 | CFI_REL_OFFSET ecx,0 | 114 | CFI_REL_OFFSET ecx,0 |
124 | push %edx | 115 | pushl_cfi %edx |
125 | CFI_ADJUST_CFA_OFFSET 4 | ||
126 | CFI_REL_OFFSET edx,0 | 116 | CFI_REL_OFFSET edx,0 |
127 | call rwsem_downgrade_wake | 117 | call rwsem_downgrade_wake |
128 | pop %edx | 118 | popl_cfi %edx |
129 | CFI_ADJUST_CFA_OFFSET -4 | 119 | popl_cfi %ecx |
130 | pop %ecx | ||
131 | CFI_ADJUST_CFA_OFFSET -4 | ||
132 | ret | 120 | ret |
133 | CFI_ENDPROC | 121 | CFI_ENDPROC |
134 | ENDPROC(call_rwsem_downgrade_wake) | 122 | ENDPROC(call_rwsem_downgrade_wake) |
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 650b11e00ecc..2930ae05d773 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S | |||
@@ -7,24 +7,6 @@ | |||
7 | 7 | ||
8 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
9 | 9 | ||
10 | #define ARCH_TRACE_IRQS_ON \ | ||
11 | pushl %eax; \ | ||
12 | pushl %ecx; \ | ||
13 | pushl %edx; \ | ||
14 | call trace_hardirqs_on; \ | ||
15 | popl %edx; \ | ||
16 | popl %ecx; \ | ||
17 | popl %eax; | ||
18 | |||
19 | #define ARCH_TRACE_IRQS_OFF \ | ||
20 | pushl %eax; \ | ||
21 | pushl %ecx; \ | ||
22 | pushl %edx; \ | ||
23 | call trace_hardirqs_off; \ | ||
24 | popl %edx; \ | ||
25 | popl %ecx; \ | ||
26 | popl %eax; | ||
27 | |||
28 | #ifdef CONFIG_TRACE_IRQFLAGS | 10 | #ifdef CONFIG_TRACE_IRQFLAGS |
29 | /* put return address in eax (arg1) */ | 11 | /* put return address in eax (arg1) */ |
30 | .macro thunk_ra name,func | 12 | .macro thunk_ra name,func |
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index bf9a7d5a5428..782b082c9ff7 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -22,26 +22,6 @@ | |||
22 | CFI_ENDPROC | 22 | CFI_ENDPROC |
23 | .endm | 23 | .endm |
24 | 24 | ||
25 | /* rdi: arg1 ... normal C conventions. rax is passed from C. */ | ||
26 | .macro thunk_retrax name,func | ||
27 | .globl \name | ||
28 | \name: | ||
29 | CFI_STARTPROC | ||
30 | SAVE_ARGS | ||
31 | call \func | ||
32 | jmp restore_norax | ||
33 | CFI_ENDPROC | ||
34 | .endm | ||
35 | |||
36 | |||
37 | .section .sched.text, "ax" | ||
38 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | ||
39 | thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed | ||
40 | thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed | ||
41 | thunk rwsem_wake_thunk,rwsem_wake | ||
42 | thunk rwsem_downgrade_thunk,rwsem_downgrade_wake | ||
43 | #endif | ||
44 | |||
45 | #ifdef CONFIG_TRACE_IRQFLAGS | 25 | #ifdef CONFIG_TRACE_IRQFLAGS |
46 | /* put return address in rdi (arg1) */ | 26 | /* put return address in rdi (arg1) */ |
47 | .macro thunk_ra name,func | 27 | .macro thunk_ra name,func |
@@ -72,10 +52,3 @@ restore: | |||
72 | RESTORE_ARGS | 52 | RESTORE_ARGS |
73 | ret | 53 | ret |
74 | CFI_ENDPROC | 54 | CFI_ENDPROC |
75 | |||
76 | CFI_STARTPROC | ||
77 | SAVE_ARGS | ||
78 | restore_norax: | ||
79 | RESTORE_ARGS 1 | ||
80 | ret | ||
81 | CFI_ENDPROC | ||