aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 16:16:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 16:16:36 -0400
commit60f898eeaaa1c5d0162a4240bacf33a6c87ecef6 (patch)
tree23eeac4b1e9a616779d22c104dbc8bd45dfeefd1 /arch/x86/lib
parent977e1ba50893c15121557b39de586901fe3f75cf (diff)
parent3b75232d55680ca166dffa274d0587d5faf0a016 (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "There were lots of changes in this development cycle: - over 100 separate cleanups, restructuring changes, speedups and fixes in the x86 system call, irq, trap and other entry code, part of a heroic effort to deobfuscate a decade old spaghetti asm code and its C code dependencies (Denys Vlasenko, Andy Lutomirski) - alternatives code fixes and enhancements (Borislav Petkov) - simplifications and cleanups to the compat code (Brian Gerst) - signal handling fixes and new x86 testcases (Andy Lutomirski) - various other fixes and cleanups By their nature many of these changes are risky - we tried to test them well on many different x86 systems (there are no known regressions), and they are split up finely to help bisection - but there's still a fair bit of residual risk left so caveat emptor" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits) perf/x86/64: Report regs_user->ax too in get_regs_user() perf/x86/64: Simplify regs_user->abi setting code in get_regs_user() perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user() perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user() x86/asm/entry/32: Tidy up JNZ instructions after TESTs x86/asm/entry/64: Reduce padding in execve stubs x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork x86/asm/entry/64: Simplify jumps in ret_from_fork x86/asm/entry/64: Remove a redundant jump x86/asm/entry/64: Optimize [v]fork/clone stubs x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat() x86/asm/entry/64: Use common code for rt_sigreturn() epilogue x86/asm/entry/64: Add forgotten CFI annotation x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout x86/asm/entry/64: Move opportunistic sysret code to syscall code path x86, selftests: Add sigreturn selftest x86/alternatives: Guard NOPs optimization x86/asm/entry: Clear EXTRA_REGS for all executable formats x86/signal: Remove pax argument from restore_sigcontext ...
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S50
-rw-r--r--arch/x86/lib/checksum_32.S64
-rw-r--r--arch/x86/lib/clear_page_64.S66
-rw-r--r--arch/x86/lib/copy_page_64.S37
-rw-r--r--arch/x86/lib/copy_user_64.S46
-rw-r--r--arch/x86/lib/csum-copy_64.S2
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--arch/x86/lib/memcpy_64.S68
-rw-r--r--arch/x86/lib/memmove_64.S19
-rw-r--r--arch/x86/lib/memset_64.S61
-rw-r--r--arch/x86/lib/msr-reg.S24
-rw-r--r--arch/x86/lib/rwsem.S44
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S28
-rw-r--r--arch/x86/lib/x86-opcode-map.txt9
15 files changed, 223 insertions, 326 deletions
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index f5cc9eb1d51b..082a85167a5b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -13,16 +13,6 @@
13#include <asm/alternative-asm.h> 13#include <asm/alternative-asm.h>
14#include <asm/dwarf2.h> 14#include <asm/dwarf2.h>
15 15
16.macro SAVE reg
17 pushl_cfi %\reg
18 CFI_REL_OFFSET \reg, 0
19.endm
20
21.macro RESTORE reg
22 popl_cfi %\reg
23 CFI_RESTORE \reg
24.endm
25
26.macro read64 reg 16.macro read64 reg
27 movl %ebx, %eax 17 movl %ebx, %eax
28 movl %ecx, %edx 18 movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
67.macro addsub_return func ins insc 57.macro addsub_return func ins insc
68ENTRY(atomic64_\func\()_return_cx8) 58ENTRY(atomic64_\func\()_return_cx8)
69 CFI_STARTPROC 59 CFI_STARTPROC
70 SAVE ebp 60 pushl_cfi_reg ebp
71 SAVE ebx 61 pushl_cfi_reg ebx
72 SAVE esi 62 pushl_cfi_reg esi
73 SAVE edi 63 pushl_cfi_reg edi
74 64
75 movl %eax, %esi 65 movl %eax, %esi
76 movl %edx, %edi 66 movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
8910: 7910:
90 movl %ebx, %eax 80 movl %ebx, %eax
91 movl %ecx, %edx 81 movl %ecx, %edx
92 RESTORE edi 82 popl_cfi_reg edi
93 RESTORE esi 83 popl_cfi_reg esi
94 RESTORE ebx 84 popl_cfi_reg ebx
95 RESTORE ebp 85 popl_cfi_reg ebp
96 ret 86 ret
97 CFI_ENDPROC 87 CFI_ENDPROC
98ENDPROC(atomic64_\func\()_return_cx8) 88ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
104.macro incdec_return func ins insc 94.macro incdec_return func ins insc
105ENTRY(atomic64_\func\()_return_cx8) 95ENTRY(atomic64_\func\()_return_cx8)
106 CFI_STARTPROC 96 CFI_STARTPROC
107 SAVE ebx 97 pushl_cfi_reg ebx
108 98
109 read64 %esi 99 read64 %esi
1101: 1001:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
11910: 10910:
120 movl %ebx, %eax 110 movl %ebx, %eax
121 movl %ecx, %edx 111 movl %ecx, %edx
122 RESTORE ebx 112 popl_cfi_reg ebx
123 ret 113 ret
124 CFI_ENDPROC 114 CFI_ENDPROC
125ENDPROC(atomic64_\func\()_return_cx8) 115ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
130 120
131ENTRY(atomic64_dec_if_positive_cx8) 121ENTRY(atomic64_dec_if_positive_cx8)
132 CFI_STARTPROC 122 CFI_STARTPROC
133 SAVE ebx 123 pushl_cfi_reg ebx
134 124
135 read64 %esi 125 read64 %esi
1361: 1261:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
1462: 1362:
147 movl %ebx, %eax 137 movl %ebx, %eax
148 movl %ecx, %edx 138 movl %ecx, %edx
149 RESTORE ebx 139 popl_cfi_reg ebx
150 ret 140 ret
151 CFI_ENDPROC 141 CFI_ENDPROC
152ENDPROC(atomic64_dec_if_positive_cx8) 142ENDPROC(atomic64_dec_if_positive_cx8)
153 143
154ENTRY(atomic64_add_unless_cx8) 144ENTRY(atomic64_add_unless_cx8)
155 CFI_STARTPROC 145 CFI_STARTPROC
156 SAVE ebp 146 pushl_cfi_reg ebp
157 SAVE ebx 147 pushl_cfi_reg ebx
158/* these just push these two parameters on the stack */ 148/* these just push these two parameters on the stack */
159 SAVE edi 149 pushl_cfi_reg edi
160 SAVE ecx 150 pushl_cfi_reg ecx
161 151
162 movl %eax, %ebp 152 movl %eax, %ebp
163 movl %edx, %edi 153 movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
1793: 1693:
180 addl $8, %esp 170 addl $8, %esp
181 CFI_ADJUST_CFA_OFFSET -8 171 CFI_ADJUST_CFA_OFFSET -8
182 RESTORE ebx 172 popl_cfi_reg ebx
183 RESTORE ebp 173 popl_cfi_reg ebp
184 ret 174 ret
1854: 1754:
186 cmpl %edx, 4(%esp) 176 cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
192 182
193ENTRY(atomic64_inc_not_zero_cx8) 183ENTRY(atomic64_inc_not_zero_cx8)
194 CFI_STARTPROC 184 CFI_STARTPROC
195 SAVE ebx 185 pushl_cfi_reg ebx
196 186
197 read64 %esi 187 read64 %esi
1981: 1881:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
209 199
210 movl $1, %eax 200 movl $1, %eax
2113: 2013:
212 RESTORE ebx 202 popl_cfi_reg ebx
213 ret 203 ret
214 CFI_ENDPROC 204 CFI_ENDPROC
215ENDPROC(atomic64_inc_not_zero_cx8) 205ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index e78b8eee6615..9bc944a91274 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
51 */ 51 */
52ENTRY(csum_partial) 52ENTRY(csum_partial)
53 CFI_STARTPROC 53 CFI_STARTPROC
54 pushl_cfi %esi 54 pushl_cfi_reg esi
55 CFI_REL_OFFSET esi, 0 55 pushl_cfi_reg ebx
56 pushl_cfi %ebx
57 CFI_REL_OFFSET ebx, 0
58 movl 20(%esp),%eax # Function arg: unsigned int sum 56 movl 20(%esp),%eax # Function arg: unsigned int sum
59 movl 16(%esp),%ecx # Function arg: int len 57 movl 16(%esp),%ecx # Function arg: int len
60 movl 12(%esp),%esi # Function arg: unsigned char *buff 58 movl 12(%esp),%esi # Function arg: unsigned char *buff
@@ -127,14 +125,12 @@ ENTRY(csum_partial)
1276: addl %ecx,%eax 1256: addl %ecx,%eax
128 adcl $0, %eax 126 adcl $0, %eax
1297: 1277:
130 testl $1, 12(%esp) 128 testb $1, 12(%esp)
131 jz 8f 129 jz 8f
132 roll $8, %eax 130 roll $8, %eax
1338: 1318:
134 popl_cfi %ebx 132 popl_cfi_reg ebx
135 CFI_RESTORE ebx 133 popl_cfi_reg esi
136 popl_cfi %esi
137 CFI_RESTORE esi
138 ret 134 ret
139 CFI_ENDPROC 135 CFI_ENDPROC
140ENDPROC(csum_partial) 136ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
145 141
146ENTRY(csum_partial) 142ENTRY(csum_partial)
147 CFI_STARTPROC 143 CFI_STARTPROC
148 pushl_cfi %esi 144 pushl_cfi_reg esi
149 CFI_REL_OFFSET esi, 0 145 pushl_cfi_reg ebx
150 pushl_cfi %ebx
151 CFI_REL_OFFSET ebx, 0
152 movl 20(%esp),%eax # Function arg: unsigned int sum 146 movl 20(%esp),%eax # Function arg: unsigned int sum
153 movl 16(%esp),%ecx # Function arg: int len 147 movl 16(%esp),%ecx # Function arg: int len
154 movl 12(%esp),%esi # Function arg: const unsigned char *buf 148 movl 12(%esp),%esi # Function arg: const unsigned char *buf
@@ -251,14 +245,12 @@ ENTRY(csum_partial)
251 addl %ebx,%eax 245 addl %ebx,%eax
252 adcl $0,%eax 246 adcl $0,%eax
25380: 24780:
254 testl $1, 12(%esp) 248 testb $1, 12(%esp)
255 jz 90f 249 jz 90f
256 roll $8, %eax 250 roll $8, %eax
25790: 25190:
258 popl_cfi %ebx 252 popl_cfi_reg ebx
259 CFI_RESTORE ebx 253 popl_cfi_reg esi
260 popl_cfi %esi
261 CFI_RESTORE esi
262 ret 254 ret
263 CFI_ENDPROC 255 CFI_ENDPROC
264ENDPROC(csum_partial) 256ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
298 CFI_STARTPROC 290 CFI_STARTPROC
299 subl $4,%esp 291 subl $4,%esp
300 CFI_ADJUST_CFA_OFFSET 4 292 CFI_ADJUST_CFA_OFFSET 4
301 pushl_cfi %edi 293 pushl_cfi_reg edi
302 CFI_REL_OFFSET edi, 0 294 pushl_cfi_reg esi
303 pushl_cfi %esi 295 pushl_cfi_reg ebx
304 CFI_REL_OFFSET esi, 0
305 pushl_cfi %ebx
306 CFI_REL_OFFSET ebx, 0
307 movl ARGBASE+16(%esp),%eax # sum 296 movl ARGBASE+16(%esp),%eax # sum
308 movl ARGBASE+12(%esp),%ecx # len 297 movl ARGBASE+12(%esp),%ecx # len
309 movl ARGBASE+4(%esp),%esi # src 298 movl ARGBASE+4(%esp),%esi # src
@@ -412,12 +401,9 @@ DST( movb %cl, (%edi) )
412 401
413.previous 402.previous
414 403
415 popl_cfi %ebx 404 popl_cfi_reg ebx
416 CFI_RESTORE ebx 405 popl_cfi_reg esi
417 popl_cfi %esi 406 popl_cfi_reg edi
418 CFI_RESTORE esi
419 popl_cfi %edi
420 CFI_RESTORE edi
421 popl_cfi %ecx # equivalent to addl $4,%esp 407 popl_cfi %ecx # equivalent to addl $4,%esp
422 ret 408 ret
423 CFI_ENDPROC 409 CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
441 427
442ENTRY(csum_partial_copy_generic) 428ENTRY(csum_partial_copy_generic)
443 CFI_STARTPROC 429 CFI_STARTPROC
444 pushl_cfi %ebx 430 pushl_cfi_reg ebx
445 CFI_REL_OFFSET ebx, 0 431 pushl_cfi_reg edi
446 pushl_cfi %edi 432 pushl_cfi_reg esi
447 CFI_REL_OFFSET edi, 0
448 pushl_cfi %esi
449 CFI_REL_OFFSET esi, 0
450 movl ARGBASE+4(%esp),%esi #src 433 movl ARGBASE+4(%esp),%esi #src
451 movl ARGBASE+8(%esp),%edi #dst 434 movl ARGBASE+8(%esp),%edi #dst
452 movl ARGBASE+12(%esp),%ecx #len 435 movl ARGBASE+12(%esp),%ecx #len
@@ -506,12 +489,9 @@ DST( movb %dl, (%edi) )
506 jmp 7b 489 jmp 7b
507.previous 490.previous
508 491
509 popl_cfi %esi 492 popl_cfi_reg esi
510 CFI_RESTORE esi 493 popl_cfi_reg edi
511 popl_cfi %edi 494 popl_cfi_reg ebx
512 CFI_RESTORE edi
513 popl_cfi %ebx
514 CFI_RESTORE ebx
515 ret 495 ret
516 CFI_ENDPROC 496 CFI_ENDPROC
517ENDPROC(csum_partial_copy_generic) 497ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index f2145cfa12a6..e67e579c93bd 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,31 +1,35 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <asm/dwarf2.h> 2#include <asm/dwarf2.h>
3#include <asm/cpufeature.h>
3#include <asm/alternative-asm.h> 4#include <asm/alternative-asm.h>
4 5
5/* 6/*
6 * Zero a page. 7 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
7 * rdi page 8 * recommended to use this when possible and we do use them by default.
8 */ 9 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
9ENTRY(clear_page_c) 10 * Otherwise, use original.
11 */
12
13/*
14 * Zero a page.
15 * %rdi - page
16 */
17ENTRY(clear_page)
10 CFI_STARTPROC 18 CFI_STARTPROC
19
20 ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
21 "jmp clear_page_c_e", X86_FEATURE_ERMS
22
11 movl $4096/8,%ecx 23 movl $4096/8,%ecx
12 xorl %eax,%eax 24 xorl %eax,%eax
13 rep stosq 25 rep stosq
14 ret 26 ret
15 CFI_ENDPROC 27 CFI_ENDPROC
16ENDPROC(clear_page_c) 28ENDPROC(clear_page)
17 29
18ENTRY(clear_page_c_e) 30ENTRY(clear_page_orig)
19 CFI_STARTPROC 31 CFI_STARTPROC
20 movl $4096,%ecx
21 xorl %eax,%eax
22 rep stosb
23 ret
24 CFI_ENDPROC
25ENDPROC(clear_page_c_e)
26 32
27ENTRY(clear_page)
28 CFI_STARTPROC
29 xorl %eax,%eax 33 xorl %eax,%eax
30 movl $4096/64,%ecx 34 movl $4096/64,%ecx
31 .p2align 4 35 .p2align 4
@@ -45,29 +49,13 @@ ENTRY(clear_page)
45 nop 49 nop
46 ret 50 ret
47 CFI_ENDPROC 51 CFI_ENDPROC
48.Lclear_page_end: 52ENDPROC(clear_page_orig)
49ENDPROC(clear_page)
50
51 /*
52 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
53 * It is recommended to use this when possible.
54 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
55 * Otherwise, use original function.
56 *
57 */
58 53
59#include <asm/cpufeature.h> 54ENTRY(clear_page_c_e)
60 55 CFI_STARTPROC
61 .section .altinstr_replacement,"ax" 56 movl $4096,%ecx
621: .byte 0xeb /* jmp <disp8> */ 57 xorl %eax,%eax
63 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ 58 rep stosb
642: .byte 0xeb /* jmp <disp8> */ 59 ret
65 .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ 60 CFI_ENDPROC
663: 61ENDPROC(clear_page_c_e)
67 .previous
68 .section .altinstructions,"a"
69 altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
70 .Lclear_page_end-clear_page, 2b-1b
71 altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
72 .Lclear_page_end-clear_page,3b-2b
73 .previous
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 176cca67212b..8239dbcbf984 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,23 +2,26 @@
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
5#include <asm/alternative-asm.h> 6#include <asm/alternative-asm.h>
6 7
8/*
9 * Some CPUs run faster using the string copy instructions (sane microcode).
10 * It is also a lot simpler. Use this when possible. But, don't use streaming
11 * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
12 * prefetch distance based on SMP/UP.
13 */
7 ALIGN 14 ALIGN
8copy_page_rep: 15ENTRY(copy_page)
9 CFI_STARTPROC 16 CFI_STARTPROC
17 ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
10 movl $4096/8, %ecx 18 movl $4096/8, %ecx
11 rep movsq 19 rep movsq
12 ret 20 ret
13 CFI_ENDPROC 21 CFI_ENDPROC
14ENDPROC(copy_page_rep) 22ENDPROC(copy_page)
15
16/*
17 * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
18 * Could vary the prefetch distance based on SMP/UP.
19*/
20 23
21ENTRY(copy_page) 24ENTRY(copy_page_regs)
22 CFI_STARTPROC 25 CFI_STARTPROC
23 subq $2*8, %rsp 26 subq $2*8, %rsp
24 CFI_ADJUST_CFA_OFFSET 2*8 27 CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
90 addq $2*8, %rsp 93 addq $2*8, %rsp
91 CFI_ADJUST_CFA_OFFSET -2*8 94 CFI_ADJUST_CFA_OFFSET -2*8
92 ret 95 ret
93.Lcopy_page_end:
94 CFI_ENDPROC 96 CFI_ENDPROC
95ENDPROC(copy_page) 97ENDPROC(copy_page_regs)
96
97 /* Some CPUs run faster using the string copy instructions.
98 It is also a lot simpler. Use this when possible */
99
100#include <asm/cpufeature.h>
101
102 .section .altinstr_replacement,"ax"
1031: .byte 0xeb /* jmp <disp8> */
104 .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
1052:
106 .previous
107 .section .altinstructions,"a"
108 altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
109 .Lcopy_page_end-copy_page, 2b-1b
110 .previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dee945d55594..fa997dfaef24 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -8,9 +8,6 @@
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/dwarf2.h> 10#include <asm/dwarf2.h>
11
12#define FIX_ALIGNMENT 1
13
14#include <asm/current.h> 11#include <asm/current.h>
15#include <asm/asm-offsets.h> 12#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 13#include <asm/thread_info.h>
@@ -19,33 +16,7 @@
19#include <asm/asm.h> 16#include <asm/asm.h>
20#include <asm/smap.h> 17#include <asm/smap.h>
21 18
22/*
23 * By placing feature2 after feature1 in altinstructions section, we logically
24 * implement:
25 * If CPU has feature2, jmp to alt2 is used
26 * else if CPU has feature1, jmp to alt1 is used
27 * else jmp to orig is used.
28 */
29 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
300:
31 .byte 0xe9 /* 32bit jump */
32 .long \orig-1f /* by default jump to orig */
331:
34 .section .altinstr_replacement,"ax"
352: .byte 0xe9 /* near jump with 32bit immediate */
36 .long \alt1-1b /* offset */ /* or alternatively to alt1 */
373: .byte 0xe9 /* near jump with 32bit immediate */
38 .long \alt2-1b /* offset */ /* or alternatively to alt2 */
39 .previous
40
41 .section .altinstructions,"a"
42 altinstruction_entry 0b,2b,\feature1,5,5
43 altinstruction_entry 0b,3b,\feature2,5,5
44 .previous
45 .endm
46
47 .macro ALIGN_DESTINATION 19 .macro ALIGN_DESTINATION
48#ifdef FIX_ALIGNMENT
49 /* check for bad alignment of destination */ 20 /* check for bad alignment of destination */
50 movl %edi,%ecx 21 movl %edi,%ecx
51 andl $7,%ecx 22 andl $7,%ecx
@@ -67,7 +38,6 @@
67 38
68 _ASM_EXTABLE(100b,103b) 39 _ASM_EXTABLE(100b,103b)
69 _ASM_EXTABLE(101b,103b) 40 _ASM_EXTABLE(101b,103b)
70#endif
71 .endm 41 .endm
72 42
73/* Standard copy_to_user with segment limit checking */ 43/* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
79 jc bad_to_user 49 jc bad_to_user
80 cmpq TI_addr_limit(%rax),%rcx 50 cmpq TI_addr_limit(%rax),%rcx
81 ja bad_to_user 51 ja bad_to_user
82 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ 52 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
83 copy_user_generic_unrolled,copy_user_generic_string, \ 53 "jmp copy_user_generic_string", \
84 copy_user_enhanced_fast_string 54 X86_FEATURE_REP_GOOD, \
55 "jmp copy_user_enhanced_fast_string", \
56 X86_FEATURE_ERMS
85 CFI_ENDPROC 57 CFI_ENDPROC
86ENDPROC(_copy_to_user) 58ENDPROC(_copy_to_user)
87 59
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
94 jc bad_from_user 66 jc bad_from_user
95 cmpq TI_addr_limit(%rax),%rcx 67 cmpq TI_addr_limit(%rax),%rcx
96 ja bad_from_user 68 ja bad_from_user
97 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ 69 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
98 copy_user_generic_unrolled,copy_user_generic_string, \ 70 "jmp copy_user_generic_string", \
99 copy_user_enhanced_fast_string 71 X86_FEATURE_REP_GOOD, \
72 "jmp copy_user_enhanced_fast_string", \
73 X86_FEATURE_ERMS
100 CFI_ENDPROC 74 CFI_ENDPROC
101ENDPROC(_copy_from_user) 75ENDPROC(_copy_from_user)
102 76
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 2419d5fefae3..9734182966f3 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
196 196
197 /* handle last odd byte */ 197 /* handle last odd byte */
198.Lhandle_1: 198.Lhandle_1:
199 testl $1, %r10d 199 testb $1, %r10b
200 jz .Lende 200 jz .Lende
201 xorl %ebx, %ebx 201 xorl %ebx, %ebx
202 source 202 source
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1313ae6b478b..8f72b334aea0 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -52,6 +52,13 @@
52 */ 52 */
53void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) 53void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
54{ 54{
55 /*
56 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
57 * even if the input buffer is long enough to hold them.
58 */
59 if (buf_len > MAX_INSN_SIZE)
60 buf_len = MAX_INSN_SIZE;
61
55 memset(insn, 0, sizeof(*insn)); 62 memset(insn, 0, sizeof(*insn));
56 insn->kaddr = kaddr; 63 insn->kaddr = kaddr;
57 insn->end_kaddr = kaddr + buf_len; 64 insn->end_kaddr = kaddr + buf_len;
@@ -164,6 +171,12 @@ found:
164 /* VEX.W overrides opnd_size */ 171 /* VEX.W overrides opnd_size */
165 insn->opnd_bytes = 8; 172 insn->opnd_bytes = 8;
166 } else { 173 } else {
174 /*
175 * For VEX2, fake VEX3-like byte#2.
176 * Makes it easier to decode vex.W, vex.vvvv,
177 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
178 */
179 insn->vex_prefix.bytes[2] = b2 & 0x7f;
167 insn->vex_prefix.nbytes = 2; 180 insn->vex_prefix.nbytes = 2;
168 insn->next_byte += 2; 181 insn->next_byte += 2;
169 } 182 }
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 89b53c9968e7..b046664f5a1c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,12 +1,20 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4
5#include <asm/cpufeature.h> 4#include <asm/cpufeature.h>
6#include <asm/dwarf2.h> 5#include <asm/dwarf2.h>
7#include <asm/alternative-asm.h> 6#include <asm/alternative-asm.h>
8 7
9/* 8/*
9 * We build a jump to memcpy_orig by default which gets NOPped out on
10 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
11 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
12 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
13 */
14
15.weak memcpy
16
17/*
10 * memcpy - Copy a memory block. 18 * memcpy - Copy a memory block.
11 * 19 *
12 * Input: 20 * Input:
@@ -17,15 +25,11 @@
17 * Output: 25 * Output:
18 * rax original destination 26 * rax original destination
19 */ 27 */
28ENTRY(__memcpy)
29ENTRY(memcpy)
30 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
31 "jmp memcpy_erms", X86_FEATURE_ERMS
20 32
21/*
22 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
23 *
24 * This gets patched over the unrolled variant (below) via the
25 * alternative instructions framework:
26 */
27 .section .altinstr_replacement, "ax", @progbits
28.Lmemcpy_c:
29 movq %rdi, %rax 33 movq %rdi, %rax
30 movq %rdx, %rcx 34 movq %rdx, %rcx
31 shrq $3, %rcx 35 shrq $3, %rcx
@@ -34,29 +38,21 @@
34 movl %edx, %ecx 38 movl %edx, %ecx
35 rep movsb 39 rep movsb
36 ret 40 ret
37.Lmemcpy_e: 41ENDPROC(memcpy)
38 .previous 42ENDPROC(__memcpy)
39 43
40/* 44/*
41 * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than 45 * memcpy_erms() - enhanced fast string memcpy. This is faster and
42 * memcpy_c. Use memcpy_c_e when possible. 46 * simpler than memcpy. Use memcpy_erms when possible.
43 *
44 * This gets patched over the unrolled variant (below) via the
45 * alternative instructions framework:
46 */ 47 */
47 .section .altinstr_replacement, "ax", @progbits 48ENTRY(memcpy_erms)
48.Lmemcpy_c_e:
49 movq %rdi, %rax 49 movq %rdi, %rax
50 movq %rdx, %rcx 50 movq %rdx, %rcx
51 rep movsb 51 rep movsb
52 ret 52 ret
53.Lmemcpy_e_e: 53ENDPROC(memcpy_erms)
54 .previous
55
56.weak memcpy
57 54
58ENTRY(__memcpy) 55ENTRY(memcpy_orig)
59ENTRY(memcpy)
60 CFI_STARTPROC 56 CFI_STARTPROC
61 movq %rdi, %rax 57 movq %rdi, %rax
62 58
@@ -183,26 +179,4 @@ ENTRY(memcpy)
183.Lend: 179.Lend:
184 retq 180 retq
185 CFI_ENDPROC 181 CFI_ENDPROC
186ENDPROC(memcpy) 182ENDPROC(memcpy_orig)
187ENDPROC(__memcpy)
188
189 /*
190 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
191 * If the feature is supported, memcpy_c_e() is the first choice.
192 * If enhanced rep movsb copy is not available, use fast string copy
193 * memcpy_c() when possible. This is faster and code is simpler than
194 * original memcpy().
195 * Otherwise, original memcpy() is used.
196 * In .altinstructions section, ERMS feature is placed after REG_GOOD
197 * feature to implement the right patch order.
198 *
199 * Replace only beginning, memcpy is used to apply alternatives,
200 * so it is silly to overwrite itself with nops - reboot is the
201 * only outcome...
202 */
203 .section .altinstructions, "a"
204 altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
205 .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
206 altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
207 .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
208 .previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 9c4b530575da..0f8a0d0331b9 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -5,7 +5,6 @@
5 * This assembly file is re-written from memmove_64.c file. 5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */ 7 */
8#define _STRING_C
9#include <linux/linkage.h> 8#include <linux/linkage.h>
10#include <asm/dwarf2.h> 9#include <asm/dwarf2.h>
11#include <asm/cpufeature.h> 10#include <asm/cpufeature.h>
@@ -44,6 +43,8 @@ ENTRY(__memmove)
44 jg 2f 43 jg 2f
45 44
46.Lmemmove_begin_forward: 45.Lmemmove_begin_forward:
46 ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
47
47 /* 48 /*
48 * movsq instruction have many startup latency 49 * movsq instruction have many startup latency
49 * so we handle small size by general register. 50 * so we handle small size by general register.
@@ -207,21 +208,5 @@ ENTRY(__memmove)
20713: 20813:
208 retq 209 retq
209 CFI_ENDPROC 210 CFI_ENDPROC
210
211 .section .altinstr_replacement,"ax"
212.Lmemmove_begin_forward_efs:
213 /* Forward moving data. */
214 movq %rdx, %rcx
215 rep movsb
216 retq
217.Lmemmove_end_forward_efs:
218 .previous
219
220 .section .altinstructions,"a"
221 altinstruction_entry .Lmemmove_begin_forward, \
222 .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
223 .Lmemmove_end_forward-.Lmemmove_begin_forward, \
224 .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
225 .previous
226ENDPROC(__memmove) 211ENDPROC(__memmove)
227ENDPROC(memmove) 212ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 6f44935c6a60..93118fb23976 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -5,19 +5,30 @@
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h> 6#include <asm/alternative-asm.h>
7 7
8.weak memset
9
8/* 10/*
9 * ISO C memset - set a memory block to a byte value. This function uses fast 11 * ISO C memset - set a memory block to a byte value. This function uses fast
10 * string to get better performance than the original function. The code is 12 * string to get better performance than the original function. The code is
11 * simpler and shorter than the orignal function as well. 13 * simpler and shorter than the orignal function as well.
12 * 14 *
13 * rdi destination 15 * rdi destination
14 * rsi value (char) 16 * rsi value (char)
15 * rdx count (bytes) 17 * rdx count (bytes)
16 * 18 *
17 * rax original destination 19 * rax original destination
18 */ 20 */
19 .section .altinstr_replacement, "ax", @progbits 21ENTRY(memset)
20.Lmemset_c: 22ENTRY(__memset)
23 /*
24 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
25 * to use it when possible. If not available, use fast string instructions.
26 *
27 * Otherwise, use original memset function.
28 */
29 ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
30 "jmp memset_erms", X86_FEATURE_ERMS
31
21 movq %rdi,%r9 32 movq %rdi,%r9
22 movq %rdx,%rcx 33 movq %rdx,%rcx
23 andl $7,%edx 34 andl $7,%edx
@@ -31,8 +42,8 @@
31 rep stosb 42 rep stosb
32 movq %r9,%rax 43 movq %r9,%rax
33 ret 44 ret
34.Lmemset_e: 45ENDPROC(memset)
35 .previous 46ENDPROC(__memset)
36 47
37/* 48/*
38 * ISO C memset - set a memory block to a byte value. This function uses 49 * ISO C memset - set a memory block to a byte value. This function uses
@@ -45,21 +56,16 @@
45 * 56 *
46 * rax original destination 57 * rax original destination
47 */ 58 */
48 .section .altinstr_replacement, "ax", @progbits 59ENTRY(memset_erms)
49.Lmemset_c_e:
50 movq %rdi,%r9 60 movq %rdi,%r9
51 movb %sil,%al 61 movb %sil,%al
52 movq %rdx,%rcx 62 movq %rdx,%rcx
53 rep stosb 63 rep stosb
54 movq %r9,%rax 64 movq %r9,%rax
55 ret 65 ret
56.Lmemset_e_e: 66ENDPROC(memset_erms)
57 .previous
58
59.weak memset
60 67
61ENTRY(memset) 68ENTRY(memset_orig)
62ENTRY(__memset)
63 CFI_STARTPROC 69 CFI_STARTPROC
64 movq %rdi,%r10 70 movq %rdi,%r10
65 71
@@ -134,23 +140,4 @@ ENTRY(__memset)
134 jmp .Lafter_bad_alignment 140 jmp .Lafter_bad_alignment
135.Lfinal: 141.Lfinal:
136 CFI_ENDPROC 142 CFI_ENDPROC
137ENDPROC(memset) 143ENDPROC(memset_orig)
138ENDPROC(__memset)
139
140 /* Some CPUs support enhanced REP MOVSB/STOSB feature.
141 * It is recommended to use this when possible.
142 *
143 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
144 * instructions.
145 *
146 * Otherwise, use original memset function.
147 *
148 * In .altinstructions section, ERMS feature is placed after REG_GOOD
149 * feature to implement the right patch order.
150 */
151 .section .altinstructions,"a"
152 altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
153 .Lfinal-__memset,.Lmemset_e-.Lmemset_c
154 altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
155 .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
156 .previous
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index f6d13eefad10..3ca5218fbece 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -14,8 +14,8 @@
14.macro op_safe_regs op 14.macro op_safe_regs op
15ENTRY(\op\()_safe_regs) 15ENTRY(\op\()_safe_regs)
16 CFI_STARTPROC 16 CFI_STARTPROC
17 pushq_cfi %rbx 17 pushq_cfi_reg rbx
18 pushq_cfi %rbp 18 pushq_cfi_reg rbp
19 movq %rdi, %r10 /* Save pointer */ 19 movq %rdi, %r10 /* Save pointer */
20 xorl %r11d, %r11d /* Return value */ 20 xorl %r11d, %r11d /* Return value */
21 movl (%rdi), %eax 21 movl (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
35 movl %ebp, 20(%r10) 35 movl %ebp, 20(%r10)
36 movl %esi, 24(%r10) 36 movl %esi, 24(%r10)
37 movl %edi, 28(%r10) 37 movl %edi, 28(%r10)
38 popq_cfi %rbp 38 popq_cfi_reg rbp
39 popq_cfi %rbx 39 popq_cfi_reg rbx
40 ret 40 ret
413: 413:
42 CFI_RESTORE_STATE 42 CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
53.macro op_safe_regs op 53.macro op_safe_regs op
54ENTRY(\op\()_safe_regs) 54ENTRY(\op\()_safe_regs)
55 CFI_STARTPROC 55 CFI_STARTPROC
56 pushl_cfi %ebx 56 pushl_cfi_reg ebx
57 pushl_cfi %ebp 57 pushl_cfi_reg ebp
58 pushl_cfi %esi 58 pushl_cfi_reg esi
59 pushl_cfi %edi 59 pushl_cfi_reg edi
60 pushl_cfi $0 /* Return value */ 60 pushl_cfi $0 /* Return value */
61 pushl_cfi %eax 61 pushl_cfi %eax
62 movl 4(%eax), %ecx 62 movl 4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
80 movl %esi, 24(%eax) 80 movl %esi, 24(%eax)
81 movl %edi, 28(%eax) 81 movl %edi, 28(%eax)
82 popl_cfi %eax 82 popl_cfi %eax
83 popl_cfi %edi 83 popl_cfi_reg edi
84 popl_cfi %esi 84 popl_cfi_reg esi
85 popl_cfi %ebp 85 popl_cfi_reg ebp
86 popl_cfi %ebx 86 popl_cfi_reg ebx
87 ret 87 ret
883: 883:
89 CFI_RESTORE_STATE 89 CFI_RESTORE_STATE
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 5dff5f042468..2322abe4da3b 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -34,10 +34,10 @@
34 */ 34 */
35 35
36#define save_common_regs \ 36#define save_common_regs \
37 pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 37 pushl_cfi_reg ecx
38 38
39#define restore_common_regs \ 39#define restore_common_regs \
40 popl_cfi %ecx; CFI_RESTORE ecx 40 popl_cfi_reg ecx
41 41
42 /* Avoid uglifying the argument copying x86-64 needs to do. */ 42 /* Avoid uglifying the argument copying x86-64 needs to do. */
43 .macro movq src, dst 43 .macro movq src, dst
@@ -64,22 +64,22 @@
64 */ 64 */
65 65
66#define save_common_regs \ 66#define save_common_regs \
67 pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ 67 pushq_cfi_reg rdi; \
68 pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ 68 pushq_cfi_reg rsi; \
69 pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ 69 pushq_cfi_reg rcx; \
70 pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ 70 pushq_cfi_reg r8; \
71 pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ 71 pushq_cfi_reg r9; \
72 pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ 72 pushq_cfi_reg r10; \
73 pushq_cfi %r11; CFI_REL_OFFSET r11, 0 73 pushq_cfi_reg r11
74 74
75#define restore_common_regs \ 75#define restore_common_regs \
76 popq_cfi %r11; CFI_RESTORE r11; \ 76 popq_cfi_reg r11; \
77 popq_cfi %r10; CFI_RESTORE r10; \ 77 popq_cfi_reg r10; \
78 popq_cfi %r9; CFI_RESTORE r9; \ 78 popq_cfi_reg r9; \
79 popq_cfi %r8; CFI_RESTORE r8; \ 79 popq_cfi_reg r8; \
80 popq_cfi %rcx; CFI_RESTORE rcx; \ 80 popq_cfi_reg rcx; \
81 popq_cfi %rsi; CFI_RESTORE rsi; \ 81 popq_cfi_reg rsi; \
82 popq_cfi %rdi; CFI_RESTORE rdi 82 popq_cfi_reg rdi
83 83
84#endif 84#endif
85 85
@@ -87,12 +87,10 @@
87ENTRY(call_rwsem_down_read_failed) 87ENTRY(call_rwsem_down_read_failed)
88 CFI_STARTPROC 88 CFI_STARTPROC
89 save_common_regs 89 save_common_regs
90 __ASM_SIZE(push,_cfi) %__ASM_REG(dx) 90 __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
91 CFI_REL_OFFSET __ASM_REG(dx), 0
92 movq %rax,%rdi 91 movq %rax,%rdi
93 call rwsem_down_read_failed 92 call rwsem_down_read_failed
94 __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) 93 __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
95 CFI_RESTORE __ASM_REG(dx)
96 restore_common_regs 94 restore_common_regs
97 ret 95 ret
98 CFI_ENDPROC 96 CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
124ENTRY(call_rwsem_downgrade_wake) 122ENTRY(call_rwsem_downgrade_wake)
125 CFI_STARTPROC 123 CFI_STARTPROC
126 save_common_regs 124 save_common_regs
127 __ASM_SIZE(push,_cfi) %__ASM_REG(dx) 125 __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
128 CFI_REL_OFFSET __ASM_REG(dx), 0
129 movq %rax,%rdi 126 movq %rax,%rdi
130 call rwsem_downgrade_wake 127 call rwsem_downgrade_wake
131 __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) 128 __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
132 CFI_RESTORE __ASM_REG(dx)
133 restore_common_regs 129 restore_common_regs
134 ret 130 ret
135 CFI_ENDPROC 131 CFI_ENDPROC
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index e28cdaf5ac2c..5eb715087b80 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -13,12 +13,9 @@
13 .globl \name 13 .globl \name
14\name: 14\name:
15 CFI_STARTPROC 15 CFI_STARTPROC
16 pushl_cfi %eax 16 pushl_cfi_reg eax
17 CFI_REL_OFFSET eax, 0 17 pushl_cfi_reg ecx
18 pushl_cfi %ecx 18 pushl_cfi_reg edx
19 CFI_REL_OFFSET ecx, 0
20 pushl_cfi %edx
21 CFI_REL_OFFSET edx, 0
22 19
23 .if \put_ret_addr_in_eax 20 .if \put_ret_addr_in_eax
24 /* Place EIP in the arg1 */ 21 /* Place EIP in the arg1 */
@@ -26,12 +23,9 @@
26 .endif 23 .endif
27 24
28 call \func 25 call \func
29 popl_cfi %edx 26 popl_cfi_reg edx
30 CFI_RESTORE edx 27 popl_cfi_reg ecx
31 popl_cfi %ecx 28 popl_cfi_reg eax
32 CFI_RESTORE ecx
33 popl_cfi %eax
34 CFI_RESTORE eax
35 ret 29 ret
36 CFI_ENDPROC 30 CFI_ENDPROC
37 _ASM_NOKPROBE(\name) 31 _ASM_NOKPROBE(\name)
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5ebd614a..f89ba4e93025 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,18 @@
17 CFI_STARTPROC 17 CFI_STARTPROC
18 18
19 /* this one pushes 9 elems, the next one would be %rIP */ 19 /* this one pushes 9 elems, the next one would be %rIP */
20 SAVE_ARGS 20 pushq_cfi_reg rdi
21 pushq_cfi_reg rsi
22 pushq_cfi_reg rdx
23 pushq_cfi_reg rcx
24 pushq_cfi_reg rax
25 pushq_cfi_reg r8
26 pushq_cfi_reg r9
27 pushq_cfi_reg r10
28 pushq_cfi_reg r11
21 29
22 .if \put_ret_addr_in_rdi 30 .if \put_ret_addr_in_rdi
31 /* 9*8(%rsp) is return addr on stack */
23 movq_cfi_restore 9*8, rdi 32 movq_cfi_restore 9*8, rdi
24 .endif 33 .endif
25 34
@@ -45,11 +54,22 @@
45#endif 54#endif
46#endif 55#endif
47 56
48 /* SAVE_ARGS below is used only for the .cfi directives it contains. */ 57#if defined(CONFIG_TRACE_IRQFLAGS) \
58 || defined(CONFIG_DEBUG_LOCK_ALLOC) \
59 || defined(CONFIG_PREEMPT)
49 CFI_STARTPROC 60 CFI_STARTPROC
50 SAVE_ARGS 61 CFI_ADJUST_CFA_OFFSET 9*8
51restore: 62restore:
52 RESTORE_ARGS 63 popq_cfi_reg r11
64 popq_cfi_reg r10
65 popq_cfi_reg r9
66 popq_cfi_reg r8
67 popq_cfi_reg rax
68 popq_cfi_reg rcx
69 popq_cfi_reg rdx
70 popq_cfi_reg rsi
71 popq_cfi_reg rdi
53 ret 72 ret
54 CFI_ENDPROC 73 CFI_ENDPROC
55 _ASM_NOKPROBE(restore) 74 _ASM_NOKPROBE(restore)
75#endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 1a2be7c6895d..816488c0b97e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -273,6 +273,9 @@ dd: ESC
273de: ESC 273de: ESC
274df: ESC 274df: ESC
275# 0xe0 - 0xef 275# 0xe0 - 0xef
276# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
277# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
278# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
276e0: LOOPNE/LOOPNZ Jb (f64) 279e0: LOOPNE/LOOPNZ Jb (f64)
277e1: LOOPE/LOOPZ Jb (f64) 280e1: LOOPE/LOOPZ Jb (f64)
278e2: LOOP Jb (f64) 281e2: LOOP Jb (f64)
@@ -281,6 +284,10 @@ e4: IN AL,Ib
281e5: IN eAX,Ib 284e5: IN eAX,Ib
282e6: OUT Ib,AL 285e6: OUT Ib,AL
283e7: OUT Ib,eAX 286e7: OUT Ib,eAX
287# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
288# in "near" jumps and calls is 16-bit. For CALL,
289# push of return address is 16-bit wide, RSP is decremented by 2
290# but is not truncated to 16 bits, unlike RIP.
284e8: CALL Jz (f64) 291e8: CALL Jz (f64)
285e9: JMP-near Jz (f64) 292e9: JMP-near Jz (f64)
286ea: JMP-far Ap (i64) 293ea: JMP-far Ap (i64)
@@ -456,6 +463,7 @@ AVXcode: 1
4567e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) 4637e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
4577f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) 4647f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
458# 0x0f 0x80-0x8f 465# 0x0f 0x80-0x8f
466# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
45980: JO Jz (f64) 46780: JO Jz (f64)
46081: JNO Jz (f64) 46881: JNO Jz (f64)
46182: JB/JC/JNAE Jz (f64) 46982: JB/JC/JNAE Jz (f64)
@@ -842,6 +850,7 @@ EndTable
842GrpTable: Grp5 850GrpTable: Grp5
8430: INC Ev 8510: INC Ev
8441: DEC Ev 8521: DEC Ev
853# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
8452: CALLN Ev (f64) 8542: CALLN Ev (f64)
8463: CALLF Ep 8553: CALLF Ep
8474: JMPN Ev (f64) 8564: JMPN Ev (f64)