diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 16:16:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 16:16:36 -0400 |
commit | 60f898eeaaa1c5d0162a4240bacf33a6c87ecef6 (patch) | |
tree | 23eeac4b1e9a616779d22c104dbc8bd45dfeefd1 /arch/x86/lib | |
parent | 977e1ba50893c15121557b39de586901fe3f75cf (diff) | |
parent | 3b75232d55680ca166dffa274d0587d5faf0a016 (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar:
"There were lots of changes in this development cycle:
- over 100 separate cleanups, restructuring changes, speedups and
fixes in the x86 system call, irq, trap and other entry code, part
of a heroic effort to deobfuscate a decade old spaghetti asm code
and its C code dependencies (Denys Vlasenko, Andy Lutomirski)
- alternatives code fixes and enhancements (Borislav Petkov)
- simplifications and cleanups to the compat code (Brian Gerst)
- signal handling fixes and new x86 testcases (Andy Lutomirski)
- various other fixes and cleanups
By their nature many of these changes are risky - we tried to test
them well on many different x86 systems (there are no known
regressions), and they are split up finely to help bisection - but
there's still a fair bit of residual risk left so caveat emptor"
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits)
perf/x86/64: Report regs_user->ax too in get_regs_user()
perf/x86/64: Simplify regs_user->abi setting code in get_regs_user()
perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user()
perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user()
x86/asm/entry/32: Tidy up JNZ instructions after TESTs
x86/asm/entry/64: Reduce padding in execve stubs
x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork
x86/asm/entry/64: Simplify jumps in ret_from_fork
x86/asm/entry/64: Remove a redundant jump
x86/asm/entry/64: Optimize [v]fork/clone stubs
x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too
x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat()
x86/asm/entry/64: Use common code for rt_sigreturn() epilogue
x86/asm/entry/64: Add forgotten CFI annotation
x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
x86/asm/entry/64: Move opportunistic sysret code to syscall code path
x86, selftests: Add sigreturn selftest
x86/alternatives: Guard NOPs optimization
x86/asm/entry: Clear EXTRA_REGS for all executable formats
x86/signal: Remove pax argument from restore_sigcontext
...
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/atomic64_cx8_32.S | 50 | ||||
-rw-r--r-- | arch/x86/lib/checksum_32.S | 64 | ||||
-rw-r--r-- | arch/x86/lib/clear_page_64.S | 66 | ||||
-rw-r--r-- | arch/x86/lib/copy_page_64.S | 37 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 46 | ||||
-rw-r--r-- | arch/x86/lib/csum-copy_64.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/insn.c | 13 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 68 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.S | 19 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 61 | ||||
-rw-r--r-- | arch/x86/lib/msr-reg.S | 24 | ||||
-rw-r--r-- | arch/x86/lib/rwsem.S | 44 | ||||
-rw-r--r-- | arch/x86/lib/thunk_32.S | 18 | ||||
-rw-r--r-- | arch/x86/lib/thunk_64.S | 28 | ||||
-rw-r--r-- | arch/x86/lib/x86-opcode-map.txt | 9 |
15 files changed, 223 insertions, 326 deletions
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index f5cc9eb1d51b..082a85167a5b 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
@@ -13,16 +13,6 @@ | |||
13 | #include <asm/alternative-asm.h> | 13 | #include <asm/alternative-asm.h> |
14 | #include <asm/dwarf2.h> | 14 | #include <asm/dwarf2.h> |
15 | 15 | ||
16 | .macro SAVE reg | ||
17 | pushl_cfi %\reg | ||
18 | CFI_REL_OFFSET \reg, 0 | ||
19 | .endm | ||
20 | |||
21 | .macro RESTORE reg | ||
22 | popl_cfi %\reg | ||
23 | CFI_RESTORE \reg | ||
24 | .endm | ||
25 | |||
26 | .macro read64 reg | 16 | .macro read64 reg |
27 | movl %ebx, %eax | 17 | movl %ebx, %eax |
28 | movl %ecx, %edx | 18 | movl %ecx, %edx |
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8) | |||
67 | .macro addsub_return func ins insc | 57 | .macro addsub_return func ins insc |
68 | ENTRY(atomic64_\func\()_return_cx8) | 58 | ENTRY(atomic64_\func\()_return_cx8) |
69 | CFI_STARTPROC | 59 | CFI_STARTPROC |
70 | SAVE ebp | 60 | pushl_cfi_reg ebp |
71 | SAVE ebx | 61 | pushl_cfi_reg ebx |
72 | SAVE esi | 62 | pushl_cfi_reg esi |
73 | SAVE edi | 63 | pushl_cfi_reg edi |
74 | 64 | ||
75 | movl %eax, %esi | 65 | movl %eax, %esi |
76 | movl %edx, %edi | 66 | movl %edx, %edi |
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8) | |||
89 | 10: | 79 | 10: |
90 | movl %ebx, %eax | 80 | movl %ebx, %eax |
91 | movl %ecx, %edx | 81 | movl %ecx, %edx |
92 | RESTORE edi | 82 | popl_cfi_reg edi |
93 | RESTORE esi | 83 | popl_cfi_reg esi |
94 | RESTORE ebx | 84 | popl_cfi_reg ebx |
95 | RESTORE ebp | 85 | popl_cfi_reg ebp |
96 | ret | 86 | ret |
97 | CFI_ENDPROC | 87 | CFI_ENDPROC |
98 | ENDPROC(atomic64_\func\()_return_cx8) | 88 | ENDPROC(atomic64_\func\()_return_cx8) |
@@ -104,7 +94,7 @@ addsub_return sub sub sbb | |||
104 | .macro incdec_return func ins insc | 94 | .macro incdec_return func ins insc |
105 | ENTRY(atomic64_\func\()_return_cx8) | 95 | ENTRY(atomic64_\func\()_return_cx8) |
106 | CFI_STARTPROC | 96 | CFI_STARTPROC |
107 | SAVE ebx | 97 | pushl_cfi_reg ebx |
108 | 98 | ||
109 | read64 %esi | 99 | read64 %esi |
110 | 1: | 100 | 1: |
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8) | |||
119 | 10: | 109 | 10: |
120 | movl %ebx, %eax | 110 | movl %ebx, %eax |
121 | movl %ecx, %edx | 111 | movl %ecx, %edx |
122 | RESTORE ebx | 112 | popl_cfi_reg ebx |
123 | ret | 113 | ret |
124 | CFI_ENDPROC | 114 | CFI_ENDPROC |
125 | ENDPROC(atomic64_\func\()_return_cx8) | 115 | ENDPROC(atomic64_\func\()_return_cx8) |
@@ -130,7 +120,7 @@ incdec_return dec sub sbb | |||
130 | 120 | ||
131 | ENTRY(atomic64_dec_if_positive_cx8) | 121 | ENTRY(atomic64_dec_if_positive_cx8) |
132 | CFI_STARTPROC | 122 | CFI_STARTPROC |
133 | SAVE ebx | 123 | pushl_cfi_reg ebx |
134 | 124 | ||
135 | read64 %esi | 125 | read64 %esi |
136 | 1: | 126 | 1: |
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8) | |||
146 | 2: | 136 | 2: |
147 | movl %ebx, %eax | 137 | movl %ebx, %eax |
148 | movl %ecx, %edx | 138 | movl %ecx, %edx |
149 | RESTORE ebx | 139 | popl_cfi_reg ebx |
150 | ret | 140 | ret |
151 | CFI_ENDPROC | 141 | CFI_ENDPROC |
152 | ENDPROC(atomic64_dec_if_positive_cx8) | 142 | ENDPROC(atomic64_dec_if_positive_cx8) |
153 | 143 | ||
154 | ENTRY(atomic64_add_unless_cx8) | 144 | ENTRY(atomic64_add_unless_cx8) |
155 | CFI_STARTPROC | 145 | CFI_STARTPROC |
156 | SAVE ebp | 146 | pushl_cfi_reg ebp |
157 | SAVE ebx | 147 | pushl_cfi_reg ebx |
158 | /* these just push these two parameters on the stack */ | 148 | /* these just push these two parameters on the stack */ |
159 | SAVE edi | 149 | pushl_cfi_reg edi |
160 | SAVE ecx | 150 | pushl_cfi_reg ecx |
161 | 151 | ||
162 | movl %eax, %ebp | 152 | movl %eax, %ebp |
163 | movl %edx, %edi | 153 | movl %edx, %edi |
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8) | |||
179 | 3: | 169 | 3: |
180 | addl $8, %esp | 170 | addl $8, %esp |
181 | CFI_ADJUST_CFA_OFFSET -8 | 171 | CFI_ADJUST_CFA_OFFSET -8 |
182 | RESTORE ebx | 172 | popl_cfi_reg ebx |
183 | RESTORE ebp | 173 | popl_cfi_reg ebp |
184 | ret | 174 | ret |
185 | 4: | 175 | 4: |
186 | cmpl %edx, 4(%esp) | 176 | cmpl %edx, 4(%esp) |
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8) | |||
192 | 182 | ||
193 | ENTRY(atomic64_inc_not_zero_cx8) | 183 | ENTRY(atomic64_inc_not_zero_cx8) |
194 | CFI_STARTPROC | 184 | CFI_STARTPROC |
195 | SAVE ebx | 185 | pushl_cfi_reg ebx |
196 | 186 | ||
197 | read64 %esi | 187 | read64 %esi |
198 | 1: | 188 | 1: |
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8) | |||
209 | 199 | ||
210 | movl $1, %eax | 200 | movl $1, %eax |
211 | 3: | 201 | 3: |
212 | RESTORE ebx | 202 | popl_cfi_reg ebx |
213 | ret | 203 | ret |
214 | CFI_ENDPROC | 204 | CFI_ENDPROC |
215 | ENDPROC(atomic64_inc_not_zero_cx8) | 205 | ENDPROC(atomic64_inc_not_zero_cx8) |
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index e78b8eee6615..9bc944a91274 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S | |||
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |||
51 | */ | 51 | */ |
52 | ENTRY(csum_partial) | 52 | ENTRY(csum_partial) |
53 | CFI_STARTPROC | 53 | CFI_STARTPROC |
54 | pushl_cfi %esi | 54 | pushl_cfi_reg esi |
55 | CFI_REL_OFFSET esi, 0 | 55 | pushl_cfi_reg ebx |
56 | pushl_cfi %ebx | ||
57 | CFI_REL_OFFSET ebx, 0 | ||
58 | movl 20(%esp),%eax # Function arg: unsigned int sum | 56 | movl 20(%esp),%eax # Function arg: unsigned int sum |
59 | movl 16(%esp),%ecx # Function arg: int len | 57 | movl 16(%esp),%ecx # Function arg: int len |
60 | movl 12(%esp),%esi # Function arg: unsigned char *buff | 58 | movl 12(%esp),%esi # Function arg: unsigned char *buff |
@@ -127,14 +125,12 @@ ENTRY(csum_partial) | |||
127 | 6: addl %ecx,%eax | 125 | 6: addl %ecx,%eax |
128 | adcl $0, %eax | 126 | adcl $0, %eax |
129 | 7: | 127 | 7: |
130 | testl $1, 12(%esp) | 128 | testb $1, 12(%esp) |
131 | jz 8f | 129 | jz 8f |
132 | roll $8, %eax | 130 | roll $8, %eax |
133 | 8: | 131 | 8: |
134 | popl_cfi %ebx | 132 | popl_cfi_reg ebx |
135 | CFI_RESTORE ebx | 133 | popl_cfi_reg esi |
136 | popl_cfi %esi | ||
137 | CFI_RESTORE esi | ||
138 | ret | 134 | ret |
139 | CFI_ENDPROC | 135 | CFI_ENDPROC |
140 | ENDPROC(csum_partial) | 136 | ENDPROC(csum_partial) |
@@ -145,10 +141,8 @@ ENDPROC(csum_partial) | |||
145 | 141 | ||
146 | ENTRY(csum_partial) | 142 | ENTRY(csum_partial) |
147 | CFI_STARTPROC | 143 | CFI_STARTPROC |
148 | pushl_cfi %esi | 144 | pushl_cfi_reg esi |
149 | CFI_REL_OFFSET esi, 0 | 145 | pushl_cfi_reg ebx |
150 | pushl_cfi %ebx | ||
151 | CFI_REL_OFFSET ebx, 0 | ||
152 | movl 20(%esp),%eax # Function arg: unsigned int sum | 146 | movl 20(%esp),%eax # Function arg: unsigned int sum |
153 | movl 16(%esp),%ecx # Function arg: int len | 147 | movl 16(%esp),%ecx # Function arg: int len |
154 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | 148 | movl 12(%esp),%esi # Function arg: const unsigned char *buf |
@@ -251,14 +245,12 @@ ENTRY(csum_partial) | |||
251 | addl %ebx,%eax | 245 | addl %ebx,%eax |
252 | adcl $0,%eax | 246 | adcl $0,%eax |
253 | 80: | 247 | 80: |
254 | testl $1, 12(%esp) | 248 | testb $1, 12(%esp) |
255 | jz 90f | 249 | jz 90f |
256 | roll $8, %eax | 250 | roll $8, %eax |
257 | 90: | 251 | 90: |
258 | popl_cfi %ebx | 252 | popl_cfi_reg ebx |
259 | CFI_RESTORE ebx | 253 | popl_cfi_reg esi |
260 | popl_cfi %esi | ||
261 | CFI_RESTORE esi | ||
262 | ret | 254 | ret |
263 | CFI_ENDPROC | 255 | CFI_ENDPROC |
264 | ENDPROC(csum_partial) | 256 | ENDPROC(csum_partial) |
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic) | |||
298 | CFI_STARTPROC | 290 | CFI_STARTPROC |
299 | subl $4,%esp | 291 | subl $4,%esp |
300 | CFI_ADJUST_CFA_OFFSET 4 | 292 | CFI_ADJUST_CFA_OFFSET 4 |
301 | pushl_cfi %edi | 293 | pushl_cfi_reg edi |
302 | CFI_REL_OFFSET edi, 0 | 294 | pushl_cfi_reg esi |
303 | pushl_cfi %esi | 295 | pushl_cfi_reg ebx |
304 | CFI_REL_OFFSET esi, 0 | ||
305 | pushl_cfi %ebx | ||
306 | CFI_REL_OFFSET ebx, 0 | ||
307 | movl ARGBASE+16(%esp),%eax # sum | 296 | movl ARGBASE+16(%esp),%eax # sum |
308 | movl ARGBASE+12(%esp),%ecx # len | 297 | movl ARGBASE+12(%esp),%ecx # len |
309 | movl ARGBASE+4(%esp),%esi # src | 298 | movl ARGBASE+4(%esp),%esi # src |
@@ -412,12 +401,9 @@ DST( movb %cl, (%edi) ) | |||
412 | 401 | ||
413 | .previous | 402 | .previous |
414 | 403 | ||
415 | popl_cfi %ebx | 404 | popl_cfi_reg ebx |
416 | CFI_RESTORE ebx | 405 | popl_cfi_reg esi |
417 | popl_cfi %esi | 406 | popl_cfi_reg edi |
418 | CFI_RESTORE esi | ||
419 | popl_cfi %edi | ||
420 | CFI_RESTORE edi | ||
421 | popl_cfi %ecx # equivalent to addl $4,%esp | 407 | popl_cfi %ecx # equivalent to addl $4,%esp |
422 | ret | 408 | ret |
423 | CFI_ENDPROC | 409 | CFI_ENDPROC |
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic) | |||
441 | 427 | ||
442 | ENTRY(csum_partial_copy_generic) | 428 | ENTRY(csum_partial_copy_generic) |
443 | CFI_STARTPROC | 429 | CFI_STARTPROC |
444 | pushl_cfi %ebx | 430 | pushl_cfi_reg ebx |
445 | CFI_REL_OFFSET ebx, 0 | 431 | pushl_cfi_reg edi |
446 | pushl_cfi %edi | 432 | pushl_cfi_reg esi |
447 | CFI_REL_OFFSET edi, 0 | ||
448 | pushl_cfi %esi | ||
449 | CFI_REL_OFFSET esi, 0 | ||
450 | movl ARGBASE+4(%esp),%esi #src | 433 | movl ARGBASE+4(%esp),%esi #src |
451 | movl ARGBASE+8(%esp),%edi #dst | 434 | movl ARGBASE+8(%esp),%edi #dst |
452 | movl ARGBASE+12(%esp),%ecx #len | 435 | movl ARGBASE+12(%esp),%ecx #len |
@@ -506,12 +489,9 @@ DST( movb %dl, (%edi) ) | |||
506 | jmp 7b | 489 | jmp 7b |
507 | .previous | 490 | .previous |
508 | 491 | ||
509 | popl_cfi %esi | 492 | popl_cfi_reg esi |
510 | CFI_RESTORE esi | 493 | popl_cfi_reg edi |
511 | popl_cfi %edi | 494 | popl_cfi_reg ebx |
512 | CFI_RESTORE edi | ||
513 | popl_cfi %ebx | ||
514 | CFI_RESTORE ebx | ||
515 | ret | 495 | ret |
516 | CFI_ENDPROC | 496 | CFI_ENDPROC |
517 | ENDPROC(csum_partial_copy_generic) | 497 | ENDPROC(csum_partial_copy_generic) |
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index f2145cfa12a6..e67e579c93bd 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -1,31 +1,35 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <asm/dwarf2.h> | 2 | #include <asm/dwarf2.h> |
3 | #include <asm/cpufeature.h> | ||
3 | #include <asm/alternative-asm.h> | 4 | #include <asm/alternative-asm.h> |
4 | 5 | ||
5 | /* | 6 | /* |
6 | * Zero a page. | 7 | * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is |
7 | * rdi page | 8 | * recommended to use this when possible and we do use them by default. |
8 | */ | 9 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. |
9 | ENTRY(clear_page_c) | 10 | * Otherwise, use original. |
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Zero a page. | ||
15 | * %rdi - page | ||
16 | */ | ||
17 | ENTRY(clear_page) | ||
10 | CFI_STARTPROC | 18 | CFI_STARTPROC |
19 | |||
20 | ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \ | ||
21 | "jmp clear_page_c_e", X86_FEATURE_ERMS | ||
22 | |||
11 | movl $4096/8,%ecx | 23 | movl $4096/8,%ecx |
12 | xorl %eax,%eax | 24 | xorl %eax,%eax |
13 | rep stosq | 25 | rep stosq |
14 | ret | 26 | ret |
15 | CFI_ENDPROC | 27 | CFI_ENDPROC |
16 | ENDPROC(clear_page_c) | 28 | ENDPROC(clear_page) |
17 | 29 | ||
18 | ENTRY(clear_page_c_e) | 30 | ENTRY(clear_page_orig) |
19 | CFI_STARTPROC | 31 | CFI_STARTPROC |
20 | movl $4096,%ecx | ||
21 | xorl %eax,%eax | ||
22 | rep stosb | ||
23 | ret | ||
24 | CFI_ENDPROC | ||
25 | ENDPROC(clear_page_c_e) | ||
26 | 32 | ||
27 | ENTRY(clear_page) | ||
28 | CFI_STARTPROC | ||
29 | xorl %eax,%eax | 33 | xorl %eax,%eax |
30 | movl $4096/64,%ecx | 34 | movl $4096/64,%ecx |
31 | .p2align 4 | 35 | .p2align 4 |
@@ -45,29 +49,13 @@ ENTRY(clear_page) | |||
45 | nop | 49 | nop |
46 | ret | 50 | ret |
47 | CFI_ENDPROC | 51 | CFI_ENDPROC |
48 | .Lclear_page_end: | 52 | ENDPROC(clear_page_orig) |
49 | ENDPROC(clear_page) | ||
50 | |||
51 | /* | ||
52 | * Some CPUs support enhanced REP MOVSB/STOSB instructions. | ||
53 | * It is recommended to use this when possible. | ||
54 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. | ||
55 | * Otherwise, use original function. | ||
56 | * | ||
57 | */ | ||
58 | 53 | ||
59 | #include <asm/cpufeature.h> | 54 | ENTRY(clear_page_c_e) |
60 | 55 | CFI_STARTPROC | |
61 | .section .altinstr_replacement,"ax" | 56 | movl $4096,%ecx |
62 | 1: .byte 0xeb /* jmp <disp8> */ | 57 | xorl %eax,%eax |
63 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | 58 | rep stosb |
64 | 2: .byte 0xeb /* jmp <disp8> */ | 59 | ret |
65 | .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ | 60 | CFI_ENDPROC |
66 | 3: | 61 | ENDPROC(clear_page_c_e) |
67 | .previous | ||
68 | .section .altinstructions,"a" | ||
69 | altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ | ||
70 | .Lclear_page_end-clear_page, 2b-1b | ||
71 | altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ | ||
72 | .Lclear_page_end-clear_page,3b-2b | ||
73 | .previous | ||
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 176cca67212b..8239dbcbf984 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -2,23 +2,26 @@ | |||
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | 4 | #include <asm/dwarf2.h> |
5 | #include <asm/cpufeature.h> | ||
5 | #include <asm/alternative-asm.h> | 6 | #include <asm/alternative-asm.h> |
6 | 7 | ||
8 | /* | ||
9 | * Some CPUs run faster using the string copy instructions (sane microcode). | ||
10 | * It is also a lot simpler. Use this when possible. But, don't use streaming | ||
11 | * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the | ||
12 | * prefetch distance based on SMP/UP. | ||
13 | */ | ||
7 | ALIGN | 14 | ALIGN |
8 | copy_page_rep: | 15 | ENTRY(copy_page) |
9 | CFI_STARTPROC | 16 | CFI_STARTPROC |
17 | ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD | ||
10 | movl $4096/8, %ecx | 18 | movl $4096/8, %ecx |
11 | rep movsq | 19 | rep movsq |
12 | ret | 20 | ret |
13 | CFI_ENDPROC | 21 | CFI_ENDPROC |
14 | ENDPROC(copy_page_rep) | 22 | ENDPROC(copy_page) |
15 | |||
16 | /* | ||
17 | * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. | ||
18 | * Could vary the prefetch distance based on SMP/UP. | ||
19 | */ | ||
20 | 23 | ||
21 | ENTRY(copy_page) | 24 | ENTRY(copy_page_regs) |
22 | CFI_STARTPROC | 25 | CFI_STARTPROC |
23 | subq $2*8, %rsp | 26 | subq $2*8, %rsp |
24 | CFI_ADJUST_CFA_OFFSET 2*8 | 27 | CFI_ADJUST_CFA_OFFSET 2*8 |
@@ -90,21 +93,5 @@ ENTRY(copy_page) | |||
90 | addq $2*8, %rsp | 93 | addq $2*8, %rsp |
91 | CFI_ADJUST_CFA_OFFSET -2*8 | 94 | CFI_ADJUST_CFA_OFFSET -2*8 |
92 | ret | 95 | ret |
93 | .Lcopy_page_end: | ||
94 | CFI_ENDPROC | 96 | CFI_ENDPROC |
95 | ENDPROC(copy_page) | 97 | ENDPROC(copy_page_regs) |
96 | |||
97 | /* Some CPUs run faster using the string copy instructions. | ||
98 | It is also a lot simpler. Use this when possible */ | ||
99 | |||
100 | #include <asm/cpufeature.h> | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | 1: .byte 0xeb /* jmp <disp8> */ | ||
104 | .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ | ||
105 | 2: | ||
106 | .previous | ||
107 | .section .altinstructions,"a" | ||
108 | altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \ | ||
109 | .Lcopy_page_end-copy_page, 2b-1b | ||
110 | .previous | ||
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dee945d55594..fa997dfaef24 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -8,9 +8,6 @@ | |||
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | |||
12 | #define FIX_ALIGNMENT 1 | ||
13 | |||
14 | #include <asm/current.h> | 11 | #include <asm/current.h> |
15 | #include <asm/asm-offsets.h> | 12 | #include <asm/asm-offsets.h> |
16 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
@@ -19,33 +16,7 @@ | |||
19 | #include <asm/asm.h> | 16 | #include <asm/asm.h> |
20 | #include <asm/smap.h> | 17 | #include <asm/smap.h> |
21 | 18 | ||
22 | /* | ||
23 | * By placing feature2 after feature1 in altinstructions section, we logically | ||
24 | * implement: | ||
25 | * If CPU has feature2, jmp to alt2 is used | ||
26 | * else if CPU has feature1, jmp to alt1 is used | ||
27 | * else jmp to orig is used. | ||
28 | */ | ||
29 | .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 | ||
30 | 0: | ||
31 | .byte 0xe9 /* 32bit jump */ | ||
32 | .long \orig-1f /* by default jump to orig */ | ||
33 | 1: | ||
34 | .section .altinstr_replacement,"ax" | ||
35 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
36 | .long \alt1-1b /* offset */ /* or alternatively to alt1 */ | ||
37 | 3: .byte 0xe9 /* near jump with 32bit immediate */ | ||
38 | .long \alt2-1b /* offset */ /* or alternatively to alt2 */ | ||
39 | .previous | ||
40 | |||
41 | .section .altinstructions,"a" | ||
42 | altinstruction_entry 0b,2b,\feature1,5,5 | ||
43 | altinstruction_entry 0b,3b,\feature2,5,5 | ||
44 | .previous | ||
45 | .endm | ||
46 | |||
47 | .macro ALIGN_DESTINATION | 19 | .macro ALIGN_DESTINATION |
48 | #ifdef FIX_ALIGNMENT | ||
49 | /* check for bad alignment of destination */ | 20 | /* check for bad alignment of destination */ |
50 | movl %edi,%ecx | 21 | movl %edi,%ecx |
51 | andl $7,%ecx | 22 | andl $7,%ecx |
@@ -67,7 +38,6 @@ | |||
67 | 38 | ||
68 | _ASM_EXTABLE(100b,103b) | 39 | _ASM_EXTABLE(100b,103b) |
69 | _ASM_EXTABLE(101b,103b) | 40 | _ASM_EXTABLE(101b,103b) |
70 | #endif | ||
71 | .endm | 41 | .endm |
72 | 42 | ||
73 | /* Standard copy_to_user with segment limit checking */ | 43 | /* Standard copy_to_user with segment limit checking */ |
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user) | |||
79 | jc bad_to_user | 49 | jc bad_to_user |
80 | cmpq TI_addr_limit(%rax),%rcx | 50 | cmpq TI_addr_limit(%rax),%rcx |
81 | ja bad_to_user | 51 | ja bad_to_user |
82 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ | 52 | ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ |
83 | copy_user_generic_unrolled,copy_user_generic_string, \ | 53 | "jmp copy_user_generic_string", \ |
84 | copy_user_enhanced_fast_string | 54 | X86_FEATURE_REP_GOOD, \ |
55 | "jmp copy_user_enhanced_fast_string", \ | ||
56 | X86_FEATURE_ERMS | ||
85 | CFI_ENDPROC | 57 | CFI_ENDPROC |
86 | ENDPROC(_copy_to_user) | 58 | ENDPROC(_copy_to_user) |
87 | 59 | ||
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user) | |||
94 | jc bad_from_user | 66 | jc bad_from_user |
95 | cmpq TI_addr_limit(%rax),%rcx | 67 | cmpq TI_addr_limit(%rax),%rcx |
96 | ja bad_from_user | 68 | ja bad_from_user |
97 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ | 69 | ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ |
98 | copy_user_generic_unrolled,copy_user_generic_string, \ | 70 | "jmp copy_user_generic_string", \ |
99 | copy_user_enhanced_fast_string | 71 | X86_FEATURE_REP_GOOD, \ |
72 | "jmp copy_user_enhanced_fast_string", \ | ||
73 | X86_FEATURE_ERMS | ||
100 | CFI_ENDPROC | 74 | CFI_ENDPROC |
101 | ENDPROC(_copy_from_user) | 75 | ENDPROC(_copy_from_user) |
102 | 76 | ||
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 2419d5fefae3..9734182966f3 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic) | |||
196 | 196 | ||
197 | /* handle last odd byte */ | 197 | /* handle last odd byte */ |
198 | .Lhandle_1: | 198 | .Lhandle_1: |
199 | testl $1, %r10d | 199 | testb $1, %r10b |
200 | jz .Lende | 200 | jz .Lende |
201 | xorl %ebx, %ebx | 201 | xorl %ebx, %ebx |
202 | source | 202 | source |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 1313ae6b478b..8f72b334aea0 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -52,6 +52,13 @@ | |||
52 | */ | 52 | */ |
53 | void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) | 53 | void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) |
54 | { | 54 | { |
55 | /* | ||
56 | * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid | ||
57 | * even if the input buffer is long enough to hold them. | ||
58 | */ | ||
59 | if (buf_len > MAX_INSN_SIZE) | ||
60 | buf_len = MAX_INSN_SIZE; | ||
61 | |||
55 | memset(insn, 0, sizeof(*insn)); | 62 | memset(insn, 0, sizeof(*insn)); |
56 | insn->kaddr = kaddr; | 63 | insn->kaddr = kaddr; |
57 | insn->end_kaddr = kaddr + buf_len; | 64 | insn->end_kaddr = kaddr + buf_len; |
@@ -164,6 +171,12 @@ found: | |||
164 | /* VEX.W overrides opnd_size */ | 171 | /* VEX.W overrides opnd_size */ |
165 | insn->opnd_bytes = 8; | 172 | insn->opnd_bytes = 8; |
166 | } else { | 173 | } else { |
174 | /* | ||
175 | * For VEX2, fake VEX3-like byte#2. | ||
176 | * Makes it easier to decode vex.W, vex.vvvv, | ||
177 | * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. | ||
178 | */ | ||
179 | insn->vex_prefix.bytes[2] = b2 & 0x7f; | ||
167 | insn->vex_prefix.nbytes = 2; | 180 | insn->vex_prefix.nbytes = 2; |
168 | insn->next_byte += 2; | 181 | insn->next_byte += 2; |
169 | } | 182 | } |
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 89b53c9968e7..b046664f5a1c 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -1,12 +1,20 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | 1 | /* Copyright 2002 Andi Kleen */ |
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | |||
5 | #include <asm/cpufeature.h> | 4 | #include <asm/cpufeature.h> |
6 | #include <asm/dwarf2.h> | 5 | #include <asm/dwarf2.h> |
7 | #include <asm/alternative-asm.h> | 6 | #include <asm/alternative-asm.h> |
8 | 7 | ||
9 | /* | 8 | /* |
9 | * We build a jump to memcpy_orig by default which gets NOPped out on | ||
10 | * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which | ||
11 | * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs | ||
12 | * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. | ||
13 | */ | ||
14 | |||
15 | .weak memcpy | ||
16 | |||
17 | /* | ||
10 | * memcpy - Copy a memory block. | 18 | * memcpy - Copy a memory block. |
11 | * | 19 | * |
12 | * Input: | 20 | * Input: |
@@ -17,15 +25,11 @@ | |||
17 | * Output: | 25 | * Output: |
18 | * rax original destination | 26 | * rax original destination |
19 | */ | 27 | */ |
28 | ENTRY(__memcpy) | ||
29 | ENTRY(memcpy) | ||
30 | ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ | ||
31 | "jmp memcpy_erms", X86_FEATURE_ERMS | ||
20 | 32 | ||
21 | /* | ||
22 | * memcpy_c() - fast string ops (REP MOVSQ) based variant. | ||
23 | * | ||
24 | * This gets patched over the unrolled variant (below) via the | ||
25 | * alternative instructions framework: | ||
26 | */ | ||
27 | .section .altinstr_replacement, "ax", @progbits | ||
28 | .Lmemcpy_c: | ||
29 | movq %rdi, %rax | 33 | movq %rdi, %rax |
30 | movq %rdx, %rcx | 34 | movq %rdx, %rcx |
31 | shrq $3, %rcx | 35 | shrq $3, %rcx |
@@ -34,29 +38,21 @@ | |||
34 | movl %edx, %ecx | 38 | movl %edx, %ecx |
35 | rep movsb | 39 | rep movsb |
36 | ret | 40 | ret |
37 | .Lmemcpy_e: | 41 | ENDPROC(memcpy) |
38 | .previous | 42 | ENDPROC(__memcpy) |
39 | 43 | ||
40 | /* | 44 | /* |
41 | * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than | 45 | * memcpy_erms() - enhanced fast string memcpy. This is faster and |
42 | * memcpy_c. Use memcpy_c_e when possible. | 46 | * simpler than memcpy. Use memcpy_erms when possible. |
43 | * | ||
44 | * This gets patched over the unrolled variant (below) via the | ||
45 | * alternative instructions framework: | ||
46 | */ | 47 | */ |
47 | .section .altinstr_replacement, "ax", @progbits | 48 | ENTRY(memcpy_erms) |
48 | .Lmemcpy_c_e: | ||
49 | movq %rdi, %rax | 49 | movq %rdi, %rax |
50 | movq %rdx, %rcx | 50 | movq %rdx, %rcx |
51 | rep movsb | 51 | rep movsb |
52 | ret | 52 | ret |
53 | .Lmemcpy_e_e: | 53 | ENDPROC(memcpy_erms) |
54 | .previous | ||
55 | |||
56 | .weak memcpy | ||
57 | 54 | ||
58 | ENTRY(__memcpy) | 55 | ENTRY(memcpy_orig) |
59 | ENTRY(memcpy) | ||
60 | CFI_STARTPROC | 56 | CFI_STARTPROC |
61 | movq %rdi, %rax | 57 | movq %rdi, %rax |
62 | 58 | ||
@@ -183,26 +179,4 @@ ENTRY(memcpy) | |||
183 | .Lend: | 179 | .Lend: |
184 | retq | 180 | retq |
185 | CFI_ENDPROC | 181 | CFI_ENDPROC |
186 | ENDPROC(memcpy) | 182 | ENDPROC(memcpy_orig) |
187 | ENDPROC(__memcpy) | ||
188 | |||
189 | /* | ||
190 | * Some CPUs are adding enhanced REP MOVSB/STOSB feature | ||
191 | * If the feature is supported, memcpy_c_e() is the first choice. | ||
192 | * If enhanced rep movsb copy is not available, use fast string copy | ||
193 | * memcpy_c() when possible. This is faster and code is simpler than | ||
194 | * original memcpy(). | ||
195 | * Otherwise, original memcpy() is used. | ||
196 | * In .altinstructions section, ERMS feature is placed after REG_GOOD | ||
197 | * feature to implement the right patch order. | ||
198 | * | ||
199 | * Replace only beginning, memcpy is used to apply alternatives, | ||
200 | * so it is silly to overwrite itself with nops - reboot is the | ||
201 | * only outcome... | ||
202 | */ | ||
203 | .section .altinstructions, "a" | ||
204 | altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ | ||
205 | .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c | ||
206 | altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ | ||
207 | .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e | ||
208 | .previous | ||
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 9c4b530575da..0f8a0d0331b9 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S | |||
@@ -5,7 +5,6 @@ | |||
5 | * This assembly file is re-written from memmove_64.c file. | 5 | * This assembly file is re-written from memmove_64.c file. |
6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> | 6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> |
7 | */ | 7 | */ |
8 | #define _STRING_C | ||
9 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 9 | #include <asm/dwarf2.h> |
11 | #include <asm/cpufeature.h> | 10 | #include <asm/cpufeature.h> |
@@ -44,6 +43,8 @@ ENTRY(__memmove) | |||
44 | jg 2f | 43 | jg 2f |
45 | 44 | ||
46 | .Lmemmove_begin_forward: | 45 | .Lmemmove_begin_forward: |
46 | ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS | ||
47 | |||
47 | /* | 48 | /* |
48 | * movsq instruction have many startup latency | 49 | * movsq instruction have many startup latency |
49 | * so we handle small size by general register. | 50 | * so we handle small size by general register. |
@@ -207,21 +208,5 @@ ENTRY(__memmove) | |||
207 | 13: | 208 | 13: |
208 | retq | 209 | retq |
209 | CFI_ENDPROC | 210 | CFI_ENDPROC |
210 | |||
211 | .section .altinstr_replacement,"ax" | ||
212 | .Lmemmove_begin_forward_efs: | ||
213 | /* Forward moving data. */ | ||
214 | movq %rdx, %rcx | ||
215 | rep movsb | ||
216 | retq | ||
217 | .Lmemmove_end_forward_efs: | ||
218 | .previous | ||
219 | |||
220 | .section .altinstructions,"a" | ||
221 | altinstruction_entry .Lmemmove_begin_forward, \ | ||
222 | .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \ | ||
223 | .Lmemmove_end_forward-.Lmemmove_begin_forward, \ | ||
224 | .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs | ||
225 | .previous | ||
226 | ENDPROC(__memmove) | 211 | ENDPROC(__memmove) |
227 | ENDPROC(memmove) | 212 | ENDPROC(memmove) |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 6f44935c6a60..93118fb23976 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -5,19 +5,30 @@ | |||
5 | #include <asm/cpufeature.h> | 5 | #include <asm/cpufeature.h> |
6 | #include <asm/alternative-asm.h> | 6 | #include <asm/alternative-asm.h> |
7 | 7 | ||
8 | .weak memset | ||
9 | |||
8 | /* | 10 | /* |
9 | * ISO C memset - set a memory block to a byte value. This function uses fast | 11 | * ISO C memset - set a memory block to a byte value. This function uses fast |
10 | * string to get better performance than the original function. The code is | 12 | * string to get better performance than the original function. The code is |
11 | * simpler and shorter than the orignal function as well. | 13 | * simpler and shorter than the orignal function as well. |
12 | * | 14 | * |
13 | * rdi destination | 15 | * rdi destination |
14 | * rsi value (char) | 16 | * rsi value (char) |
15 | * rdx count (bytes) | 17 | * rdx count (bytes) |
16 | * | 18 | * |
17 | * rax original destination | 19 | * rax original destination |
18 | */ | 20 | */ |
19 | .section .altinstr_replacement, "ax", @progbits | 21 | ENTRY(memset) |
20 | .Lmemset_c: | 22 | ENTRY(__memset) |
23 | /* | ||
24 | * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended | ||
25 | * to use it when possible. If not available, use fast string instructions. | ||
26 | * | ||
27 | * Otherwise, use original memset function. | ||
28 | */ | ||
29 | ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ | ||
30 | "jmp memset_erms", X86_FEATURE_ERMS | ||
31 | |||
21 | movq %rdi,%r9 | 32 | movq %rdi,%r9 |
22 | movq %rdx,%rcx | 33 | movq %rdx,%rcx |
23 | andl $7,%edx | 34 | andl $7,%edx |
@@ -31,8 +42,8 @@ | |||
31 | rep stosb | 42 | rep stosb |
32 | movq %r9,%rax | 43 | movq %r9,%rax |
33 | ret | 44 | ret |
34 | .Lmemset_e: | 45 | ENDPROC(memset) |
35 | .previous | 46 | ENDPROC(__memset) |
36 | 47 | ||
37 | /* | 48 | /* |
38 | * ISO C memset - set a memory block to a byte value. This function uses | 49 | * ISO C memset - set a memory block to a byte value. This function uses |
@@ -45,21 +56,16 @@ | |||
45 | * | 56 | * |
46 | * rax original destination | 57 | * rax original destination |
47 | */ | 58 | */ |
48 | .section .altinstr_replacement, "ax", @progbits | 59 | ENTRY(memset_erms) |
49 | .Lmemset_c_e: | ||
50 | movq %rdi,%r9 | 60 | movq %rdi,%r9 |
51 | movb %sil,%al | 61 | movb %sil,%al |
52 | movq %rdx,%rcx | 62 | movq %rdx,%rcx |
53 | rep stosb | 63 | rep stosb |
54 | movq %r9,%rax | 64 | movq %r9,%rax |
55 | ret | 65 | ret |
56 | .Lmemset_e_e: | 66 | ENDPROC(memset_erms) |
57 | .previous | ||
58 | |||
59 | .weak memset | ||
60 | 67 | ||
61 | ENTRY(memset) | 68 | ENTRY(memset_orig) |
62 | ENTRY(__memset) | ||
63 | CFI_STARTPROC | 69 | CFI_STARTPROC |
64 | movq %rdi,%r10 | 70 | movq %rdi,%r10 |
65 | 71 | ||
@@ -134,23 +140,4 @@ ENTRY(__memset) | |||
134 | jmp .Lafter_bad_alignment | 140 | jmp .Lafter_bad_alignment |
135 | .Lfinal: | 141 | .Lfinal: |
136 | CFI_ENDPROC | 142 | CFI_ENDPROC |
137 | ENDPROC(memset) | 143 | ENDPROC(memset_orig) |
138 | ENDPROC(__memset) | ||
139 | |||
140 | /* Some CPUs support enhanced REP MOVSB/STOSB feature. | ||
141 | * It is recommended to use this when possible. | ||
142 | * | ||
143 | * If enhanced REP MOVSB/STOSB feature is not available, use fast string | ||
144 | * instructions. | ||
145 | * | ||
146 | * Otherwise, use original memset function. | ||
147 | * | ||
148 | * In .altinstructions section, ERMS feature is placed after REG_GOOD | ||
149 | * feature to implement the right patch order. | ||
150 | */ | ||
151 | .section .altinstructions,"a" | ||
152 | altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ | ||
153 | .Lfinal-__memset,.Lmemset_e-.Lmemset_c | ||
154 | altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ | ||
155 | .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e | ||
156 | .previous | ||
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index f6d13eefad10..3ca5218fbece 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S | |||
@@ -14,8 +14,8 @@ | |||
14 | .macro op_safe_regs op | 14 | .macro op_safe_regs op |
15 | ENTRY(\op\()_safe_regs) | 15 | ENTRY(\op\()_safe_regs) |
16 | CFI_STARTPROC | 16 | CFI_STARTPROC |
17 | pushq_cfi %rbx | 17 | pushq_cfi_reg rbx |
18 | pushq_cfi %rbp | 18 | pushq_cfi_reg rbp |
19 | movq %rdi, %r10 /* Save pointer */ | 19 | movq %rdi, %r10 /* Save pointer */ |
20 | xorl %r11d, %r11d /* Return value */ | 20 | xorl %r11d, %r11d /* Return value */ |
21 | movl (%rdi), %eax | 21 | movl (%rdi), %eax |
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs) | |||
35 | movl %ebp, 20(%r10) | 35 | movl %ebp, 20(%r10) |
36 | movl %esi, 24(%r10) | 36 | movl %esi, 24(%r10) |
37 | movl %edi, 28(%r10) | 37 | movl %edi, 28(%r10) |
38 | popq_cfi %rbp | 38 | popq_cfi_reg rbp |
39 | popq_cfi %rbx | 39 | popq_cfi_reg rbx |
40 | ret | 40 | ret |
41 | 3: | 41 | 3: |
42 | CFI_RESTORE_STATE | 42 | CFI_RESTORE_STATE |
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs) | |||
53 | .macro op_safe_regs op | 53 | .macro op_safe_regs op |
54 | ENTRY(\op\()_safe_regs) | 54 | ENTRY(\op\()_safe_regs) |
55 | CFI_STARTPROC | 55 | CFI_STARTPROC |
56 | pushl_cfi %ebx | 56 | pushl_cfi_reg ebx |
57 | pushl_cfi %ebp | 57 | pushl_cfi_reg ebp |
58 | pushl_cfi %esi | 58 | pushl_cfi_reg esi |
59 | pushl_cfi %edi | 59 | pushl_cfi_reg edi |
60 | pushl_cfi $0 /* Return value */ | 60 | pushl_cfi $0 /* Return value */ |
61 | pushl_cfi %eax | 61 | pushl_cfi %eax |
62 | movl 4(%eax), %ecx | 62 | movl 4(%eax), %ecx |
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs) | |||
80 | movl %esi, 24(%eax) | 80 | movl %esi, 24(%eax) |
81 | movl %edi, 28(%eax) | 81 | movl %edi, 28(%eax) |
82 | popl_cfi %eax | 82 | popl_cfi %eax |
83 | popl_cfi %edi | 83 | popl_cfi_reg edi |
84 | popl_cfi %esi | 84 | popl_cfi_reg esi |
85 | popl_cfi %ebp | 85 | popl_cfi_reg ebp |
86 | popl_cfi %ebx | 86 | popl_cfi_reg ebx |
87 | ret | 87 | ret |
88 | 3: | 88 | 3: |
89 | CFI_RESTORE_STATE | 89 | CFI_RESTORE_STATE |
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index 5dff5f042468..2322abe4da3b 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S | |||
@@ -34,10 +34,10 @@ | |||
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define save_common_regs \ | 36 | #define save_common_regs \ |
37 | pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 | 37 | pushl_cfi_reg ecx |
38 | 38 | ||
39 | #define restore_common_regs \ | 39 | #define restore_common_regs \ |
40 | popl_cfi %ecx; CFI_RESTORE ecx | 40 | popl_cfi_reg ecx |
41 | 41 | ||
42 | /* Avoid uglifying the argument copying x86-64 needs to do. */ | 42 | /* Avoid uglifying the argument copying x86-64 needs to do. */ |
43 | .macro movq src, dst | 43 | .macro movq src, dst |
@@ -64,22 +64,22 @@ | |||
64 | */ | 64 | */ |
65 | 65 | ||
66 | #define save_common_regs \ | 66 | #define save_common_regs \ |
67 | pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ | 67 | pushq_cfi_reg rdi; \ |
68 | pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ | 68 | pushq_cfi_reg rsi; \ |
69 | pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ | 69 | pushq_cfi_reg rcx; \ |
70 | pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ | 70 | pushq_cfi_reg r8; \ |
71 | pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ | 71 | pushq_cfi_reg r9; \ |
72 | pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ | 72 | pushq_cfi_reg r10; \ |
73 | pushq_cfi %r11; CFI_REL_OFFSET r11, 0 | 73 | pushq_cfi_reg r11 |
74 | 74 | ||
75 | #define restore_common_regs \ | 75 | #define restore_common_regs \ |
76 | popq_cfi %r11; CFI_RESTORE r11; \ | 76 | popq_cfi_reg r11; \ |
77 | popq_cfi %r10; CFI_RESTORE r10; \ | 77 | popq_cfi_reg r10; \ |
78 | popq_cfi %r9; CFI_RESTORE r9; \ | 78 | popq_cfi_reg r9; \ |
79 | popq_cfi %r8; CFI_RESTORE r8; \ | 79 | popq_cfi_reg r8; \ |
80 | popq_cfi %rcx; CFI_RESTORE rcx; \ | 80 | popq_cfi_reg rcx; \ |
81 | popq_cfi %rsi; CFI_RESTORE rsi; \ | 81 | popq_cfi_reg rsi; \ |
82 | popq_cfi %rdi; CFI_RESTORE rdi | 82 | popq_cfi_reg rdi |
83 | 83 | ||
84 | #endif | 84 | #endif |
85 | 85 | ||
@@ -87,12 +87,10 @@ | |||
87 | ENTRY(call_rwsem_down_read_failed) | 87 | ENTRY(call_rwsem_down_read_failed) |
88 | CFI_STARTPROC | 88 | CFI_STARTPROC |
89 | save_common_regs | 89 | save_common_regs |
90 | __ASM_SIZE(push,_cfi) %__ASM_REG(dx) | 90 | __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) |
91 | CFI_REL_OFFSET __ASM_REG(dx), 0 | ||
92 | movq %rax,%rdi | 91 | movq %rax,%rdi |
93 | call rwsem_down_read_failed | 92 | call rwsem_down_read_failed |
94 | __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) | 93 | __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) |
95 | CFI_RESTORE __ASM_REG(dx) | ||
96 | restore_common_regs | 94 | restore_common_regs |
97 | ret | 95 | ret |
98 | CFI_ENDPROC | 96 | CFI_ENDPROC |
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake) | |||
124 | ENTRY(call_rwsem_downgrade_wake) | 122 | ENTRY(call_rwsem_downgrade_wake) |
125 | CFI_STARTPROC | 123 | CFI_STARTPROC |
126 | save_common_regs | 124 | save_common_regs |
127 | __ASM_SIZE(push,_cfi) %__ASM_REG(dx) | 125 | __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) |
128 | CFI_REL_OFFSET __ASM_REG(dx), 0 | ||
129 | movq %rax,%rdi | 126 | movq %rax,%rdi |
130 | call rwsem_downgrade_wake | 127 | call rwsem_downgrade_wake |
131 | __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) | 128 | __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) |
132 | CFI_RESTORE __ASM_REG(dx) | ||
133 | restore_common_regs | 129 | restore_common_regs |
134 | ret | 130 | ret |
135 | CFI_ENDPROC | 131 | CFI_ENDPROC |
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index e28cdaf5ac2c..5eb715087b80 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S | |||
@@ -13,12 +13,9 @@ | |||
13 | .globl \name | 13 | .globl \name |
14 | \name: | 14 | \name: |
15 | CFI_STARTPROC | 15 | CFI_STARTPROC |
16 | pushl_cfi %eax | 16 | pushl_cfi_reg eax |
17 | CFI_REL_OFFSET eax, 0 | 17 | pushl_cfi_reg ecx |
18 | pushl_cfi %ecx | 18 | pushl_cfi_reg edx |
19 | CFI_REL_OFFSET ecx, 0 | ||
20 | pushl_cfi %edx | ||
21 | CFI_REL_OFFSET edx, 0 | ||
22 | 19 | ||
23 | .if \put_ret_addr_in_eax | 20 | .if \put_ret_addr_in_eax |
24 | /* Place EIP in the arg1 */ | 21 | /* Place EIP in the arg1 */ |
@@ -26,12 +23,9 @@ | |||
26 | .endif | 23 | .endif |
27 | 24 | ||
28 | call \func | 25 | call \func |
29 | popl_cfi %edx | 26 | popl_cfi_reg edx |
30 | CFI_RESTORE edx | 27 | popl_cfi_reg ecx |
31 | popl_cfi %ecx | 28 | popl_cfi_reg eax |
32 | CFI_RESTORE ecx | ||
33 | popl_cfi %eax | ||
34 | CFI_RESTORE eax | ||
35 | ret | 29 | ret |
36 | CFI_ENDPROC | 30 | CFI_ENDPROC |
37 | _ASM_NOKPROBE(\name) | 31 | _ASM_NOKPROBE(\name) |
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index b30b5ebd614a..f89ba4e93025 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -17,9 +17,18 @@ | |||
17 | CFI_STARTPROC | 17 | CFI_STARTPROC |
18 | 18 | ||
19 | /* this one pushes 9 elems, the next one would be %rIP */ | 19 | /* this one pushes 9 elems, the next one would be %rIP */ |
20 | SAVE_ARGS | 20 | pushq_cfi_reg rdi |
21 | pushq_cfi_reg rsi | ||
22 | pushq_cfi_reg rdx | ||
23 | pushq_cfi_reg rcx | ||
24 | pushq_cfi_reg rax | ||
25 | pushq_cfi_reg r8 | ||
26 | pushq_cfi_reg r9 | ||
27 | pushq_cfi_reg r10 | ||
28 | pushq_cfi_reg r11 | ||
21 | 29 | ||
22 | .if \put_ret_addr_in_rdi | 30 | .if \put_ret_addr_in_rdi |
31 | /* 9*8(%rsp) is return addr on stack */ | ||
23 | movq_cfi_restore 9*8, rdi | 32 | movq_cfi_restore 9*8, rdi |
24 | .endif | 33 | .endif |
25 | 34 | ||
@@ -45,11 +54,22 @@ | |||
45 | #endif | 54 | #endif |
46 | #endif | 55 | #endif |
47 | 56 | ||
48 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ | 57 | #if defined(CONFIG_TRACE_IRQFLAGS) \ |
58 | || defined(CONFIG_DEBUG_LOCK_ALLOC) \ | ||
59 | || defined(CONFIG_PREEMPT) | ||
49 | CFI_STARTPROC | 60 | CFI_STARTPROC |
50 | SAVE_ARGS | 61 | CFI_ADJUST_CFA_OFFSET 9*8 |
51 | restore: | 62 | restore: |
52 | RESTORE_ARGS | 63 | popq_cfi_reg r11 |
64 | popq_cfi_reg r10 | ||
65 | popq_cfi_reg r9 | ||
66 | popq_cfi_reg r8 | ||
67 | popq_cfi_reg rax | ||
68 | popq_cfi_reg rcx | ||
69 | popq_cfi_reg rdx | ||
70 | popq_cfi_reg rsi | ||
71 | popq_cfi_reg rdi | ||
53 | ret | 72 | ret |
54 | CFI_ENDPROC | 73 | CFI_ENDPROC |
55 | _ASM_NOKPROBE(restore) | 74 | _ASM_NOKPROBE(restore) |
75 | #endif | ||
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 1a2be7c6895d..816488c0b97e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -273,6 +273,9 @@ dd: ESC | |||
273 | de: ESC | 273 | de: ESC |
274 | df: ESC | 274 | df: ESC |
275 | # 0xe0 - 0xef | 275 | # 0xe0 - 0xef |
276 | # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix | ||
277 | # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation | ||
278 | # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. | ||
276 | e0: LOOPNE/LOOPNZ Jb (f64) | 279 | e0: LOOPNE/LOOPNZ Jb (f64) |
277 | e1: LOOPE/LOOPZ Jb (f64) | 280 | e1: LOOPE/LOOPZ Jb (f64) |
278 | e2: LOOP Jb (f64) | 281 | e2: LOOP Jb (f64) |
@@ -281,6 +284,10 @@ e4: IN AL,Ib | |||
281 | e5: IN eAX,Ib | 284 | e5: IN eAX,Ib |
282 | e6: OUT Ib,AL | 285 | e6: OUT Ib,AL |
283 | e7: OUT Ib,eAX | 286 | e7: OUT Ib,eAX |
287 | # With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset | ||
288 | # in "near" jumps and calls is 16-bit. For CALL, | ||
289 | # push of return address is 16-bit wide, RSP is decremented by 2 | ||
290 | # but is not truncated to 16 bits, unlike RIP. | ||
284 | e8: CALL Jz (f64) | 291 | e8: CALL Jz (f64) |
285 | e9: JMP-near Jz (f64) | 292 | e9: JMP-near Jz (f64) |
286 | ea: JMP-far Ap (i64) | 293 | ea: JMP-far Ap (i64) |
@@ -456,6 +463,7 @@ AVXcode: 1 | |||
456 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) | 463 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) |
457 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) | 464 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) |
458 | # 0x0f 0x80-0x8f | 465 | # 0x0f 0x80-0x8f |
466 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). | ||
459 | 80: JO Jz (f64) | 467 | 80: JO Jz (f64) |
460 | 81: JNO Jz (f64) | 468 | 81: JNO Jz (f64) |
461 | 82: JB/JC/JNAE Jz (f64) | 469 | 82: JB/JC/JNAE Jz (f64) |
@@ -842,6 +850,7 @@ EndTable | |||
842 | GrpTable: Grp5 | 850 | GrpTable: Grp5 |
843 | 0: INC Ev | 851 | 0: INC Ev |
844 | 1: DEC Ev | 852 | 1: DEC Ev |
853 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). | ||
845 | 2: CALLN Ev (f64) | 854 | 2: CALLN Ev (f64) |
846 | 3: CALLF Ep | 855 | 3: CALLF Ep |
847 | 4: JMPN Ev (f64) | 856 | 4: JMPN Ev (f64) |