diff options
author | Dominik Brodowski <linux@dominikbrodowski.net> | 2018-02-14 12:59:24 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-02-17 05:14:33 -0500 |
commit | ced5d0bf603fa0baee8ea889e1d70971fd210894 (patch) | |
tree | 6f9a705b89fc522ed4301049bdc58af4bcb7f21e | |
parent | 9e809d15d6b692fa061d74be7aaab1c79f6784b8 (diff) |
x86/entry/64: Use 'xorl' for faster register clearing
On some x86 CPU microarchitectures using 'xorq' to clear general-purpose
registers is slower than 'xorl'. As 'xorl' is sufficient to clear all
64 bits of these registers due to zero-extension [*], switch the x86
64-bit entry code to use 'xorl'.
No change in functionality and no change in code size.
[*] According to Intel 64 and IA-32 Architecture Software Developer's
Manual, section 3.4.1.1, the result of 32-bit operands are "zero-
extended to a 64-bit result in the destination general-purpose
register." The AMD64 Architecture Programmer’s Manual Volume 3,
Appendix B.1, describes the same behaviour.
Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180214175924.23065-3-linux@dominikbrodowski.net
[ Improved on the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/entry/calling.h | 16 | ||||
-rw-r--r-- | arch/x86/entry/entry_64_compat.S | 54 |
2 files changed, 35 insertions, 35 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 196b6103edf6..5d10b7a85cad 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h | |||
@@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with | |||
117 | pushq %rcx /* pt_regs->cx */ | 117 | pushq %rcx /* pt_regs->cx */ |
118 | pushq \rax /* pt_regs->ax */ | 118 | pushq \rax /* pt_regs->ax */ |
119 | pushq %r8 /* pt_regs->r8 */ | 119 | pushq %r8 /* pt_regs->r8 */ |
120 | xorq %r8, %r8 /* nospec r8 */ | 120 | xorl %r8d, %r8d /* nospec r8 */ |
121 | pushq %r9 /* pt_regs->r9 */ | 121 | pushq %r9 /* pt_regs->r9 */ |
122 | xorq %r9, %r9 /* nospec r9 */ | 122 | xorl %r9d, %r9d /* nospec r9 */ |
123 | pushq %r10 /* pt_regs->r10 */ | 123 | pushq %r10 /* pt_regs->r10 */ |
124 | xorq %r10, %r10 /* nospec r10 */ | 124 | xorl %r10d, %r10d /* nospec r10 */ |
125 | pushq %r11 /* pt_regs->r11 */ | 125 | pushq %r11 /* pt_regs->r11 */ |
126 | xorq %r11, %r11 /* nospec r11*/ | 126 | xorl %r11d, %r11d /* nospec r11*/ |
127 | pushq %rbx /* pt_regs->rbx */ | 127 | pushq %rbx /* pt_regs->rbx */ |
128 | xorl %ebx, %ebx /* nospec rbx*/ | 128 | xorl %ebx, %ebx /* nospec rbx*/ |
129 | pushq %rbp /* pt_regs->rbp */ | 129 | pushq %rbp /* pt_regs->rbp */ |
130 | xorl %ebp, %ebp /* nospec rbp*/ | 130 | xorl %ebp, %ebp /* nospec rbp*/ |
131 | pushq %r12 /* pt_regs->r12 */ | 131 | pushq %r12 /* pt_regs->r12 */ |
132 | xorq %r12, %r12 /* nospec r12*/ | 132 | xorl %r12d, %r12d /* nospec r12*/ |
133 | pushq %r13 /* pt_regs->r13 */ | 133 | pushq %r13 /* pt_regs->r13 */ |
134 | xorq %r13, %r13 /* nospec r13*/ | 134 | xorl %r13d, %r13d /* nospec r13*/ |
135 | pushq %r14 /* pt_regs->r14 */ | 135 | pushq %r14 /* pt_regs->r14 */ |
136 | xorq %r14, %r14 /* nospec r14*/ | 136 | xorl %r14d, %r14d /* nospec r14*/ |
137 | pushq %r15 /* pt_regs->r15 */ | 137 | pushq %r15 /* pt_regs->r15 */ |
138 | xorq %r15, %r15 /* nospec r15*/ | 138 | xorl %r15d, %r15d /* nospec r15*/ |
139 | UNWIND_HINT_REGS | 139 | UNWIND_HINT_REGS |
140 | .if \save_ret | 140 | .if \save_ret |
141 | pushq %rsi /* return address on top of stack */ | 141 | pushq %rsi /* return address on top of stack */ |
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index fd65e016e413..364ea4a207be 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S | |||
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat) | |||
85 | pushq %rcx /* pt_regs->cx */ | 85 | pushq %rcx /* pt_regs->cx */ |
86 | pushq $-ENOSYS /* pt_regs->ax */ | 86 | pushq $-ENOSYS /* pt_regs->ax */ |
87 | pushq $0 /* pt_regs->r8 = 0 */ | 87 | pushq $0 /* pt_regs->r8 = 0 */ |
88 | xorq %r8, %r8 /* nospec r8 */ | 88 | xorl %r8d, %r8d /* nospec r8 */ |
89 | pushq $0 /* pt_regs->r9 = 0 */ | 89 | pushq $0 /* pt_regs->r9 = 0 */ |
90 | xorq %r9, %r9 /* nospec r9 */ | 90 | xorl %r9d, %r9d /* nospec r9 */ |
91 | pushq $0 /* pt_regs->r10 = 0 */ | 91 | pushq $0 /* pt_regs->r10 = 0 */ |
92 | xorq %r10, %r10 /* nospec r10 */ | 92 | xorl %r10d, %r10d /* nospec r10 */ |
93 | pushq $0 /* pt_regs->r11 = 0 */ | 93 | pushq $0 /* pt_regs->r11 = 0 */ |
94 | xorq %r11, %r11 /* nospec r11 */ | 94 | xorl %r11d, %r11d /* nospec r11 */ |
95 | pushq %rbx /* pt_regs->rbx */ | 95 | pushq %rbx /* pt_regs->rbx */ |
96 | xorl %ebx, %ebx /* nospec rbx */ | 96 | xorl %ebx, %ebx /* nospec rbx */ |
97 | pushq %rbp /* pt_regs->rbp (will be overwritten) */ | 97 | pushq %rbp /* pt_regs->rbp (will be overwritten) */ |
98 | xorl %ebp, %ebp /* nospec rbp */ | 98 | xorl %ebp, %ebp /* nospec rbp */ |
99 | pushq $0 /* pt_regs->r12 = 0 */ | 99 | pushq $0 /* pt_regs->r12 = 0 */ |
100 | xorq %r12, %r12 /* nospec r12 */ | 100 | xorl %r12d, %r12d /* nospec r12 */ |
101 | pushq $0 /* pt_regs->r13 = 0 */ | 101 | pushq $0 /* pt_regs->r13 = 0 */ |
102 | xorq %r13, %r13 /* nospec r13 */ | 102 | xorl %r13d, %r13d /* nospec r13 */ |
103 | pushq $0 /* pt_regs->r14 = 0 */ | 103 | pushq $0 /* pt_regs->r14 = 0 */ |
104 | xorq %r14, %r14 /* nospec r14 */ | 104 | xorl %r14d, %r14d /* nospec r14 */ |
105 | pushq $0 /* pt_regs->r15 = 0 */ | 105 | pushq $0 /* pt_regs->r15 = 0 */ |
106 | xorq %r15, %r15 /* nospec r15 */ | 106 | xorl %r15d, %r15d /* nospec r15 */ |
107 | cld | 107 | cld |
108 | 108 | ||
109 | /* | 109 | /* |
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) | |||
224 | pushq %rbp /* pt_regs->cx (stashed in bp) */ | 224 | pushq %rbp /* pt_regs->cx (stashed in bp) */ |
225 | pushq $-ENOSYS /* pt_regs->ax */ | 225 | pushq $-ENOSYS /* pt_regs->ax */ |
226 | pushq $0 /* pt_regs->r8 = 0 */ | 226 | pushq $0 /* pt_regs->r8 = 0 */ |
227 | xorq %r8, %r8 /* nospec r8 */ | 227 | xorl %r8d, %r8d /* nospec r8 */ |
228 | pushq $0 /* pt_regs->r9 = 0 */ | 228 | pushq $0 /* pt_regs->r9 = 0 */ |
229 | xorq %r9, %r9 /* nospec r9 */ | 229 | xorl %r9d, %r9d /* nospec r9 */ |
230 | pushq $0 /* pt_regs->r10 = 0 */ | 230 | pushq $0 /* pt_regs->r10 = 0 */ |
231 | xorq %r10, %r10 /* nospec r10 */ | 231 | xorl %r10d, %r10d /* nospec r10 */ |
232 | pushq $0 /* pt_regs->r11 = 0 */ | 232 | pushq $0 /* pt_regs->r11 = 0 */ |
233 | xorq %r11, %r11 /* nospec r11 */ | 233 | xorl %r11d, %r11d /* nospec r11 */ |
234 | pushq %rbx /* pt_regs->rbx */ | 234 | pushq %rbx /* pt_regs->rbx */ |
235 | xorl %ebx, %ebx /* nospec rbx */ | 235 | xorl %ebx, %ebx /* nospec rbx */ |
236 | pushq %rbp /* pt_regs->rbp (will be overwritten) */ | 236 | pushq %rbp /* pt_regs->rbp (will be overwritten) */ |
237 | xorl %ebp, %ebp /* nospec rbp */ | 237 | xorl %ebp, %ebp /* nospec rbp */ |
238 | pushq $0 /* pt_regs->r12 = 0 */ | 238 | pushq $0 /* pt_regs->r12 = 0 */ |
239 | xorq %r12, %r12 /* nospec r12 */ | 239 | xorl %r12d, %r12d /* nospec r12 */ |
240 | pushq $0 /* pt_regs->r13 = 0 */ | 240 | pushq $0 /* pt_regs->r13 = 0 */ |
241 | xorq %r13, %r13 /* nospec r13 */ | 241 | xorl %r13d, %r13d /* nospec r13 */ |
242 | pushq $0 /* pt_regs->r14 = 0 */ | 242 | pushq $0 /* pt_regs->r14 = 0 */ |
243 | xorq %r14, %r14 /* nospec r14 */ | 243 | xorl %r14d, %r14d /* nospec r14 */ |
244 | pushq $0 /* pt_regs->r15 = 0 */ | 244 | pushq $0 /* pt_regs->r15 = 0 */ |
245 | xorq %r15, %r15 /* nospec r15 */ | 245 | xorl %r15d, %r15d /* nospec r15 */ |
246 | 246 | ||
247 | /* | 247 | /* |
248 | * User mode is traced as though IRQs are on, and SYSENTER | 248 | * User mode is traced as though IRQs are on, and SYSENTER |
@@ -298,9 +298,9 @@ sysret32_from_system_call: | |||
298 | */ | 298 | */ |
299 | SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 | 299 | SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 |
300 | 300 | ||
301 | xorq %r8, %r8 | 301 | xorl %r8d, %r8d |
302 | xorq %r9, %r9 | 302 | xorl %r9d, %r9d |
303 | xorq %r10, %r10 | 303 | xorl %r10d, %r10d |
304 | swapgs | 304 | swapgs |
305 | sysretl | 305 | sysretl |
306 | END(entry_SYSCALL_compat) | 306 | END(entry_SYSCALL_compat) |
@@ -358,25 +358,25 @@ ENTRY(entry_INT80_compat) | |||
358 | pushq %rcx /* pt_regs->cx */ | 358 | pushq %rcx /* pt_regs->cx */ |
359 | pushq $-ENOSYS /* pt_regs->ax */ | 359 | pushq $-ENOSYS /* pt_regs->ax */ |
360 | pushq $0 /* pt_regs->r8 = 0 */ | 360 | pushq $0 /* pt_regs->r8 = 0 */ |
361 | xorq %r8, %r8 /* nospec r8 */ | 361 | xorl %r8d, %r8d /* nospec r8 */ |
362 | pushq $0 /* pt_regs->r9 = 0 */ | 362 | pushq $0 /* pt_regs->r9 = 0 */ |
363 | xorq %r9, %r9 /* nospec r9 */ | 363 | xorl %r9d, %r9d /* nospec r9 */ |
364 | pushq $0 /* pt_regs->r10 = 0 */ | 364 | pushq $0 /* pt_regs->r10 = 0 */ |
365 | xorq %r10, %r10 /* nospec r10 */ | 365 | xorl %r10d, %r10d /* nospec r10 */ |
366 | pushq $0 /* pt_regs->r11 = 0 */ | 366 | pushq $0 /* pt_regs->r11 = 0 */ |
367 | xorq %r11, %r11 /* nospec r11 */ | 367 | xorl %r11d, %r11d /* nospec r11 */ |
368 | pushq %rbx /* pt_regs->rbx */ | 368 | pushq %rbx /* pt_regs->rbx */ |
369 | xorl %ebx, %ebx /* nospec rbx */ | 369 | xorl %ebx, %ebx /* nospec rbx */ |
370 | pushq %rbp /* pt_regs->rbp */ | 370 | pushq %rbp /* pt_regs->rbp */ |
371 | xorl %ebp, %ebp /* nospec rbp */ | 371 | xorl %ebp, %ebp /* nospec rbp */ |
372 | pushq %r12 /* pt_regs->r12 */ | 372 | pushq %r12 /* pt_regs->r12 */ |
373 | xorq %r12, %r12 /* nospec r12 */ | 373 | xorl %r12d, %r12d /* nospec r12 */ |
374 | pushq %r13 /* pt_regs->r13 */ | 374 | pushq %r13 /* pt_regs->r13 */ |
375 | xorq %r13, %r13 /* nospec r13 */ | 375 | xorl %r13d, %r13d /* nospec r13 */ |
376 | pushq %r14 /* pt_regs->r14 */ | 376 | pushq %r14 /* pt_regs->r14 */ |
377 | xorq %r14, %r14 /* nospec r14 */ | 377 | xorl %r14d, %r14d /* nospec r14 */ |
378 | pushq %r15 /* pt_regs->r15 */ | 378 | pushq %r15 /* pt_regs->r15 */ |
379 | xorq %r15, %r15 /* nospec r15 */ | 379 | xorl %r15d, %r15d /* nospec r15 */ |
380 | cld | 380 | cld |
381 | 381 | ||
382 | /* | 382 | /* |