aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDominik Brodowski <linux@dominikbrodowski.net>2018-02-14 12:59:24 -0500
committerIngo Molnar <mingo@kernel.org>2018-02-17 05:14:33 -0500
commitced5d0bf603fa0baee8ea889e1d70971fd210894 (patch)
tree6f9a705b89fc522ed4301049bdc58af4bcb7f21e
parent9e809d15d6b692fa061d74be7aaab1c79f6784b8 (diff)
x86/entry/64: Use 'xorl' for faster register clearing
On some x86 CPU microarchitectures using 'xorq' to clear general-purpose registers is slower than 'xorl'. As 'xorl' is sufficient to clear all 64 bits of these registers due to zero-extension [*], switch the x86 64-bit entry code to use 'xorl'. No change in functionality and no change in code size. [*] According to Intel 64 and IA-32 Architecture Software Developer's Manual, section 3.4.1.1, the result of 32-bit operands are "zero- extended to a 64-bit result in the destination general-purpose register." The AMD64 Architecture Programmer’s Manual Volume 3, Appendix B.1, describes the same behaviour. Suggested-by: Denys Vlasenko <dvlasenk@redhat.com> Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180214175924.23065-3-linux@dominikbrodowski.net [ Improved on the changelog a bit. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/entry/calling.h16
-rw-r--r--arch/x86/entry/entry_64_compat.S54
2 files changed, 35 insertions, 35 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 196b6103edf6..5d10b7a85cad 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with
117 pushq %rcx /* pt_regs->cx */ 117 pushq %rcx /* pt_regs->cx */
118 pushq \rax /* pt_regs->ax */ 118 pushq \rax /* pt_regs->ax */
119 pushq %r8 /* pt_regs->r8 */ 119 pushq %r8 /* pt_regs->r8 */
120 xorq %r8, %r8 /* nospec r8 */ 120 xorl %r8d, %r8d /* nospec r8 */
121 pushq %r9 /* pt_regs->r9 */ 121 pushq %r9 /* pt_regs->r9 */
122 xorq %r9, %r9 /* nospec r9 */ 122 xorl %r9d, %r9d /* nospec r9 */
123 pushq %r10 /* pt_regs->r10 */ 123 pushq %r10 /* pt_regs->r10 */
124 xorq %r10, %r10 /* nospec r10 */ 124 xorl %r10d, %r10d /* nospec r10 */
125 pushq %r11 /* pt_regs->r11 */ 125 pushq %r11 /* pt_regs->r11 */
126 xorq %r11, %r11 /* nospec r11*/ 126 xorl %r11d, %r11d /* nospec r11*/
127 pushq %rbx /* pt_regs->rbx */ 127 pushq %rbx /* pt_regs->rbx */
128 xorl %ebx, %ebx /* nospec rbx*/ 128 xorl %ebx, %ebx /* nospec rbx*/
129 pushq %rbp /* pt_regs->rbp */ 129 pushq %rbp /* pt_regs->rbp */
130 xorl %ebp, %ebp /* nospec rbp*/ 130 xorl %ebp, %ebp /* nospec rbp*/
131 pushq %r12 /* pt_regs->r12 */ 131 pushq %r12 /* pt_regs->r12 */
132 xorq %r12, %r12 /* nospec r12*/ 132 xorl %r12d, %r12d /* nospec r12*/
133 pushq %r13 /* pt_regs->r13 */ 133 pushq %r13 /* pt_regs->r13 */
134 xorq %r13, %r13 /* nospec r13*/ 134 xorl %r13d, %r13d /* nospec r13*/
135 pushq %r14 /* pt_regs->r14 */ 135 pushq %r14 /* pt_regs->r14 */
136 xorq %r14, %r14 /* nospec r14*/ 136 xorl %r14d, %r14d /* nospec r14*/
137 pushq %r15 /* pt_regs->r15 */ 137 pushq %r15 /* pt_regs->r15 */
138 xorq %r15, %r15 /* nospec r15*/ 138 xorl %r15d, %r15d /* nospec r15*/
139 UNWIND_HINT_REGS 139 UNWIND_HINT_REGS
140 .if \save_ret 140 .if \save_ret
141 pushq %rsi /* return address on top of stack */ 141 pushq %rsi /* return address on top of stack */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..364ea4a207be 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
85 pushq %rcx /* pt_regs->cx */ 85 pushq %rcx /* pt_regs->cx */
86 pushq $-ENOSYS /* pt_regs->ax */ 86 pushq $-ENOSYS /* pt_regs->ax */
87 pushq $0 /* pt_regs->r8 = 0 */ 87 pushq $0 /* pt_regs->r8 = 0 */
88 xorq %r8, %r8 /* nospec r8 */ 88 xorl %r8d, %r8d /* nospec r8 */
89 pushq $0 /* pt_regs->r9 = 0 */ 89 pushq $0 /* pt_regs->r9 = 0 */
90 xorq %r9, %r9 /* nospec r9 */ 90 xorl %r9d, %r9d /* nospec r9 */
91 pushq $0 /* pt_regs->r10 = 0 */ 91 pushq $0 /* pt_regs->r10 = 0 */
92 xorq %r10, %r10 /* nospec r10 */ 92 xorl %r10d, %r10d /* nospec r10 */
93 pushq $0 /* pt_regs->r11 = 0 */ 93 pushq $0 /* pt_regs->r11 = 0 */
94 xorq %r11, %r11 /* nospec r11 */ 94 xorl %r11d, %r11d /* nospec r11 */
95 pushq %rbx /* pt_regs->rbx */ 95 pushq %rbx /* pt_regs->rbx */
96 xorl %ebx, %ebx /* nospec rbx */ 96 xorl %ebx, %ebx /* nospec rbx */
97 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 97 pushq %rbp /* pt_regs->rbp (will be overwritten) */
98 xorl %ebp, %ebp /* nospec rbp */ 98 xorl %ebp, %ebp /* nospec rbp */
99 pushq $0 /* pt_regs->r12 = 0 */ 99 pushq $0 /* pt_regs->r12 = 0 */
100 xorq %r12, %r12 /* nospec r12 */ 100 xorl %r12d, %r12d /* nospec r12 */
101 pushq $0 /* pt_regs->r13 = 0 */ 101 pushq $0 /* pt_regs->r13 = 0 */
102 xorq %r13, %r13 /* nospec r13 */ 102 xorl %r13d, %r13d /* nospec r13 */
103 pushq $0 /* pt_regs->r14 = 0 */ 103 pushq $0 /* pt_regs->r14 = 0 */
104 xorq %r14, %r14 /* nospec r14 */ 104 xorl %r14d, %r14d /* nospec r14 */
105 pushq $0 /* pt_regs->r15 = 0 */ 105 pushq $0 /* pt_regs->r15 = 0 */
106 xorq %r15, %r15 /* nospec r15 */ 106 xorl %r15d, %r15d /* nospec r15 */
107 cld 107 cld
108 108
109 /* 109 /*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
224 pushq %rbp /* pt_regs->cx (stashed in bp) */ 224 pushq %rbp /* pt_regs->cx (stashed in bp) */
225 pushq $-ENOSYS /* pt_regs->ax */ 225 pushq $-ENOSYS /* pt_regs->ax */
226 pushq $0 /* pt_regs->r8 = 0 */ 226 pushq $0 /* pt_regs->r8 = 0 */
227 xorq %r8, %r8 /* nospec r8 */ 227 xorl %r8d, %r8d /* nospec r8 */
228 pushq $0 /* pt_regs->r9 = 0 */ 228 pushq $0 /* pt_regs->r9 = 0 */
229 xorq %r9, %r9 /* nospec r9 */ 229 xorl %r9d, %r9d /* nospec r9 */
230 pushq $0 /* pt_regs->r10 = 0 */ 230 pushq $0 /* pt_regs->r10 = 0 */
231 xorq %r10, %r10 /* nospec r10 */ 231 xorl %r10d, %r10d /* nospec r10 */
232 pushq $0 /* pt_regs->r11 = 0 */ 232 pushq $0 /* pt_regs->r11 = 0 */
233 xorq %r11, %r11 /* nospec r11 */ 233 xorl %r11d, %r11d /* nospec r11 */
234 pushq %rbx /* pt_regs->rbx */ 234 pushq %rbx /* pt_regs->rbx */
235 xorl %ebx, %ebx /* nospec rbx */ 235 xorl %ebx, %ebx /* nospec rbx */
236 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 236 pushq %rbp /* pt_regs->rbp (will be overwritten) */
237 xorl %ebp, %ebp /* nospec rbp */ 237 xorl %ebp, %ebp /* nospec rbp */
238 pushq $0 /* pt_regs->r12 = 0 */ 238 pushq $0 /* pt_regs->r12 = 0 */
239 xorq %r12, %r12 /* nospec r12 */ 239 xorl %r12d, %r12d /* nospec r12 */
240 pushq $0 /* pt_regs->r13 = 0 */ 240 pushq $0 /* pt_regs->r13 = 0 */
241 xorq %r13, %r13 /* nospec r13 */ 241 xorl %r13d, %r13d /* nospec r13 */
242 pushq $0 /* pt_regs->r14 = 0 */ 242 pushq $0 /* pt_regs->r14 = 0 */
243 xorq %r14, %r14 /* nospec r14 */ 243 xorl %r14d, %r14d /* nospec r14 */
244 pushq $0 /* pt_regs->r15 = 0 */ 244 pushq $0 /* pt_regs->r15 = 0 */
245 xorq %r15, %r15 /* nospec r15 */ 245 xorl %r15d, %r15d /* nospec r15 */
246 246
247 /* 247 /*
248 * User mode is traced as though IRQs are on, and SYSENTER 248 * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
298 */ 298 */
299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
300 300
301 xorq %r8, %r8 301 xorl %r8d, %r8d
302 xorq %r9, %r9 302 xorl %r9d, %r9d
303 xorq %r10, %r10 303 xorl %r10d, %r10d
304 swapgs 304 swapgs
305 sysretl 305 sysretl
306END(entry_SYSCALL_compat) 306END(entry_SYSCALL_compat)
@@ -358,25 +358,25 @@ ENTRY(entry_INT80_compat)
358 pushq %rcx /* pt_regs->cx */ 358 pushq %rcx /* pt_regs->cx */
359 pushq $-ENOSYS /* pt_regs->ax */ 359 pushq $-ENOSYS /* pt_regs->ax */
360 pushq $0 /* pt_regs->r8 = 0 */ 360 pushq $0 /* pt_regs->r8 = 0 */
361 xorq %r8, %r8 /* nospec r8 */ 361 xorl %r8d, %r8d /* nospec r8 */
362 pushq $0 /* pt_regs->r9 = 0 */ 362 pushq $0 /* pt_regs->r9 = 0 */
363 xorq %r9, %r9 /* nospec r9 */ 363 xorl %r9d, %r9d /* nospec r9 */
364 pushq $0 /* pt_regs->r10 = 0 */ 364 pushq $0 /* pt_regs->r10 = 0 */
365 xorq %r10, %r10 /* nospec r10 */ 365 xorl %r10d, %r10d /* nospec r10 */
366 pushq $0 /* pt_regs->r11 = 0 */ 366 pushq $0 /* pt_regs->r11 = 0 */
367 xorq %r11, %r11 /* nospec r11 */ 367 xorl %r11d, %r11d /* nospec r11 */
368 pushq %rbx /* pt_regs->rbx */ 368 pushq %rbx /* pt_regs->rbx */
369 xorl %ebx, %ebx /* nospec rbx */ 369 xorl %ebx, %ebx /* nospec rbx */
370 pushq %rbp /* pt_regs->rbp */ 370 pushq %rbp /* pt_regs->rbp */
371 xorl %ebp, %ebp /* nospec rbp */ 371 xorl %ebp, %ebp /* nospec rbp */
372 pushq %r12 /* pt_regs->r12 */ 372 pushq %r12 /* pt_regs->r12 */
373 xorq %r12, %r12 /* nospec r12 */ 373 xorl %r12d, %r12d /* nospec r12 */
374 pushq %r13 /* pt_regs->r13 */ 374 pushq %r13 /* pt_regs->r13 */
375 xorq %r13, %r13 /* nospec r13 */ 375 xorl %r13d, %r13d /* nospec r13 */
376 pushq %r14 /* pt_regs->r14 */ 376 pushq %r14 /* pt_regs->r14 */
377 xorq %r14, %r14 /* nospec r14 */ 377 xorl %r14d, %r14d /* nospec r14 */
378 pushq %r15 /* pt_regs->r15 */ 378 pushq %r15 /* pt_regs->r15 */
379 xorq %r15, %r15 /* nospec r15 */ 379 xorl %r15d, %r15d /* nospec r15 */
380 cld 380 cld
381 381
382 /* 382 /*