aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw@amazon.co.uk>2018-02-19 05:50:56 -0500
committerIngo Molnar <mingo@kernel.org>2018-02-20 03:38:26 -0500
commitd1c99108af3c5992640aa2afa7d2e88c3775c06e (patch)
tree603d37297f7655ff02522f9fbee940d22ac83236
parent8554004a0231dedf44d4d62147fb3d6a6db489aa (diff)
Revert "x86/retpoline: Simplify vmexit_fill_RSB()"
This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting the RSB filling out of line and calling it, we waste one RSB slot for returning from the function itself, which means one fewer actual function call we can make if we're doing the Skylake abomination of call-depth counting. It also changed the number of RSB stuffings we do on vmexit from 32, which was correct, to 16. Let's just stop with the bikeshedding; it didn't actually *fix* anything anyway. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Acked-by: Thomas Gleixner <tglx@linutronix.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S3
-rw-r--r--arch/x86/include/asm/asm-prototypes.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h70
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/retpoline.S56
6 files changed, 65 insertions, 71 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..6ad064c8cf35 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
252 * exist, overwrite the RSB with entries which capture 252 * exist, overwrite the RSB with entries which capture
253 * speculative execution to prevent attack. 253 * speculative execution to prevent attack.
254 */ 254 */
255 /* Clobbers %ebx */ 255 FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
256 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
257#endif 256#endif
258 257
259 /* restore callee-saved registers */ 258 /* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 77edc2390868..7a53879ec689 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
364 * exist, overwrite the RSB with entries which capture 364 * exist, overwrite the RSB with entries which capture
365 * speculative execution to prevent attack. 365 * speculative execution to prevent attack.
366 */ 366 */
367 /* Clobbers %rbx */ 367 FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
368 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
369#endif 368#endif
370 369
371 /* restore callee-saved registers */ 370 /* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d111616524b..1908214b9125 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
38INDIRECT_THUNK(si) 38INDIRECT_THUNK(si)
39INDIRECT_THUNK(di) 39INDIRECT_THUNK(di)
40INDIRECT_THUNK(bp) 40INDIRECT_THUNK(bp)
41asmlinkage void __fill_rsb(void);
42asmlinkage void __clear_rsb(void);
43
44#endif /* CONFIG_RETPOLINE */ 41#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 76b058533e47..af34b1e8069a 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
8#include <asm/cpufeatures.h> 8#include <asm/cpufeatures.h>
9#include <asm/msr-index.h> 9#include <asm/msr-index.h>
10 10
11/*
12 * Fill the CPU return stack buffer.
13 *
14 * Each entry in the RSB, if used for a speculative 'ret', contains an
15 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
16 *
17 * This is required in various cases for retpoline and IBRS-based
18 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
19 * eliminate potentially bogus entries from the RSB, and sometimes
20 * purely to ensure that it doesn't get empty, which on some CPUs would
21 * allow predictions from other (unwanted!) sources to be used.
22 *
23 * We define a CPP macro such that it can be used from both .S files and
24 * inline assembly. It's possible to do a .macro and then include that
25 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
26 */
27
28#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
29#define RSB_FILL_LOOPS 16 /* To avoid underflow */
30
31/*
32 * Google experimented with loop-unrolling and this turned out to be
33 * the optimal version — two calls, each with their own speculation
34 * trap should their return address end up getting used, in a loop.
35 */
36#define __FILL_RETURN_BUFFER(reg, nr, sp) \
37 mov $(nr/2), reg; \
38771: \
39 call 772f; \
40773: /* speculation trap */ \
41 pause; \
42 lfence; \
43 jmp 773b; \
44772: \
45 call 774f; \
46775: /* speculation trap */ \
47 pause; \
48 lfence; \
49 jmp 775b; \
50774: \
51 dec reg; \
52 jnz 771b; \
53 add $(BITS_PER_LONG/8) * nr, sp;
54
11#ifdef __ASSEMBLY__ 55#ifdef __ASSEMBLY__
12 56
13/* 57/*
@@ -78,10 +122,17 @@
78#endif 122#endif
79.endm 123.endm
80 124
81/* This clobbers the BX register */ 125 /*
82.macro FILL_RETURN_BUFFER nr:req ftr:req 126 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
127 * monstrosity above, manually.
128 */
129.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
83#ifdef CONFIG_RETPOLINE 130#ifdef CONFIG_RETPOLINE
84 ALTERNATIVE "", "call __clear_rsb", \ftr 131 ANNOTATE_NOSPEC_ALTERNATIVE
132 ALTERNATIVE "jmp .Lskip_rsb_\@", \
133 __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
134 \ftr
135.Lskip_rsb_\@:
85#endif 136#endif
86.endm 137.endm
87 138
@@ -156,10 +207,15 @@ extern char __indirect_thunk_end[];
156static inline void vmexit_fill_RSB(void) 207static inline void vmexit_fill_RSB(void)
157{ 208{
158#ifdef CONFIG_RETPOLINE 209#ifdef CONFIG_RETPOLINE
159 alternative_input("", 210 unsigned long loops;
160 "call __fill_rsb", 211
161 X86_FEATURE_RETPOLINE, 212 asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
162 ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); 213 ALTERNATIVE("jmp 910f",
214 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
215 X86_FEATURE_RETPOLINE)
216 "910:"
217 : "=r" (loops), ASM_CALL_CONSTRAINT
218 : : "memory" );
163#endif 219#endif
164} 220}
165 221
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700cc6dc..25a972c61b0a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
30lib-$(CONFIG_RETPOLINE) += retpoline.o 30lib-$(CONFIG_RETPOLINE) += retpoline.o
31OBJECT_FILES_NON_STANDARD_retpoline.o :=y
32 31
33obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 32obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
34 33
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3a5e03..c909961e678a 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
7#include <asm/alternative-asm.h> 7#include <asm/alternative-asm.h>
8#include <asm/export.h> 8#include <asm/export.h>
9#include <asm/nospec-branch.h> 9#include <asm/nospec-branch.h>
10#include <asm/bitsperlong.h>
11 10
12.macro THUNK reg 11.macro THUNK reg
13 .section .text.__x86.indirect_thunk 12 .section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
47GENERATE_THUNK(r14) 46GENERATE_THUNK(r14)
48GENERATE_THUNK(r15) 47GENERATE_THUNK(r15)
49#endif 48#endif
50
51/*
52 * Fill the CPU return stack buffer.
53 *
54 * Each entry in the RSB, if used for a speculative 'ret', contains an
55 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
56 *
57 * This is required in various cases for retpoline and IBRS-based
58 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
59 * eliminate potentially bogus entries from the RSB, and sometimes
60 * purely to ensure that it doesn't get empty, which on some CPUs would
61 * allow predictions from other (unwanted!) sources to be used.
62 *
63 * Google experimented with loop-unrolling and this turned out to be
64 * the optimal version - two calls, each with their own speculation
65 * trap should their return address end up getting used, in a loop.
66 */
67.macro STUFF_RSB nr:req sp:req
68 mov $(\nr / 2), %_ASM_BX
69 .align 16
70771:
71 call 772f
72773: /* speculation trap */
73 pause
74 lfence
75 jmp 773b
76 .align 16
77772:
78 call 774f
79775: /* speculation trap */
80 pause
81 lfence
82 jmp 775b
83 .align 16
84774:
85 dec %_ASM_BX
86 jnz 771b
87 add $((BITS_PER_LONG/8) * \nr), \sp
88.endm
89
90#define RSB_FILL_LOOPS 16 /* To avoid underflow */
91
92ENTRY(__fill_rsb)
93 STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
94 ret
95END(__fill_rsb)
96EXPORT_SYMBOL_GPL(__fill_rsb)
97
98#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
99
100ENTRY(__clear_rsb)
101 STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
102 ret
103END(__clear_rsb)
104EXPORT_SYMBOL_GPL(__clear_rsb)