aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBorislav Petkov <bp@suse.de>2016-01-26 16:12:05 -0500
committerIngo Molnar <mingo@kernel.org>2016-01-30 05:22:18 -0500
commitbc696ca05f5a8927329ec276a892341e006b00ba (patch)
treea8200bfe9677aa30a11b53e8fd6215aaeb3a4a96
parentcd4d09ec6f6c12a2cc3db5b7d8876a325a53545b (diff)
x86/cpufeature: Replace the old static_cpu_has() with safe variant
So the old one didn't work properly before alternatives had run. And it was supposed to provide an optimized JMP because the assumption was that the offset it is jumping to is within a signed byte and thus a two-byte JMP. So I did an x86_64 allyesconfig build and dumped all possible sites where static_cpu_has() was used. The optimization amounted to all in all 12(!) places where static_cpu_has() had generated a 2-byte JMP. Which has saved us a whopping 36 bytes! This clearly is not worth the trouble so we can remove it. The only place where the optimization might count - in __switch_to() - we will handle differently. But that's not subject of this patch. Signed-off-by: Borislav Petkov <bp@suse.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1453842730-28463-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/Kconfig.debug10
-rw-r--r--arch/x86/include/asm/cpufeature.h100
-rw-r--r--arch/x86/include/asm/fpu/internal.h14
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c4
-rw-r--r--arch/x86/kernel/cpu/common.c12
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--drivers/cpufreq/intel_pstate.c2
-rw-r--r--fs/btrfs/disk-io.c2
8 files changed, 21 insertions, 125 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed97a8a2..68a2d1f0a683 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -350,16 +350,6 @@ config DEBUG_IMR_SELFTEST
350 350
351 If unsure say N here. 351 If unsure say N here.
352 352
353config X86_DEBUG_STATIC_CPU_HAS
354 bool "Debug alternatives"
355 depends on DEBUG_KERNEL
356 ---help---
357 This option causes additional code to be generated which
358 fails if static_cpu_has() is used before alternatives have
359 run.
360
361 If unsure, say N.
362
363config X86_DEBUG_FPU 353config X86_DEBUG_FPU
364 bool "Debug the x86 FPU code" 354 bool "Debug the x86 FPU code"
365 depends on DEBUG_KERNEL 355 depends on DEBUG_KERNEL
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3cce9f3c5cb1..a261cf2e7907 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -125,103 +125,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
125#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) 125#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
126#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 126#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
127/* 127/*
128 * Do not add any more of those clumsy macros - use static_cpu_has_safe() for 128 * Do not add any more of those clumsy macros - use static_cpu_has() for
129 * fast paths and boot_cpu_has() otherwise! 129 * fast paths and boot_cpu_has() otherwise!
130 */ 130 */
131 131
132#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS) 132#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
133extern void warn_pre_alternatives(void); 133extern bool __static_cpu_has(u16 bit);
134extern bool __static_cpu_has_safe(u16 bit);
135 134
136/* 135/*
137 * Static testing of CPU features. Used the same as boot_cpu_has(). 136 * Static testing of CPU features. Used the same as boot_cpu_has().
138 * These are only valid after alternatives have run, but will statically 137 * These are only valid after alternatives have run, but will statically
139 * patch the target code for additional performance. 138 * patch the target code for additional performance.
140 */ 139 */
141static __always_inline __pure bool __static_cpu_has(u16 bit) 140static __always_inline __pure bool _static_cpu_has(u16 bit)
142{
143#ifdef CC_HAVE_ASM_GOTO
144
145#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
146
147 /*
148 * Catch too early usage of this before alternatives
149 * have run.
150 */
151 asm_volatile_goto("1: jmp %l[t_warn]\n"
152 "2:\n"
153 ".section .altinstructions,\"a\"\n"
154 " .long 1b - .\n"
155 " .long 0\n" /* no replacement */
156 " .word %P0\n" /* 1: do replace */
157 " .byte 2b - 1b\n" /* source len */
158 " .byte 0\n" /* replacement len */
159 " .byte 0\n" /* pad len */
160 ".previous\n"
161 /* skipping size check since replacement size = 0 */
162 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
163
164#endif
165
166 asm_volatile_goto("1: jmp %l[t_no]\n"
167 "2:\n"
168 ".section .altinstructions,\"a\"\n"
169 " .long 1b - .\n"
170 " .long 0\n" /* no replacement */
171 " .word %P0\n" /* feature bit */
172 " .byte 2b - 1b\n" /* source len */
173 " .byte 0\n" /* replacement len */
174 " .byte 0\n" /* pad len */
175 ".previous\n"
176 /* skipping size check since replacement size = 0 */
177 : : "i" (bit) : : t_no);
178 return true;
179 t_no:
180 return false;
181
182#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
183 t_warn:
184 warn_pre_alternatives();
185 return false;
186#endif
187
188#else /* CC_HAVE_ASM_GOTO */
189
190 u8 flag;
191 /* Open-coded due to __stringify() in ALTERNATIVE() */
192 asm volatile("1: movb $0,%0\n"
193 "2:\n"
194 ".section .altinstructions,\"a\"\n"
195 " .long 1b - .\n"
196 " .long 3f - .\n"
197 " .word %P1\n" /* feature bit */
198 " .byte 2b - 1b\n" /* source len */
199 " .byte 4f - 3f\n" /* replacement len */
200 " .byte 0\n" /* pad len */
201 ".previous\n"
202 ".section .discard,\"aw\",@progbits\n"
203 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
204 ".previous\n"
205 ".section .altinstr_replacement,\"ax\"\n"
206 "3: movb $1,%0\n"
207 "4:\n"
208 ".previous\n"
209 : "=qm" (flag) : "i" (bit));
210 return flag;
211
212#endif /* CC_HAVE_ASM_GOTO */
213}
214
215#define static_cpu_has(bit) \
216( \
217 __builtin_constant_p(boot_cpu_has(bit)) ? \
218 boot_cpu_has(bit) : \
219 __builtin_constant_p(bit) ? \
220 __static_cpu_has(bit) : \
221 boot_cpu_has(bit) \
222)
223
224static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
225{ 141{
226#ifdef CC_HAVE_ASM_GOTO 142#ifdef CC_HAVE_ASM_GOTO
227 asm_volatile_goto("1: jmp %l[t_dynamic]\n" 143 asm_volatile_goto("1: jmp %l[t_dynamic]\n"
@@ -255,7 +171,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
255 t_no: 171 t_no:
256 return false; 172 return false;
257 t_dynamic: 173 t_dynamic:
258 return __static_cpu_has_safe(bit); 174 return __static_cpu_has(bit);
259#else 175#else
260 u8 flag; 176 u8 flag;
261 /* Open-coded due to __stringify() in ALTERNATIVE() */ 177 /* Open-coded due to __stringify() in ALTERNATIVE() */
@@ -293,22 +209,21 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
293 ".previous\n" 209 ".previous\n"
294 : "=qm" (flag) 210 : "=qm" (flag)
295 : "i" (bit), "i" (X86_FEATURE_ALWAYS)); 211 : "i" (bit), "i" (X86_FEATURE_ALWAYS));
296 return (flag == 2 ? __static_cpu_has_safe(bit) : flag); 212 return (flag == 2 ? __static_cpu_has(bit) : flag);
297#endif /* CC_HAVE_ASM_GOTO */ 213#endif /* CC_HAVE_ASM_GOTO */
298} 214}
299 215
300#define static_cpu_has_safe(bit) \ 216#define static_cpu_has(bit) \
301( \ 217( \
302 __builtin_constant_p(boot_cpu_has(bit)) ? \ 218 __builtin_constant_p(boot_cpu_has(bit)) ? \
303 boot_cpu_has(bit) : \ 219 boot_cpu_has(bit) : \
304 _static_cpu_has_safe(bit) \ 220 _static_cpu_has(bit) \
305) 221)
306#else 222#else
307/* 223/*
308 * gcc 3.x is too stupid to do the static test; fall back to dynamic. 224 * gcc 3.x is too stupid to do the static test; fall back to dynamic.
309 */ 225 */
310#define static_cpu_has(bit) boot_cpu_has(bit) 226#define static_cpu_has(bit) boot_cpu_has(bit)
311#define static_cpu_has_safe(bit) boot_cpu_has(bit)
312#endif 227#endif
313 228
314#define cpu_has_bug(c, bit) cpu_has(c, (bit)) 229#define cpu_has_bug(c, bit) cpu_has(c, (bit))
@@ -316,7 +231,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
316#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) 231#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
317 232
318#define static_cpu_has_bug(bit) static_cpu_has((bit)) 233#define static_cpu_has_bug(bit) static_cpu_has((bit))
319#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
320#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) 234#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
321 235
322#define MAX_CPU_FEATURES (NCAPINTS * 32) 236#define MAX_CPU_FEATURES (NCAPINTS * 32)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index d01199def781..c2e46eb96b6d 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -59,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
59 */ 59 */
60static __always_inline __pure bool use_eager_fpu(void) 60static __always_inline __pure bool use_eager_fpu(void)
61{ 61{
62 return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); 62 return static_cpu_has(X86_FEATURE_EAGER_FPU);
63} 63}
64 64
65static __always_inline __pure bool use_xsaveopt(void) 65static __always_inline __pure bool use_xsaveopt(void)
66{ 66{
67 return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); 67 return static_cpu_has(X86_FEATURE_XSAVEOPT);
68} 68}
69 69
70static __always_inline __pure bool use_xsave(void) 70static __always_inline __pure bool use_xsave(void)
71{ 71{
72 return static_cpu_has_safe(X86_FEATURE_XSAVE); 72 return static_cpu_has(X86_FEATURE_XSAVE);
73} 73}
74 74
75static __always_inline __pure bool use_fxsr(void) 75static __always_inline __pure bool use_fxsr(void)
76{ 76{
77 return static_cpu_has_safe(X86_FEATURE_FXSR); 77 return static_cpu_has(X86_FEATURE_FXSR);
78} 78}
79 79
80/* 80/*
@@ -301,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
301 301
302 WARN_ON(system_state != SYSTEM_BOOTING); 302 WARN_ON(system_state != SYSTEM_BOOTING);
303 303
304 if (static_cpu_has_safe(X86_FEATURE_XSAVES)) 304 if (static_cpu_has(X86_FEATURE_XSAVES))
305 XSTATE_OP(XSAVES, xstate, lmask, hmask, err); 305 XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
306 else 306 else
307 XSTATE_OP(XSAVE, xstate, lmask, hmask, err); 307 XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -323,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
323 323
324 WARN_ON(system_state != SYSTEM_BOOTING); 324 WARN_ON(system_state != SYSTEM_BOOTING);
325 325
326 if (static_cpu_has_safe(X86_FEATURE_XSAVES)) 326 if (static_cpu_has(X86_FEATURE_XSAVES))
327 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); 327 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
328 else 328 else
329 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); 329 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -461,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
461 * pending. Clear the x87 state here by setting it to fixed values. 461 * pending. Clear the x87 state here by setting it to fixed values.
462 * "m" is a random variable that should be in L1. 462 * "m" is a random variable that should be in L1.
463 */ 463 */
464 if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { 464 if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
465 asm volatile( 465 asm volatile(
466 "fnclex\n\t" 466 "fnclex\n\t"
467 "emms\n\t" 467 "emms\n\t"
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c6ec49..ab5c2c685a3c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
30 unsigned long value; 30 unsigned long value;
31 unsigned int id = (x >> 24) & 0xff; 31 unsigned int id = (x >> 24) & 0xff;
32 32
33 if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { 33 if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
34 rdmsrl(MSR_FAM10H_NODE_ID, value); 34 rdmsrl(MSR_FAM10H_NODE_ID, value);
35 id |= (value << 2) & 0xff00; 35 id |= (value << 2) & 0xff00;
36 } 36 }
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
178 this_cpu_write(cpu_llc_id, node); 178 this_cpu_write(cpu_llc_id, node);
179 179
180 /* Account for nodes per socket in multi-core-module processors */ 180 /* Account for nodes per socket in multi-core-module processors */
181 if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { 181 if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
182 rdmsrl(MSR_FAM10H_NODE_ID, val); 182 rdmsrl(MSR_FAM10H_NODE_ID, val);
183 nodes = ((val >> 3) & 7) + 1; 183 nodes = ((val >> 3) & 7) + 1;
184 } 184 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de8f60a..ee499817f3f5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1475,19 +1475,11 @@ void cpu_init(void)
1475} 1475}
1476#endif 1476#endif
1477 1477
1478#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS 1478inline bool __static_cpu_has(u16 bit)
1479void warn_pre_alternatives(void)
1480{
1481 WARN(1, "You're using static_cpu_has before alternatives have run!\n");
1482}
1483EXPORT_SYMBOL_GPL(warn_pre_alternatives);
1484#endif
1485
1486inline bool __static_cpu_has_safe(u16 bit)
1487{ 1479{
1488 return boot_cpu_has(bit); 1480 return boot_cpu_has(bit);
1489} 1481}
1490EXPORT_SYMBOL_GPL(__static_cpu_has_safe); 1482EXPORT_SYMBOL_GPL(__static_cpu_has);
1491 1483
1492static void bsp_resume(void) 1484static void bsp_resume(void)
1493{ 1485{
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b8546518..3dce1ca0a653 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
362 /* make room for real-mode segments */ 362 /* make room for real-mode segments */
363 tsk->thread.sp0 += 16; 363 tsk->thread.sp0 += 16;
364 364
365 if (static_cpu_has_safe(X86_FEATURE_SEP)) 365 if (static_cpu_has(X86_FEATURE_SEP))
366 tsk->thread.sysenter_cs = 0; 366 tsk->thread.sysenter_cs = 0;
367 367
368 load_sp0(tss, &tsk->thread); 368 load_sp0(tss, &tsk->thread);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd83d477e32d..3a4b39afc0ab 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1431,7 +1431,7 @@ static int __init intel_pstate_init(void)
1431 if (!all_cpu_data) 1431 if (!all_cpu_data)
1432 return -ENOMEM; 1432 return -ENOMEM;
1433 1433
1434 if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { 1434 if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
1435 pr_info("intel_pstate: HWP enabled\n"); 1435 pr_info("intel_pstate: HWP enabled\n");
1436 hwp_active++; 1436 hwp_active++;
1437 } 1437 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd08e29f5117..d9286497924f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -930,7 +930,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
930 if (bio_flags & EXTENT_BIO_TREE_LOG) 930 if (bio_flags & EXTENT_BIO_TREE_LOG)
931 return 0; 931 return 0;
932#ifdef CONFIG_X86 932#ifdef CONFIG_X86
933 if (static_cpu_has_safe(X86_FEATURE_XMM4_2)) 933 if (static_cpu_has(X86_FEATURE_XMM4_2))
934 return 0; 934 return 0;
935#endif 935#endif
936 return 1; 936 return 1;