aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2012-08-24 17:13:02 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2012-09-18 18:52:11 -0400
commit304bceda6a18ae0b0240b8aac9a6bdf8ce2d2469 (patch)
tree9ffae43391d69aa4765590b942b907da4a189041 /arch/x86/kernel
parent9c6ff8bbb69a4e7b47ac40bfa44509296e89c5c0 (diff)
x86, fpu: use non-lazy fpu restore for processors supporting xsave
Fundamental model of the current Linux kernel is to lazily init and restore FPU instead of restoring the task state during context switch. This changes that fundamental lazy model to the non-lazy model for the processors supporting xsave feature. Reasons driving this model change are: i. Newer processors support optimized state save/restore using xsaveopt and xrstor by tracking the INIT state and MODIFIED state during context-switch. This is faster than modifying the cr0.TS bit which has serializing semantics. ii. Newer glibc versions use SSE for some of the optimized copy/clear routines. With certain workloads (like boot, kernel-compilation etc), application completes its work with in the first 5 task switches, thus taking upto 5 #DNA traps with the kernel not getting a chance to apply the above mentioned pre-load heuristic. iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit and thus will not work correctly in the presence of lazy restore. Non-lazy state restore is needed for enabling such features. Some data on a two socket SNB system: * Saved 20K DNA exceptions during boot on a two socket SNB system. * Saved 50K DNA exceptions during kernel-compilation workload. * Improved throughput of the AVX based checksumming function inside the kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts pair. Also now kernel_fpu_begin/end() relies on the patched alternative instructions. So move check_fpu() which uses the kernel_fpu_begin/end() after alternative_instructions(). Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.siddha@intel.com Merge 32-bit boot fix from, Link: http://lkml.kernel.org/r/1347300665-6209-4-git-send-email-suresh.b.siddha@intel.com Cc: Jim Kukunas <james.t.kukunas@linux.intel.com> Cc: NeilBrown <neilb@suse.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/bugs.c7
-rw-r--r--arch/x86/kernel/i387.c20
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/traps.c5
-rw-r--r--arch/x86/kernel/xsave.c57
7 files changed, 81 insertions, 28 deletions
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c97bb7b5a9f8..d0e910da16c5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -165,10 +165,15 @@ void __init check_bugs(void)
165 print_cpu_info(&boot_cpu_data); 165 print_cpu_info(&boot_cpu_data);
166#endif 166#endif
167 check_config(); 167 check_config();
168 check_fpu();
169 check_hlt(); 168 check_hlt();
170 check_popad(); 169 check_popad();
171 init_utsname()->machine[1] = 170 init_utsname()->machine[1] =
172 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 171 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
173 alternative_instructions(); 172 alternative_instructions();
173
174 /*
175 * kernel_fpu_begin/end() in check_fpu() relies on the patched
176 * alternative instructions.
177 */
178 check_fpu();
174} 179}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index ab6a2e8028ae..528557470ddb 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -22,7 +22,15 @@
22/* 22/*
23 * Were we in an interrupt that interrupted kernel mode? 23 * Were we in an interrupt that interrupted kernel mode?
24 * 24 *
25 * We can do a kernel_fpu_begin/end() pair *ONLY* if that 25 * For now, on xsave platforms we will return interrupted
26 * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore
27 * for xsave platforms, ideally we can change the return value
28 * to something like __thread_has_fpu(current). But we need to
29 * be careful of doing __thread_clear_has_fpu() before saving
30 * the FPU etc for supporting nested uses etc. For now, take
31 * the simple route!
32 *
33 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
26 * pair does nothing at all: the thread must not have fpu (so 34 * pair does nothing at all: the thread must not have fpu (so
27 * that we don't try to save the FPU state), and TS must 35 * that we don't try to save the FPU state), and TS must
28 * be set (so that the clts/stts pair does nothing that is 36 * be set (so that the clts/stts pair does nothing that is
@@ -30,6 +38,9 @@
30 */ 38 */
31static inline bool interrupted_kernel_fpu_idle(void) 39static inline bool interrupted_kernel_fpu_idle(void)
32{ 40{
41 if (use_xsave())
42 return 0;
43
33 return !__thread_has_fpu(current) && 44 return !__thread_has_fpu(current) &&
34 (read_cr0() & X86_CR0_TS); 45 (read_cr0() & X86_CR0_TS);
35} 46}
@@ -73,7 +84,7 @@ void kernel_fpu_begin(void)
73 __save_init_fpu(me); 84 __save_init_fpu(me);
74 __thread_clear_has_fpu(me); 85 __thread_clear_has_fpu(me);
75 /* We do 'stts()' in kernel_fpu_end() */ 86 /* We do 'stts()' in kernel_fpu_end() */
76 } else { 87 } else if (!use_xsave()) {
77 this_cpu_write(fpu_owner_task, NULL); 88 this_cpu_write(fpu_owner_task, NULL);
78 clts(); 89 clts();
79 } 90 }
@@ -82,7 +93,10 @@ EXPORT_SYMBOL(kernel_fpu_begin);
82 93
83void kernel_fpu_end(void) 94void kernel_fpu_end(void)
84{ 95{
85 stts(); 96 if (use_xsave())
97 math_state_restore();
98 else
99 stts();
86 preempt_enable(); 100 preempt_enable();
87} 101}
88EXPORT_SYMBOL(kernel_fpu_end); 102EXPORT_SYMBOL(kernel_fpu_end);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 30069d1a6a4d..c21e30f8923b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
66{ 66{
67 int ret; 67 int ret;
68 68
69 unlazy_fpu(src);
70
71 *dst = *src; 69 *dst = *src;
72 if (fpu_allocated(&src->thread.fpu)) { 70 if (fpu_allocated(&src->thread.fpu)) {
73 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
74 ret = fpu_alloc(&dst->thread.fpu); 72 ret = fpu_alloc(&dst->thread.fpu);
75 if (ret) 73 if (ret)
76 return ret; 74 return ret;
77 fpu_copy(&dst->thread.fpu, &src->thread.fpu); 75 fpu_copy(dst, src);
78 } 76 }
79 return 0; 77 return 0;
80} 78}
@@ -153,7 +151,13 @@ void flush_thread(void)
153 151
154 flush_ptrace_hw_breakpoint(tsk); 152 flush_ptrace_hw_breakpoint(tsk);
155 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 153 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
156 drop_fpu(tsk); 154 drop_init_fpu(tsk);
155 /*
156 * Free the FPU state for non xsave platforms. They get reallocated
157 * lazily at the first use.
158 */
159 if (!use_xsave())
160 free_thread_xstate(tsk);
157} 161}
158 162
159static void hard_disable_TSC(void) 163static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 516fa186121b..b9ff83c7135b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
190 regs->cs = __USER_CS; 190 regs->cs = __USER_CS;
191 regs->ip = new_ip; 191 regs->ip = new_ip;
192 regs->sp = new_sp; 192 regs->sp = new_sp;
193 /*
194 * Free the old FP and other extended state
195 */
196 free_thread_xstate(current);
197} 193}
198EXPORT_SYMBOL_GPL(start_thread); 194EXPORT_SYMBOL_GPL(start_thread);
199 195
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9d7cb8..8a6d20ce1978 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
232 regs->cs = _cs; 232 regs->cs = _cs;
233 regs->ss = _ss; 233 regs->ss = _ss;
234 regs->flags = X86_EFLAGS_IF; 234 regs->flags = X86_EFLAGS_IF;
235 /*
236 * Free the old FP and other extended state
237 */
238 free_thread_xstate(current);
239} 235}
240 236
241void 237void
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c9369..ac7d5275f6e8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -613,11 +613,12 @@ void math_state_restore(void)
613 } 613 }
614 614
615 __thread_fpu_begin(tsk); 615 __thread_fpu_begin(tsk);
616
616 /* 617 /*
617 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 618 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
618 */ 619 */
619 if (unlikely(restore_fpu_checking(tsk))) { 620 if (unlikely(restore_fpu_checking(tsk))) {
620 __thread_fpu_end(tsk); 621 drop_init_fpu(tsk);
621 force_sig(SIGSEGV, tsk); 622 force_sig(SIGSEGV, tsk);
622 return; 623 return;
623 } 624 }
@@ -629,6 +630,8 @@ EXPORT_SYMBOL_GPL(math_state_restore);
629dotraplinkage void __kprobes 630dotraplinkage void __kprobes
630do_device_not_available(struct pt_regs *regs, long error_code) 631do_device_not_available(struct pt_regs *regs, long error_code)
631{ 632{
633 BUG_ON(use_xsave());
634
632#ifdef CONFIG_MATH_EMULATION 635#ifdef CONFIG_MATH_EMULATION
633 if (read_cr0() & X86_CR0_EM) { 636 if (read_cr0() & X86_CR0_EM) {
634 struct math_emu_info info = { }; 637 struct math_emu_info info = { };
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4ac5f2e135b4..e7752bd7cac8 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -21,7 +21,7 @@ u64 pcntxt_mask;
21/* 21/*
22 * Represents init state for the supported extended state. 22 * Represents init state for the supported extended state.
23 */ 23 */
24static struct xsave_struct *init_xstate_buf; 24struct xsave_struct *init_xstate_buf;
25 25
26static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; 26static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
27static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; 27static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
@@ -268,7 +268,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
268 if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) 268 if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
269 return -1; 269 return -1;
270 270
271 drop_fpu(tsk); /* trigger finit */ 271 drop_init_fpu(tsk); /* trigger finit */
272 272
273 return 0; 273 return 0;
274} 274}
@@ -340,7 +340,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
340 config_enabled(CONFIG_IA32_EMULATION)); 340 config_enabled(CONFIG_IA32_EMULATION));
341 341
342 if (!buf) { 342 if (!buf) {
343 drop_fpu(tsk); 343 drop_init_fpu(tsk);
344 return 0; 344 return 0;
345 } 345 }
346 346
@@ -380,15 +380,30 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
380 */ 380 */
381 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; 381 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
382 struct user_i387_ia32_struct env; 382 struct user_i387_ia32_struct env;
383 int err = 0;
383 384
385 /*
386 * Drop the current fpu which clears used_math(). This ensures
387 * that any context-switch during the copy of the new state,
388 * avoids the intermediate state from getting restored/saved.
389 * Thus avoiding the new restored state from getting corrupted.
390 * We will be ready to restore/save the state only after
391 * set_used_math() is again set.
392 */
384 drop_fpu(tsk); 393 drop_fpu(tsk);
385 394
386 if (__copy_from_user(xsave, buf_fx, state_size) || 395 if (__copy_from_user(xsave, buf_fx, state_size) ||
387 __copy_from_user(&env, buf, sizeof(env))) 396 __copy_from_user(&env, buf, sizeof(env))) {
388 return -1; 397 err = -1;
398 } else {
399 sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
400 set_used_math();
401 }
389 402
390 sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); 403 if (use_xsave())
391 set_used_math(); 404 math_state_restore();
405
406 return err;
392 } else { 407 } else {
393 /* 408 /*
394 * For 64-bit frames and 32-bit fsave frames, restore the user 409 * For 64-bit frames and 32-bit fsave frames, restore the user
@@ -396,7 +411,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
396 */ 411 */
397 user_fpu_begin(); 412 user_fpu_begin();
398 if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { 413 if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
399 drop_fpu(tsk); 414 drop_init_fpu(tsk);
400 return -1; 415 return -1;
401 } 416 }
402 } 417 }
@@ -435,11 +450,29 @@ static void prepare_fx_sw_frame(void)
435 */ 450 */
436static inline void xstate_enable(void) 451static inline void xstate_enable(void)
437{ 452{
453 clts();
438 set_in_cr4(X86_CR4_OSXSAVE); 454 set_in_cr4(X86_CR4_OSXSAVE);
439 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); 455 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
440} 456}
441 457
442/* 458/*
459 * This is same as math_state_restore(). But use_xsave() is not yet
460 * patched to use math_state_restore().
461 */
462static inline void init_restore_xstate(void)
463{
464 init_fpu(current);
465 __thread_fpu_begin(current);
466 xrstor_state(init_xstate_buf, -1);
467}
468
469static inline void xstate_enable_ap(void)
470{
471 xstate_enable();
472 init_restore_xstate();
473}
474
475/*
443 * Record the offsets and sizes of different state managed by the xsave 476 * Record the offsets and sizes of different state managed by the xsave
444 * memory layout. 477 * memory layout.
445 */ 478 */
@@ -479,7 +512,6 @@ static void __init setup_xstate_init(void)
479 __alignof__(struct xsave_struct)); 512 __alignof__(struct xsave_struct));
480 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 513 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
481 514
482 clts();
483 /* 515 /*
484 * Init all the features state with header_bv being 0x0 516 * Init all the features state with header_bv being 0x0
485 */ 517 */
@@ -489,7 +521,6 @@ static void __init setup_xstate_init(void)
489 * of any feature which is not represented by all zero's. 521 * of any feature which is not represented by all zero's.
490 */ 522 */
491 xsave_state(init_xstate_buf, -1); 523 xsave_state(init_xstate_buf, -1);
492 stts();
493} 524}
494 525
495/* 526/*
@@ -533,6 +564,10 @@ static void __init xstate_enable_boot_cpu(void)
533 564
534 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", 565 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
535 pcntxt_mask, xstate_size); 566 pcntxt_mask, xstate_size);
567
568 current->thread.fpu.state =
569 alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
570 init_restore_xstate();
536} 571}
537 572
538/* 573/*
@@ -551,6 +586,6 @@ void __cpuinit xsave_init(void)
551 return; 586 return;
552 587
553 this_func = next_func; 588 this_func = next_func;
554 next_func = xstate_enable; 589 next_func = xstate_enable_ap;
555 this_func(); 590 this_func();
556} 591}