diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2012-08-24 17:13:02 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-09-18 18:52:11 -0400 |
commit | 304bceda6a18ae0b0240b8aac9a6bdf8ce2d2469 (patch) | |
tree | 9ffae43391d69aa4765590b942b907da4a189041 /arch/x86/kernel/xsave.c | |
parent | 9c6ff8bbb69a4e7b47ac40bfa44509296e89c5c0 (diff) |
x86, fpu: use non-lazy fpu restore for processors supporting xsave
Fundamental model of the current Linux kernel is to lazily init and
restore FPU instead of restoring the task state during context switch.
This changes that fundamental lazy model to the non-lazy model for
the processors supporting xsave feature.
Reasons driving this model change are:
i. Newer processors support optimized state save/restore using xsaveopt and
xrstor by tracking the INIT state and MODIFIED state during context-switch.
This is faster than modifying the cr0.TS bit which has serializing semantics.
ii. Newer glibc versions use SSE for some of the optimized copy/clear routines.
With certain workloads (like boot, kernel-compilation etc), application
completes its work with in the first 5 task switches, thus taking upto 5 #DNA
traps with the kernel not getting a chance to apply the above mentioned
pre-load heuristic.
iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit
and thus will not work correctly in the presence of lazy restore. Non-lazy
state restore is needed for enabling such features.
Some data on a two socket SNB system:
* Saved 20K DNA exceptions during boot on a two socket SNB system.
* Saved 50K DNA exceptions during kernel-compilation workload.
* Improved throughput of the AVX based checksumming function inside the
kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts
pair.
Also now kernel_fpu_begin/end() relies on the patched
alternative instructions. So move check_fpu() which uses the
kernel_fpu_begin/end() after alternative_instructions().
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.siddha@intel.com
Merge 32-bit boot fix from,
Link: http://lkml.kernel.org/r/1347300665-6209-4-git-send-email-suresh.b.siddha@intel.com
Cc: Jim Kukunas <james.t.kukunas@linux.intel.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel/xsave.c')
-rw-r--r-- | arch/x86/kernel/xsave.c | 57 |
1 files changed, 46 insertions, 11 deletions
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 4ac5f2e135b4..e7752bd7cac8 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -21,7 +21,7 @@ u64 pcntxt_mask; | |||
21 | /* | 21 | /* |
22 | * Represents init state for the supported extended state. | 22 | * Represents init state for the supported extended state. |
23 | */ | 23 | */ |
24 | static struct xsave_struct *init_xstate_buf; | 24 | struct xsave_struct *init_xstate_buf; |
25 | 25 | ||
26 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | 26 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; |
27 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | 27 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; |
@@ -268,7 +268,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
268 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) | 268 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) |
269 | return -1; | 269 | return -1; |
270 | 270 | ||
271 | drop_fpu(tsk); /* trigger finit */ | 271 | drop_init_fpu(tsk); /* trigger finit */ |
272 | 272 | ||
273 | return 0; | 273 | return 0; |
274 | } | 274 | } |
@@ -340,7 +340,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
340 | config_enabled(CONFIG_IA32_EMULATION)); | 340 | config_enabled(CONFIG_IA32_EMULATION)); |
341 | 341 | ||
342 | if (!buf) { | 342 | if (!buf) { |
343 | drop_fpu(tsk); | 343 | drop_init_fpu(tsk); |
344 | return 0; | 344 | return 0; |
345 | } | 345 | } |
346 | 346 | ||
@@ -380,15 +380,30 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
380 | */ | 380 | */ |
381 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; | 381 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
382 | struct user_i387_ia32_struct env; | 382 | struct user_i387_ia32_struct env; |
383 | int err = 0; | ||
383 | 384 | ||
385 | /* | ||
386 | * Drop the current fpu which clears used_math(). This ensures | ||
387 | * that any context-switch during the copy of the new state, | ||
388 | * avoids the intermediate state from getting restored/saved. | ||
389 | * Thus avoiding the new restored state from getting corrupted. | ||
390 | * We will be ready to restore/save the state only after | ||
391 | * set_used_math() is again set. | ||
392 | */ | ||
384 | drop_fpu(tsk); | 393 | drop_fpu(tsk); |
385 | 394 | ||
386 | if (__copy_from_user(xsave, buf_fx, state_size) || | 395 | if (__copy_from_user(xsave, buf_fx, state_size) || |
387 | __copy_from_user(&env, buf, sizeof(env))) | 396 | __copy_from_user(&env, buf, sizeof(env))) { |
388 | return -1; | 397 | err = -1; |
398 | } else { | ||
399 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | ||
400 | set_used_math(); | ||
401 | } | ||
389 | 402 | ||
390 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | 403 | if (use_xsave()) |
391 | set_used_math(); | 404 | math_state_restore(); |
405 | |||
406 | return err; | ||
392 | } else { | 407 | } else { |
393 | /* | 408 | /* |
394 | * For 64-bit frames and 32-bit fsave frames, restore the user | 409 | * For 64-bit frames and 32-bit fsave frames, restore the user |
@@ -396,7 +411,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
396 | */ | 411 | */ |
397 | user_fpu_begin(); | 412 | user_fpu_begin(); |
398 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { | 413 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { |
399 | drop_fpu(tsk); | 414 | drop_init_fpu(tsk); |
400 | return -1; | 415 | return -1; |
401 | } | 416 | } |
402 | } | 417 | } |
@@ -435,11 +450,29 @@ static void prepare_fx_sw_frame(void) | |||
435 | */ | 450 | */ |
436 | static inline void xstate_enable(void) | 451 | static inline void xstate_enable(void) |
437 | { | 452 | { |
453 | clts(); | ||
438 | set_in_cr4(X86_CR4_OSXSAVE); | 454 | set_in_cr4(X86_CR4_OSXSAVE); |
439 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | 455 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); |
440 | } | 456 | } |
441 | 457 | ||
442 | /* | 458 | /* |
459 | * This is same as math_state_restore(). But use_xsave() is not yet | ||
460 | * patched to use math_state_restore(). | ||
461 | */ | ||
462 | static inline void init_restore_xstate(void) | ||
463 | { | ||
464 | init_fpu(current); | ||
465 | __thread_fpu_begin(current); | ||
466 | xrstor_state(init_xstate_buf, -1); | ||
467 | } | ||
468 | |||
469 | static inline void xstate_enable_ap(void) | ||
470 | { | ||
471 | xstate_enable(); | ||
472 | init_restore_xstate(); | ||
473 | } | ||
474 | |||
475 | /* | ||
443 | * Record the offsets and sizes of different state managed by the xsave | 476 | * Record the offsets and sizes of different state managed by the xsave |
444 | * memory layout. | 477 | * memory layout. |
445 | */ | 478 | */ |
@@ -479,7 +512,6 @@ static void __init setup_xstate_init(void) | |||
479 | __alignof__(struct xsave_struct)); | 512 | __alignof__(struct xsave_struct)); |
480 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 513 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; |
481 | 514 | ||
482 | clts(); | ||
483 | /* | 515 | /* |
484 | * Init all the features state with header_bv being 0x0 | 516 | * Init all the features state with header_bv being 0x0 |
485 | */ | 517 | */ |
@@ -489,7 +521,6 @@ static void __init setup_xstate_init(void) | |||
489 | * of any feature which is not represented by all zero's. | 521 | * of any feature which is not represented by all zero's. |
490 | */ | 522 | */ |
491 | xsave_state(init_xstate_buf, -1); | 523 | xsave_state(init_xstate_buf, -1); |
492 | stts(); | ||
493 | } | 524 | } |
494 | 525 | ||
495 | /* | 526 | /* |
@@ -533,6 +564,10 @@ static void __init xstate_enable_boot_cpu(void) | |||
533 | 564 | ||
534 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 565 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
535 | pcntxt_mask, xstate_size); | 566 | pcntxt_mask, xstate_size); |
567 | |||
568 | current->thread.fpu.state = | ||
569 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
570 | init_restore_xstate(); | ||
536 | } | 571 | } |
537 | 572 | ||
538 | /* | 573 | /* |
@@ -551,6 +586,6 @@ void __cpuinit xsave_init(void) | |||
551 | return; | 586 | return; |
552 | 587 | ||
553 | this_func = next_func; | 588 | this_func = next_func; |
554 | next_func = xstate_enable; | 589 | next_func = xstate_enable_ap; |
555 | this_func(); | 590 | this_func(); |
556 | } | 591 | } |