diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 17:27:49 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 17:27:49 -0500 |
commit | 518bacf5a569d111e256d58b9fbc8d7b80ec42ea (patch) | |
tree | 53aa3297fbd3cf98caa592dec5b3be4e01646ff4 | |
parent | 535b2f73f6f60fb227b700136c134c5d7c8f8ad3 (diff) | |
parent | 064e6a8ba61a751625478f656c6f76a6f37a009e (diff) |
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FPU updates from Ingo Molnar:
"The main changes in this cycle were:
- do a large round of simplifications after all CPUs do 'eager' FPU
context switching in v4.9: remove CR0 twiddling, remove leftover
eager/lazy bts, etc (Andy Lutomirski)
- more FPU code simplifications: remove struct fpu::counter, clarify
nomenclature, remove unnecessary arguments/functions and better
structure the code (Rik van Riel)"
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/fpu: Remove clts()
x86/fpu: Remove stts()
x86/fpu: Handle #NM without FPU emulation as an error
x86/fpu, lguest: Remove CR0.TS support
x86/fpu, kvm: Remove host CR0.TS manipulation
x86/fpu: Remove irq_ts_save() and irq_ts_restore()
x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs()
x86/fpu: Get rid of two redundant clts() calls
x86/fpu: Finish excising 'eagerfpu'
x86/fpu: Split old_fpu & new_fpu handling into separate functions
x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state()
x86/fpu: Split old & new FPU code paths
x86/fpu: Remove __fpregs_(de)activate()
x86/fpu: Rename lazy restore functions to "register state valid"
x86/fpu, kvm: Remove KVM vcpu->fpu_counter
x86/fpu: Remove struct fpu::counter
x86/fpu: Remove use_eager_fpu()
x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction
x86/fpu: Hard-disable lazy FPU mode
x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c code
38 files changed, 105 insertions, 547 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c57316f230de..4e2373e0c0cb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1079,12 +1079,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1079 | nopku [X86] Disable Memory Protection Keys CPU feature found | 1079 | nopku [X86] Disable Memory Protection Keys CPU feature found |
1080 | in some Intel CPUs. | 1080 | in some Intel CPUs. |
1081 | 1081 | ||
1082 | eagerfpu= [X86] | ||
1083 | on enable eager fpu restore | ||
1084 | off disable eager fpu restore | ||
1085 | auto selects the default scheme, which automatically | ||
1086 | enables eagerfpu restore for xsaveopt. | ||
1087 | |||
1088 | module.async_probe [KNL] | 1082 | module.async_probe [KNL] |
1089 | Enable asynchronous probe on this module. | 1083 | Enable asynchronous probe on this module. |
1090 | 1084 | ||
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 0857b1a1de3b..c194d5717ae5 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c | |||
@@ -48,26 +48,13 @@ | |||
48 | #ifdef CONFIG_X86_64 | 48 | #ifdef CONFIG_X86_64 |
49 | /* | 49 | /* |
50 | * use carryless multiply version of crc32c when buffer | 50 | * use carryless multiply version of crc32c when buffer |
51 | * size is >= 512 (when eager fpu is enabled) or | 51 | * size is >= 512 to account |
52 | * >= 1024 (when eager fpu is disabled) to account | ||
53 | * for fpu state save/restore overhead. | 52 | * for fpu state save/restore overhead. |
54 | */ | 53 | */ |
55 | #define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 | 54 | #define CRC32C_PCL_BREAKEVEN 512 |
56 | #define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 | ||
57 | 55 | ||
58 | asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, | 56 | asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, |
59 | unsigned int crc_init); | 57 | unsigned int crc_init); |
60 | static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; | ||
61 | #if defined(X86_FEATURE_EAGER_FPU) | ||
62 | #define set_pcl_breakeven_point() \ | ||
63 | do { \ | ||
64 | if (!use_eager_fpu()) \ | ||
65 | crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ | ||
66 | } while (0) | ||
67 | #else | ||
68 | #define set_pcl_breakeven_point() \ | ||
69 | (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU) | ||
70 | #endif | ||
71 | #endif /* CONFIG_X86_64 */ | 58 | #endif /* CONFIG_X86_64 */ |
72 | 59 | ||
73 | static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) | 60 | static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) |
@@ -190,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, | |||
190 | * use faster PCL version if datasize is large enough to | 177 | * use faster PCL version if datasize is large enough to |
191 | * overcome kernel fpu state save/restore overhead | 178 | * overcome kernel fpu state save/restore overhead |
192 | */ | 179 | */ |
193 | if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { | 180 | if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { |
194 | kernel_fpu_begin(); | 181 | kernel_fpu_begin(); |
195 | *crcp = crc_pcl(data, len, *crcp); | 182 | *crcp = crc_pcl(data, len, *crcp); |
196 | kernel_fpu_end(); | 183 | kernel_fpu_end(); |
@@ -202,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, | |||
202 | static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, | 189 | static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, |
203 | u8 *out) | 190 | u8 *out) |
204 | { | 191 | { |
205 | if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { | 192 | if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { |
206 | kernel_fpu_begin(); | 193 | kernel_fpu_begin(); |
207 | *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); | 194 | *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); |
208 | kernel_fpu_end(); | 195 | kernel_fpu_end(); |
@@ -261,7 +248,6 @@ static int __init crc32c_intel_mod_init(void) | |||
261 | alg.update = crc32c_pcl_intel_update; | 248 | alg.update = crc32c_pcl_intel_update; |
262 | alg.finup = crc32c_pcl_intel_finup; | 249 | alg.finup = crc32c_pcl_intel_finup; |
263 | alg.digest = crc32c_pcl_intel_digest; | 250 | alg.digest = crc32c_pcl_intel_digest; |
264 | set_pcl_breakeven_point(); | ||
265 | } | 251 | } |
266 | #endif | 252 | #endif |
267 | return crypto_register_shash(&alg); | 253 | return crypto_register_shash(&alg); |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4dba597c5807..e83f972b0a14 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -104,7 +104,6 @@ | |||
104 | #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ | 104 | #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ |
105 | #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ | 105 | #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ |
106 | #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ | 106 | #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ |
107 | #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ | ||
108 | #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ | 107 | #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ |
109 | 108 | ||
110 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 109 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 1429a7c736db..0877ae018fc9 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h | |||
@@ -27,16 +27,6 @@ extern void kernel_fpu_end(void); | |||
27 | extern bool irq_fpu_usable(void); | 27 | extern bool irq_fpu_usable(void); |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Some instructions like VIA's padlock instructions generate a spurious | ||
31 | * DNA fault but don't modify SSE registers. And these instructions | ||
32 | * get used from interrupt context as well. To prevent these kernel instructions | ||
33 | * in interrupt context interacting wrongly with other user/kernel fpu usage, we | ||
34 | * should use them only in the context of irq_ts_save/restore() | ||
35 | */ | ||
36 | extern int irq_ts_save(void); | ||
37 | extern void irq_ts_restore(int TS_state); | ||
38 | |||
39 | /* | ||
40 | * Query the presence of one or more xfeatures. Works on any legacy CPU as well. | 30 | * Query the presence of one or more xfeatures. Works on any legacy CPU as well. |
41 | * | 31 | * |
42 | * If 'feature_name' is set then put a human-readable description of | 32 | * If 'feature_name' is set then put a human-readable description of |
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 2737366ea583..d4a684997497 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h | |||
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void); | |||
60 | /* | 60 | /* |
61 | * FPU related CPU feature flag helper routines: | 61 | * FPU related CPU feature flag helper routines: |
62 | */ | 62 | */ |
63 | static __always_inline __pure bool use_eager_fpu(void) | ||
64 | { | ||
65 | return static_cpu_has(X86_FEATURE_EAGER_FPU); | ||
66 | } | ||
67 | |||
68 | static __always_inline __pure bool use_xsaveopt(void) | 63 | static __always_inline __pure bool use_xsaveopt(void) |
69 | { | 64 | { |
70 | return static_cpu_has(X86_FEATURE_XSAVEOPT); | 65 | return static_cpu_has(X86_FEATURE_XSAVEOPT); |
@@ -484,42 +479,42 @@ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size) | |||
484 | DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); | 479 | DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); |
485 | 480 | ||
486 | /* | 481 | /* |
487 | * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, | 482 | * The in-register FPU state for an FPU context on a CPU is assumed to be |
488 | * on this CPU. | 483 | * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx |
484 | * matches the FPU. | ||
489 | * | 485 | * |
490 | * This will disable any lazy FPU state restore of the current FPU state, | 486 | * If the FPU register state is valid, the kernel can skip restoring the |
491 | * but if the current thread owns the FPU, it will still be saved by. | 487 | * FPU state from memory. |
488 | * | ||
489 | * Any code that clobbers the FPU registers or updates the in-memory | ||
490 | * FPU state for a task MUST let the rest of the kernel know that the | ||
491 | * FPU registers are no longer valid for this task. | ||
492 | * | ||
493 | * Either one of these invalidation functions is enough. Invalidate | ||
494 | * a resource you control: CPU if using the CPU for something else | ||
495 | * (with preemption disabled), FPU for the current task, or a task that | ||
496 | * is prevented from running by the current task. | ||
492 | */ | 497 | */ |
493 | static inline void __cpu_disable_lazy_restore(unsigned int cpu) | 498 | static inline void __cpu_invalidate_fpregs_state(void) |
494 | { | 499 | { |
495 | per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; | 500 | __this_cpu_write(fpu_fpregs_owner_ctx, NULL); |
496 | } | 501 | } |
497 | 502 | ||
498 | static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) | 503 | static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) |
499 | { | ||
500 | return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; | ||
501 | } | ||
502 | |||
503 | |||
504 | /* | ||
505 | * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' | ||
506 | * idiom, which is then paired with the sw-flag (fpregs_active) later on: | ||
507 | */ | ||
508 | |||
509 | static inline void __fpregs_activate_hw(void) | ||
510 | { | 504 | { |
511 | if (!use_eager_fpu()) | 505 | fpu->last_cpu = -1; |
512 | clts(); | ||
513 | } | 506 | } |
514 | 507 | ||
515 | static inline void __fpregs_deactivate_hw(void) | 508 | static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) |
516 | { | 509 | { |
517 | if (!use_eager_fpu()) | 510 | return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; |
518 | stts(); | ||
519 | } | 511 | } |
520 | 512 | ||
521 | /* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ | 513 | /* |
522 | static inline void __fpregs_deactivate(struct fpu *fpu) | 514 | * These generally need preemption protection to work, |
515 | * do try to avoid using these on their own: | ||
516 | */ | ||
517 | static inline void fpregs_deactivate(struct fpu *fpu) | ||
523 | { | 518 | { |
524 | WARN_ON_FPU(!fpu->fpregs_active); | 519 | WARN_ON_FPU(!fpu->fpregs_active); |
525 | 520 | ||
@@ -528,8 +523,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu) | |||
528 | trace_x86_fpu_regs_deactivated(fpu); | 523 | trace_x86_fpu_regs_deactivated(fpu); |
529 | } | 524 | } |
530 | 525 | ||
531 | /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ | 526 | static inline void fpregs_activate(struct fpu *fpu) |
532 | static inline void __fpregs_activate(struct fpu *fpu) | ||
533 | { | 527 | { |
534 | WARN_ON_FPU(fpu->fpregs_active); | 528 | WARN_ON_FPU(fpu->fpregs_active); |
535 | 529 | ||
@@ -554,51 +548,19 @@ static inline int fpregs_active(void) | |||
554 | } | 548 | } |
555 | 549 | ||
556 | /* | 550 | /* |
557 | * Encapsulate the CR0.TS handling together with the | ||
558 | * software flag. | ||
559 | * | ||
560 | * These generally need preemption protection to work, | ||
561 | * do try to avoid using these on their own. | ||
562 | */ | ||
563 | static inline void fpregs_activate(struct fpu *fpu) | ||
564 | { | ||
565 | __fpregs_activate_hw(); | ||
566 | __fpregs_activate(fpu); | ||
567 | } | ||
568 | |||
569 | static inline void fpregs_deactivate(struct fpu *fpu) | ||
570 | { | ||
571 | __fpregs_deactivate(fpu); | ||
572 | __fpregs_deactivate_hw(); | ||
573 | } | ||
574 | |||
575 | /* | ||
576 | * FPU state switching for scheduling. | 551 | * FPU state switching for scheduling. |
577 | * | 552 | * |
578 | * This is a two-stage process: | 553 | * This is a two-stage process: |
579 | * | 554 | * |
580 | * - switch_fpu_prepare() saves the old state and | 555 | * - switch_fpu_prepare() saves the old state. |
581 | * sets the new state of the CR0.TS bit. This is | 556 | * This is done within the context of the old process. |
582 | * done within the context of the old process. | ||
583 | * | 557 | * |
584 | * - switch_fpu_finish() restores the new state as | 558 | * - switch_fpu_finish() restores the new state as |
585 | * necessary. | 559 | * necessary. |
586 | */ | 560 | */ |
587 | typedef struct { int preload; } fpu_switch_t; | 561 | static inline void |
588 | 562 | switch_fpu_prepare(struct fpu *old_fpu, int cpu) | |
589 | static inline fpu_switch_t | ||
590 | switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) | ||
591 | { | 563 | { |
592 | fpu_switch_t fpu; | ||
593 | |||
594 | /* | ||
595 | * If the task has used the math, pre-load the FPU on xsave processors | ||
596 | * or if the past 5 consecutive context-switches used math. | ||
597 | */ | ||
598 | fpu.preload = static_cpu_has(X86_FEATURE_FPU) && | ||
599 | new_fpu->fpstate_active && | ||
600 | (use_eager_fpu() || new_fpu->counter > 5); | ||
601 | |||
602 | if (old_fpu->fpregs_active) { | 564 | if (old_fpu->fpregs_active) { |
603 | if (!copy_fpregs_to_fpstate(old_fpu)) | 565 | if (!copy_fpregs_to_fpstate(old_fpu)) |
604 | old_fpu->last_cpu = -1; | 566 | old_fpu->last_cpu = -1; |
@@ -608,29 +570,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) | |||
608 | /* But leave fpu_fpregs_owner_ctx! */ | 570 | /* But leave fpu_fpregs_owner_ctx! */ |
609 | old_fpu->fpregs_active = 0; | 571 | old_fpu->fpregs_active = 0; |
610 | trace_x86_fpu_regs_deactivated(old_fpu); | 572 | trace_x86_fpu_regs_deactivated(old_fpu); |
611 | 573 | } else | |
612 | /* Don't change CR0.TS if we just switch! */ | ||
613 | if (fpu.preload) { | ||
614 | new_fpu->counter++; | ||
615 | __fpregs_activate(new_fpu); | ||
616 | trace_x86_fpu_regs_activated(new_fpu); | ||
617 | prefetch(&new_fpu->state); | ||
618 | } else { | ||
619 | __fpregs_deactivate_hw(); | ||
620 | } | ||
621 | } else { | ||
622 | old_fpu->counter = 0; | ||
623 | old_fpu->last_cpu = -1; | 574 | old_fpu->last_cpu = -1; |
624 | if (fpu.preload) { | ||
625 | new_fpu->counter++; | ||
626 | if (fpu_want_lazy_restore(new_fpu, cpu)) | ||
627 | fpu.preload = 0; | ||
628 | else | ||
629 | prefetch(&new_fpu->state); | ||
630 | fpregs_activate(new_fpu); | ||
631 | } | ||
632 | } | ||
633 | return fpu; | ||
634 | } | 575 | } |
635 | 576 | ||
636 | /* | 577 | /* |
@@ -638,15 +579,19 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) | |||
638 | */ | 579 | */ |
639 | 580 | ||
640 | /* | 581 | /* |
641 | * By the time this gets called, we've already cleared CR0.TS and | 582 | * Set up the userspace FPU context for the new task, if the task |
642 | * given the process the FPU if we are going to preload the FPU | 583 | * has used the FPU. |
643 | * state - all we need to do is to conditionally restore the register | ||
644 | * state itself. | ||
645 | */ | 584 | */ |
646 | static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) | 585 | static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) |
647 | { | 586 | { |
648 | if (fpu_switch.preload) | 587 | bool preload = static_cpu_has(X86_FEATURE_FPU) && |
649 | copy_kernel_to_fpregs(&new_fpu->state); | 588 | new_fpu->fpstate_active; |
589 | |||
590 | if (preload) { | ||
591 | if (!fpregs_state_valid(new_fpu, cpu)) | ||
592 | copy_kernel_to_fpregs(&new_fpu->state); | ||
593 | fpregs_activate(new_fpu); | ||
594 | } | ||
650 | } | 595 | } |
651 | 596 | ||
652 | /* | 597 | /* |
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 48df486b02f9..3c80f5b9c09d 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h | |||
@@ -322,17 +322,6 @@ struct fpu { | |||
322 | unsigned char fpregs_active; | 322 | unsigned char fpregs_active; |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * @counter: | ||
326 | * | ||
327 | * This counter contains the number of consecutive context switches | ||
328 | * during which the FPU stays used. If this is over a threshold, the | ||
329 | * lazy FPU restore logic becomes eager, to save the trap overhead. | ||
330 | * This is an unsigned char so that after 256 iterations the counter | ||
331 | * wraps and the context switch behavior turns lazy again; this is to | ||
332 | * deal with bursty apps that only use the FPU for a short time: | ||
333 | */ | ||
334 | unsigned char counter; | ||
335 | /* | ||
336 | * @state: | 325 | * @state: |
337 | * | 326 | * |
338 | * In-memory copy of all FPU registers that we save/restore | 327 | * In-memory copy of all FPU registers that we save/restore |
@@ -340,29 +329,6 @@ struct fpu { | |||
340 | * the registers in the FPU are more recent than this state | 329 | * the registers in the FPU are more recent than this state |
341 | * copy. If the task context-switches away then they get | 330 | * copy. If the task context-switches away then they get |
342 | * saved here and represent the FPU state. | 331 | * saved here and represent the FPU state. |
343 | * | ||
344 | * After context switches there may be a (short) time period | ||
345 | * during which the in-FPU hardware registers are unchanged | ||
346 | * and still perfectly match this state, if the tasks | ||
347 | * scheduled afterwards are not using the FPU. | ||
348 | * | ||
349 | * This is the 'lazy restore' window of optimization, which | ||
350 | * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. | ||
351 | * | ||
352 | * We detect whether a subsequent task uses the FPU via setting | ||
353 | * CR0::TS to 1, which causes any FPU use to raise a #NM fault. | ||
354 | * | ||
355 | * During this window, if the task gets scheduled again, we | ||
356 | * might be able to skip having to do a restore from this | ||
357 | * memory buffer to the hardware registers - at the cost of | ||
358 | * incurring the overhead of #NM fault traps. | ||
359 | * | ||
360 | * Note that on modern CPUs that support the XSAVEOPT (or other | ||
361 | * optimized XSAVE instructions), we don't use #NM traps anymore, | ||
362 | * as the hardware can track whether FPU registers need saving | ||
363 | * or not. On such CPUs we activate the non-lazy ('eagerfpu') | ||
364 | * logic, which unconditionally saves/restores all FPU state | ||
365 | * across context switches. (if FPU state exists.) | ||
366 | */ | 332 | */ |
367 | union fpregs_state state; | 333 | union fpregs_state state; |
368 | /* | 334 | /* |
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 430bacf73074..1b2799e0699a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h | |||
@@ -21,21 +21,16 @@ | |||
21 | /* Supervisor features */ | 21 | /* Supervisor features */ |
22 | #define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) | 22 | #define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) |
23 | 23 | ||
24 | /* Supported features which support lazy state saving */ | 24 | /* All currently supported features */ |
25 | #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ | 25 | #define XCNTXT_MASK (XFEATURE_MASK_FP | \ |
26 | XFEATURE_MASK_SSE | \ | 26 | XFEATURE_MASK_SSE | \ |
27 | XFEATURE_MASK_YMM | \ | 27 | XFEATURE_MASK_YMM | \ |
28 | XFEATURE_MASK_OPMASK | \ | 28 | XFEATURE_MASK_OPMASK | \ |
29 | XFEATURE_MASK_ZMM_Hi256 | \ | 29 | XFEATURE_MASK_ZMM_Hi256 | \ |
30 | XFEATURE_MASK_Hi16_ZMM) | 30 | XFEATURE_MASK_Hi16_ZMM | \ |
31 | 31 | XFEATURE_MASK_PKRU | \ | |
32 | /* Supported features which require eager state saving */ | 32 | XFEATURE_MASK_BNDREGS | \ |
33 | #define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ | 33 | XFEATURE_MASK_BNDCSR) |
34 | XFEATURE_MASK_BNDCSR | \ | ||
35 | XFEATURE_MASK_PKRU) | ||
36 | |||
37 | /* All currently supported features */ | ||
38 | #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) | ||
39 | 34 | ||
40 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
41 | #define REX_PREFIX "0x48, " | 36 | #define REX_PREFIX "0x48, " |
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index ef01fef3eebc..6c119cfae218 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
@@ -9,7 +9,6 @@ | |||
9 | #define LHCALL_FLUSH_TLB 5 | 9 | #define LHCALL_FLUSH_TLB 5 |
10 | #define LHCALL_LOAD_IDT_ENTRY 6 | 10 | #define LHCALL_LOAD_IDT_ENTRY 6 |
11 | #define LHCALL_SET_STACK 7 | 11 | #define LHCALL_SET_STACK 7 |
12 | #define LHCALL_TS 8 | ||
13 | #define LHCALL_SET_CLOCKEVENT 9 | 12 | #define LHCALL_SET_CLOCKEVENT 9 |
14 | #define LHCALL_HALT 10 | 13 | #define LHCALL_HALT 10 |
15 | #define LHCALL_SET_PMD 13 | 14 | #define LHCALL_SET_PMD 13 |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6108b1fada2b..1eea6ca40694 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -41,11 +41,6 @@ static inline void set_debugreg(unsigned long val, int reg) | |||
41 | PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); | 41 | PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); |
42 | } | 42 | } |
43 | 43 | ||
44 | static inline void clts(void) | ||
45 | { | ||
46 | PVOP_VCALL0(pv_cpu_ops.clts); | ||
47 | } | ||
48 | |||
49 | static inline unsigned long read_cr0(void) | 44 | static inline unsigned long read_cr0(void) |
50 | { | 45 | { |
51 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); | 46 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 3f2bc0f0d3e8..bb2de45a60f2 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -103,8 +103,6 @@ struct pv_cpu_ops { | |||
103 | unsigned long (*get_debugreg)(int regno); | 103 | unsigned long (*get_debugreg)(int regno); |
104 | void (*set_debugreg)(int regno, unsigned long value); | 104 | void (*set_debugreg)(int regno, unsigned long value); |
105 | 105 | ||
106 | void (*clts)(void); | ||
107 | |||
108 | unsigned long (*read_cr0)(void); | 106 | unsigned long (*read_cr0)(void); |
109 | void (*write_cr0)(unsigned long); | 107 | void (*write_cr0)(unsigned long); |
110 | 108 | ||
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 19a2224f9e16..12af3e35edfa 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h | |||
@@ -6,11 +6,6 @@ | |||
6 | 6 | ||
7 | #include <asm/nops.h> | 7 | #include <asm/nops.h> |
8 | 8 | ||
9 | static inline void native_clts(void) | ||
10 | { | ||
11 | asm volatile("clts"); | ||
12 | } | ||
13 | |||
14 | /* | 9 | /* |
15 | * Volatile isn't enough to prevent the compiler from reordering the | 10 | * Volatile isn't enough to prevent the compiler from reordering the |
16 | * read/write functions for the control registers and messing everything up. | 11 | * read/write functions for the control registers and messing everything up. |
@@ -208,16 +203,8 @@ static inline void load_gs_index(unsigned selector) | |||
208 | 203 | ||
209 | #endif | 204 | #endif |
210 | 205 | ||
211 | /* Clear the 'TS' bit */ | ||
212 | static inline void clts(void) | ||
213 | { | ||
214 | native_clts(); | ||
215 | } | ||
216 | |||
217 | #endif/* CONFIG_PARAVIRT */ | 206 | #endif/* CONFIG_PARAVIRT */ |
218 | 207 | ||
219 | #define stts() write_cr0(read_cr0() | X86_CR0_TS) | ||
220 | |||
221 | static inline void clflush(volatile void *__p) | 208 | static inline void clflush(volatile void *__p) |
222 | { | 209 | { |
223 | asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); | 210 | asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); |
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 9217ab1f5bf6..342e59789fcd 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h | |||
@@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu, | |||
14 | __field(struct fpu *, fpu) | 14 | __field(struct fpu *, fpu) |
15 | __field(bool, fpregs_active) | 15 | __field(bool, fpregs_active) |
16 | __field(bool, fpstate_active) | 16 | __field(bool, fpstate_active) |
17 | __field(int, counter) | ||
18 | __field(u64, xfeatures) | 17 | __field(u64, xfeatures) |
19 | __field(u64, xcomp_bv) | 18 | __field(u64, xcomp_bv) |
20 | ), | 19 | ), |
@@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu, | |||
23 | __entry->fpu = fpu; | 22 | __entry->fpu = fpu; |
24 | __entry->fpregs_active = fpu->fpregs_active; | 23 | __entry->fpregs_active = fpu->fpregs_active; |
25 | __entry->fpstate_active = fpu->fpstate_active; | 24 | __entry->fpstate_active = fpu->fpstate_active; |
26 | __entry->counter = fpu->counter; | ||
27 | if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { | 25 | if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { |
28 | __entry->xfeatures = fpu->state.xsave.header.xfeatures; | 26 | __entry->xfeatures = fpu->state.xsave.header.xfeatures; |
29 | __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; | 27 | __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; |
30 | } | 28 | } |
31 | ), | 29 | ), |
32 | TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx", | 30 | TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", |
33 | __entry->fpu, | 31 | __entry->fpu, |
34 | __entry->fpregs_active, | 32 | __entry->fpregs_active, |
35 | __entry->fpstate_active, | 33 | __entry->fpstate_active, |
36 | __entry->counter, | ||
37 | __entry->xfeatures, | 34 | __entry->xfeatures, |
38 | __entry->xcomp_bv | 35 | __entry->xcomp_bv |
39 | ) | 36 | ) |
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index aad34aafc0e0..d913047f832c 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c | |||
@@ -23,17 +23,12 @@ static double __initdata y = 3145727.0; | |||
23 | */ | 23 | */ |
24 | void __init fpu__init_check_bugs(void) | 24 | void __init fpu__init_check_bugs(void) |
25 | { | 25 | { |
26 | u32 cr0_saved; | ||
27 | s32 fdiv_bug; | 26 | s32 fdiv_bug; |
28 | 27 | ||
29 | /* kernel_fpu_begin/end() relies on patched alternative instructions. */ | 28 | /* kernel_fpu_begin/end() relies on patched alternative instructions. */ |
30 | if (!boot_cpu_has(X86_FEATURE_FPU)) | 29 | if (!boot_cpu_has(X86_FEATURE_FPU)) |
31 | return; | 30 | return; |
32 | 31 | ||
33 | /* We might have CR0::TS set already, clear it: */ | ||
34 | cr0_saved = read_cr0(); | ||
35 | write_cr0(cr0_saved & ~X86_CR0_TS); | ||
36 | |||
37 | kernel_fpu_begin(); | 32 | kernel_fpu_begin(); |
38 | 33 | ||
39 | /* | 34 | /* |
@@ -56,8 +51,6 @@ void __init fpu__init_check_bugs(void) | |||
56 | 51 | ||
57 | kernel_fpu_end(); | 52 | kernel_fpu_end(); |
58 | 53 | ||
59 | write_cr0(cr0_saved); | ||
60 | |||
61 | if (fdiv_bug) { | 54 | if (fdiv_bug) { |
62 | set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); | 55 | set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); |
63 | pr_warn("Hmm, FPU with FDIV bug\n"); | 56 | pr_warn("Hmm, FPU with FDIV bug\n"); |
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ebb4e95fbd74..e4e97a5355ce 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c | |||
@@ -58,27 +58,9 @@ static bool kernel_fpu_disabled(void) | |||
58 | return this_cpu_read(in_kernel_fpu); | 58 | return this_cpu_read(in_kernel_fpu); |
59 | } | 59 | } |
60 | 60 | ||
61 | /* | ||
62 | * Were we in an interrupt that interrupted kernel mode? | ||
63 | * | ||
64 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
65 | * pair does nothing at all: the thread must not have fpu (so | ||
66 | * that we don't try to save the FPU state), and TS must | ||
67 | * be set (so that the clts/stts pair does nothing that is | ||
68 | * visible in the interrupted kernel thread). | ||
69 | * | ||
70 | * Except for the eagerfpu case when we return true; in the likely case | ||
71 | * the thread has FPU but we are not going to set/clear TS. | ||
72 | */ | ||
73 | static bool interrupted_kernel_fpu_idle(void) | 61 | static bool interrupted_kernel_fpu_idle(void) |
74 | { | 62 | { |
75 | if (kernel_fpu_disabled()) | 63 | return !kernel_fpu_disabled(); |
76 | return false; | ||
77 | |||
78 | if (use_eager_fpu()) | ||
79 | return true; | ||
80 | |||
81 | return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); | ||
82 | } | 64 | } |
83 | 65 | ||
84 | /* | 66 | /* |
@@ -125,8 +107,7 @@ void __kernel_fpu_begin(void) | |||
125 | */ | 107 | */ |
126 | copy_fpregs_to_fpstate(fpu); | 108 | copy_fpregs_to_fpstate(fpu); |
127 | } else { | 109 | } else { |
128 | this_cpu_write(fpu_fpregs_owner_ctx, NULL); | 110 | __cpu_invalidate_fpregs_state(); |
129 | __fpregs_activate_hw(); | ||
130 | } | 111 | } |
131 | } | 112 | } |
132 | EXPORT_SYMBOL(__kernel_fpu_begin); | 113 | EXPORT_SYMBOL(__kernel_fpu_begin); |
@@ -137,8 +118,6 @@ void __kernel_fpu_end(void) | |||
137 | 118 | ||
138 | if (fpu->fpregs_active) | 119 | if (fpu->fpregs_active) |
139 | copy_kernel_to_fpregs(&fpu->state); | 120 | copy_kernel_to_fpregs(&fpu->state); |
140 | else | ||
141 | __fpregs_deactivate_hw(); | ||
142 | 121 | ||
143 | kernel_fpu_enable(); | 122 | kernel_fpu_enable(); |
144 | } | 123 | } |
@@ -159,35 +138,6 @@ void kernel_fpu_end(void) | |||
159 | EXPORT_SYMBOL_GPL(kernel_fpu_end); | 138 | EXPORT_SYMBOL_GPL(kernel_fpu_end); |
160 | 139 | ||
161 | /* | 140 | /* |
162 | * CR0::TS save/restore functions: | ||
163 | */ | ||
164 | int irq_ts_save(void) | ||
165 | { | ||
166 | /* | ||
167 | * If in process context and not atomic, we can take a spurious DNA fault. | ||
168 | * Otherwise, doing clts() in process context requires disabling preemption | ||
169 | * or some heavy lifting like kernel_fpu_begin() | ||
170 | */ | ||
171 | if (!in_atomic()) | ||
172 | return 0; | ||
173 | |||
174 | if (read_cr0() & X86_CR0_TS) { | ||
175 | clts(); | ||
176 | return 1; | ||
177 | } | ||
178 | |||
179 | return 0; | ||
180 | } | ||
181 | EXPORT_SYMBOL_GPL(irq_ts_save); | ||
182 | |||
183 | void irq_ts_restore(int TS_state) | ||
184 | { | ||
185 | if (TS_state) | ||
186 | stts(); | ||
187 | } | ||
188 | EXPORT_SYMBOL_GPL(irq_ts_restore); | ||
189 | |||
190 | /* | ||
191 | * Save the FPU state (mark it for reload if necessary): | 141 | * Save the FPU state (mark it for reload if necessary): |
192 | * | 142 | * |
193 | * This only ever gets called for the current task. | 143 | * This only ever gets called for the current task. |
@@ -200,10 +150,7 @@ void fpu__save(struct fpu *fpu) | |||
200 | trace_x86_fpu_before_save(fpu); | 150 | trace_x86_fpu_before_save(fpu); |
201 | if (fpu->fpregs_active) { | 151 | if (fpu->fpregs_active) { |
202 | if (!copy_fpregs_to_fpstate(fpu)) { | 152 | if (!copy_fpregs_to_fpstate(fpu)) { |
203 | if (use_eager_fpu()) | 153 | copy_kernel_to_fpregs(&fpu->state); |
204 | copy_kernel_to_fpregs(&fpu->state); | ||
205 | else | ||
206 | fpregs_deactivate(fpu); | ||
207 | } | 154 | } |
208 | } | 155 | } |
209 | trace_x86_fpu_after_save(fpu); | 156 | trace_x86_fpu_after_save(fpu); |
@@ -247,7 +194,6 @@ EXPORT_SYMBOL_GPL(fpstate_init); | |||
247 | 194 | ||
248 | int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | 195 | int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) |
249 | { | 196 | { |
250 | dst_fpu->counter = 0; | ||
251 | dst_fpu->fpregs_active = 0; | 197 | dst_fpu->fpregs_active = 0; |
252 | dst_fpu->last_cpu = -1; | 198 | dst_fpu->last_cpu = -1; |
253 | 199 | ||
@@ -260,8 +206,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
260 | * Don't let 'init optimized' areas of the XSAVE area | 206 | * Don't let 'init optimized' areas of the XSAVE area |
261 | * leak into the child task: | 207 | * leak into the child task: |
262 | */ | 208 | */ |
263 | if (use_eager_fpu()) | 209 | memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); |
264 | memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); | ||
265 | 210 | ||
266 | /* | 211 | /* |
267 | * Save current FPU registers directly into the child | 212 | * Save current FPU registers directly into the child |
@@ -283,10 +228,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
283 | memcpy(&src_fpu->state, &dst_fpu->state, | 228 | memcpy(&src_fpu->state, &dst_fpu->state, |
284 | fpu_kernel_xstate_size); | 229 | fpu_kernel_xstate_size); |
285 | 230 | ||
286 | if (use_eager_fpu()) | 231 | copy_kernel_to_fpregs(&src_fpu->state); |
287 | copy_kernel_to_fpregs(&src_fpu->state); | ||
288 | else | ||
289 | fpregs_deactivate(src_fpu); | ||
290 | } | 232 | } |
291 | preempt_enable(); | 233 | preempt_enable(); |
292 | 234 | ||
@@ -366,7 +308,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) | |||
366 | 308 | ||
367 | if (fpu->fpstate_active) { | 309 | if (fpu->fpstate_active) { |
368 | /* Invalidate any lazy state: */ | 310 | /* Invalidate any lazy state: */ |
369 | fpu->last_cpu = -1; | 311 | __fpu_invalidate_fpregs_state(fpu); |
370 | } else { | 312 | } else { |
371 | fpstate_init(&fpu->state); | 313 | fpstate_init(&fpu->state); |
372 | trace_x86_fpu_init_state(fpu); | 314 | trace_x86_fpu_init_state(fpu); |
@@ -409,7 +351,7 @@ void fpu__current_fpstate_write_begin(void) | |||
409 | * ensures we will not be lazy and skip a XRSTOR in the | 351 | * ensures we will not be lazy and skip a XRSTOR in the |
410 | * future. | 352 | * future. |
411 | */ | 353 | */ |
412 | fpu->last_cpu = -1; | 354 | __fpu_invalidate_fpregs_state(fpu); |
413 | } | 355 | } |
414 | 356 | ||
415 | /* | 357 | /* |
@@ -459,7 +401,6 @@ void fpu__restore(struct fpu *fpu) | |||
459 | trace_x86_fpu_before_restore(fpu); | 401 | trace_x86_fpu_before_restore(fpu); |
460 | fpregs_activate(fpu); | 402 | fpregs_activate(fpu); |
461 | copy_kernel_to_fpregs(&fpu->state); | 403 | copy_kernel_to_fpregs(&fpu->state); |
462 | fpu->counter++; | ||
463 | trace_x86_fpu_after_restore(fpu); | 404 | trace_x86_fpu_after_restore(fpu); |
464 | kernel_fpu_enable(); | 405 | kernel_fpu_enable(); |
465 | } | 406 | } |
@@ -477,7 +418,6 @@ EXPORT_SYMBOL_GPL(fpu__restore); | |||
477 | void fpu__drop(struct fpu *fpu) | 418 | void fpu__drop(struct fpu *fpu) |
478 | { | 419 | { |
479 | preempt_disable(); | 420 | preempt_disable(); |
480 | fpu->counter = 0; | ||
481 | 421 | ||
482 | if (fpu->fpregs_active) { | 422 | if (fpu->fpregs_active) { |
483 | /* Ignore delayed exceptions from user space */ | 423 | /* Ignore delayed exceptions from user space */ |
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 2f2b8c7ccb85..60dece392b3a 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -10,18 +10,6 @@ | |||
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Initialize the TS bit in CR0 according to the style of context-switches | ||
14 | * we are using: | ||
15 | */ | ||
16 | static void fpu__init_cpu_ctx_switch(void) | ||
17 | { | ||
18 | if (!boot_cpu_has(X86_FEATURE_EAGER_FPU)) | ||
19 | stts(); | ||
20 | else | ||
21 | clts(); | ||
22 | } | ||
23 | |||
24 | /* | ||
25 | * Initialize the registers found in all CPUs, CR0 and CR4: | 13 | * Initialize the registers found in all CPUs, CR0 and CR4: |
26 | */ | 14 | */ |
27 | static void fpu__init_cpu_generic(void) | 15 | static void fpu__init_cpu_generic(void) |
@@ -58,7 +46,6 @@ void fpu__init_cpu(void) | |||
58 | { | 46 | { |
59 | fpu__init_cpu_generic(); | 47 | fpu__init_cpu_generic(); |
60 | fpu__init_cpu_xstate(); | 48 | fpu__init_cpu_xstate(); |
61 | fpu__init_cpu_ctx_switch(); | ||
62 | } | 49 | } |
63 | 50 | ||
64 | /* | 51 | /* |
@@ -233,82 +220,16 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
233 | } | 220 | } |
234 | 221 | ||
235 | /* | 222 | /* |
236 | * FPU context switching strategies: | ||
237 | * | ||
238 | * Against popular belief, we don't do lazy FPU saves, due to the | ||
239 | * task migration complications it brings on SMP - we only do | ||
240 | * lazy FPU restores. | ||
241 | * | ||
242 | * 'lazy' is the traditional strategy, which is based on setting | ||
243 | * CR0::TS to 1 during context-switch (instead of doing a full | ||
244 | * restore of the FPU state), which causes the first FPU instruction | ||
245 | * after the context switch (whenever it is executed) to fault - at | ||
246 | * which point we lazily restore the FPU state into FPU registers. | ||
247 | * | ||
248 | * Tasks are of course under no obligation to execute FPU instructions, | ||
249 | * so it can easily happen that another context-switch occurs without | ||
250 | * a single FPU instruction being executed. If we eventually switch | ||
251 | * back to the original task (that still owns the FPU) then we have | ||
252 | * not only saved the restores along the way, but we also have the | ||
253 | * FPU ready to be used for the original task. | ||
254 | * | ||
255 | * 'lazy' is deprecated because it's almost never a performance win | ||
256 | * and it's much more complicated than 'eager'. | ||
257 | * | ||
258 | * 'eager' switching is by default on all CPUs, there we switch the FPU | ||
259 | * state during every context switch, regardless of whether the task | ||
260 | * has used FPU instructions in that time slice or not. This is done | ||
261 | * because modern FPU context saving instructions are able to optimize | ||
262 | * state saving and restoration in hardware: they can detect both | ||
263 | * unused and untouched FPU state and optimize accordingly. | ||
264 | * | ||
265 | * [ Note that even in 'lazy' mode we might optimize context switches | ||
266 | * to use 'eager' restores, if we detect that a task is using the FPU | ||
267 | * frequently. See the fpu->counter logic in fpu/internal.h for that. ] | ||
268 | */ | ||
269 | static enum { ENABLE, DISABLE } eagerfpu = ENABLE; | ||
270 | |||
271 | /* | ||
272 | * Find supported xfeatures based on cpu features and command-line input. | 223 | * Find supported xfeatures based on cpu features and command-line input. |
273 | * This must be called after fpu__init_parse_early_param() is called and | 224 | * This must be called after fpu__init_parse_early_param() is called and |
274 | * xfeatures_mask is enumerated. | 225 | * xfeatures_mask is enumerated. |
275 | */ | 226 | */ |
276 | u64 __init fpu__get_supported_xfeatures_mask(void) | 227 | u64 __init fpu__get_supported_xfeatures_mask(void) |
277 | { | 228 | { |
278 | /* Support all xfeatures known to us */ | 229 | return XCNTXT_MASK; |
279 | if (eagerfpu != DISABLE) | ||
280 | return XCNTXT_MASK; | ||
281 | |||
282 | /* Warning of xfeatures being disabled for no eagerfpu mode */ | ||
283 | if (xfeatures_mask & XFEATURE_MASK_EAGER) { | ||
284 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", | ||
285 | xfeatures_mask & XFEATURE_MASK_EAGER); | ||
286 | } | ||
287 | |||
288 | /* Return a mask that masks out all features requiring eagerfpu mode */ | ||
289 | return ~XFEATURE_MASK_EAGER; | ||
290 | } | 230 | } |
291 | 231 | ||
292 | /* | 232 | /* Legacy code to initialize eager fpu mode. */ |
293 | * Disable features dependent on eagerfpu. | ||
294 | */ | ||
295 | static void __init fpu__clear_eager_fpu_features(void) | ||
296 | { | ||
297 | setup_clear_cpu_cap(X86_FEATURE_MPX); | ||
298 | } | ||
299 | |||
300 | /* | ||
301 | * Pick the FPU context switching strategy: | ||
302 | * | ||
303 | * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of | ||
304 | * the following is true: | ||
305 | * | ||
306 | * (1) the cpu has xsaveopt, as it has the optimization and doing eager | ||
307 | * FPU switching has a relatively low cost compared to a plain xsave; | ||
308 | * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU | ||
309 | * switching. Should the kernel boot with noxsaveopt, we support MPX | ||
310 | * with eager FPU switching at a higher cost. | ||
311 | */ | ||
312 | static void __init fpu__init_system_ctx_switch(void) | 233 | static void __init fpu__init_system_ctx_switch(void) |
313 | { | 234 | { |
314 | static bool on_boot_cpu __initdata = 1; | 235 | static bool on_boot_cpu __initdata = 1; |
@@ -317,17 +238,6 @@ static void __init fpu__init_system_ctx_switch(void) | |||
317 | on_boot_cpu = 0; | 238 | on_boot_cpu = 0; |
318 | 239 | ||
319 | WARN_ON_FPU(current->thread.fpu.fpstate_active); | 240 | WARN_ON_FPU(current->thread.fpu.fpstate_active); |
320 | |||
321 | if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) | ||
322 | eagerfpu = ENABLE; | ||
323 | |||
324 | if (xfeatures_mask & XFEATURE_MASK_EAGER) | ||
325 | eagerfpu = ENABLE; | ||
326 | |||
327 | if (eagerfpu == ENABLE) | ||
328 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
329 | |||
330 | printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); | ||
331 | } | 241 | } |
332 | 242 | ||
333 | /* | 243 | /* |
@@ -336,11 +246,6 @@ static void __init fpu__init_system_ctx_switch(void) | |||
336 | */ | 246 | */ |
337 | static void __init fpu__init_parse_early_param(void) | 247 | static void __init fpu__init_parse_early_param(void) |
338 | { | 248 | { |
339 | if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { | ||
340 | eagerfpu = DISABLE; | ||
341 | fpu__clear_eager_fpu_features(); | ||
342 | } | ||
343 | |||
344 | if (cmdline_find_option_bool(boot_command_line, "no387")) | 249 | if (cmdline_find_option_bool(boot_command_line, "no387")) |
345 | setup_clear_cpu_cap(X86_FEATURE_FPU); | 250 | setup_clear_cpu_cap(X86_FEATURE_FPU); |
346 | 251 | ||
@@ -375,14 +280,6 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) | |||
375 | */ | 280 | */ |
376 | fpu__init_cpu(); | 281 | fpu__init_cpu(); |
377 | 282 | ||
378 | /* | ||
379 | * But don't leave CR0::TS set yet, as some of the FPU setup | ||
380 | * methods depend on being able to execute FPU instructions | ||
381 | * that will fault on a set TS, such as the FXSAVE in | ||
382 | * fpu__init_system_mxcsr(). | ||
383 | */ | ||
384 | clts(); | ||
385 | |||
386 | fpu__init_system_generic(); | 283 | fpu__init_system_generic(); |
387 | fpu__init_system_xstate_size_legacy(); | 284 | fpu__init_system_xstate_size_legacy(); |
388 | fpu__init_system_xstate(); | 285 | fpu__init_system_xstate(); |
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a184c210efba..83c23c230b4c 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c | |||
@@ -340,11 +340,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) | |||
340 | } | 340 | } |
341 | 341 | ||
342 | fpu->fpstate_active = 1; | 342 | fpu->fpstate_active = 1; |
343 | if (use_eager_fpu()) { | 343 | preempt_disable(); |
344 | preempt_disable(); | 344 | fpu__restore(fpu); |
345 | fpu__restore(fpu); | 345 | preempt_enable(); |
346 | preempt_enable(); | ||
347 | } | ||
348 | 346 | ||
349 | return err; | 347 | return err; |
350 | } else { | 348 | } else { |
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ce47452879fd..1d7770447b3e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -892,15 +892,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
892 | */ | 892 | */ |
893 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 893 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
894 | return -EINVAL; | 894 | return -EINVAL; |
895 | /* | ||
896 | * For most XSAVE components, this would be an arduous task: | ||
897 | * brining fpstate up to date with fpregs, updating fpstate, | ||
898 | * then re-populating fpregs. But, for components that are | ||
899 | * never lazily managed, we can just access the fpregs | ||
900 | * directly. PKRU is never managed lazily, so we can just | ||
901 | * manipulate it directly. Make sure it stays that way. | ||
902 | */ | ||
903 | WARN_ON_ONCE(!use_eager_fpu()); | ||
904 | 895 | ||
905 | /* Set the bits we need in PKRU: */ | 896 | /* Set the bits we need in PKRU: */ |
906 | if (init_val & PKEY_DISABLE_ACCESS) | 897 | if (init_val & PKEY_DISABLE_ACCESS) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index bbf3d5933eaa..a1bfba0f7234 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -328,7 +328,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { | |||
328 | .cpuid = native_cpuid, | 328 | .cpuid = native_cpuid, |
329 | .get_debugreg = native_get_debugreg, | 329 | .get_debugreg = native_get_debugreg, |
330 | .set_debugreg = native_set_debugreg, | 330 | .set_debugreg = native_set_debugreg, |
331 | .clts = native_clts, | ||
332 | .read_cr0 = native_read_cr0, | 331 | .read_cr0 = native_read_cr0, |
333 | .write_cr0 = native_write_cr0, | 332 | .write_cr0 = native_write_cr0, |
334 | .read_cr4 = native_read_cr4, | 333 | .read_cr4 = native_read_cr4, |
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 33cdec221f3d..d33ef165b1f8 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
@@ -8,7 +8,6 @@ DEF_NATIVE(pv_cpu_ops, iret, "iret"); | |||
8 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); | 8 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); |
9 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); | 9 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); |
10 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | 10 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); |
11 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | ||
12 | 11 | ||
13 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) | 12 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) |
14 | DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); | 13 | DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); |
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
50 | PATCH_SITE(pv_mmu_ops, read_cr2); | 49 | PATCH_SITE(pv_mmu_ops, read_cr2); |
51 | PATCH_SITE(pv_mmu_ops, read_cr3); | 50 | PATCH_SITE(pv_mmu_ops, read_cr3); |
52 | PATCH_SITE(pv_mmu_ops, write_cr3); | 51 | PATCH_SITE(pv_mmu_ops, write_cr3); |
53 | PATCH_SITE(pv_cpu_ops, clts); | ||
54 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) | 52 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) |
55 | case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): | 53 | case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): |
56 | if (pv_is_native_spin_unlock()) { | 54 | if (pv_is_native_spin_unlock()) { |
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index b0fceff502b3..f4fcf26c9fce 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c | |||
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); | |||
10 | DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); | 10 | DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); |
11 | DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); | 11 | DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); |
12 | DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); | 12 | DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); |
13 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | ||
14 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); | 13 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); |
15 | 14 | ||
16 | DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); | 15 | DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); |
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
60 | PATCH_SITE(pv_mmu_ops, read_cr2); | 59 | PATCH_SITE(pv_mmu_ops, read_cr2); |
61 | PATCH_SITE(pv_mmu_ops, read_cr3); | 60 | PATCH_SITE(pv_mmu_ops, read_cr3); |
62 | PATCH_SITE(pv_mmu_ops, write_cr3); | 61 | PATCH_SITE(pv_mmu_ops, write_cr3); |
63 | PATCH_SITE(pv_cpu_ops, clts); | ||
64 | PATCH_SITE(pv_mmu_ops, flush_tlb_single); | 62 | PATCH_SITE(pv_mmu_ops, flush_tlb_single); |
65 | PATCH_SITE(pv_cpu_ops, wbinvd); | 63 | PATCH_SITE(pv_cpu_ops, wbinvd); |
66 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) | 64 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e3223bc78cb6..f854404be1c6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -231,11 +231,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
231 | struct fpu *next_fpu = &next->fpu; | 231 | struct fpu *next_fpu = &next->fpu; |
232 | int cpu = smp_processor_id(); | 232 | int cpu = smp_processor_id(); |
233 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | 233 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
234 | fpu_switch_t fpu_switch; | ||
235 | 234 | ||
236 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 235 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
237 | 236 | ||
238 | fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); | 237 | switch_fpu_prepare(prev_fpu, cpu); |
239 | 238 | ||
240 | /* | 239 | /* |
241 | * Save away %gs. No need to save %fs, as it was saved on the | 240 | * Save away %gs. No need to save %fs, as it was saved on the |
@@ -294,7 +293,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
294 | if (prev->gs | next->gs) | 293 | if (prev->gs | next->gs) |
295 | lazy_load_gs(next->gs); | 294 | lazy_load_gs(next->gs); |
296 | 295 | ||
297 | switch_fpu_finish(next_fpu, fpu_switch); | 296 | switch_fpu_finish(next_fpu, cpu); |
298 | 297 | ||
299 | this_cpu_write(current_task, next_p); | 298 | this_cpu_write(current_task, next_p); |
300 | 299 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c99f1ca35eb5..6c1b43eab80c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -270,9 +270,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
270 | int cpu = smp_processor_id(); | 270 | int cpu = smp_processor_id(); |
271 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | 271 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
272 | unsigned prev_fsindex, prev_gsindex; | 272 | unsigned prev_fsindex, prev_gsindex; |
273 | fpu_switch_t fpu_switch; | ||
274 | 273 | ||
275 | fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); | 274 | switch_fpu_prepare(prev_fpu, cpu); |
276 | 275 | ||
277 | /* We must save %fs and %gs before load_TLS() because | 276 | /* We must save %fs and %gs before load_TLS() because |
278 | * %fs and %gs may be cleared by load_TLS(). | 277 | * %fs and %gs may be cleared by load_TLS(). |
@@ -422,7 +421,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
422 | prev->gsbase = 0; | 421 | prev->gsbase = 0; |
423 | prev->gsindex = prev_gsindex; | 422 | prev->gsindex = prev_gsindex; |
424 | 423 | ||
425 | switch_fpu_finish(next_fpu, fpu_switch); | 424 | switch_fpu_finish(next_fpu, cpu); |
426 | 425 | ||
427 | /* | 426 | /* |
428 | * Switch the PDA and FPU contexts. | 427 | * Switch the PDA and FPU contexts. |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f084a24c2c0f..2a501abe5000 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1132,7 +1132,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
1132 | return err; | 1132 | return err; |
1133 | 1133 | ||
1134 | /* the FPU context is blank, nobody can own it */ | 1134 | /* the FPU context is blank, nobody can own it */ |
1135 | __cpu_disable_lazy_restore(cpu); | 1135 | per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; |
1136 | 1136 | ||
1137 | common_cpu_up(cpu, tidle); | 1137 | common_cpu_up(cpu, tidle); |
1138 | 1138 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bd4e3d4d3625..bf0c6d049080 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -853,6 +853,8 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | |||
853 | dotraplinkage void | 853 | dotraplinkage void |
854 | do_device_not_available(struct pt_regs *regs, long error_code) | 854 | do_device_not_available(struct pt_regs *regs, long error_code) |
855 | { | 855 | { |
856 | unsigned long cr0; | ||
857 | |||
856 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); | 858 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
857 | 859 | ||
858 | #ifdef CONFIG_MATH_EMULATION | 860 | #ifdef CONFIG_MATH_EMULATION |
@@ -866,10 +868,20 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
866 | return; | 868 | return; |
867 | } | 869 | } |
868 | #endif | 870 | #endif |
869 | fpu__restore(¤t->thread.fpu); /* interrupts still off */ | 871 | |
870 | #ifdef CONFIG_X86_32 | 872 | /* This should not happen. */ |
871 | cond_local_irq_enable(regs); | 873 | cr0 = read_cr0(); |
872 | #endif | 874 | if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) { |
875 | /* Try to fix it up and carry on. */ | ||
876 | write_cr0(cr0 & ~X86_CR0_TS); | ||
877 | } else { | ||
878 | /* | ||
879 | * Something terrible happened, and we're better off trying | ||
880 | * to kill the task than getting stuck in a never-ending | ||
881 | * loop of #NM faults. | ||
882 | */ | ||
883 | die("unexpected #NM exception", regs, error_code); | ||
884 | } | ||
873 | } | 885 | } |
874 | NOKPROBE_SYMBOL(do_device_not_available); | 886 | NOKPROBE_SYMBOL(do_device_not_available); |
875 | 887 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index afa7bbb596cd..0aefb626fa8f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/export.h> | 16 | #include <linux/export.h> |
17 | #include <linux/vmalloc.h> | 17 | #include <linux/vmalloc.h> |
18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
19 | #include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */ | ||
20 | #include <asm/user.h> | 19 | #include <asm/user.h> |
21 | #include <asm/fpu/xstate.h> | 20 | #include <asm/fpu/xstate.h> |
22 | #include "cpuid.h" | 21 | #include "cpuid.h" |
@@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
114 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) | 113 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) |
115 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); | 114 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); |
116 | 115 | ||
117 | if (use_eager_fpu()) | 116 | kvm_x86_ops->fpu_activate(vcpu); |
118 | kvm_x86_ops->fpu_activate(vcpu); | ||
119 | 117 | ||
120 | /* | 118 | /* |
121 | * The existing code assumes virtual address is 48-bit in the canonical | 119 | * The existing code assumes virtual address is 48-bit in the canonical |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5382b82462fc..3980da515fd0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2145,12 +2145,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
2145 | #endif | 2145 | #endif |
2146 | if (vmx->host_state.msr_host_bndcfgs) | 2146 | if (vmx->host_state.msr_host_bndcfgs) |
2147 | wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); | 2147 | wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); |
2148 | /* | ||
2149 | * If the FPU is not active (through the host task or | ||
2150 | * the guest vcpu), then restore the cr0.TS bit. | ||
2151 | */ | ||
2152 | if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded) | ||
2153 | stts(); | ||
2154 | load_gdt(this_cpu_ptr(&host_gdt)); | 2148 | load_gdt(this_cpu_ptr(&host_gdt)); |
2155 | } | 2149 | } |
2156 | 2150 | ||
@@ -4845,9 +4839,11 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) | |||
4845 | u32 low32, high32; | 4839 | u32 low32, high32; |
4846 | unsigned long tmpl; | 4840 | unsigned long tmpl; |
4847 | struct desc_ptr dt; | 4841 | struct desc_ptr dt; |
4848 | unsigned long cr4; | 4842 | unsigned long cr0, cr4; |
4849 | 4843 | ||
4850 | vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ | 4844 | cr0 = read_cr0(); |
4845 | WARN_ON(cr0 & X86_CR0_TS); | ||
4846 | vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ | ||
4851 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | 4847 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ |
4852 | 4848 | ||
4853 | /* Save the most likely value for this task's CR4 in the VMCS. */ | 4849 | /* Save the most likely value for this task's CR4 in the VMCS. */ |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59c2d6f1b131..6f5f465fdb6b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -5097,11 +5097,6 @@ static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) | |||
5097 | { | 5097 | { |
5098 | preempt_disable(); | 5098 | preempt_disable(); |
5099 | kvm_load_guest_fpu(emul_to_vcpu(ctxt)); | 5099 | kvm_load_guest_fpu(emul_to_vcpu(ctxt)); |
5100 | /* | ||
5101 | * CR0.TS may reference the host fpu state, not the guest fpu state, | ||
5102 | * so it may be clear at this point. | ||
5103 | */ | ||
5104 | clts(); | ||
5105 | } | 5100 | } |
5106 | 5101 | ||
5107 | static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) | 5102 | static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) |
@@ -7423,25 +7418,13 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | |||
7423 | 7418 | ||
7424 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 7419 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
7425 | { | 7420 | { |
7426 | if (!vcpu->guest_fpu_loaded) { | 7421 | if (!vcpu->guest_fpu_loaded) |
7427 | vcpu->fpu_counter = 0; | ||
7428 | return; | 7422 | return; |
7429 | } | ||
7430 | 7423 | ||
7431 | vcpu->guest_fpu_loaded = 0; | 7424 | vcpu->guest_fpu_loaded = 0; |
7432 | copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); | 7425 | copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); |
7433 | __kernel_fpu_end(); | 7426 | __kernel_fpu_end(); |
7434 | ++vcpu->stat.fpu_reload; | 7427 | ++vcpu->stat.fpu_reload; |
7435 | /* | ||
7436 | * If using eager FPU mode, or if the guest is a frequent user | ||
7437 | * of the FPU, just leave the FPU active for next time. | ||
7438 | * Every 255 times fpu_counter rolls over to 0; a guest that uses | ||
7439 | * the FPU in bursts will revert to loading it on demand. | ||
7440 | */ | ||
7441 | if (!use_eager_fpu()) { | ||
7442 | if (++vcpu->fpu_counter < 5) | ||
7443 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); | ||
7444 | } | ||
7445 | trace_kvm_fpu(0); | 7428 | trace_kvm_fpu(0); |
7446 | } | 7429 | } |
7447 | 7430 | ||
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 25da5bc8d83d..4ca0d78adcf0 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -497,38 +497,24 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
497 | * a whole series of functions like read_cr0() and write_cr0(). | 497 | * a whole series of functions like read_cr0() and write_cr0(). |
498 | * | 498 | * |
499 | * We start with cr0. cr0 allows you to turn on and off all kinds of basic | 499 | * We start with cr0. cr0 allows you to turn on and off all kinds of basic |
500 | * features, but Linux only really cares about one: the horrifically-named Task | 500 | * features, but the only cr0 bit that Linux ever used at runtime was the |
501 | * Switched (TS) bit at bit 3 (ie. 8) | 501 | * horrifically-named Task Switched (TS) bit at bit 3 (ie. 8) |
502 | * | 502 | * |
503 | * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if | 503 | * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if |
504 | * the floating point unit is used. Which allows us to restore FPU state | 504 | * the floating point unit is used. Which allows us to restore FPU state |
505 | * lazily after a task switch, and Linux uses that gratefully, but wouldn't a | 505 | * lazily after a task switch if we wanted to, but wouldn't a name like |
506 | * name like "FPUTRAP bit" be a little less cryptic? | 506 | * "FPUTRAP bit" be a little less cryptic? |
507 | * | 507 | * |
508 | * We store cr0 locally because the Host never changes it. The Guest sometimes | 508 | * Fortunately, Linux keeps it simple and doesn't use TS, so we can ignore |
509 | * wants to read it and we'd prefer not to bother the Host unnecessarily. | 509 | * cr0. |
510 | */ | 510 | */ |
511 | static unsigned long current_cr0; | ||
512 | static void lguest_write_cr0(unsigned long val) | 511 | static void lguest_write_cr0(unsigned long val) |
513 | { | 512 | { |
514 | lazy_hcall1(LHCALL_TS, val & X86_CR0_TS); | ||
515 | current_cr0 = val; | ||
516 | } | 513 | } |
517 | 514 | ||
518 | static unsigned long lguest_read_cr0(void) | 515 | static unsigned long lguest_read_cr0(void) |
519 | { | 516 | { |
520 | return current_cr0; | 517 | return 0; |
521 | } | ||
522 | |||
523 | /* | ||
524 | * Intel provided a special instruction to clear the TS bit for people too cool | ||
525 | * to use write_cr0() to do it. This "clts" instruction is faster, because all | ||
526 | * the vowels have been optimized out. | ||
527 | */ | ||
528 | static void lguest_clts(void) | ||
529 | { | ||
530 | lazy_hcall1(LHCALL_TS, 0); | ||
531 | current_cr0 &= ~X86_CR0_TS; | ||
532 | } | 518 | } |
533 | 519 | ||
534 | /* | 520 | /* |
@@ -1432,7 +1418,6 @@ __init void lguest_init(void) | |||
1432 | pv_cpu_ops.load_tls = lguest_load_tls; | 1418 | pv_cpu_ops.load_tls = lguest_load_tls; |
1433 | pv_cpu_ops.get_debugreg = lguest_get_debugreg; | 1419 | pv_cpu_ops.get_debugreg = lguest_get_debugreg; |
1434 | pv_cpu_ops.set_debugreg = lguest_set_debugreg; | 1420 | pv_cpu_ops.set_debugreg = lguest_set_debugreg; |
1435 | pv_cpu_ops.clts = lguest_clts; | ||
1436 | pv_cpu_ops.read_cr0 = lguest_read_cr0; | 1421 | pv_cpu_ops.read_cr0 = lguest_read_cr0; |
1437 | pv_cpu_ops.write_cr0 = lguest_write_cr0; | 1422 | pv_cpu_ops.write_cr0 = lguest_write_cr0; |
1438 | pv_cpu_ops.read_cr4 = lguest_read_cr4; | 1423 | pv_cpu_ops.read_cr4 = lguest_read_cr4; |
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index f88ce0e5efd9..2dab69a706ec 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c | |||
@@ -141,8 +141,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | | |||
141 | * Called from the FPU code when creating a fresh set of FPU | 141 | * Called from the FPU code when creating a fresh set of FPU |
142 | * registers. This is called from a very specific context where | 142 | * registers. This is called from a very specific context where |
143 | * we know the FPU regstiers are safe for use and we can use PKRU | 143 | * we know the FPU regstiers are safe for use and we can use PKRU |
144 | * directly. The fact that PKRU is only available when we are | 144 | * directly. |
145 | * using eagerfpu mode makes this possible. | ||
146 | */ | 145 | */ |
147 | void copy_init_pkru_to_fpregs(void) | 146 | void copy_init_pkru_to_fpregs(void) |
148 | { | 147 | { |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bdd855685403..ced7027b3fbc 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -980,17 +980,6 @@ static void xen_io_delay(void) | |||
980 | { | 980 | { |
981 | } | 981 | } |
982 | 982 | ||
983 | static void xen_clts(void) | ||
984 | { | ||
985 | struct multicall_space mcs; | ||
986 | |||
987 | mcs = xen_mc_entry(0); | ||
988 | |||
989 | MULTI_fpu_taskswitch(mcs.mc, 0); | ||
990 | |||
991 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
992 | } | ||
993 | |||
994 | static DEFINE_PER_CPU(unsigned long, xen_cr0_value); | 983 | static DEFINE_PER_CPU(unsigned long, xen_cr0_value); |
995 | 984 | ||
996 | static unsigned long xen_read_cr0(void) | 985 | static unsigned long xen_read_cr0(void) |
@@ -1233,8 +1222,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1233 | .set_debugreg = xen_set_debugreg, | 1222 | .set_debugreg = xen_set_debugreg, |
1234 | .get_debugreg = xen_get_debugreg, | 1223 | .get_debugreg = xen_get_debugreg, |
1235 | 1224 | ||
1236 | .clts = xen_clts, | ||
1237 | |||
1238 | .read_cr0 = xen_read_cr0, | 1225 | .read_cr0 = xen_read_cr0, |
1239 | .write_cr0 = xen_write_cr0, | 1226 | .write_cr0 = xen_write_cr0, |
1240 | 1227 | ||
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 44ce80606944..d1f5bb534e0e 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c | |||
@@ -70,21 +70,17 @@ enum { | |||
70 | * until we have 4 bytes, thus returning a u32 at a time, | 70 | * until we have 4 bytes, thus returning a u32 at a time, |
71 | * instead of the current u8-at-a-time. | 71 | * instead of the current u8-at-a-time. |
72 | * | 72 | * |
73 | * Padlock instructions can generate a spurious DNA fault, so | 73 | * Padlock instructions can generate a spurious DNA fault, but the |
74 | * we have to call them in the context of irq_ts_save/restore() | 74 | * kernel doesn't use CR0.TS, so this doesn't matter. |
75 | */ | 75 | */ |
76 | 76 | ||
77 | static inline u32 xstore(u32 *addr, u32 edx_in) | 77 | static inline u32 xstore(u32 *addr, u32 edx_in) |
78 | { | 78 | { |
79 | u32 eax_out; | 79 | u32 eax_out; |
80 | int ts_state; | ||
81 | |||
82 | ts_state = irq_ts_save(); | ||
83 | 80 | ||
84 | asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" | 81 | asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" |
85 | : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr)); | 82 | : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr)); |
86 | 83 | ||
87 | irq_ts_restore(ts_state); | ||
88 | return eax_out; | 84 | return eax_out; |
89 | } | 85 | } |
90 | 86 | ||
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 441e86b23571..b3869748cc6b 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c | |||
@@ -183,8 +183,8 @@ static inline void padlock_store_cword(struct cword *cword) | |||
183 | 183 | ||
184 | /* | 184 | /* |
185 | * While the padlock instructions don't use FP/SSE registers, they | 185 | * While the padlock instructions don't use FP/SSE registers, they |
186 | * generate a spurious DNA fault when cr0.ts is '1'. These instructions | 186 | * generate a spurious DNA fault when CR0.TS is '1'. Fortunately, |
187 | * should be used only inside the irq_ts_save/restore() context | 187 | * the kernel doesn't use CR0.TS. |
188 | */ | 188 | */ |
189 | 189 | ||
190 | static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, | 190 | static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, |
@@ -298,24 +298,18 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, | |||
298 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | 298 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) |
299 | { | 299 | { |
300 | struct aes_ctx *ctx = aes_ctx(tfm); | 300 | struct aes_ctx *ctx = aes_ctx(tfm); |
301 | int ts_state; | ||
302 | 301 | ||
303 | padlock_reset_key(&ctx->cword.encrypt); | 302 | padlock_reset_key(&ctx->cword.encrypt); |
304 | ts_state = irq_ts_save(); | ||
305 | ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); | 303 | ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); |
306 | irq_ts_restore(ts_state); | ||
307 | padlock_store_cword(&ctx->cword.encrypt); | 304 | padlock_store_cword(&ctx->cword.encrypt); |
308 | } | 305 | } |
309 | 306 | ||
310 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | 307 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) |
311 | { | 308 | { |
312 | struct aes_ctx *ctx = aes_ctx(tfm); | 309 | struct aes_ctx *ctx = aes_ctx(tfm); |
313 | int ts_state; | ||
314 | 310 | ||
315 | padlock_reset_key(&ctx->cword.encrypt); | 311 | padlock_reset_key(&ctx->cword.encrypt); |
316 | ts_state = irq_ts_save(); | ||
317 | ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); | 312 | ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); |
318 | irq_ts_restore(ts_state); | ||
319 | padlock_store_cword(&ctx->cword.encrypt); | 313 | padlock_store_cword(&ctx->cword.encrypt); |
320 | } | 314 | } |
321 | 315 | ||
@@ -346,14 +340,12 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, | |||
346 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 340 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
347 | struct blkcipher_walk walk; | 341 | struct blkcipher_walk walk; |
348 | int err; | 342 | int err; |
349 | int ts_state; | ||
350 | 343 | ||
351 | padlock_reset_key(&ctx->cword.encrypt); | 344 | padlock_reset_key(&ctx->cword.encrypt); |
352 | 345 | ||
353 | blkcipher_walk_init(&walk, dst, src, nbytes); | 346 | blkcipher_walk_init(&walk, dst, src, nbytes); |
354 | err = blkcipher_walk_virt(desc, &walk); | 347 | err = blkcipher_walk_virt(desc, &walk); |
355 | 348 | ||
356 | ts_state = irq_ts_save(); | ||
357 | while ((nbytes = walk.nbytes)) { | 349 | while ((nbytes = walk.nbytes)) { |
358 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, | 350 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, |
359 | ctx->E, &ctx->cword.encrypt, | 351 | ctx->E, &ctx->cword.encrypt, |
@@ -361,7 +353,6 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, | |||
361 | nbytes &= AES_BLOCK_SIZE - 1; | 353 | nbytes &= AES_BLOCK_SIZE - 1; |
362 | err = blkcipher_walk_done(desc, &walk, nbytes); | 354 | err = blkcipher_walk_done(desc, &walk, nbytes); |
363 | } | 355 | } |
364 | irq_ts_restore(ts_state); | ||
365 | 356 | ||
366 | padlock_store_cword(&ctx->cword.encrypt); | 357 | padlock_store_cword(&ctx->cword.encrypt); |
367 | 358 | ||
@@ -375,14 +366,12 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, | |||
375 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 366 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
376 | struct blkcipher_walk walk; | 367 | struct blkcipher_walk walk; |
377 | int err; | 368 | int err; |
378 | int ts_state; | ||
379 | 369 | ||
380 | padlock_reset_key(&ctx->cword.decrypt); | 370 | padlock_reset_key(&ctx->cword.decrypt); |
381 | 371 | ||
382 | blkcipher_walk_init(&walk, dst, src, nbytes); | 372 | blkcipher_walk_init(&walk, dst, src, nbytes); |
383 | err = blkcipher_walk_virt(desc, &walk); | 373 | err = blkcipher_walk_virt(desc, &walk); |
384 | 374 | ||
385 | ts_state = irq_ts_save(); | ||
386 | while ((nbytes = walk.nbytes)) { | 375 | while ((nbytes = walk.nbytes)) { |
387 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, | 376 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, |
388 | ctx->D, &ctx->cword.decrypt, | 377 | ctx->D, &ctx->cword.decrypt, |
@@ -390,7 +379,6 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, | |||
390 | nbytes &= AES_BLOCK_SIZE - 1; | 379 | nbytes &= AES_BLOCK_SIZE - 1; |
391 | err = blkcipher_walk_done(desc, &walk, nbytes); | 380 | err = blkcipher_walk_done(desc, &walk, nbytes); |
392 | } | 381 | } |
393 | irq_ts_restore(ts_state); | ||
394 | 382 | ||
395 | padlock_store_cword(&ctx->cword.encrypt); | 383 | padlock_store_cword(&ctx->cword.encrypt); |
396 | 384 | ||
@@ -425,14 +413,12 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, | |||
425 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 413 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
426 | struct blkcipher_walk walk; | 414 | struct blkcipher_walk walk; |
427 | int err; | 415 | int err; |
428 | int ts_state; | ||
429 | 416 | ||
430 | padlock_reset_key(&ctx->cword.encrypt); | 417 | padlock_reset_key(&ctx->cword.encrypt); |
431 | 418 | ||
432 | blkcipher_walk_init(&walk, dst, src, nbytes); | 419 | blkcipher_walk_init(&walk, dst, src, nbytes); |
433 | err = blkcipher_walk_virt(desc, &walk); | 420 | err = blkcipher_walk_virt(desc, &walk); |
434 | 421 | ||
435 | ts_state = irq_ts_save(); | ||
436 | while ((nbytes = walk.nbytes)) { | 422 | while ((nbytes = walk.nbytes)) { |
437 | u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, | 423 | u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, |
438 | walk.dst.virt.addr, ctx->E, | 424 | walk.dst.virt.addr, ctx->E, |
@@ -442,7 +428,6 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, | |||
442 | nbytes &= AES_BLOCK_SIZE - 1; | 428 | nbytes &= AES_BLOCK_SIZE - 1; |
443 | err = blkcipher_walk_done(desc, &walk, nbytes); | 429 | err = blkcipher_walk_done(desc, &walk, nbytes); |
444 | } | 430 | } |
445 | irq_ts_restore(ts_state); | ||
446 | 431 | ||
447 | padlock_store_cword(&ctx->cword.decrypt); | 432 | padlock_store_cword(&ctx->cword.decrypt); |
448 | 433 | ||
@@ -456,14 +441,12 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, | |||
456 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 441 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
457 | struct blkcipher_walk walk; | 442 | struct blkcipher_walk walk; |
458 | int err; | 443 | int err; |
459 | int ts_state; | ||
460 | 444 | ||
461 | padlock_reset_key(&ctx->cword.encrypt); | 445 | padlock_reset_key(&ctx->cword.encrypt); |
462 | 446 | ||
463 | blkcipher_walk_init(&walk, dst, src, nbytes); | 447 | blkcipher_walk_init(&walk, dst, src, nbytes); |
464 | err = blkcipher_walk_virt(desc, &walk); | 448 | err = blkcipher_walk_virt(desc, &walk); |
465 | 449 | ||
466 | ts_state = irq_ts_save(); | ||
467 | while ((nbytes = walk.nbytes)) { | 450 | while ((nbytes = walk.nbytes)) { |
468 | padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, | 451 | padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, |
469 | ctx->D, walk.iv, &ctx->cword.decrypt, | 452 | ctx->D, walk.iv, &ctx->cword.decrypt, |
@@ -472,8 +455,6 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, | |||
472 | err = blkcipher_walk_done(desc, &walk, nbytes); | 455 | err = blkcipher_walk_done(desc, &walk, nbytes); |
473 | } | 456 | } |
474 | 457 | ||
475 | irq_ts_restore(ts_state); | ||
476 | |||
477 | padlock_store_cword(&ctx->cword.encrypt); | 458 | padlock_store_cword(&ctx->cword.encrypt); |
478 | 459 | ||
479 | return err; | 460 | return err; |
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 8c5f90647b7a..bc72d20c32c3 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c | |||
@@ -89,7 +89,6 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in, | |||
89 | struct sha1_state state; | 89 | struct sha1_state state; |
90 | unsigned int space; | 90 | unsigned int space; |
91 | unsigned int leftover; | 91 | unsigned int leftover; |
92 | int ts_state; | ||
93 | int err; | 92 | int err; |
94 | 93 | ||
95 | dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; | 94 | dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; |
@@ -120,14 +119,11 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in, | |||
120 | 119 | ||
121 | memcpy(result, &state.state, SHA1_DIGEST_SIZE); | 120 | memcpy(result, &state.state, SHA1_DIGEST_SIZE); |
122 | 121 | ||
123 | /* prevent taking the spurious DNA fault with padlock. */ | ||
124 | ts_state = irq_ts_save(); | ||
125 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ | 122 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ |
126 | : \ | 123 | : \ |
127 | : "c"((unsigned long)state.count + count), \ | 124 | : "c"((unsigned long)state.count + count), \ |
128 | "a"((unsigned long)state.count), \ | 125 | "a"((unsigned long)state.count), \ |
129 | "S"(in), "D"(result)); | 126 | "S"(in), "D"(result)); |
130 | irq_ts_restore(ts_state); | ||
131 | 127 | ||
132 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); | 128 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); |
133 | 129 | ||
@@ -155,7 +151,6 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in, | |||
155 | struct sha256_state state; | 151 | struct sha256_state state; |
156 | unsigned int space; | 152 | unsigned int space; |
157 | unsigned int leftover; | 153 | unsigned int leftover; |
158 | int ts_state; | ||
159 | int err; | 154 | int err; |
160 | 155 | ||
161 | dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; | 156 | dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; |
@@ -186,14 +181,11 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in, | |||
186 | 181 | ||
187 | memcpy(result, &state.state, SHA256_DIGEST_SIZE); | 182 | memcpy(result, &state.state, SHA256_DIGEST_SIZE); |
188 | 183 | ||
189 | /* prevent taking the spurious DNA fault with padlock. */ | ||
190 | ts_state = irq_ts_save(); | ||
191 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ | 184 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ |
192 | : \ | 185 | : \ |
193 | : "c"((unsigned long)state.count + count), \ | 186 | : "c"((unsigned long)state.count + count), \ |
194 | "a"((unsigned long)state.count), \ | 187 | "a"((unsigned long)state.count), \ |
195 | "S"(in), "D"(result)); | 188 | "S"(in), "D"(result)); |
196 | irq_ts_restore(ts_state); | ||
197 | 189 | ||
198 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); | 190 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); |
199 | 191 | ||
@@ -312,7 +304,6 @@ static int padlock_sha1_update_nano(struct shash_desc *desc, | |||
312 | u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ | 304 | u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ |
313 | ((aligned(STACK_ALIGN))); | 305 | ((aligned(STACK_ALIGN))); |
314 | u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); | 306 | u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); |
315 | int ts_state; | ||
316 | 307 | ||
317 | partial = sctx->count & 0x3f; | 308 | partial = sctx->count & 0x3f; |
318 | sctx->count += len; | 309 | sctx->count += len; |
@@ -328,23 +319,19 @@ static int padlock_sha1_update_nano(struct shash_desc *desc, | |||
328 | memcpy(sctx->buffer + partial, data, | 319 | memcpy(sctx->buffer + partial, data, |
329 | done + SHA1_BLOCK_SIZE); | 320 | done + SHA1_BLOCK_SIZE); |
330 | src = sctx->buffer; | 321 | src = sctx->buffer; |
331 | ts_state = irq_ts_save(); | ||
332 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" | 322 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" |
333 | : "+S"(src), "+D"(dst) \ | 323 | : "+S"(src), "+D"(dst) \ |
334 | : "a"((long)-1), "c"((unsigned long)1)); | 324 | : "a"((long)-1), "c"((unsigned long)1)); |
335 | irq_ts_restore(ts_state); | ||
336 | done += SHA1_BLOCK_SIZE; | 325 | done += SHA1_BLOCK_SIZE; |
337 | src = data + done; | 326 | src = data + done; |
338 | } | 327 | } |
339 | 328 | ||
340 | /* Process the left bytes from the input data */ | 329 | /* Process the left bytes from the input data */ |
341 | if (len - done >= SHA1_BLOCK_SIZE) { | 330 | if (len - done >= SHA1_BLOCK_SIZE) { |
342 | ts_state = irq_ts_save(); | ||
343 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" | 331 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" |
344 | : "+S"(src), "+D"(dst) | 332 | : "+S"(src), "+D"(dst) |
345 | : "a"((long)-1), | 333 | : "a"((long)-1), |
346 | "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); | 334 | "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); |
347 | irq_ts_restore(ts_state); | ||
348 | done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); | 335 | done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); |
349 | src = data + done; | 336 | src = data + done; |
350 | } | 337 | } |
@@ -401,7 +388,6 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data, | |||
401 | u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ | 388 | u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ |
402 | ((aligned(STACK_ALIGN))); | 389 | ((aligned(STACK_ALIGN))); |
403 | u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); | 390 | u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); |
404 | int ts_state; | ||
405 | 391 | ||
406 | partial = sctx->count & 0x3f; | 392 | partial = sctx->count & 0x3f; |
407 | sctx->count += len; | 393 | sctx->count += len; |
@@ -417,23 +403,19 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data, | |||
417 | memcpy(sctx->buf + partial, data, | 403 | memcpy(sctx->buf + partial, data, |
418 | done + SHA256_BLOCK_SIZE); | 404 | done + SHA256_BLOCK_SIZE); |
419 | src = sctx->buf; | 405 | src = sctx->buf; |
420 | ts_state = irq_ts_save(); | ||
421 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" | 406 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" |
422 | : "+S"(src), "+D"(dst) | 407 | : "+S"(src), "+D"(dst) |
423 | : "a"((long)-1), "c"((unsigned long)1)); | 408 | : "a"((long)-1), "c"((unsigned long)1)); |
424 | irq_ts_restore(ts_state); | ||
425 | done += SHA256_BLOCK_SIZE; | 409 | done += SHA256_BLOCK_SIZE; |
426 | src = data + done; | 410 | src = data + done; |
427 | } | 411 | } |
428 | 412 | ||
429 | /* Process the left bytes from input data*/ | 413 | /* Process the left bytes from input data*/ |
430 | if (len - done >= SHA256_BLOCK_SIZE) { | 414 | if (len - done >= SHA256_BLOCK_SIZE) { |
431 | ts_state = irq_ts_save(); | ||
432 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" | 415 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" |
433 | : "+S"(src), "+D"(dst) | 416 | : "+S"(src), "+D"(dst) |
434 | : "a"((long)-1), | 417 | : "a"((long)-1), |
435 | "c"((unsigned long)((len - done) / 64))); | 418 | "c"((unsigned long)((len - done) / 64))); |
436 | irq_ts_restore(ts_state); | ||
437 | done += ((len - done) - (len - done) % 64); | 419 | done += ((len - done) - (len - done) % 64); |
438 | src = data + done; | 420 | src = data + done; |
439 | } | 421 | } |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 19a32280731d..601f81c04873 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -109,10 +109,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | |||
109 | case LHCALL_SET_CLOCKEVENT: | 109 | case LHCALL_SET_CLOCKEVENT: |
110 | guest_set_clockevent(cpu, args->arg1); | 110 | guest_set_clockevent(cpu, args->arg1); |
111 | break; | 111 | break; |
112 | case LHCALL_TS: | ||
113 | /* This sets the TS flag, as we saw used in run_guest(). */ | ||
114 | cpu->ts = args->arg1; | ||
115 | break; | ||
116 | case LHCALL_HALT: | 112 | case LHCALL_HALT: |
117 | /* Similarly, this sets the halted flag for run_guest(). */ | 113 | /* Similarly, this sets the halted flag for run_guest(). */ |
118 | cpu->halted = 1; | 114 | cpu->halted = 1; |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 69b3814afd2f..2356a2318034 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -43,7 +43,6 @@ struct lg_cpu { | |||
43 | struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ | 43 | struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ |
44 | 44 | ||
45 | u32 cr2; | 45 | u32 cr2; |
46 | int ts; | ||
47 | u32 esp1; | 46 | u32 esp1; |
48 | u16 ss1; | 47 | u16 ss1; |
49 | 48 | ||
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 6e9042e3d2a9..743253fc638f 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -247,14 +247,6 @@ unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any) | |||
247 | void lguest_arch_run_guest(struct lg_cpu *cpu) | 247 | void lguest_arch_run_guest(struct lg_cpu *cpu) |
248 | { | 248 | { |
249 | /* | 249 | /* |
250 | * Remember the awfully-named TS bit? If the Guest has asked to set it | ||
251 | * we set it now, so we can trap and pass that trap to the Guest if it | ||
252 | * uses the FPU. | ||
253 | */ | ||
254 | if (cpu->ts && fpregs_active()) | ||
255 | stts(); | ||
256 | |||
257 | /* | ||
258 | * SYSENTER is an optimized way of doing system calls. We can't allow | 250 | * SYSENTER is an optimized way of doing system calls. We can't allow |
259 | * it because it always jumps to privilege level 0. A normal Guest | 251 | * it because it always jumps to privilege level 0. A normal Guest |
260 | * won't try it because we don't advertise it in CPUID, but a malicious | 252 | * won't try it because we don't advertise it in CPUID, but a malicious |
@@ -282,10 +274,6 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
282 | if (boot_cpu_has(X86_FEATURE_SEP)) | 274 | if (boot_cpu_has(X86_FEATURE_SEP)) |
283 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 275 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
284 | 276 | ||
285 | /* Clear the host TS bit if it was set above. */ | ||
286 | if (cpu->ts && fpregs_active()) | ||
287 | clts(); | ||
288 | |||
289 | /* | 277 | /* |
290 | * If the Guest page faulted, then the cr2 register will tell us the | 278 | * If the Guest page faulted, then the cr2 register will tell us the |
291 | * bad virtual address. We have to grab this now, because once we | 279 | * bad virtual address. We have to grab this now, because once we |
@@ -421,12 +409,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
421 | kill_guest(cpu, "Writing cr2"); | 409 | kill_guest(cpu, "Writing cr2"); |
422 | break; | 410 | break; |
423 | case 7: /* We've intercepted a Device Not Available fault. */ | 411 | case 7: /* We've intercepted a Device Not Available fault. */ |
424 | /* | 412 | /* No special handling is needed here. */ |
425 | * If the Guest doesn't want to know, we already restored the | ||
426 | * Floating Point Unit, so we just continue without telling it. | ||
427 | */ | ||
428 | if (!cpu->ts) | ||
429 | return; | ||
430 | break; | 413 | break; |
431 | case 32 ... 255: | 414 | case 32 ... 255: |
432 | /* This might be a syscall. */ | 415 | /* This might be a syscall. */ |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6f0023797b33..81ba3ba641ba 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -224,7 +224,6 @@ struct kvm_vcpu { | |||
224 | 224 | ||
225 | int fpu_active; | 225 | int fpu_active; |
226 | int guest_fpu_loaded, guest_xcr0_loaded; | 226 | int guest_fpu_loaded, guest_xcr0_loaded; |
227 | unsigned char fpu_counter; | ||
228 | struct swait_queue_head wq; | 227 | struct swait_queue_head wq; |
229 | struct pid *pid; | 228 | struct pid *pid; |
230 | int sigset_active; | 229 | int sigset_active; |
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index a39629206864..cddd5d06e1cb 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h | |||
@@ -104,7 +104,6 @@ | |||
104 | #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ | 104 | #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ |
105 | #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ | 105 | #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ |
106 | #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ | 106 | #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ |
107 | #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ | ||
108 | #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ | 107 | #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ |
109 | 108 | ||
110 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 109 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |