diff options
| author | Suresh Siddha <suresh.b.siddha@intel.com> | 2012-09-06 17:58:52 -0400 |
|---|---|---|
| committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-09-18 18:52:22 -0400 |
| commit | 5d2bd7009f306c82afddd1ca4d9763ad8473c216 (patch) | |
| tree | 772bc888c48766b892e216c19e938c82657e2b0e | |
| parent | 304bceda6a18ae0b0240b8aac9a6bdf8ce2d2469 (diff) | |
x86, fpu: decouple non-lazy/eager fpu restore from xsave
Decouple non-lazy/eager fpu restore policy from the existence of the xsave
feature. Introduce a synthetic CPUID flag to represent the eagerfpu
policy. "eagerfpu=on" boot paramter will enable the policy.
Requested-by: H. Peter Anvin <hpa@zytor.com>
Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-2-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
| -rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
| -rw-r--r-- | arch/x86/include/asm/cpufeature.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 54 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/i387.c | 25 | ||||
| -rw-r--r-- | arch/x86/kernel/process.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/traps.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/xsave.c | 87 |
8 files changed, 112 insertions, 66 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ad7e2e5088c1..741d064fdc6a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1833,6 +1833,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 1833 | and restore using xsave. The kernel will fallback to | 1833 | and restore using xsave. The kernel will fallback to |
| 1834 | enabling legacy floating-point and sse state. | 1834 | enabling legacy floating-point and sse state. |
| 1835 | 1835 | ||
| 1836 | eagerfpu= [X86] | ||
| 1837 | on enable eager fpu restore | ||
| 1838 | off disable eager fpu restore | ||
| 1839 | |||
| 1836 | nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or | 1840 | nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or |
| 1837 | wfi(ARM) instruction doesn't work correctly and not to | 1841 | wfi(ARM) instruction doesn't work correctly and not to |
| 1838 | use it. This is also useful when using JTAG debugger. | 1842 | use it. This is also useful when using JTAG debugger. |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5ff6820..5dd2b473ccff 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -97,6 +97,7 @@ | |||
| 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ |
| 98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ | 98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ |
| 99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ | 99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ |
| 100 | #define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ | ||
| 100 | 101 | ||
| 101 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 102 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
| 102 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 103 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
| @@ -305,6 +306,7 @@ extern const char * const x86_power_flags[32]; | |||
| 305 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) | 306 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) |
| 306 | #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) | 307 | #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) |
| 307 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) | 308 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) |
| 309 | #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) | ||
| 308 | 310 | ||
| 309 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 311 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
| 310 | # define cpu_has_invlpg 1 | 312 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 8ca0f9f45ac4..0ca72f0d4b41 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
| @@ -38,6 +38,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
| 38 | 38 | ||
| 39 | extern unsigned int mxcsr_feature_mask; | 39 | extern unsigned int mxcsr_feature_mask; |
| 40 | extern void fpu_init(void); | 40 | extern void fpu_init(void); |
| 41 | extern void eager_fpu_init(void); | ||
| 41 | 42 | ||
| 42 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | 43 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); |
| 43 | 44 | ||
| @@ -84,6 +85,11 @@ static inline int is_x32_frame(void) | |||
| 84 | 85 | ||
| 85 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 86 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
| 86 | 87 | ||
| 88 | static __always_inline __pure bool use_eager_fpu(void) | ||
| 89 | { | ||
| 90 | return static_cpu_has(X86_FEATURE_EAGER_FPU); | ||
| 91 | } | ||
| 92 | |||
| 87 | static __always_inline __pure bool use_xsaveopt(void) | 93 | static __always_inline __pure bool use_xsaveopt(void) |
| 88 | { | 94 | { |
| 89 | return static_cpu_has(X86_FEATURE_XSAVEOPT); | 95 | return static_cpu_has(X86_FEATURE_XSAVEOPT); |
| @@ -99,6 +105,14 @@ static __always_inline __pure bool use_fxsr(void) | |||
| 99 | return static_cpu_has(X86_FEATURE_FXSR); | 105 | return static_cpu_has(X86_FEATURE_FXSR); |
| 100 | } | 106 | } |
| 101 | 107 | ||
| 108 | static inline void fx_finit(struct i387_fxsave_struct *fx) | ||
| 109 | { | ||
| 110 | memset(fx, 0, xstate_size); | ||
| 111 | fx->cwd = 0x37f; | ||
| 112 | if (cpu_has_xmm) | ||
| 113 | fx->mxcsr = MXCSR_DEFAULT; | ||
| 114 | } | ||
| 115 | |||
| 102 | extern void __sanitize_i387_state(struct task_struct *); | 116 | extern void __sanitize_i387_state(struct task_struct *); |
| 103 | 117 | ||
| 104 | static inline void sanitize_i387_state(struct task_struct *tsk) | 118 | static inline void sanitize_i387_state(struct task_struct *tsk) |
| @@ -291,13 +305,13 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) | |||
| 291 | static inline void __thread_fpu_end(struct task_struct *tsk) | 305 | static inline void __thread_fpu_end(struct task_struct *tsk) |
| 292 | { | 306 | { |
| 293 | __thread_clear_has_fpu(tsk); | 307 | __thread_clear_has_fpu(tsk); |
| 294 | if (!use_xsave()) | 308 | if (!use_eager_fpu()) |
| 295 | stts(); | 309 | stts(); |
| 296 | } | 310 | } |
| 297 | 311 | ||
| 298 | static inline void __thread_fpu_begin(struct task_struct *tsk) | 312 | static inline void __thread_fpu_begin(struct task_struct *tsk) |
| 299 | { | 313 | { |
| 300 | if (!use_xsave()) | 314 | if (!use_eager_fpu()) |
| 301 | clts(); | 315 | clts(); |
| 302 | __thread_set_has_fpu(tsk); | 316 | __thread_set_has_fpu(tsk); |
| 303 | } | 317 | } |
| @@ -327,10 +341,14 @@ static inline void drop_fpu(struct task_struct *tsk) | |||
| 327 | 341 | ||
| 328 | static inline void drop_init_fpu(struct task_struct *tsk) | 342 | static inline void drop_init_fpu(struct task_struct *tsk) |
| 329 | { | 343 | { |
| 330 | if (!use_xsave()) | 344 | if (!use_eager_fpu()) |
| 331 | drop_fpu(tsk); | 345 | drop_fpu(tsk); |
| 332 | else | 346 | else { |
| 333 | xrstor_state(init_xstate_buf, -1); | 347 | if (use_xsave()) |
| 348 | xrstor_state(init_xstate_buf, -1); | ||
| 349 | else | ||
| 350 | fxrstor_checking(&init_xstate_buf->i387); | ||
| 351 | } | ||
| 334 | } | 352 | } |
| 335 | 353 | ||
| 336 | /* | 354 | /* |
| @@ -370,7 +388,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
| 370 | * If the task has used the math, pre-load the FPU on xsave processors | 388 | * If the task has used the math, pre-load the FPU on xsave processors |
| 371 | * or if the past 5 consecutive context-switches used math. | 389 | * or if the past 5 consecutive context-switches used math. |
| 372 | */ | 390 | */ |
| 373 | fpu.preload = tsk_used_math(new) && (use_xsave() || | 391 | fpu.preload = tsk_used_math(new) && (use_eager_fpu() || |
| 374 | new->fpu_counter > 5); | 392 | new->fpu_counter > 5); |
| 375 | if (__thread_has_fpu(old)) { | 393 | if (__thread_has_fpu(old)) { |
| 376 | if (!__save_init_fpu(old)) | 394 | if (!__save_init_fpu(old)) |
| @@ -383,14 +401,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
| 383 | new->fpu_counter++; | 401 | new->fpu_counter++; |
| 384 | __thread_set_has_fpu(new); | 402 | __thread_set_has_fpu(new); |
| 385 | prefetch(new->thread.fpu.state); | 403 | prefetch(new->thread.fpu.state); |
| 386 | } else if (!use_xsave()) | 404 | } else if (!use_eager_fpu()) |
| 387 | stts(); | 405 | stts(); |
| 388 | } else { | 406 | } else { |
| 389 | old->fpu_counter = 0; | 407 | old->fpu_counter = 0; |
| 390 | old->thread.fpu.last_cpu = ~0; | 408 | old->thread.fpu.last_cpu = ~0; |
| 391 | if (fpu.preload) { | 409 | if (fpu.preload) { |
| 392 | new->fpu_counter++; | 410 | new->fpu_counter++; |
| 393 | if (!use_xsave() && fpu_lazy_restore(new, cpu)) | 411 | if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) |
| 394 | fpu.preload = 0; | 412 | fpu.preload = 0; |
| 395 | else | 413 | else |
| 396 | prefetch(new->thread.fpu.state); | 414 | prefetch(new->thread.fpu.state); |
| @@ -452,6 +470,14 @@ static inline void user_fpu_begin(void) | |||
| 452 | preempt_enable(); | 470 | preempt_enable(); |
| 453 | } | 471 | } |
| 454 | 472 | ||
| 473 | static inline void __save_fpu(struct task_struct *tsk) | ||
| 474 | { | ||
| 475 | if (use_xsave()) | ||
| 476 | xsave_state(&tsk->thread.fpu.state->xsave, -1); | ||
| 477 | else | ||
| 478 | fpu_fxsave(&tsk->thread.fpu); | ||
| 479 | } | ||
| 480 | |||
| 455 | /* | 481 | /* |
| 456 | * These disable preemption on their own and are safe | 482 | * These disable preemption on their own and are safe |
| 457 | */ | 483 | */ |
| @@ -459,8 +485,8 @@ static inline void save_init_fpu(struct task_struct *tsk) | |||
| 459 | { | 485 | { |
| 460 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | 486 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); |
| 461 | 487 | ||
| 462 | if (use_xsave()) { | 488 | if (use_eager_fpu()) { |
| 463 | xsave_state(&tsk->thread.fpu.state->xsave, -1); | 489 | __save_fpu(tsk); |
| 464 | return; | 490 | return; |
| 465 | } | 491 | } |
| 466 | 492 | ||
| @@ -526,11 +552,9 @@ static inline void fpu_free(struct fpu *fpu) | |||
| 526 | 552 | ||
| 527 | static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) | 553 | static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) |
| 528 | { | 554 | { |
| 529 | if (use_xsave()) { | 555 | if (use_eager_fpu()) { |
| 530 | struct xsave_struct *xsave = &dst->thread.fpu.state->xsave; | 556 | memset(&dst->thread.fpu.state->xsave, 0, xstate_size); |
| 531 | 557 | __save_fpu(dst); | |
| 532 | memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct)); | ||
| 533 | xsave_state(xsave, -1); | ||
| 534 | } else { | 558 | } else { |
| 535 | struct fpu *dfpu = &dst->thread.fpu; | 559 | struct fpu *dfpu = &dst->thread.fpu; |
| 536 | struct fpu *sfpu = &src->thread.fpu; | 560 | struct fpu *sfpu = &src->thread.fpu; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fccc..b0fe078614d8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void) | |||
| 1297 | dbg_restore_debug_regs(); | 1297 | dbg_restore_debug_regs(); |
| 1298 | 1298 | ||
| 1299 | fpu_init(); | 1299 | fpu_init(); |
| 1300 | xsave_init(); | ||
| 1301 | 1300 | ||
| 1302 | raw_local_save_flags(kernel_eflags); | 1301 | raw_local_save_flags(kernel_eflags); |
| 1303 | 1302 | ||
| @@ -1352,6 +1351,5 @@ void __cpuinit cpu_init(void) | |||
| 1352 | dbg_restore_debug_regs(); | 1351 | dbg_restore_debug_regs(); |
| 1353 | 1352 | ||
| 1354 | fpu_init(); | 1353 | fpu_init(); |
| 1355 | xsave_init(); | ||
| 1356 | } | 1354 | } |
| 1357 | #endif | 1355 | #endif |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 528557470ddb..6782e3983865 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -22,9 +22,8 @@ | |||
| 22 | /* | 22 | /* |
| 23 | * Were we in an interrupt that interrupted kernel mode? | 23 | * Were we in an interrupt that interrupted kernel mode? |
| 24 | * | 24 | * |
| 25 | * For now, on xsave platforms we will return interrupted | 25 | * For now, with eagerfpu we will return interrupted kernel FPU |
| 26 | * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore | 26 | * state as not-idle. TBD: Ideally we can change the return value |
| 27 | * for xsave platforms, ideally we can change the return value | ||
| 28 | * to something like __thread_has_fpu(current). But we need to | 27 | * to something like __thread_has_fpu(current). But we need to |
| 29 | * be careful of doing __thread_clear_has_fpu() before saving | 28 | * be careful of doing __thread_clear_has_fpu() before saving |
| 30 | * the FPU etc for supporting nested uses etc. For now, take | 29 | * the FPU etc for supporting nested uses etc. For now, take |
| @@ -38,7 +37,7 @@ | |||
| 38 | */ | 37 | */ |
| 39 | static inline bool interrupted_kernel_fpu_idle(void) | 38 | static inline bool interrupted_kernel_fpu_idle(void) |
| 40 | { | 39 | { |
| 41 | if (use_xsave()) | 40 | if (use_eager_fpu()) |
| 42 | return 0; | 41 | return 0; |
| 43 | 42 | ||
| 44 | return !__thread_has_fpu(current) && | 43 | return !__thread_has_fpu(current) && |
| @@ -84,7 +83,7 @@ void kernel_fpu_begin(void) | |||
| 84 | __save_init_fpu(me); | 83 | __save_init_fpu(me); |
| 85 | __thread_clear_has_fpu(me); | 84 | __thread_clear_has_fpu(me); |
| 86 | /* We do 'stts()' in kernel_fpu_end() */ | 85 | /* We do 'stts()' in kernel_fpu_end() */ |
| 87 | } else if (!use_xsave()) { | 86 | } else if (!use_eager_fpu()) { |
| 88 | this_cpu_write(fpu_owner_task, NULL); | 87 | this_cpu_write(fpu_owner_task, NULL); |
| 89 | clts(); | 88 | clts(); |
| 90 | } | 89 | } |
| @@ -93,7 +92,7 @@ EXPORT_SYMBOL(kernel_fpu_begin); | |||
| 93 | 92 | ||
| 94 | void kernel_fpu_end(void) | 93 | void kernel_fpu_end(void) |
| 95 | { | 94 | { |
| 96 | if (use_xsave()) | 95 | if (use_eager_fpu()) |
| 97 | math_state_restore(); | 96 | math_state_restore(); |
| 98 | else | 97 | else |
| 99 | stts(); | 98 | stts(); |
| @@ -122,7 +121,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) | |||
| 122 | { | 121 | { |
| 123 | unsigned long mask = 0; | 122 | unsigned long mask = 0; |
| 124 | 123 | ||
| 125 | clts(); | ||
| 126 | if (cpu_has_fxsr) { | 124 | if (cpu_has_fxsr) { |
| 127 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | 125 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); |
| 128 | asm volatile("fxsave %0" : : "m" (fx_scratch)); | 126 | asm volatile("fxsave %0" : : "m" (fx_scratch)); |
| @@ -131,7 +129,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) | |||
| 131 | mask = 0x0000ffbf; | 129 | mask = 0x0000ffbf; |
| 132 | } | 130 | } |
| 133 | mxcsr_feature_mask &= mask; | 131 | mxcsr_feature_mask &= mask; |
| 134 | stts(); | ||
| 135 | } | 132 | } |
| 136 | 133 | ||
| 137 | static void __cpuinit init_thread_xstate(void) | 134 | static void __cpuinit init_thread_xstate(void) |
| @@ -185,9 +182,8 @@ void __cpuinit fpu_init(void) | |||
| 185 | init_thread_xstate(); | 182 | init_thread_xstate(); |
| 186 | 183 | ||
| 187 | mxcsr_feature_mask_init(); | 184 | mxcsr_feature_mask_init(); |
| 188 | /* clean state in init */ | 185 | xsave_init(); |
| 189 | current_thread_info()->status = 0; | 186 | eager_fpu_init(); |
| 190 | clear_used_math(); | ||
| 191 | } | 187 | } |
| 192 | 188 | ||
| 193 | void fpu_finit(struct fpu *fpu) | 189 | void fpu_finit(struct fpu *fpu) |
| @@ -198,12 +194,7 @@ void fpu_finit(struct fpu *fpu) | |||
| 198 | } | 194 | } |
| 199 | 195 | ||
| 200 | if (cpu_has_fxsr) { | 196 | if (cpu_has_fxsr) { |
| 201 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | 197 | fx_finit(&fpu->state->fxsave); |
| 202 | |||
| 203 | memset(fx, 0, xstate_size); | ||
| 204 | fx->cwd = 0x37f; | ||
| 205 | if (cpu_has_xmm) | ||
| 206 | fx->mxcsr = MXCSR_DEFAULT; | ||
| 207 | } else { | 198 | } else { |
| 208 | struct i387_fsave_struct *fp = &fpu->state->fsave; | 199 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
| 209 | memset(fp, 0, xstate_size); | 200 | memset(fp, 0, xstate_size); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21e30f8923b..dc3567e083f9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -156,7 +156,7 @@ void flush_thread(void) | |||
| 156 | * Free the FPU state for non xsave platforms. They get reallocated | 156 | * Free the FPU state for non xsave platforms. They get reallocated |
| 157 | * lazily at the first use. | 157 | * lazily at the first use. |
| 158 | */ | 158 | */ |
| 159 | if (!use_xsave()) | 159 | if (!use_eager_fpu()) |
| 160 | free_thread_xstate(tsk); | 160 | free_thread_xstate(tsk); |
| 161 | } | 161 | } |
| 162 | 162 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ac7d5275f6e8..4f4aba0551b0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -630,7 +630,7 @@ EXPORT_SYMBOL_GPL(math_state_restore); | |||
| 630 | dotraplinkage void __kprobes | 630 | dotraplinkage void __kprobes |
| 631 | do_device_not_available(struct pt_regs *regs, long error_code) | 631 | do_device_not_available(struct pt_regs *regs, long error_code) |
| 632 | { | 632 | { |
| 633 | BUG_ON(use_xsave()); | 633 | BUG_ON(use_eager_fpu()); |
| 634 | 634 | ||
| 635 | #ifdef CONFIG_MATH_EMULATION | 635 | #ifdef CONFIG_MATH_EMULATION |
| 636 | if (read_cr0() & X86_CR0_EM) { | 636 | if (read_cr0() & X86_CR0_EM) { |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index e7752bd7cac8..c0afd2c43761 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
| @@ -400,7 +400,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
| 400 | set_used_math(); | 400 | set_used_math(); |
| 401 | } | 401 | } |
| 402 | 402 | ||
| 403 | if (use_xsave()) | 403 | if (use_eager_fpu()) |
| 404 | math_state_restore(); | 404 | math_state_restore(); |
| 405 | 405 | ||
| 406 | return err; | 406 | return err; |
| @@ -450,29 +450,11 @@ static void prepare_fx_sw_frame(void) | |||
| 450 | */ | 450 | */ |
| 451 | static inline void xstate_enable(void) | 451 | static inline void xstate_enable(void) |
| 452 | { | 452 | { |
| 453 | clts(); | ||
| 454 | set_in_cr4(X86_CR4_OSXSAVE); | 453 | set_in_cr4(X86_CR4_OSXSAVE); |
| 455 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | 454 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); |
| 456 | } | 455 | } |
| 457 | 456 | ||
| 458 | /* | 457 | /* |
| 459 | * This is same as math_state_restore(). But use_xsave() is not yet | ||
| 460 | * patched to use math_state_restore(). | ||
| 461 | */ | ||
| 462 | static inline void init_restore_xstate(void) | ||
| 463 | { | ||
| 464 | init_fpu(current); | ||
| 465 | __thread_fpu_begin(current); | ||
| 466 | xrstor_state(init_xstate_buf, -1); | ||
| 467 | } | ||
| 468 | |||
| 469 | static inline void xstate_enable_ap(void) | ||
| 470 | { | ||
| 471 | xstate_enable(); | ||
| 472 | init_restore_xstate(); | ||
| 473 | } | ||
| 474 | |||
| 475 | /* | ||
| 476 | * Record the offsets and sizes of different state managed by the xsave | 458 | * Record the offsets and sizes of different state managed by the xsave |
| 477 | * memory layout. | 459 | * memory layout. |
| 478 | */ | 460 | */ |
| @@ -500,17 +482,20 @@ static void __init setup_xstate_features(void) | |||
| 500 | /* | 482 | /* |
| 501 | * setup the xstate image representing the init state | 483 | * setup the xstate image representing the init state |
| 502 | */ | 484 | */ |
| 503 | static void __init setup_xstate_init(void) | 485 | static void __init setup_init_fpu_buf(void) |
| 504 | { | 486 | { |
| 505 | setup_xstate_features(); | ||
| 506 | |||
| 507 | /* | 487 | /* |
| 508 | * Setup init_xstate_buf to represent the init state of | 488 | * Setup init_xstate_buf to represent the init state of |
| 509 | * all the features managed by the xsave | 489 | * all the features managed by the xsave |
| 510 | */ | 490 | */ |
| 511 | init_xstate_buf = alloc_bootmem_align(xstate_size, | 491 | init_xstate_buf = alloc_bootmem_align(xstate_size, |
| 512 | __alignof__(struct xsave_struct)); | 492 | __alignof__(struct xsave_struct)); |
| 513 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 493 | fx_finit(&init_xstate_buf->i387); |
| 494 | |||
| 495 | if (!cpu_has_xsave) | ||
| 496 | return; | ||
| 497 | |||
| 498 | setup_xstate_features(); | ||
| 514 | 499 | ||
| 515 | /* | 500 | /* |
| 516 | * Init all the features state with header_bv being 0x0 | 501 | * Init all the features state with header_bv being 0x0 |
| @@ -523,6 +508,17 @@ static void __init setup_xstate_init(void) | |||
| 523 | xsave_state(init_xstate_buf, -1); | 508 | xsave_state(init_xstate_buf, -1); |
| 524 | } | 509 | } |
| 525 | 510 | ||
| 511 | static int disable_eagerfpu; | ||
| 512 | static int __init eager_fpu_setup(char *s) | ||
| 513 | { | ||
| 514 | if (!strcmp(s, "on")) | ||
| 515 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
| 516 | else if (!strcmp(s, "off")) | ||
| 517 | disable_eagerfpu = 1; | ||
| 518 | return 1; | ||
| 519 | } | ||
| 520 | __setup("eagerfpu=", eager_fpu_setup); | ||
| 521 | |||
| 526 | /* | 522 | /* |
| 527 | * Enable and initialize the xsave feature. | 523 | * Enable and initialize the xsave feature. |
| 528 | */ | 524 | */ |
| @@ -559,15 +555,10 @@ static void __init xstate_enable_boot_cpu(void) | |||
| 559 | 555 | ||
| 560 | update_regset_xstate_info(xstate_size, pcntxt_mask); | 556 | update_regset_xstate_info(xstate_size, pcntxt_mask); |
| 561 | prepare_fx_sw_frame(); | 557 | prepare_fx_sw_frame(); |
| 562 | 558 | setup_init_fpu_buf(); | |
| 563 | setup_xstate_init(); | ||
| 564 | 559 | ||
| 565 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 560 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
| 566 | pcntxt_mask, xstate_size); | 561 | pcntxt_mask, xstate_size); |
| 567 | |||
| 568 | current->thread.fpu.state = | ||
| 569 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
| 570 | init_restore_xstate(); | ||
| 571 | } | 562 | } |
| 572 | 563 | ||
| 573 | /* | 564 | /* |
| @@ -586,6 +577,42 @@ void __cpuinit xsave_init(void) | |||
| 586 | return; | 577 | return; |
| 587 | 578 | ||
| 588 | this_func = next_func; | 579 | this_func = next_func; |
| 589 | next_func = xstate_enable_ap; | 580 | next_func = xstate_enable; |
| 590 | this_func(); | 581 | this_func(); |
| 591 | } | 582 | } |
| 583 | |||
| 584 | static inline void __init eager_fpu_init_bp(void) | ||
| 585 | { | ||
| 586 | current->thread.fpu.state = | ||
| 587 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
| 588 | if (!init_xstate_buf) | ||
| 589 | setup_init_fpu_buf(); | ||
| 590 | } | ||
| 591 | |||
| 592 | void __cpuinit eager_fpu_init(void) | ||
| 593 | { | ||
| 594 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; | ||
| 595 | |||
| 596 | clear_used_math(); | ||
| 597 | current_thread_info()->status = 0; | ||
| 598 | if (!cpu_has_eager_fpu) { | ||
| 599 | stts(); | ||
| 600 | return; | ||
| 601 | } | ||
| 602 | |||
| 603 | if (boot_func) { | ||
| 604 | boot_func(); | ||
| 605 | boot_func = NULL; | ||
| 606 | } | ||
| 607 | |||
| 608 | /* | ||
| 609 | * This is same as math_state_restore(). But use_xsave() is | ||
| 610 | * not yet patched to use math_state_restore(). | ||
| 611 | */ | ||
| 612 | init_fpu(current); | ||
| 613 | __thread_fpu_begin(current); | ||
| 614 | if (cpu_has_xsave) | ||
| 615 | xrstor_state(init_xstate_buf, -1); | ||
| 616 | else | ||
| 617 | fxrstor_checking(&init_xstate_buf->i387); | ||
| 618 | } | ||
