diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2012-09-06 17:58:52 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-09-18 18:52:22 -0400 |
commit | 5d2bd7009f306c82afddd1ca4d9763ad8473c216 (patch) | |
tree | 772bc888c48766b892e216c19e938c82657e2b0e | |
parent | 304bceda6a18ae0b0240b8aac9a6bdf8ce2d2469 (diff) |
x86, fpu: decouple non-lazy/eager fpu restore from xsave
Decouple non-lazy/eager fpu restore policy from the existence of the xsave
feature. Introduce a synthetic CPUID flag to represent the eagerfpu
policy. "eagerfpu=on" boot paramter will enable the policy.
Requested-by: H. Peter Anvin <hpa@zytor.com>
Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-2-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeature.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 54 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/xsave.c | 87 |
8 files changed, 112 insertions, 66 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ad7e2e5088c1..741d064fdc6a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1833,6 +1833,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1833 | and restore using xsave. The kernel will fallback to | 1833 | and restore using xsave. The kernel will fallback to |
1834 | enabling legacy floating-point and sse state. | 1834 | enabling legacy floating-point and sse state. |
1835 | 1835 | ||
1836 | eagerfpu= [X86] | ||
1837 | on enable eager fpu restore | ||
1838 | off disable eager fpu restore | ||
1839 | |||
1836 | nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or | 1840 | nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or |
1837 | wfi(ARM) instruction doesn't work correctly and not to | 1841 | wfi(ARM) instruction doesn't work correctly and not to |
1838 | use it. This is also useful when using JTAG debugger. | 1842 | use it. This is also useful when using JTAG debugger. |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5ff6820..5dd2b473ccff 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -97,6 +97,7 @@ | |||
97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ |
98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ | 98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ |
99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ | 99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ |
100 | #define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ | ||
100 | 101 | ||
101 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 102 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
102 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 103 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
@@ -305,6 +306,7 @@ extern const char * const x86_power_flags[32]; | |||
305 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) | 306 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) |
306 | #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) | 307 | #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) |
307 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) | 308 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) |
309 | #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) | ||
308 | 310 | ||
309 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 311 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
310 | # define cpu_has_invlpg 1 | 312 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 8ca0f9f45ac4..0ca72f0d4b41 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
@@ -38,6 +38,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
38 | 38 | ||
39 | extern unsigned int mxcsr_feature_mask; | 39 | extern unsigned int mxcsr_feature_mask; |
40 | extern void fpu_init(void); | 40 | extern void fpu_init(void); |
41 | extern void eager_fpu_init(void); | ||
41 | 42 | ||
42 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | 43 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); |
43 | 44 | ||
@@ -84,6 +85,11 @@ static inline int is_x32_frame(void) | |||
84 | 85 | ||
85 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 86 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
86 | 87 | ||
88 | static __always_inline __pure bool use_eager_fpu(void) | ||
89 | { | ||
90 | return static_cpu_has(X86_FEATURE_EAGER_FPU); | ||
91 | } | ||
92 | |||
87 | static __always_inline __pure bool use_xsaveopt(void) | 93 | static __always_inline __pure bool use_xsaveopt(void) |
88 | { | 94 | { |
89 | return static_cpu_has(X86_FEATURE_XSAVEOPT); | 95 | return static_cpu_has(X86_FEATURE_XSAVEOPT); |
@@ -99,6 +105,14 @@ static __always_inline __pure bool use_fxsr(void) | |||
99 | return static_cpu_has(X86_FEATURE_FXSR); | 105 | return static_cpu_has(X86_FEATURE_FXSR); |
100 | } | 106 | } |
101 | 107 | ||
108 | static inline void fx_finit(struct i387_fxsave_struct *fx) | ||
109 | { | ||
110 | memset(fx, 0, xstate_size); | ||
111 | fx->cwd = 0x37f; | ||
112 | if (cpu_has_xmm) | ||
113 | fx->mxcsr = MXCSR_DEFAULT; | ||
114 | } | ||
115 | |||
102 | extern void __sanitize_i387_state(struct task_struct *); | 116 | extern void __sanitize_i387_state(struct task_struct *); |
103 | 117 | ||
104 | static inline void sanitize_i387_state(struct task_struct *tsk) | 118 | static inline void sanitize_i387_state(struct task_struct *tsk) |
@@ -291,13 +305,13 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) | |||
291 | static inline void __thread_fpu_end(struct task_struct *tsk) | 305 | static inline void __thread_fpu_end(struct task_struct *tsk) |
292 | { | 306 | { |
293 | __thread_clear_has_fpu(tsk); | 307 | __thread_clear_has_fpu(tsk); |
294 | if (!use_xsave()) | 308 | if (!use_eager_fpu()) |
295 | stts(); | 309 | stts(); |
296 | } | 310 | } |
297 | 311 | ||
298 | static inline void __thread_fpu_begin(struct task_struct *tsk) | 312 | static inline void __thread_fpu_begin(struct task_struct *tsk) |
299 | { | 313 | { |
300 | if (!use_xsave()) | 314 | if (!use_eager_fpu()) |
301 | clts(); | 315 | clts(); |
302 | __thread_set_has_fpu(tsk); | 316 | __thread_set_has_fpu(tsk); |
303 | } | 317 | } |
@@ -327,10 +341,14 @@ static inline void drop_fpu(struct task_struct *tsk) | |||
327 | 341 | ||
328 | static inline void drop_init_fpu(struct task_struct *tsk) | 342 | static inline void drop_init_fpu(struct task_struct *tsk) |
329 | { | 343 | { |
330 | if (!use_xsave()) | 344 | if (!use_eager_fpu()) |
331 | drop_fpu(tsk); | 345 | drop_fpu(tsk); |
332 | else | 346 | else { |
333 | xrstor_state(init_xstate_buf, -1); | 347 | if (use_xsave()) |
348 | xrstor_state(init_xstate_buf, -1); | ||
349 | else | ||
350 | fxrstor_checking(&init_xstate_buf->i387); | ||
351 | } | ||
334 | } | 352 | } |
335 | 353 | ||
336 | /* | 354 | /* |
@@ -370,7 +388,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
370 | * If the task has used the math, pre-load the FPU on xsave processors | 388 | * If the task has used the math, pre-load the FPU on xsave processors |
371 | * or if the past 5 consecutive context-switches used math. | 389 | * or if the past 5 consecutive context-switches used math. |
372 | */ | 390 | */ |
373 | fpu.preload = tsk_used_math(new) && (use_xsave() || | 391 | fpu.preload = tsk_used_math(new) && (use_eager_fpu() || |
374 | new->fpu_counter > 5); | 392 | new->fpu_counter > 5); |
375 | if (__thread_has_fpu(old)) { | 393 | if (__thread_has_fpu(old)) { |
376 | if (!__save_init_fpu(old)) | 394 | if (!__save_init_fpu(old)) |
@@ -383,14 +401,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
383 | new->fpu_counter++; | 401 | new->fpu_counter++; |
384 | __thread_set_has_fpu(new); | 402 | __thread_set_has_fpu(new); |
385 | prefetch(new->thread.fpu.state); | 403 | prefetch(new->thread.fpu.state); |
386 | } else if (!use_xsave()) | 404 | } else if (!use_eager_fpu()) |
387 | stts(); | 405 | stts(); |
388 | } else { | 406 | } else { |
389 | old->fpu_counter = 0; | 407 | old->fpu_counter = 0; |
390 | old->thread.fpu.last_cpu = ~0; | 408 | old->thread.fpu.last_cpu = ~0; |
391 | if (fpu.preload) { | 409 | if (fpu.preload) { |
392 | new->fpu_counter++; | 410 | new->fpu_counter++; |
393 | if (!use_xsave() && fpu_lazy_restore(new, cpu)) | 411 | if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) |
394 | fpu.preload = 0; | 412 | fpu.preload = 0; |
395 | else | 413 | else |
396 | prefetch(new->thread.fpu.state); | 414 | prefetch(new->thread.fpu.state); |
@@ -452,6 +470,14 @@ static inline void user_fpu_begin(void) | |||
452 | preempt_enable(); | 470 | preempt_enable(); |
453 | } | 471 | } |
454 | 472 | ||
473 | static inline void __save_fpu(struct task_struct *tsk) | ||
474 | { | ||
475 | if (use_xsave()) | ||
476 | xsave_state(&tsk->thread.fpu.state->xsave, -1); | ||
477 | else | ||
478 | fpu_fxsave(&tsk->thread.fpu); | ||
479 | } | ||
480 | |||
455 | /* | 481 | /* |
456 | * These disable preemption on their own and are safe | 482 | * These disable preemption on their own and are safe |
457 | */ | 483 | */ |
@@ -459,8 +485,8 @@ static inline void save_init_fpu(struct task_struct *tsk) | |||
459 | { | 485 | { |
460 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | 486 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); |
461 | 487 | ||
462 | if (use_xsave()) { | 488 | if (use_eager_fpu()) { |
463 | xsave_state(&tsk->thread.fpu.state->xsave, -1); | 489 | __save_fpu(tsk); |
464 | return; | 490 | return; |
465 | } | 491 | } |
466 | 492 | ||
@@ -526,11 +552,9 @@ static inline void fpu_free(struct fpu *fpu) | |||
526 | 552 | ||
527 | static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) | 553 | static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) |
528 | { | 554 | { |
529 | if (use_xsave()) { | 555 | if (use_eager_fpu()) { |
530 | struct xsave_struct *xsave = &dst->thread.fpu.state->xsave; | 556 | memset(&dst->thread.fpu.state->xsave, 0, xstate_size); |
531 | 557 | __save_fpu(dst); | |
532 | memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct)); | ||
533 | xsave_state(xsave, -1); | ||
534 | } else { | 558 | } else { |
535 | struct fpu *dfpu = &dst->thread.fpu; | 559 | struct fpu *dfpu = &dst->thread.fpu; |
536 | struct fpu *sfpu = &src->thread.fpu; | 560 | struct fpu *sfpu = &src->thread.fpu; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fccc..b0fe078614d8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void) | |||
1297 | dbg_restore_debug_regs(); | 1297 | dbg_restore_debug_regs(); |
1298 | 1298 | ||
1299 | fpu_init(); | 1299 | fpu_init(); |
1300 | xsave_init(); | ||
1301 | 1300 | ||
1302 | raw_local_save_flags(kernel_eflags); | 1301 | raw_local_save_flags(kernel_eflags); |
1303 | 1302 | ||
@@ -1352,6 +1351,5 @@ void __cpuinit cpu_init(void) | |||
1352 | dbg_restore_debug_regs(); | 1351 | dbg_restore_debug_regs(); |
1353 | 1352 | ||
1354 | fpu_init(); | 1353 | fpu_init(); |
1355 | xsave_init(); | ||
1356 | } | 1354 | } |
1357 | #endif | 1355 | #endif |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 528557470ddb..6782e3983865 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -22,9 +22,8 @@ | |||
22 | /* | 22 | /* |
23 | * Were we in an interrupt that interrupted kernel mode? | 23 | * Were we in an interrupt that interrupted kernel mode? |
24 | * | 24 | * |
25 | * For now, on xsave platforms we will return interrupted | 25 | * For now, with eagerfpu we will return interrupted kernel FPU |
26 | * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore | 26 | * state as not-idle. TBD: Ideally we can change the return value |
27 | * for xsave platforms, ideally we can change the return value | ||
28 | * to something like __thread_has_fpu(current). But we need to | 27 | * to something like __thread_has_fpu(current). But we need to |
29 | * be careful of doing __thread_clear_has_fpu() before saving | 28 | * be careful of doing __thread_clear_has_fpu() before saving |
30 | * the FPU etc for supporting nested uses etc. For now, take | 29 | * the FPU etc for supporting nested uses etc. For now, take |
@@ -38,7 +37,7 @@ | |||
38 | */ | 37 | */ |
39 | static inline bool interrupted_kernel_fpu_idle(void) | 38 | static inline bool interrupted_kernel_fpu_idle(void) |
40 | { | 39 | { |
41 | if (use_xsave()) | 40 | if (use_eager_fpu()) |
42 | return 0; | 41 | return 0; |
43 | 42 | ||
44 | return !__thread_has_fpu(current) && | 43 | return !__thread_has_fpu(current) && |
@@ -84,7 +83,7 @@ void kernel_fpu_begin(void) | |||
84 | __save_init_fpu(me); | 83 | __save_init_fpu(me); |
85 | __thread_clear_has_fpu(me); | 84 | __thread_clear_has_fpu(me); |
86 | /* We do 'stts()' in kernel_fpu_end() */ | 85 | /* We do 'stts()' in kernel_fpu_end() */ |
87 | } else if (!use_xsave()) { | 86 | } else if (!use_eager_fpu()) { |
88 | this_cpu_write(fpu_owner_task, NULL); | 87 | this_cpu_write(fpu_owner_task, NULL); |
89 | clts(); | 88 | clts(); |
90 | } | 89 | } |
@@ -93,7 +92,7 @@ EXPORT_SYMBOL(kernel_fpu_begin); | |||
93 | 92 | ||
94 | void kernel_fpu_end(void) | 93 | void kernel_fpu_end(void) |
95 | { | 94 | { |
96 | if (use_xsave()) | 95 | if (use_eager_fpu()) |
97 | math_state_restore(); | 96 | math_state_restore(); |
98 | else | 97 | else |
99 | stts(); | 98 | stts(); |
@@ -122,7 +121,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) | |||
122 | { | 121 | { |
123 | unsigned long mask = 0; | 122 | unsigned long mask = 0; |
124 | 123 | ||
125 | clts(); | ||
126 | if (cpu_has_fxsr) { | 124 | if (cpu_has_fxsr) { |
127 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | 125 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); |
128 | asm volatile("fxsave %0" : : "m" (fx_scratch)); | 126 | asm volatile("fxsave %0" : : "m" (fx_scratch)); |
@@ -131,7 +129,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) | |||
131 | mask = 0x0000ffbf; | 129 | mask = 0x0000ffbf; |
132 | } | 130 | } |
133 | mxcsr_feature_mask &= mask; | 131 | mxcsr_feature_mask &= mask; |
134 | stts(); | ||
135 | } | 132 | } |
136 | 133 | ||
137 | static void __cpuinit init_thread_xstate(void) | 134 | static void __cpuinit init_thread_xstate(void) |
@@ -185,9 +182,8 @@ void __cpuinit fpu_init(void) | |||
185 | init_thread_xstate(); | 182 | init_thread_xstate(); |
186 | 183 | ||
187 | mxcsr_feature_mask_init(); | 184 | mxcsr_feature_mask_init(); |
188 | /* clean state in init */ | 185 | xsave_init(); |
189 | current_thread_info()->status = 0; | 186 | eager_fpu_init(); |
190 | clear_used_math(); | ||
191 | } | 187 | } |
192 | 188 | ||
193 | void fpu_finit(struct fpu *fpu) | 189 | void fpu_finit(struct fpu *fpu) |
@@ -198,12 +194,7 @@ void fpu_finit(struct fpu *fpu) | |||
198 | } | 194 | } |
199 | 195 | ||
200 | if (cpu_has_fxsr) { | 196 | if (cpu_has_fxsr) { |
201 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | 197 | fx_finit(&fpu->state->fxsave); |
202 | |||
203 | memset(fx, 0, xstate_size); | ||
204 | fx->cwd = 0x37f; | ||
205 | if (cpu_has_xmm) | ||
206 | fx->mxcsr = MXCSR_DEFAULT; | ||
207 | } else { | 198 | } else { |
208 | struct i387_fsave_struct *fp = &fpu->state->fsave; | 199 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
209 | memset(fp, 0, xstate_size); | 200 | memset(fp, 0, xstate_size); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21e30f8923b..dc3567e083f9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -156,7 +156,7 @@ void flush_thread(void) | |||
156 | * Free the FPU state for non xsave platforms. They get reallocated | 156 | * Free the FPU state for non xsave platforms. They get reallocated |
157 | * lazily at the first use. | 157 | * lazily at the first use. |
158 | */ | 158 | */ |
159 | if (!use_xsave()) | 159 | if (!use_eager_fpu()) |
160 | free_thread_xstate(tsk); | 160 | free_thread_xstate(tsk); |
161 | } | 161 | } |
162 | 162 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ac7d5275f6e8..4f4aba0551b0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -630,7 +630,7 @@ EXPORT_SYMBOL_GPL(math_state_restore); | |||
630 | dotraplinkage void __kprobes | 630 | dotraplinkage void __kprobes |
631 | do_device_not_available(struct pt_regs *regs, long error_code) | 631 | do_device_not_available(struct pt_regs *regs, long error_code) |
632 | { | 632 | { |
633 | BUG_ON(use_xsave()); | 633 | BUG_ON(use_eager_fpu()); |
634 | 634 | ||
635 | #ifdef CONFIG_MATH_EMULATION | 635 | #ifdef CONFIG_MATH_EMULATION |
636 | if (read_cr0() & X86_CR0_EM) { | 636 | if (read_cr0() & X86_CR0_EM) { |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index e7752bd7cac8..c0afd2c43761 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -400,7 +400,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
400 | set_used_math(); | 400 | set_used_math(); |
401 | } | 401 | } |
402 | 402 | ||
403 | if (use_xsave()) | 403 | if (use_eager_fpu()) |
404 | math_state_restore(); | 404 | math_state_restore(); |
405 | 405 | ||
406 | return err; | 406 | return err; |
@@ -450,29 +450,11 @@ static void prepare_fx_sw_frame(void) | |||
450 | */ | 450 | */ |
451 | static inline void xstate_enable(void) | 451 | static inline void xstate_enable(void) |
452 | { | 452 | { |
453 | clts(); | ||
454 | set_in_cr4(X86_CR4_OSXSAVE); | 453 | set_in_cr4(X86_CR4_OSXSAVE); |
455 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | 454 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); |
456 | } | 455 | } |
457 | 456 | ||
458 | /* | 457 | /* |
459 | * This is same as math_state_restore(). But use_xsave() is not yet | ||
460 | * patched to use math_state_restore(). | ||
461 | */ | ||
462 | static inline void init_restore_xstate(void) | ||
463 | { | ||
464 | init_fpu(current); | ||
465 | __thread_fpu_begin(current); | ||
466 | xrstor_state(init_xstate_buf, -1); | ||
467 | } | ||
468 | |||
469 | static inline void xstate_enable_ap(void) | ||
470 | { | ||
471 | xstate_enable(); | ||
472 | init_restore_xstate(); | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * Record the offsets and sizes of different state managed by the xsave | 458 | * Record the offsets and sizes of different state managed by the xsave |
477 | * memory layout. | 459 | * memory layout. |
478 | */ | 460 | */ |
@@ -500,17 +482,20 @@ static void __init setup_xstate_features(void) | |||
500 | /* | 482 | /* |
501 | * setup the xstate image representing the init state | 483 | * setup the xstate image representing the init state |
502 | */ | 484 | */ |
503 | static void __init setup_xstate_init(void) | 485 | static void __init setup_init_fpu_buf(void) |
504 | { | 486 | { |
505 | setup_xstate_features(); | ||
506 | |||
507 | /* | 487 | /* |
508 | * Setup init_xstate_buf to represent the init state of | 488 | * Setup init_xstate_buf to represent the init state of |
509 | * all the features managed by the xsave | 489 | * all the features managed by the xsave |
510 | */ | 490 | */ |
511 | init_xstate_buf = alloc_bootmem_align(xstate_size, | 491 | init_xstate_buf = alloc_bootmem_align(xstate_size, |
512 | __alignof__(struct xsave_struct)); | 492 | __alignof__(struct xsave_struct)); |
513 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 493 | fx_finit(&init_xstate_buf->i387); |
494 | |||
495 | if (!cpu_has_xsave) | ||
496 | return; | ||
497 | |||
498 | setup_xstate_features(); | ||
514 | 499 | ||
515 | /* | 500 | /* |
516 | * Init all the features state with header_bv being 0x0 | 501 | * Init all the features state with header_bv being 0x0 |
@@ -523,6 +508,17 @@ static void __init setup_xstate_init(void) | |||
523 | xsave_state(init_xstate_buf, -1); | 508 | xsave_state(init_xstate_buf, -1); |
524 | } | 509 | } |
525 | 510 | ||
511 | static int disable_eagerfpu; | ||
512 | static int __init eager_fpu_setup(char *s) | ||
513 | { | ||
514 | if (!strcmp(s, "on")) | ||
515 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
516 | else if (!strcmp(s, "off")) | ||
517 | disable_eagerfpu = 1; | ||
518 | return 1; | ||
519 | } | ||
520 | __setup("eagerfpu=", eager_fpu_setup); | ||
521 | |||
526 | /* | 522 | /* |
527 | * Enable and initialize the xsave feature. | 523 | * Enable and initialize the xsave feature. |
528 | */ | 524 | */ |
@@ -559,15 +555,10 @@ static void __init xstate_enable_boot_cpu(void) | |||
559 | 555 | ||
560 | update_regset_xstate_info(xstate_size, pcntxt_mask); | 556 | update_regset_xstate_info(xstate_size, pcntxt_mask); |
561 | prepare_fx_sw_frame(); | 557 | prepare_fx_sw_frame(); |
562 | 558 | setup_init_fpu_buf(); | |
563 | setup_xstate_init(); | ||
564 | 559 | ||
565 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 560 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
566 | pcntxt_mask, xstate_size); | 561 | pcntxt_mask, xstate_size); |
567 | |||
568 | current->thread.fpu.state = | ||
569 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
570 | init_restore_xstate(); | ||
571 | } | 562 | } |
572 | 563 | ||
573 | /* | 564 | /* |
@@ -586,6 +577,42 @@ void __cpuinit xsave_init(void) | |||
586 | return; | 577 | return; |
587 | 578 | ||
588 | this_func = next_func; | 579 | this_func = next_func; |
589 | next_func = xstate_enable_ap; | 580 | next_func = xstate_enable; |
590 | this_func(); | 581 | this_func(); |
591 | } | 582 | } |
583 | |||
584 | static inline void __init eager_fpu_init_bp(void) | ||
585 | { | ||
586 | current->thread.fpu.state = | ||
587 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
588 | if (!init_xstate_buf) | ||
589 | setup_init_fpu_buf(); | ||
590 | } | ||
591 | |||
592 | void __cpuinit eager_fpu_init(void) | ||
593 | { | ||
594 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; | ||
595 | |||
596 | clear_used_math(); | ||
597 | current_thread_info()->status = 0; | ||
598 | if (!cpu_has_eager_fpu) { | ||
599 | stts(); | ||
600 | return; | ||
601 | } | ||
602 | |||
603 | if (boot_func) { | ||
604 | boot_func(); | ||
605 | boot_func = NULL; | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * This is same as math_state_restore(). But use_xsave() is | ||
610 | * not yet patched to use math_state_restore(). | ||
611 | */ | ||
612 | init_fpu(current); | ||
613 | __thread_fpu_begin(current); | ||
614 | if (cpu_has_xsave) | ||
615 | xrstor_state(init_xstate_buf, -1); | ||
616 | else | ||
617 | fxrstor_checking(&init_xstate_buf->i387); | ||
618 | } | ||