diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2008-03-10 18:28:04 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:19:55 -0400 |
commit | 61c4628b538608c1a85211ed8438136adfeb9a95 (patch) | |
tree | 290a695299a363153bc692e6d705ac680d64359e /arch/x86/kernel/i387.c | |
parent | fa5c4639419668cbb18ca3d20c1253559a3b43ae (diff) |
x86, fpu: split FPU state from task struct - v5
Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:
1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.
2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/i387.c')
-rw-r--r-- | arch/x86/kernel/i387.c | 80 |
1 files changed, 46 insertions, 34 deletions
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 8f8102d967b3..baf632b221d4 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/regset.h> | 9 | #include <linux/regset.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/bootmem.h> | ||
11 | 12 | ||
12 | #include <asm/sigcontext.h> | 13 | #include <asm/sigcontext.h> |
13 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
@@ -35,17 +36,18 @@ | |||
35 | #endif | 36 | #endif |
36 | 37 | ||
37 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | 38 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
39 | unsigned int xstate_size; | ||
40 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; | ||
38 | 41 | ||
39 | void mxcsr_feature_mask_init(void) | 42 | void __cpuinit mxcsr_feature_mask_init(void) |
40 | { | 43 | { |
41 | unsigned long mask = 0; | 44 | unsigned long mask = 0; |
42 | 45 | ||
43 | clts(); | 46 | clts(); |
44 | if (cpu_has_fxsr) { | 47 | if (cpu_has_fxsr) { |
45 | memset(¤t->thread.i387.fxsave, 0, | 48 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); |
46 | sizeof(struct i387_fxsave_struct)); | 49 | asm volatile("fxsave %0" : : "m" (fx_scratch)); |
47 | asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); | 50 | mask = fx_scratch.mxcsr_mask; |
48 | mask = current->thread.i387.fxsave.mxcsr_mask; | ||
49 | if (mask == 0) | 51 | if (mask == 0) |
50 | mask = 0x0000ffbf; | 52 | mask = 0x0000ffbf; |
51 | } | 53 | } |
@@ -53,6 +55,17 @@ void mxcsr_feature_mask_init(void) | |||
53 | stts(); | 55 | stts(); |
54 | } | 56 | } |
55 | 57 | ||
58 | void __init init_thread_xstate(void) | ||
59 | { | ||
60 | if (cpu_has_fxsr) | ||
61 | xstate_size = sizeof(struct i387_fxsave_struct); | ||
62 | #ifdef CONFIG_X86_32 | ||
63 | else | ||
64 | xstate_size = sizeof(struct i387_fsave_struct); | ||
65 | #endif | ||
66 | init_task.thread.xstate = alloc_bootmem(xstate_size); | ||
67 | } | ||
68 | |||
56 | #ifdef CONFIG_X86_64 | 69 | #ifdef CONFIG_X86_64 |
57 | /* | 70 | /* |
58 | * Called at bootup to set up the initial FPU state that is later cloned | 71 | * Called at bootup to set up the initial FPU state that is later cloned |
@@ -61,10 +74,6 @@ void mxcsr_feature_mask_init(void) | |||
61 | void __cpuinit fpu_init(void) | 74 | void __cpuinit fpu_init(void) |
62 | { | 75 | { |
63 | unsigned long oldcr0 = read_cr0(); | 76 | unsigned long oldcr0 = read_cr0(); |
64 | extern void __bad_fxsave_alignment(void); | ||
65 | |||
66 | if (offsetof(struct task_struct, thread.i387.fxsave) & 15) | ||
67 | __bad_fxsave_alignment(); | ||
68 | 77 | ||
69 | set_in_cr4(X86_CR4_OSFXSR); | 78 | set_in_cr4(X86_CR4_OSFXSR); |
70 | set_in_cr4(X86_CR4_OSXMMEXCPT); | 79 | set_in_cr4(X86_CR4_OSXMMEXCPT); |
@@ -93,18 +102,19 @@ void init_fpu(struct task_struct *tsk) | |||
93 | } | 102 | } |
94 | 103 | ||
95 | if (cpu_has_fxsr) { | 104 | if (cpu_has_fxsr) { |
96 | memset(&tsk->thread.i387.fxsave, 0, | 105 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; |
97 | sizeof(struct i387_fxsave_struct)); | 106 | |
98 | tsk->thread.i387.fxsave.cwd = 0x37f; | 107 | memset(fx, 0, xstate_size); |
108 | fx->cwd = 0x37f; | ||
99 | if (cpu_has_xmm) | 109 | if (cpu_has_xmm) |
100 | tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; | 110 | fx->mxcsr = MXCSR_DEFAULT; |
101 | } else { | 111 | } else { |
102 | memset(&tsk->thread.i387.fsave, 0, | 112 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; |
103 | sizeof(struct i387_fsave_struct)); | 113 | memset(fp, 0, xstate_size); |
104 | tsk->thread.i387.fsave.cwd = 0xffff037fu; | 114 | fp->cwd = 0xffff037fu; |
105 | tsk->thread.i387.fsave.swd = 0xffff0000u; | 115 | fp->swd = 0xffff0000u; |
106 | tsk->thread.i387.fsave.twd = 0xffffffffu; | 116 | fp->twd = 0xffffffffu; |
107 | tsk->thread.i387.fsave.fos = 0xffff0000u; | 117 | fp->fos = 0xffff0000u; |
108 | } | 118 | } |
109 | /* | 119 | /* |
110 | * Only the device not available exception or ptrace can call init_fpu. | 120 | * Only the device not available exception or ptrace can call init_fpu. |
@@ -132,7 +142,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
132 | init_fpu(target); | 142 | init_fpu(target); |
133 | 143 | ||
134 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 144 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
135 | &target->thread.i387.fxsave, 0, -1); | 145 | &target->thread.xstate->fxsave, 0, -1); |
136 | } | 146 | } |
137 | 147 | ||
138 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | 148 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, |
@@ -148,12 +158,12 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
148 | set_stopped_child_used_math(target); | 158 | set_stopped_child_used_math(target); |
149 | 159 | ||
150 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 160 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
151 | &target->thread.i387.fxsave, 0, -1); | 161 | &target->thread.xstate->fxsave, 0, -1); |
152 | 162 | ||
153 | /* | 163 | /* |
154 | * mxcsr reserved bits must be masked to zero for security reasons. | 164 | * mxcsr reserved bits must be masked to zero for security reasons. |
155 | */ | 165 | */ |
156 | target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | 166 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; |
157 | 167 | ||
158 | return ret; | 168 | return ret; |
159 | } | 169 | } |
@@ -233,7 +243,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
233 | static void | 243 | static void |
234 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 244 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
235 | { | 245 | { |
236 | struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; | 246 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; |
237 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | 247 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; |
238 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | 248 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; |
239 | int i; | 249 | int i; |
@@ -273,7 +283,7 @@ static void convert_to_fxsr(struct task_struct *tsk, | |||
273 | const struct user_i387_ia32_struct *env) | 283 | const struct user_i387_ia32_struct *env) |
274 | 284 | ||
275 | { | 285 | { |
276 | struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; | 286 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; |
277 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | 287 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; |
278 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | 288 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; |
279 | int i; | 289 | int i; |
@@ -310,7 +320,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
310 | 320 | ||
311 | if (!cpu_has_fxsr) { | 321 | if (!cpu_has_fxsr) { |
312 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 322 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
313 | &target->thread.i387.fsave, 0, -1); | 323 | &target->thread.xstate->fsave, 0, |
324 | -1); | ||
314 | } | 325 | } |
315 | 326 | ||
316 | if (kbuf && pos == 0 && count == sizeof(env)) { | 327 | if (kbuf && pos == 0 && count == sizeof(env)) { |
@@ -338,7 +349,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
338 | 349 | ||
339 | if (!cpu_has_fxsr) { | 350 | if (!cpu_has_fxsr) { |
340 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 351 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
341 | &target->thread.i387.fsave, 0, -1); | 352 | &target->thread.xstate->fsave, 0, -1); |
342 | } | 353 | } |
343 | 354 | ||
344 | if (pos > 0 || count < sizeof(env)) | 355 | if (pos > 0 || count < sizeof(env)) |
@@ -358,11 +369,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
358 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | 369 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) |
359 | { | 370 | { |
360 | struct task_struct *tsk = current; | 371 | struct task_struct *tsk = current; |
372 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | ||
361 | 373 | ||
362 | unlazy_fpu(tsk); | 374 | unlazy_fpu(tsk); |
363 | tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; | 375 | fp->status = fp->swd; |
364 | if (__copy_to_user(buf, &tsk->thread.i387.fsave, | 376 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) |
365 | sizeof(struct i387_fsave_struct))) | ||
366 | return -1; | 377 | return -1; |
367 | return 1; | 378 | return 1; |
368 | } | 379 | } |
@@ -370,6 +381,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
370 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | 381 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) |
371 | { | 382 | { |
372 | struct task_struct *tsk = current; | 383 | struct task_struct *tsk = current; |
384 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | ||
373 | struct user_i387_ia32_struct env; | 385 | struct user_i387_ia32_struct env; |
374 | int err = 0; | 386 | int err = 0; |
375 | 387 | ||
@@ -379,12 +391,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | |||
379 | if (__copy_to_user(buf, &env, sizeof(env))) | 391 | if (__copy_to_user(buf, &env, sizeof(env))) |
380 | return -1; | 392 | return -1; |
381 | 393 | ||
382 | err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); | 394 | err |= __put_user(fx->swd, &buf->status); |
383 | err |= __put_user(X86_FXSR_MAGIC, &buf->magic); | 395 | err |= __put_user(X86_FXSR_MAGIC, &buf->magic); |
384 | if (err) | 396 | if (err) |
385 | return -1; | 397 | return -1; |
386 | 398 | ||
387 | if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, | 399 | if (__copy_to_user(&buf->_fxsr_env[0], fx, |
388 | sizeof(struct i387_fxsave_struct))) | 400 | sizeof(struct i387_fxsave_struct))) |
389 | return -1; | 401 | return -1; |
390 | return 1; | 402 | return 1; |
@@ -417,7 +429,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
417 | struct task_struct *tsk = current; | 429 | struct task_struct *tsk = current; |
418 | 430 | ||
419 | clear_fpu(tsk); | 431 | clear_fpu(tsk); |
420 | return __copy_from_user(&tsk->thread.i387.fsave, buf, | 432 | return __copy_from_user(&tsk->thread.xstate->fsave, buf, |
421 | sizeof(struct i387_fsave_struct)); | 433 | sizeof(struct i387_fsave_struct)); |
422 | } | 434 | } |
423 | 435 | ||
@@ -428,10 +440,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) | |||
428 | int err; | 440 | int err; |
429 | 441 | ||
430 | clear_fpu(tsk); | 442 | clear_fpu(tsk); |
431 | err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], | 443 | err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], |
432 | sizeof(struct i387_fxsave_struct)); | 444 | sizeof(struct i387_fxsave_struct)); |
433 | /* mxcsr reserved bits must be masked to zero for security reasons */ | 445 | /* mxcsr reserved bits must be masked to zero for security reasons */ |
434 | tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | 446 | tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; |
435 | if (err || __copy_from_user(&env, buf, sizeof(env))) | 447 | if (err || __copy_from_user(&env, buf, sizeof(env))) |
436 | return 1; | 448 | return 1; |
437 | convert_to_fxsr(tsk, &env); | 449 | convert_to_fxsr(tsk, &env); |