aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2008-03-10 18:28:04 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:19:55 -0400
commit61c4628b538608c1a85211ed8438136adfeb9a95 (patch)
tree290a695299a363153bc692e6d705ac680d64359e
parentfa5c4639419668cbb18ca3d20c1253559a3b43ae (diff)
x86, fpu: split FPU state from task struct - v5
Split the FPU save area from the task struct. This allows easy migration of FPU context, and it's generally cleaner. It also allows the following two optimizations: 1) only allocate when the application actually uses FPU, so in the first lazy FPU trap. This could save memory for non-fpu using apps. Next patch does this lazy allocation. 2) allocate the right size for the actual cpu rather than 512 bytes always. Patches enabling xsave/xrstor support (coming shortly) will take advantage of this. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/i387.c80
-rw-r--r--arch/x86/kernel/process.c35
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/traps_32.c6
-rw-r--r--arch/x86/kernel/traps_64.c6
-rw-r--r--arch/x86/math-emu/fpu_entry.c4
-rw-r--r--arch/x86/math-emu/fpu_system.h26
-rw-r--r--arch/x86/math-emu/reg_ld_str.c4
-rw-r--r--include/asm-x86/i387.h35
-rw-r--r--include/asm-x86/processor.h7
-rw-r--r--include/asm-x86/thread_info.h8
-rw-r--r--include/asm-x86/thread_info_32.h2
-rw-r--r--include/asm-x86/thread_info_64.h2
-rw-r--r--kernel/fork.c31
16 files changed, 161 insertions, 90 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c3920ea8ac56..7a2a2e93e84b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o
29obj-y += tsc_$(BITS).o io_delay.o rtc.o 29obj-y += tsc_$(BITS).o io_delay.o rtc.o
30 30
31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
32obj-y += process.o
32obj-y += i387.o 33obj-y += i387.o
33obj-y += ptrace.o 34obj-y += ptrace.o
34obj-y += ds.o 35obj-y += ds.o
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 8f8102d967b3..baf632b221d4 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/regset.h> 9#include <linux/regset.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/bootmem.h>
11 12
12#include <asm/sigcontext.h> 13#include <asm/sigcontext.h>
13#include <asm/processor.h> 14#include <asm/processor.h>
@@ -35,17 +36,18 @@
35#endif 36#endif
36 37
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 38static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
39unsigned int xstate_size;
40static struct i387_fxsave_struct fx_scratch __cpuinitdata;
38 41
39void mxcsr_feature_mask_init(void) 42void __cpuinit mxcsr_feature_mask_init(void)
40{ 43{
41 unsigned long mask = 0; 44 unsigned long mask = 0;
42 45
43 clts(); 46 clts();
44 if (cpu_has_fxsr) { 47 if (cpu_has_fxsr) {
45 memset(&current->thread.i387.fxsave, 0, 48 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
46 sizeof(struct i387_fxsave_struct)); 49 asm volatile("fxsave %0" : : "m" (fx_scratch));
47 asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); 50 mask = fx_scratch.mxcsr_mask;
48 mask = current->thread.i387.fxsave.mxcsr_mask;
49 if (mask == 0) 51 if (mask == 0)
50 mask = 0x0000ffbf; 52 mask = 0x0000ffbf;
51 } 53 }
@@ -53,6 +55,17 @@ void mxcsr_feature_mask_init(void)
53 stts(); 55 stts();
54} 56}
55 57
58void __init init_thread_xstate(void)
59{
60 if (cpu_has_fxsr)
61 xstate_size = sizeof(struct i387_fxsave_struct);
62#ifdef CONFIG_X86_32
63 else
64 xstate_size = sizeof(struct i387_fsave_struct);
65#endif
66 init_task.thread.xstate = alloc_bootmem(xstate_size);
67}
68
56#ifdef CONFIG_X86_64 69#ifdef CONFIG_X86_64
57/* 70/*
58 * Called at bootup to set up the initial FPU state that is later cloned 71 * Called at bootup to set up the initial FPU state that is later cloned
@@ -61,10 +74,6 @@ void mxcsr_feature_mask_init(void)
61void __cpuinit fpu_init(void) 74void __cpuinit fpu_init(void)
62{ 75{
63 unsigned long oldcr0 = read_cr0(); 76 unsigned long oldcr0 = read_cr0();
64 extern void __bad_fxsave_alignment(void);
65
66 if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
67 __bad_fxsave_alignment();
68 77
69 set_in_cr4(X86_CR4_OSFXSR); 78 set_in_cr4(X86_CR4_OSFXSR);
70 set_in_cr4(X86_CR4_OSXMMEXCPT); 79 set_in_cr4(X86_CR4_OSXMMEXCPT);
@@ -93,18 +102,19 @@ void init_fpu(struct task_struct *tsk)
93 } 102 }
94 103
95 if (cpu_has_fxsr) { 104 if (cpu_has_fxsr) {
96 memset(&tsk->thread.i387.fxsave, 0, 105 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
97 sizeof(struct i387_fxsave_struct)); 106
98 tsk->thread.i387.fxsave.cwd = 0x37f; 107 memset(fx, 0, xstate_size);
108 fx->cwd = 0x37f;
99 if (cpu_has_xmm) 109 if (cpu_has_xmm)
100 tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; 110 fx->mxcsr = MXCSR_DEFAULT;
101 } else { 111 } else {
102 memset(&tsk->thread.i387.fsave, 0, 112 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
103 sizeof(struct i387_fsave_struct)); 113 memset(fp, 0, xstate_size);
104 tsk->thread.i387.fsave.cwd = 0xffff037fu; 114 fp->cwd = 0xffff037fu;
105 tsk->thread.i387.fsave.swd = 0xffff0000u; 115 fp->swd = 0xffff0000u;
106 tsk->thread.i387.fsave.twd = 0xffffffffu; 116 fp->twd = 0xffffffffu;
107 tsk->thread.i387.fsave.fos = 0xffff0000u; 117 fp->fos = 0xffff0000u;
108 } 118 }
109 /* 119 /*
110 * Only the device not available exception or ptrace can call init_fpu. 120 * Only the device not available exception or ptrace can call init_fpu.
@@ -132,7 +142,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
132 init_fpu(target); 142 init_fpu(target);
133 143
134 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 144 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
135 &target->thread.i387.fxsave, 0, -1); 145 &target->thread.xstate->fxsave, 0, -1);
136} 146}
137 147
138int xfpregs_set(struct task_struct *target, const struct user_regset *regset, 148int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -148,12 +158,12 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
148 set_stopped_child_used_math(target); 158 set_stopped_child_used_math(target);
149 159
150 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 160 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
151 &target->thread.i387.fxsave, 0, -1); 161 &target->thread.xstate->fxsave, 0, -1);
152 162
153 /* 163 /*
154 * mxcsr reserved bits must be masked to zero for security reasons. 164 * mxcsr reserved bits must be masked to zero for security reasons.
155 */ 165 */
156 target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 166 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
157 167
158 return ret; 168 return ret;
159} 169}
@@ -233,7 +243,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
233static void 243static void
234convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) 244convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
235{ 245{
236 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 246 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
237 struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; 247 struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
238 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; 248 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
239 int i; 249 int i;
@@ -273,7 +283,7 @@ static void convert_to_fxsr(struct task_struct *tsk,
273 const struct user_i387_ia32_struct *env) 283 const struct user_i387_ia32_struct *env)
274 284
275{ 285{
276 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 286 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
277 struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; 287 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
278 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; 288 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
279 int i; 289 int i;
@@ -310,7 +320,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
310 320
311 if (!cpu_has_fxsr) { 321 if (!cpu_has_fxsr) {
312 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 322 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
313 &target->thread.i387.fsave, 0, -1); 323 &target->thread.xstate->fsave, 0,
324 -1);
314 } 325 }
315 326
316 if (kbuf && pos == 0 && count == sizeof(env)) { 327 if (kbuf && pos == 0 && count == sizeof(env)) {
@@ -338,7 +349,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
338 349
339 if (!cpu_has_fxsr) { 350 if (!cpu_has_fxsr) {
340 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 351 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
341 &target->thread.i387.fsave, 0, -1); 352 &target->thread.xstate->fsave, 0, -1);
342 } 353 }
343 354
344 if (pos > 0 || count < sizeof(env)) 355 if (pos > 0 || count < sizeof(env))
@@ -358,11 +369,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
358static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) 369static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
359{ 370{
360 struct task_struct *tsk = current; 371 struct task_struct *tsk = current;
372 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
361 373
362 unlazy_fpu(tsk); 374 unlazy_fpu(tsk);
363 tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; 375 fp->status = fp->swd;
364 if (__copy_to_user(buf, &tsk->thread.i387.fsave, 376 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
365 sizeof(struct i387_fsave_struct)))
366 return -1; 377 return -1;
367 return 1; 378 return 1;
368} 379}
@@ -370,6 +381,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
370static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) 381static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
371{ 382{
372 struct task_struct *tsk = current; 383 struct task_struct *tsk = current;
384 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
373 struct user_i387_ia32_struct env; 385 struct user_i387_ia32_struct env;
374 int err = 0; 386 int err = 0;
375 387
@@ -379,12 +391,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
379 if (__copy_to_user(buf, &env, sizeof(env))) 391 if (__copy_to_user(buf, &env, sizeof(env)))
380 return -1; 392 return -1;
381 393
382 err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); 394 err |= __put_user(fx->swd, &buf->status);
383 err |= __put_user(X86_FXSR_MAGIC, &buf->magic); 395 err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
384 if (err) 396 if (err)
385 return -1; 397 return -1;
386 398
387 if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, 399 if (__copy_to_user(&buf->_fxsr_env[0], fx,
388 sizeof(struct i387_fxsave_struct))) 400 sizeof(struct i387_fxsave_struct)))
389 return -1; 401 return -1;
390 return 1; 402 return 1;
@@ -417,7 +429,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
417 struct task_struct *tsk = current; 429 struct task_struct *tsk = current;
418 430
419 clear_fpu(tsk); 431 clear_fpu(tsk);
420 return __copy_from_user(&tsk->thread.i387.fsave, buf, 432 return __copy_from_user(&tsk->thread.xstate->fsave, buf,
421 sizeof(struct i387_fsave_struct)); 433 sizeof(struct i387_fsave_struct));
422} 434}
423 435
@@ -428,10 +440,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
428 int err; 440 int err;
429 441
430 clear_fpu(tsk); 442 clear_fpu(tsk);
431 err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], 443 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
432 sizeof(struct i387_fxsave_struct)); 444 sizeof(struct i387_fxsave_struct));
433 /* mxcsr reserved bits must be masked to zero for security reasons */ 445 /* mxcsr reserved bits must be masked to zero for security reasons */
434 tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 446 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
435 if (err || __copy_from_user(&env, buf, sizeof(env))) 447 if (err || __copy_from_user(&env, buf, sizeof(env)))
436 return 1; 448 return 1;
437 convert_to_fxsr(tsk, &env); 449 convert_to_fxsr(tsk, &env);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
new file mode 100644
index 000000000000..ead24efbcba0
--- /dev/null
+++ b/arch/x86/kernel/process.c
@@ -0,0 +1,35 @@
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/smp.h>
5#include <linux/slab.h>
6#include <linux/sched.h>
7
8static struct kmem_cache *task_xstate_cachep;
9
10int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
11{
12 *dst = *src;
13 dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
14 if (!dst->thread.xstate)
15 return -ENOMEM;
16 WARN_ON((unsigned long)dst->thread.xstate & 15);
17 memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
18 return 0;
19}
20
21void free_thread_info(struct thread_info *ti)
22{
23 kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate);
24 ti->task->thread.xstate = NULL;
25
26 free_pages((unsigned long)(ti), get_order(THREAD_SIZE));
27}
28
29void arch_task_cache_init(void)
30{
31 task_xstate_cachep =
32 kmem_cache_create("task_xstate", xstate_size,
33 __alignof__(union thread_xstate),
34 SLAB_PANIC, NULL);
35}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3790a3f8a83..3890a5dd25f9 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -703,7 +703,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
703 703
704 /* we're going to use this soon, after a few expensive things */ 704 /* we're going to use this soon, after a few expensive things */
705 if (next_p->fpu_counter > 5) 705 if (next_p->fpu_counter > 5)
706 prefetch(&next->i387.fxsave); 706 prefetch(next->xstate);
707 707
708 /* 708 /*
709 * Reload esp0. 709 * Reload esp0.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4c13b1406c70..b795e831afd6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -682,7 +682,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
682 682
683 /* we're going to use this soon, after a few expensive things */ 683 /* we're going to use this soon, after a few expensive things */
684 if (next_p->fpu_counter>5) 684 if (next_p->fpu_counter>5)
685 prefetch(&next->i387.fxsave); 685 prefetch(next->xstate);
686 686
687 /* 687 /*
688 * Reload esp0, LDT and the page table pointer: 688 * Reload esp0, LDT and the page table pointer:
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index dc4273010f2a..8d136a73ce8e 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1208,11 +1208,6 @@ void __init trap_init(void)
1208#endif 1208#endif
1209 set_trap_gate(19, &simd_coprocessor_error); 1209 set_trap_gate(19, &simd_coprocessor_error);
1210 1210
1211 /*
1212 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1213 * Generate a build-time error if the alignment is wrong.
1214 */
1215 BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
1216 if (cpu_has_fxsr) { 1211 if (cpu_has_fxsr) {
1217 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1212 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1218 set_in_cr4(X86_CR4_OSFXSR); 1213 set_in_cr4(X86_CR4_OSFXSR);
@@ -1233,6 +1228,7 @@ void __init trap_init(void)
1233 1228
1234 set_bit(SYSCALL_VECTOR, used_vectors); 1229 set_bit(SYSCALL_VECTOR, used_vectors);
1235 1230
1231 init_thread_xstate();
1236 /* 1232 /*
1237 * Should be a barrier for any external CPU state: 1233 * Should be a barrier for any external CPU state:
1238 */ 1234 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 6d883b13ef4f..dc0cb497eec3 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1128,7 +1128,7 @@ asmlinkage void math_state_restore(void)
1128 1128
1129 if (!used_math()) 1129 if (!used_math())
1130 init_fpu(me); 1130 init_fpu(me);
1131 restore_fpu_checking(&me->thread.i387.fxsave); 1131 restore_fpu_checking(&me->thread.xstate->fxsave);
1132 task_thread_info(me)->status |= TS_USEDFPU; 1132 task_thread_info(me)->status |= TS_USEDFPU;
1133 me->fpu_counter++; 1133 me->fpu_counter++;
1134} 1134}
@@ -1164,6 +1164,10 @@ void __init trap_init(void)
1164#endif 1164#endif
1165 1165
1166 /* 1166 /*
1167 * initialize the per thread extended state:
1168 */
1169 init_thread_xstate();
1170 /*
1167 * Should be a barrier for any external CPU state. 1171 * Should be a barrier for any external CPU state.
1168 */ 1172 */
1169 cpu_init(); 1173 cpu_init();
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 4bab3b145392..6e38d877ea77 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -678,7 +678,7 @@ int fpregs_soft_set(struct task_struct *target,
678 unsigned int pos, unsigned int count, 678 unsigned int pos, unsigned int count,
679 const void *kbuf, const void __user *ubuf) 679 const void *kbuf, const void __user *ubuf)
680{ 680{
681 struct i387_soft_struct *s387 = &target->thread.i387.soft; 681 struct i387_soft_struct *s387 = &target->thread.xstate->soft;
682 void *space = s387->st_space; 682 void *space = s387->st_space;
683 int ret; 683 int ret;
684 int offset, other, i, tags, regnr, tag, newtop; 684 int offset, other, i, tags, regnr, tag, newtop;
@@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target,
730 unsigned int pos, unsigned int count, 730 unsigned int pos, unsigned int count,
731 void *kbuf, void __user *ubuf) 731 void *kbuf, void __user *ubuf)
732{ 732{
733 struct i387_soft_struct *s387 = &target->thread.i387.soft; 733 struct i387_soft_struct *s387 = &target->thread.xstate->soft;
734 const void *space = s387->st_space; 734 const void *space = s387->st_space;
735 int ret; 735 int ret;
736 int offset = (S387->ftop & 7) * 10, other = 80 - offset; 736 int offset = (S387->ftop & 7) * 10, other = 80 - offset;
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index a3ae28c49ddd..13488fa153e0 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -35,8 +35,8 @@
35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ 35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
36 == (1 << 10)) 36 == (1 << 10))
37 37
38#define I387 (current->thread.i387) 38#define I387 (current->thread.xstate)
39#define FPU_info (I387.soft.info) 39#define FPU_info (I387->soft.info)
40 40
41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) 41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) 42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
@@ -46,25 +46,25 @@
46#define FPU_EIP (FPU_info->___eip) 46#define FPU_EIP (FPU_info->___eip)
47#define FPU_ORIG_EIP (FPU_info->___orig_eip) 47#define FPU_ORIG_EIP (FPU_info->___orig_eip)
48 48
49#define FPU_lookahead (I387.soft.lookahead) 49#define FPU_lookahead (I387->soft.lookahead)
50 50
51/* nz if ip_offset and cs_selector are not to be set for the current 51/* nz if ip_offset and cs_selector are not to be set for the current
52 instruction. */ 52 instruction. */
53#define no_ip_update (*(u_char *)&(I387.soft.no_update)) 53#define no_ip_update (*(u_char *)&(I387->soft.no_update))
54#define FPU_rm (*(u_char *)&(I387.soft.rm)) 54#define FPU_rm (*(u_char *)&(I387->soft.rm))
55 55
56/* Number of bytes of data which can be legally accessed by the current 56/* Number of bytes of data which can be legally accessed by the current
57 instruction. This only needs to hold a number <= 108, so a byte will do. */ 57 instruction. This only needs to hold a number <= 108, so a byte will do. */
58#define access_limit (*(u_char *)&(I387.soft.alimit)) 58#define access_limit (*(u_char *)&(I387->soft.alimit))
59 59
60#define partial_status (I387.soft.swd) 60#define partial_status (I387->soft.swd)
61#define control_word (I387.soft.cwd) 61#define control_word (I387->soft.cwd)
62#define fpu_tag_word (I387.soft.twd) 62#define fpu_tag_word (I387->soft.twd)
63#define registers (I387.soft.st_space) 63#define registers (I387->soft.st_space)
64#define top (I387.soft.ftop) 64#define top (I387->soft.ftop)
65 65
66#define instruction_address (*(struct address *)&I387.soft.fip) 66#define instruction_address (*(struct address *)&I387->soft.fip)
67#define operand_address (*(struct address *)&I387.soft.foo) 67#define operand_address (*(struct address *)&I387->soft.foo)
68 68
69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ 69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
70 math_abort(FPU_info,SIGSEGV) 70 math_abort(FPU_info,SIGSEGV)
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index 02af772a24db..d597fe7423c9 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -1180,8 +1180,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
1180 control_word |= 0xffff0040; 1180 control_word |= 0xffff0040;
1181 partial_status = status_word() | 0xffff0000; 1181 partial_status = status_word() | 0xffff0000;
1182 fpu_tag_word |= 0xffff0000; 1182 fpu_tag_word |= 0xffff0000;
1183 I387.soft.fcs &= ~0xf8000000; 1183 I387->soft.fcs &= ~0xf8000000;
1184 I387.soft.fos |= 0xffff0000; 1184 I387->soft.fos |= 0xffff0000;
1185#endif /* PECULIAR_486 */ 1185#endif /* PECULIAR_486 */
1186 if (__copy_to_user(d, &control_word, 7 * 4)) 1186 if (__copy_to_user(d, &control_word, 7 * 4))
1187 FPU_abort; 1187 FPU_abort;
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 54522b814f1c..382a5fa9d492 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -23,6 +23,7 @@ extern void fpu_init(void);
23extern void mxcsr_feature_mask_init(void); 23extern void mxcsr_feature_mask_init(void);
24extern void init_fpu(struct task_struct *child); 24extern void init_fpu(struct task_struct *child);
25extern asmlinkage void math_state_restore(void); 25extern asmlinkage void math_state_restore(void);
26extern void init_thread_xstate(void);
26 27
27extern user_regset_active_fn fpregs_active, xfpregs_active; 28extern user_regset_active_fn fpregs_active, xfpregs_active;
28extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; 29extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -117,24 +118,22 @@ static inline void __save_init_fpu(struct task_struct *tsk)
117 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 118 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
118 starting with gas 2.16. */ 119 starting with gas 2.16. */
119 __asm__ __volatile__("fxsaveq %0" 120 __asm__ __volatile__("fxsaveq %0"
120 : "=m" (tsk->thread.i387.fxsave)); 121 : "=m" (tsk->thread.xstate->fxsave));
121#elif 0 122#elif 0
122 /* Using, as a workaround, the properly prefixed form below isn't 123 /* Using, as a workaround, the properly prefixed form below isn't
123 accepted by any binutils version so far released, complaining that 124 accepted by any binutils version so far released, complaining that
124 the same type of prefix is used twice if an extended register is 125 the same type of prefix is used twice if an extended register is
125 needed for addressing (fix submitted to mainline 2005-11-21). */ 126 needed for addressing (fix submitted to mainline 2005-11-21). */
126 __asm__ __volatile__("rex64/fxsave %0" 127 __asm__ __volatile__("rex64/fxsave %0"
127 : "=m" (tsk->thread.i387.fxsave)); 128 : "=m" (tsk->thread.xstate->fxsave));
128#else 129#else
129 /* This, however, we can work around by forcing the compiler to select 130 /* This, however, we can work around by forcing the compiler to select
130 an addressing mode that doesn't require extended registers. */ 131 an addressing mode that doesn't require extended registers. */
131 __asm__ __volatile__("rex64/fxsave %P2(%1)" 132 __asm__ __volatile__("rex64/fxsave (%1)"
132 : "=m" (tsk->thread.i387.fxsave) 133 : "=m" (tsk->thread.xstate->fxsave)
133 : "cdaSDb" (tsk), 134 : "cdaSDb" (&tsk->thread.xstate->fxsave));
134 "i" (offsetof(__typeof__(*tsk),
135 thread.i387.fxsave)));
136#endif 135#endif
137 clear_fpu_state(&tsk->thread.i387.fxsave); 136 clear_fpu_state(&tsk->thread.xstate->fxsave);
138 task_thread_info(tsk)->status &= ~TS_USEDFPU; 137 task_thread_info(tsk)->status &= ~TS_USEDFPU;
139} 138}
140 139
@@ -148,7 +147,7 @@ static inline int save_i387(struct _fpstate __user *buf)
148 int err = 0; 147 int err = 0;
149 148
150 BUILD_BUG_ON(sizeof(struct user_i387_struct) != 149 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
151 sizeof(tsk->thread.i387.fxsave)); 150 sizeof(tsk->thread.xstate->fxsave));
152 151
153 if ((unsigned long)buf % 16) 152 if ((unsigned long)buf % 16)
154 printk("save_i387: bad fpstate %p\n", buf); 153 printk("save_i387: bad fpstate %p\n", buf);
@@ -164,7 +163,7 @@ static inline int save_i387(struct _fpstate __user *buf)
164 task_thread_info(tsk)->status &= ~TS_USEDFPU; 163 task_thread_info(tsk)->status &= ~TS_USEDFPU;
165 stts(); 164 stts();
166 } else { 165 } else {
167 if (__copy_to_user(buf, &tsk->thread.i387.fxsave, 166 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
168 sizeof(struct i387_fxsave_struct))) 167 sizeof(struct i387_fxsave_struct)))
169 return -1; 168 return -1;
170 } 169 }
@@ -201,7 +200,7 @@ static inline void restore_fpu(struct task_struct *tsk)
201 "nop ; frstor %1", 200 "nop ; frstor %1",
202 "fxrstor %1", 201 "fxrstor %1",
203 X86_FEATURE_FXSR, 202 X86_FEATURE_FXSR,
204 "m" ((tsk)->thread.i387.fxsave)); 203 "m" (tsk->thread.xstate->fxsave));
205} 204}
206 205
207/* We need a safe address that is cheap to find and that is already 206/* We need a safe address that is cheap to find and that is already
@@ -225,8 +224,8 @@ static inline void __save_init_fpu(struct task_struct *tsk)
225 "fxsave %[fx]\n" 224 "fxsave %[fx]\n"
226 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", 225 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
227 X86_FEATURE_FXSR, 226 X86_FEATURE_FXSR,
228 [fx] "m" (tsk->thread.i387.fxsave), 227 [fx] "m" (tsk->thread.xstate->fxsave),
229 [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); 228 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
230 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 229 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
231 is pending. Clear the x87 state here by setting it to fixed 230 is pending. Clear the x87 state here by setting it to fixed
232 values. safe_address is a random variable that should be in L1 */ 231 values. safe_address is a random variable that should be in L1 */
@@ -327,25 +326,25 @@ static inline void clear_fpu(struct task_struct *tsk)
327static inline unsigned short get_fpu_cwd(struct task_struct *tsk) 326static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
328{ 327{
329 if (cpu_has_fxsr) { 328 if (cpu_has_fxsr) {
330 return tsk->thread.i387.fxsave.cwd; 329 return tsk->thread.xstate->fxsave.cwd;
331 } else { 330 } else {
332 return (unsigned short)tsk->thread.i387.fsave.cwd; 331 return (unsigned short) tsk->thread.xstate->fsave.cwd;
333 } 332 }
334} 333}
335 334
336static inline unsigned short get_fpu_swd(struct task_struct *tsk) 335static inline unsigned short get_fpu_swd(struct task_struct *tsk)
337{ 336{
338 if (cpu_has_fxsr) { 337 if (cpu_has_fxsr) {
339 return tsk->thread.i387.fxsave.swd; 338 return tsk->thread.xstate->fxsave.swd;
340 } else { 339 } else {
341 return (unsigned short)tsk->thread.i387.fsave.swd; 340 return (unsigned short) tsk->thread.xstate->fsave.swd;
342 } 341 }
343} 342}
344 343
345static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) 344static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
346{ 345{
347 if (cpu_has_xmm) { 346 if (cpu_has_xmm) {
348 return tsk->thread.i387.fxsave.mxcsr; 347 return tsk->thread.xstate->fxsave.mxcsr;
349 } else { 348 } else {
350 return MXCSR_DEFAULT; 349 return MXCSR_DEFAULT;
351 } 350 }
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index eaf4548a23d2..99d297885780 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -354,7 +354,7 @@ struct i387_soft_struct {
354 u32 entry_eip; 354 u32 entry_eip;
355}; 355};
356 356
357union i387_union { 357union thread_xstate {
358 struct i387_fsave_struct fsave; 358 struct i387_fsave_struct fsave;
359 struct i387_fxsave_struct fxsave; 359 struct i387_fxsave_struct fxsave;
360 struct i387_soft_struct soft; 360 struct i387_soft_struct soft;
@@ -365,6 +365,7 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
365#endif 365#endif
366 366
367extern void print_cpu_info(struct cpuinfo_x86 *); 367extern void print_cpu_info(struct cpuinfo_x86 *);
368extern unsigned int xstate_size;
368extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 369extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
369extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 370extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
370extern unsigned short num_cache_leaves; 371extern unsigned short num_cache_leaves;
@@ -397,8 +398,8 @@ struct thread_struct {
397 unsigned long cr2; 398 unsigned long cr2;
398 unsigned long trap_no; 399 unsigned long trap_no;
399 unsigned long error_code; 400 unsigned long error_code;
400 /* Floating point info: */ 401 /* floating point and extended processor state */
401 union i387_union i387 __attribute__((aligned(16)));; 402 union thread_xstate *xstate;
402#ifdef CONFIG_X86_32 403#ifdef CONFIG_X86_32
403 /* Virtual 86 mode info */ 404 /* Virtual 86 mode info */
404 struct vm86_struct __user *vm86_info; 405 struct vm86_struct __user *vm86_info;
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index d5fd12f2abdb..407b88c170d3 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -1,5 +1,13 @@
1#ifndef _ASM_X86_THREAD_INFO_H
1#ifdef CONFIG_X86_32 2#ifdef CONFIG_X86_32
2# include "thread_info_32.h" 3# include "thread_info_32.h"
3#else 4#else
4# include "thread_info_64.h" 5# include "thread_info_64.h"
5#endif 6#endif
7
8#ifndef __ASSEMBLY__
9extern void arch_task_cache_init(void);
10extern void free_thread_info(struct thread_info *ti);
11extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
12#endif
13#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index 4e053fa561a9..531859962096 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -102,8 +102,6 @@ static inline struct thread_info *current_thread_info(void)
102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) 102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
103#endif 103#endif
104 104
105#define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE))
106
107#else /* !__ASSEMBLY__ */ 105#else /* !__ASSEMBLY__ */
108 106
109/* how to get the thread information struct from ASM */ 107/* how to get the thread information struct from ASM */
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index b17f5f6c2c59..ed664e874dec 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -85,8 +85,6 @@ static inline struct thread_info *stack_thread_info(void)
85#define alloc_thread_info(tsk) \ 85#define alloc_thread_info(tsk) \
86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) 86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
87 87
88#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
89
90#else /* !__ASSEMBLY__ */ 88#else /* !__ASSEMBLY__ */
91 89
92/* how to get the thread information struct from ASM */ 90/* how to get the thread information struct from ASM */
diff --git a/kernel/fork.c b/kernel/fork.c
index 9c042f901570..44a18192c420 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -132,6 +132,10 @@ void __put_task_struct(struct task_struct *tsk)
132 free_task(tsk); 132 free_task(tsk);
133} 133}
134 134
135void __attribute__((weak)) arch_task_cache_init(void)
136{
137}
138
135void __init fork_init(unsigned long mempages) 139void __init fork_init(unsigned long mempages)
136{ 140{
137#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 141#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -144,6 +148,9 @@ void __init fork_init(unsigned long mempages)
144 ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); 148 ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
145#endif 149#endif
146 150
151 /* do the arch specific task caches init */
152 arch_task_cache_init();
153
147 /* 154 /*
148 * The default maximum number of threads is set to a safe 155 * The default maximum number of threads is set to a safe
149 * value: the thread structures can take up at most half 156 * value: the thread structures can take up at most half
@@ -163,6 +170,13 @@ void __init fork_init(unsigned long mempages)
163 init_task.signal->rlim[RLIMIT_NPROC]; 170 init_task.signal->rlim[RLIMIT_NPROC];
164} 171}
165 172
173int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
174 struct task_struct *src)
175{
176 *dst = *src;
177 return 0;
178}
179
166static struct task_struct *dup_task_struct(struct task_struct *orig) 180static struct task_struct *dup_task_struct(struct task_struct *orig)
167{ 181{
168 struct task_struct *tsk; 182 struct task_struct *tsk;
@@ -181,15 +195,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
181 return NULL; 195 return NULL;
182 } 196 }
183 197
184 *tsk = *orig; 198 err = arch_dup_task_struct(tsk, orig);
199 if (err)
200 goto out;
201
185 tsk->stack = ti; 202 tsk->stack = ti;
186 203
187 err = prop_local_init_single(&tsk->dirties); 204 err = prop_local_init_single(&tsk->dirties);
188 if (err) { 205 if (err)
189 free_thread_info(ti); 206 goto out;
190 free_task_struct(tsk);
191 return NULL;
192 }
193 207
194 setup_thread_stack(tsk, orig); 208 setup_thread_stack(tsk, orig);
195 209
@@ -205,6 +219,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
205#endif 219#endif
206 tsk->splice_pipe = NULL; 220 tsk->splice_pipe = NULL;
207 return tsk; 221 return tsk;
222
223out:
224 free_thread_info(ti);
225 free_task_struct(tsk);
226 return NULL;
208} 227}
209 228
210#ifdef CONFIG_MMU 229#ifdef CONFIG_MMU