aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2008-03-10 18:28:04 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:19:55 -0400
commit61c4628b538608c1a85211ed8438136adfeb9a95 (patch)
tree290a695299a363153bc692e6d705ac680d64359e /include
parentfa5c4639419668cbb18ca3d20c1253559a3b43ae (diff)
x86, fpu: split FPU state from task struct - v5
Split the FPU save area from the task struct. This allows easy migration of FPU context, and it's generally cleaner. It also allows the following two optimizations: 1) only allocate when the application actually uses FPU, so in the first lazy FPU trap. This could save memory for non-fpu using apps. Next patch does this lazy allocation. 2) allocate the right size for the actual cpu rather than 512 bytes always. Patches enabling xsave/xrstor support (coming shortly) will take advantage of this. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'include')
-rw-r--r--include/asm-x86/i387.h35
-rw-r--r--include/asm-x86/processor.h7
-rw-r--r--include/asm-x86/thread_info.h8
-rw-r--r--include/asm-x86/thread_info_32.h2
-rw-r--r--include/asm-x86/thread_info_64.h2
5 files changed, 29 insertions, 25 deletions
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 54522b814f1c..382a5fa9d492 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -23,6 +23,7 @@ extern void fpu_init(void);
23extern void mxcsr_feature_mask_init(void); 23extern void mxcsr_feature_mask_init(void);
24extern void init_fpu(struct task_struct *child); 24extern void init_fpu(struct task_struct *child);
25extern asmlinkage void math_state_restore(void); 25extern asmlinkage void math_state_restore(void);
26extern void init_thread_xstate(void);
26 27
27extern user_regset_active_fn fpregs_active, xfpregs_active; 28extern user_regset_active_fn fpregs_active, xfpregs_active;
28extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; 29extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -117,24 +118,22 @@ static inline void __save_init_fpu(struct task_struct *tsk)
117 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 118 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
118 starting with gas 2.16. */ 119 starting with gas 2.16. */
119 __asm__ __volatile__("fxsaveq %0" 120 __asm__ __volatile__("fxsaveq %0"
120 : "=m" (tsk->thread.i387.fxsave)); 121 : "=m" (tsk->thread.xstate->fxsave));
121#elif 0 122#elif 0
122 /* Using, as a workaround, the properly prefixed form below isn't 123 /* Using, as a workaround, the properly prefixed form below isn't
123 accepted by any binutils version so far released, complaining that 124 accepted by any binutils version so far released, complaining that
124 the same type of prefix is used twice if an extended register is 125 the same type of prefix is used twice if an extended register is
125 needed for addressing (fix submitted to mainline 2005-11-21). */ 126 needed for addressing (fix submitted to mainline 2005-11-21). */
126 __asm__ __volatile__("rex64/fxsave %0" 127 __asm__ __volatile__("rex64/fxsave %0"
127 : "=m" (tsk->thread.i387.fxsave)); 128 : "=m" (tsk->thread.xstate->fxsave));
128#else 129#else
129 /* This, however, we can work around by forcing the compiler to select 130 /* This, however, we can work around by forcing the compiler to select
130 an addressing mode that doesn't require extended registers. */ 131 an addressing mode that doesn't require extended registers. */
131 __asm__ __volatile__("rex64/fxsave %P2(%1)" 132 __asm__ __volatile__("rex64/fxsave (%1)"
132 : "=m" (tsk->thread.i387.fxsave) 133 : "=m" (tsk->thread.xstate->fxsave)
133 : "cdaSDb" (tsk), 134 : "cdaSDb" (&tsk->thread.xstate->fxsave));
134 "i" (offsetof(__typeof__(*tsk),
135 thread.i387.fxsave)));
136#endif 135#endif
137 clear_fpu_state(&tsk->thread.i387.fxsave); 136 clear_fpu_state(&tsk->thread.xstate->fxsave);
138 task_thread_info(tsk)->status &= ~TS_USEDFPU; 137 task_thread_info(tsk)->status &= ~TS_USEDFPU;
139} 138}
140 139
@@ -148,7 +147,7 @@ static inline int save_i387(struct _fpstate __user *buf)
148 int err = 0; 147 int err = 0;
149 148
150 BUILD_BUG_ON(sizeof(struct user_i387_struct) != 149 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
151 sizeof(tsk->thread.i387.fxsave)); 150 sizeof(tsk->thread.xstate->fxsave));
152 151
153 if ((unsigned long)buf % 16) 152 if ((unsigned long)buf % 16)
154 printk("save_i387: bad fpstate %p\n", buf); 153 printk("save_i387: bad fpstate %p\n", buf);
@@ -164,7 +163,7 @@ static inline int save_i387(struct _fpstate __user *buf)
164 task_thread_info(tsk)->status &= ~TS_USEDFPU; 163 task_thread_info(tsk)->status &= ~TS_USEDFPU;
165 stts(); 164 stts();
166 } else { 165 } else {
167 if (__copy_to_user(buf, &tsk->thread.i387.fxsave, 166 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
168 sizeof(struct i387_fxsave_struct))) 167 sizeof(struct i387_fxsave_struct)))
169 return -1; 168 return -1;
170 } 169 }
@@ -201,7 +200,7 @@ static inline void restore_fpu(struct task_struct *tsk)
201 "nop ; frstor %1", 200 "nop ; frstor %1",
202 "fxrstor %1", 201 "fxrstor %1",
203 X86_FEATURE_FXSR, 202 X86_FEATURE_FXSR,
204 "m" ((tsk)->thread.i387.fxsave)); 203 "m" (tsk->thread.xstate->fxsave));
205} 204}
206 205
207/* We need a safe address that is cheap to find and that is already 206/* We need a safe address that is cheap to find and that is already
@@ -225,8 +224,8 @@ static inline void __save_init_fpu(struct task_struct *tsk)
225 "fxsave %[fx]\n" 224 "fxsave %[fx]\n"
226 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", 225 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
227 X86_FEATURE_FXSR, 226 X86_FEATURE_FXSR,
228 [fx] "m" (tsk->thread.i387.fxsave), 227 [fx] "m" (tsk->thread.xstate->fxsave),
229 [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); 228 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
230 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 229 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
231 is pending. Clear the x87 state here by setting it to fixed 230 is pending. Clear the x87 state here by setting it to fixed
232 values. safe_address is a random variable that should be in L1 */ 231 values. safe_address is a random variable that should be in L1 */
@@ -327,25 +326,25 @@ static inline void clear_fpu(struct task_struct *tsk)
327static inline unsigned short get_fpu_cwd(struct task_struct *tsk) 326static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
328{ 327{
329 if (cpu_has_fxsr) { 328 if (cpu_has_fxsr) {
330 return tsk->thread.i387.fxsave.cwd; 329 return tsk->thread.xstate->fxsave.cwd;
331 } else { 330 } else {
332 return (unsigned short)tsk->thread.i387.fsave.cwd; 331 return (unsigned short) tsk->thread.xstate->fsave.cwd;
333 } 332 }
334} 333}
335 334
336static inline unsigned short get_fpu_swd(struct task_struct *tsk) 335static inline unsigned short get_fpu_swd(struct task_struct *tsk)
337{ 336{
338 if (cpu_has_fxsr) { 337 if (cpu_has_fxsr) {
339 return tsk->thread.i387.fxsave.swd; 338 return tsk->thread.xstate->fxsave.swd;
340 } else { 339 } else {
341 return (unsigned short)tsk->thread.i387.fsave.swd; 340 return (unsigned short) tsk->thread.xstate->fsave.swd;
342 } 341 }
343} 342}
344 343
345static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) 344static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
346{ 345{
347 if (cpu_has_xmm) { 346 if (cpu_has_xmm) {
348 return tsk->thread.i387.fxsave.mxcsr; 347 return tsk->thread.xstate->fxsave.mxcsr;
349 } else { 348 } else {
350 return MXCSR_DEFAULT; 349 return MXCSR_DEFAULT;
351 } 350 }
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index eaf4548a23d2..99d297885780 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -354,7 +354,7 @@ struct i387_soft_struct {
354 u32 entry_eip; 354 u32 entry_eip;
355}; 355};
356 356
357union i387_union { 357union thread_xstate {
358 struct i387_fsave_struct fsave; 358 struct i387_fsave_struct fsave;
359 struct i387_fxsave_struct fxsave; 359 struct i387_fxsave_struct fxsave;
360 struct i387_soft_struct soft; 360 struct i387_soft_struct soft;
@@ -365,6 +365,7 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
365#endif 365#endif
366 366
367extern void print_cpu_info(struct cpuinfo_x86 *); 367extern void print_cpu_info(struct cpuinfo_x86 *);
368extern unsigned int xstate_size;
368extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 369extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
369extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 370extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
370extern unsigned short num_cache_leaves; 371extern unsigned short num_cache_leaves;
@@ -397,8 +398,8 @@ struct thread_struct {
397 unsigned long cr2; 398 unsigned long cr2;
398 unsigned long trap_no; 399 unsigned long trap_no;
399 unsigned long error_code; 400 unsigned long error_code;
400 /* Floating point info: */ 401 /* floating point and extended processor state */
401 union i387_union i387 __attribute__((aligned(16)));; 402 union thread_xstate *xstate;
402#ifdef CONFIG_X86_32 403#ifdef CONFIG_X86_32
403 /* Virtual 86 mode info */ 404 /* Virtual 86 mode info */
404 struct vm86_struct __user *vm86_info; 405 struct vm86_struct __user *vm86_info;
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index d5fd12f2abdb..407b88c170d3 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -1,5 +1,13 @@
1#ifndef _ASM_X86_THREAD_INFO_H
1#ifdef CONFIG_X86_32 2#ifdef CONFIG_X86_32
2# include "thread_info_32.h" 3# include "thread_info_32.h"
3#else 4#else
4# include "thread_info_64.h" 5# include "thread_info_64.h"
5#endif 6#endif
7
8#ifndef __ASSEMBLY__
9extern void arch_task_cache_init(void);
10extern void free_thread_info(struct thread_info *ti);
11extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
12#endif
13#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index 4e053fa561a9..531859962096 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -102,8 +102,6 @@ static inline struct thread_info *current_thread_info(void)
102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) 102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
103#endif 103#endif
104 104
105#define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE))
106
107#else /* !__ASSEMBLY__ */ 105#else /* !__ASSEMBLY__ */
108 106
109/* how to get the thread information struct from ASM */ 107/* how to get the thread information struct from ASM */
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index b17f5f6c2c59..ed664e874dec 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -85,8 +85,6 @@ static inline struct thread_info *stack_thread_info(void)
85#define alloc_thread_info(tsk) \ 85#define alloc_thread_info(tsk) \
86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) 86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
87 87
88#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
89
90#else /* !__ASSEMBLY__ */ 88#else /* !__ASSEMBLY__ */
91 89
92/* how to get the thread information struct from ASM */ 90/* how to get the thread information struct from ASM */