diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2008-03-10 18:28:04 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:19:55 -0400 |
commit | 61c4628b538608c1a85211ed8438136adfeb9a95 (patch) | |
tree | 290a695299a363153bc692e6d705ac680d64359e /include/asm-x86 | |
parent | fa5c4639419668cbb18ca3d20c1253559a3b43ae (diff) |
x86, fpu: split FPU state from task struct - v5
Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:
1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.
2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'include/asm-x86')
-rw-r--r-- | include/asm-x86/i387.h | 35 | ||||
-rw-r--r-- | include/asm-x86/processor.h | 7 | ||||
-rw-r--r-- | include/asm-x86/thread_info.h | 8 | ||||
-rw-r--r-- | include/asm-x86/thread_info_32.h | 2 | ||||
-rw-r--r-- | include/asm-x86/thread_info_64.h | 2 |
5 files changed, 29 insertions, 25 deletions
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 54522b814f1c..382a5fa9d492 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h | |||
@@ -23,6 +23,7 @@ extern void fpu_init(void); | |||
23 | extern void mxcsr_feature_mask_init(void); | 23 | extern void mxcsr_feature_mask_init(void); |
24 | extern void init_fpu(struct task_struct *child); | 24 | extern void init_fpu(struct task_struct *child); |
25 | extern asmlinkage void math_state_restore(void); | 25 | extern asmlinkage void math_state_restore(void); |
26 | extern void init_thread_xstate(void); | ||
26 | 27 | ||
27 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 28 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
28 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; | 29 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; |
@@ -117,24 +118,22 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
117 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 118 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
118 | starting with gas 2.16. */ | 119 | starting with gas 2.16. */ |
119 | __asm__ __volatile__("fxsaveq %0" | 120 | __asm__ __volatile__("fxsaveq %0" |
120 | : "=m" (tsk->thread.i387.fxsave)); | 121 | : "=m" (tsk->thread.xstate->fxsave)); |
121 | #elif 0 | 122 | #elif 0 |
122 | /* Using, as a workaround, the properly prefixed form below isn't | 123 | /* Using, as a workaround, the properly prefixed form below isn't |
123 | accepted by any binutils version so far released, complaining that | 124 | accepted by any binutils version so far released, complaining that |
124 | the same type of prefix is used twice if an extended register is | 125 | the same type of prefix is used twice if an extended register is |
125 | needed for addressing (fix submitted to mainline 2005-11-21). */ | 126 | needed for addressing (fix submitted to mainline 2005-11-21). */ |
126 | __asm__ __volatile__("rex64/fxsave %0" | 127 | __asm__ __volatile__("rex64/fxsave %0" |
127 | : "=m" (tsk->thread.i387.fxsave)); | 128 | : "=m" (tsk->thread.xstate->fxsave)); |
128 | #else | 129 | #else |
129 | /* This, however, we can work around by forcing the compiler to select | 130 | /* This, however, we can work around by forcing the compiler to select |
130 | an addressing mode that doesn't require extended registers. */ | 131 | an addressing mode that doesn't require extended registers. */ |
131 | __asm__ __volatile__("rex64/fxsave %P2(%1)" | 132 | __asm__ __volatile__("rex64/fxsave (%1)" |
132 | : "=m" (tsk->thread.i387.fxsave) | 133 | : "=m" (tsk->thread.xstate->fxsave) |
133 | : "cdaSDb" (tsk), | 134 | : "cdaSDb" (&tsk->thread.xstate->fxsave)); |
134 | "i" (offsetof(__typeof__(*tsk), | ||
135 | thread.i387.fxsave))); | ||
136 | #endif | 135 | #endif |
137 | clear_fpu_state(&tsk->thread.i387.fxsave); | 136 | clear_fpu_state(&tsk->thread.xstate->fxsave); |
138 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 137 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
139 | } | 138 | } |
140 | 139 | ||
@@ -148,7 +147,7 @@ static inline int save_i387(struct _fpstate __user *buf) | |||
148 | int err = 0; | 147 | int err = 0; |
149 | 148 | ||
150 | BUILD_BUG_ON(sizeof(struct user_i387_struct) != | 149 | BUILD_BUG_ON(sizeof(struct user_i387_struct) != |
151 | sizeof(tsk->thread.i387.fxsave)); | 150 | sizeof(tsk->thread.xstate->fxsave)); |
152 | 151 | ||
153 | if ((unsigned long)buf % 16) | 152 | if ((unsigned long)buf % 16) |
154 | printk("save_i387: bad fpstate %p\n", buf); | 153 | printk("save_i387: bad fpstate %p\n", buf); |
@@ -164,7 +163,7 @@ static inline int save_i387(struct _fpstate __user *buf) | |||
164 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 163 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
165 | stts(); | 164 | stts(); |
166 | } else { | 165 | } else { |
167 | if (__copy_to_user(buf, &tsk->thread.i387.fxsave, | 166 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, |
168 | sizeof(struct i387_fxsave_struct))) | 167 | sizeof(struct i387_fxsave_struct))) |
169 | return -1; | 168 | return -1; |
170 | } | 169 | } |
@@ -201,7 +200,7 @@ static inline void restore_fpu(struct task_struct *tsk) | |||
201 | "nop ; frstor %1", | 200 | "nop ; frstor %1", |
202 | "fxrstor %1", | 201 | "fxrstor %1", |
203 | X86_FEATURE_FXSR, | 202 | X86_FEATURE_FXSR, |
204 | "m" ((tsk)->thread.i387.fxsave)); | 203 | "m" (tsk->thread.xstate->fxsave)); |
205 | } | 204 | } |
206 | 205 | ||
207 | /* We need a safe address that is cheap to find and that is already | 206 | /* We need a safe address that is cheap to find and that is already |
@@ -225,8 +224,8 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
225 | "fxsave %[fx]\n" | 224 | "fxsave %[fx]\n" |
226 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", | 225 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", |
227 | X86_FEATURE_FXSR, | 226 | X86_FEATURE_FXSR, |
228 | [fx] "m" (tsk->thread.i387.fxsave), | 227 | [fx] "m" (tsk->thread.xstate->fxsave), |
229 | [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); | 228 | [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); |
230 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 229 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
231 | is pending. Clear the x87 state here by setting it to fixed | 230 | is pending. Clear the x87 state here by setting it to fixed |
232 | values. safe_address is a random variable that should be in L1 */ | 231 | values. safe_address is a random variable that should be in L1 */ |
@@ -327,25 +326,25 @@ static inline void clear_fpu(struct task_struct *tsk) | |||
327 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 326 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
328 | { | 327 | { |
329 | if (cpu_has_fxsr) { | 328 | if (cpu_has_fxsr) { |
330 | return tsk->thread.i387.fxsave.cwd; | 329 | return tsk->thread.xstate->fxsave.cwd; |
331 | } else { | 330 | } else { |
332 | return (unsigned short)tsk->thread.i387.fsave.cwd; | 331 | return (unsigned short) tsk->thread.xstate->fsave.cwd; |
333 | } | 332 | } |
334 | } | 333 | } |
335 | 334 | ||
336 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | 335 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) |
337 | { | 336 | { |
338 | if (cpu_has_fxsr) { | 337 | if (cpu_has_fxsr) { |
339 | return tsk->thread.i387.fxsave.swd; | 338 | return tsk->thread.xstate->fxsave.swd; |
340 | } else { | 339 | } else { |
341 | return (unsigned short)tsk->thread.i387.fsave.swd; | 340 | return (unsigned short) tsk->thread.xstate->fsave.swd; |
342 | } | 341 | } |
343 | } | 342 | } |
344 | 343 | ||
345 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | 344 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) |
346 | { | 345 | { |
347 | if (cpu_has_xmm) { | 346 | if (cpu_has_xmm) { |
348 | return tsk->thread.i387.fxsave.mxcsr; | 347 | return tsk->thread.xstate->fxsave.mxcsr; |
349 | } else { | 348 | } else { |
350 | return MXCSR_DEFAULT; | 349 | return MXCSR_DEFAULT; |
351 | } | 350 | } |
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index eaf4548a23d2..99d297885780 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h | |||
@@ -354,7 +354,7 @@ struct i387_soft_struct { | |||
354 | u32 entry_eip; | 354 | u32 entry_eip; |
355 | }; | 355 | }; |
356 | 356 | ||
357 | union i387_union { | 357 | union thread_xstate { |
358 | struct i387_fsave_struct fsave; | 358 | struct i387_fsave_struct fsave; |
359 | struct i387_fxsave_struct fxsave; | 359 | struct i387_fxsave_struct fxsave; |
360 | struct i387_soft_struct soft; | 360 | struct i387_soft_struct soft; |
@@ -365,6 +365,7 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist); | |||
365 | #endif | 365 | #endif |
366 | 366 | ||
367 | extern void print_cpu_info(struct cpuinfo_x86 *); | 367 | extern void print_cpu_info(struct cpuinfo_x86 *); |
368 | extern unsigned int xstate_size; | ||
368 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | 369 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); |
369 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | 370 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
370 | extern unsigned short num_cache_leaves; | 371 | extern unsigned short num_cache_leaves; |
@@ -397,8 +398,8 @@ struct thread_struct { | |||
397 | unsigned long cr2; | 398 | unsigned long cr2; |
398 | unsigned long trap_no; | 399 | unsigned long trap_no; |
399 | unsigned long error_code; | 400 | unsigned long error_code; |
400 | /* Floating point info: */ | 401 | /* floating point and extended processor state */ |
401 | union i387_union i387 __attribute__((aligned(16)));; | 402 | union thread_xstate *xstate; |
402 | #ifdef CONFIG_X86_32 | 403 | #ifdef CONFIG_X86_32 |
403 | /* Virtual 86 mode info */ | 404 | /* Virtual 86 mode info */ |
404 | struct vm86_struct __user *vm86_info; | 405 | struct vm86_struct __user *vm86_info; |
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index d5fd12f2abdb..407b88c170d3 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h | |||
@@ -1,5 +1,13 @@ | |||
1 | #ifndef _ASM_X86_THREAD_INFO_H | ||
1 | #ifdef CONFIG_X86_32 | 2 | #ifdef CONFIG_X86_32 |
2 | # include "thread_info_32.h" | 3 | # include "thread_info_32.h" |
3 | #else | 4 | #else |
4 | # include "thread_info_64.h" | 5 | # include "thread_info_64.h" |
5 | #endif | 6 | #endif |
7 | |||
8 | #ifndef __ASSEMBLY__ | ||
9 | extern void arch_task_cache_init(void); | ||
10 | extern void free_thread_info(struct thread_info *ti); | ||
11 | extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); | ||
12 | #endif | ||
13 | #endif /* _ASM_X86_THREAD_INFO_H */ | ||
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index 4e053fa561a9..531859962096 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h | |||
@@ -102,8 +102,6 @@ static inline struct thread_info *current_thread_info(void) | |||
102 | __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) | 102 | __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) |
103 | #endif | 103 | #endif |
104 | 104 | ||
105 | #define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE)) | ||
106 | |||
107 | #else /* !__ASSEMBLY__ */ | 105 | #else /* !__ASSEMBLY__ */ |
108 | 106 | ||
109 | /* how to get the thread information struct from ASM */ | 107 | /* how to get the thread information struct from ASM */ |
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h index b17f5f6c2c59..ed664e874dec 100644 --- a/include/asm-x86/thread_info_64.h +++ b/include/asm-x86/thread_info_64.h | |||
@@ -85,8 +85,6 @@ static inline struct thread_info *stack_thread_info(void) | |||
85 | #define alloc_thread_info(tsk) \ | 85 | #define alloc_thread_info(tsk) \ |
86 | ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) | 86 | ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) |
87 | 87 | ||
88 | #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER) | ||
89 | |||
90 | #else /* !__ASSEMBLY__ */ | 88 | #else /* !__ASSEMBLY__ */ |
91 | 89 | ||
92 | /* how to get the thread information struct from ASM */ | 90 | /* how to get the thread information struct from ASM */ |