aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2008-03-10 18:28:05 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:19:55 -0400
commitaa283f49276e7d840a40fb01eee6de97eaa7e012 (patch)
treeb17b134b174666e482b1a8ad486436a3d5cdb83e /arch
parent61c4628b538608c1a85211ed8438136adfeb9a95 (diff)
x86, fpu: lazy allocation of FPU area - v5
Only allocate the FPU area when the application actually uses FPU, i.e., in the first lazy FPU trap. This could save memory for non-fpu using apps. for example: on my system after boot, there are around 300 processes, with only 17 using FPU. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/i387.c38
-rw-r--r--arch/x86/kernel/process.c28
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/traps_32.c17
-rw-r--r--arch/x86/kernel/traps_64.c19
6 files changed, 88 insertions, 22 deletions
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index baf632b221d4..db6839b53195 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -8,7 +8,6 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/regset.h> 9#include <linux/regset.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/bootmem.h>
12 11
13#include <asm/sigcontext.h> 12#include <asm/sigcontext.h>
14#include <asm/processor.h> 13#include <asm/processor.h>
@@ -63,7 +62,6 @@ void __init init_thread_xstate(void)
63 else 62 else
64 xstate_size = sizeof(struct i387_fsave_struct); 63 xstate_size = sizeof(struct i387_fsave_struct);
65#endif 64#endif
66 init_task.thread.xstate = alloc_bootmem(xstate_size);
67} 65}
68 66
69#ifdef CONFIG_X86_64 67#ifdef CONFIG_X86_64
@@ -93,12 +91,22 @@ void __cpuinit fpu_init(void)
93 * value at reset if we support XMM instructions and then 91 * value at reset if we support XMM instructions and then
94 * remeber the current task has used the FPU. 92 * remeber the current task has used the FPU.
95 */ 93 */
96void init_fpu(struct task_struct *tsk) 94int init_fpu(struct task_struct *tsk)
97{ 95{
98 if (tsk_used_math(tsk)) { 96 if (tsk_used_math(tsk)) {
99 if (tsk == current) 97 if (tsk == current)
100 unlazy_fpu(tsk); 98 unlazy_fpu(tsk);
101 return; 99 return 0;
100 }
101
102 /*
103 * Memory allocation at the first usage of the FPU and other state.
104 */
105 if (!tsk->thread.xstate) {
106 tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
107 GFP_KERNEL);
108 if (!tsk->thread.xstate)
109 return -ENOMEM;
102 } 110 }
103 111
104 if (cpu_has_fxsr) { 112 if (cpu_has_fxsr) {
@@ -120,6 +128,7 @@ void init_fpu(struct task_struct *tsk)
120 * Only the device not available exception or ptrace can call init_fpu. 128 * Only the device not available exception or ptrace can call init_fpu.
121 */ 129 */
122 set_stopped_child_used_math(tsk); 130 set_stopped_child_used_math(tsk);
131 return 0;
123} 132}
124 133
125int fpregs_active(struct task_struct *target, const struct user_regset *regset) 134int fpregs_active(struct task_struct *target, const struct user_regset *regset)
@@ -136,10 +145,14 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
136 unsigned int pos, unsigned int count, 145 unsigned int pos, unsigned int count,
137 void *kbuf, void __user *ubuf) 146 void *kbuf, void __user *ubuf)
138{ 147{
148 int ret;
149
139 if (!cpu_has_fxsr) 150 if (!cpu_has_fxsr)
140 return -ENODEV; 151 return -ENODEV;
141 152
142 init_fpu(target); 153 ret = init_fpu(target);
154 if (ret)
155 return ret;
143 156
144 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 157 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
145 &target->thread.xstate->fxsave, 0, -1); 158 &target->thread.xstate->fxsave, 0, -1);
@@ -154,7 +167,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
154 if (!cpu_has_fxsr) 167 if (!cpu_has_fxsr)
155 return -ENODEV; 168 return -ENODEV;
156 169
157 init_fpu(target); 170 ret = init_fpu(target);
171 if (ret)
172 return ret;
173
158 set_stopped_child_used_math(target); 174 set_stopped_child_used_math(target);
159 175
160 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 176 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -312,11 +328,14 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
312 void *kbuf, void __user *ubuf) 328 void *kbuf, void __user *ubuf)
313{ 329{
314 struct user_i387_ia32_struct env; 330 struct user_i387_ia32_struct env;
331 int ret;
315 332
316 if (!HAVE_HWFP) 333 if (!HAVE_HWFP)
317 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 334 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
318 335
319 init_fpu(target); 336 ret = init_fpu(target);
337 if (ret)
338 return ret;
320 339
321 if (!cpu_has_fxsr) { 340 if (!cpu_has_fxsr) {
322 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 341 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -344,7 +363,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
344 if (!HAVE_HWFP) 363 if (!HAVE_HWFP)
345 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 364 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
346 365
347 init_fpu(target); 366 ret = init_fpu(target);
367 if (ret)
368 return ret;
369
348 set_stopped_child_used_math(target); 370 set_stopped_child_used_math(target);
349 371
350 if (!cpu_has_fxsr) { 372 if (!cpu_has_fxsr) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ead24efbcba0..0e613e7e7b5e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -5,24 +5,34 @@
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7 7
8static struct kmem_cache *task_xstate_cachep; 8struct kmem_cache *task_xstate_cachep;
9 9
10int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 10int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
11{ 11{
12 *dst = *src; 12 *dst = *src;
13 dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); 13 if (src->thread.xstate) {
14 if (!dst->thread.xstate) 14 dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
15 return -ENOMEM; 15 GFP_KERNEL);
16 WARN_ON((unsigned long)dst->thread.xstate & 15); 16 if (!dst->thread.xstate)
17 memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); 17 return -ENOMEM;
18 WARN_ON((unsigned long)dst->thread.xstate & 15);
19 memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
20 }
18 return 0; 21 return 0;
19} 22}
20 23
21void free_thread_info(struct thread_info *ti) 24void free_thread_xstate(struct task_struct *tsk)
22{ 25{
23 kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); 26 if (tsk->thread.xstate) {
24 ti->task->thread.xstate = NULL; 27 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
28 tsk->thread.xstate = NULL;
29 }
30}
31
25 32
33void free_thread_info(struct thread_info *ti)
34{
35 free_thread_xstate(ti->task);
26 free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); 36 free_pages((unsigned long)(ti), get_order(THREAD_SIZE));
27} 37}
28 38
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3890a5dd25f9..7adad088e373 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -521,6 +521,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
521 regs->cs = __USER_CS; 521 regs->cs = __USER_CS;
522 regs->ip = new_ip; 522 regs->ip = new_ip;
523 regs->sp = new_sp; 523 regs->sp = new_sp;
524 /*
525 * Free the old FP and other extended state
526 */
527 free_thread_xstate(current);
524} 528}
525EXPORT_SYMBOL_GPL(start_thread); 529EXPORT_SYMBOL_GPL(start_thread);
526 530
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b795e831afd6..891af1a1b48a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -533,6 +533,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
533 regs->ss = __USER_DS; 533 regs->ss = __USER_DS;
534 regs->flags = 0x200; 534 regs->flags = 0x200;
535 set_fs(USER_DS); 535 set_fs(USER_DS);
536 /*
537 * Free the old FP and other extended state
538 */
539 free_thread_xstate(current);
536} 540}
537EXPORT_SYMBOL_GPL(start_thread); 541EXPORT_SYMBOL_GPL(start_thread);
538 542
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 8d136a73ce8e..471e694d6713 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void)
1148 struct thread_info *thread = current_thread_info(); 1148 struct thread_info *thread = current_thread_info();
1149 struct task_struct *tsk = thread->task; 1149 struct task_struct *tsk = thread->task;
1150 1150
1151 if (!tsk_used_math(tsk)) {
1152 local_irq_enable();
1153 /*
1154 * does a slab alloc which can sleep
1155 */
1156 if (init_fpu(tsk)) {
1157 /*
1158 * ran out of memory!
1159 */
1160 do_group_exit(SIGKILL);
1161 return;
1162 }
1163 local_irq_disable();
1164 }
1165
1151 clts(); /* Allow maths ops (or we recurse) */ 1166 clts(); /* Allow maths ops (or we recurse) */
1152 if (!tsk_used_math(tsk))
1153 init_fpu(tsk);
1154 restore_fpu(tsk); 1167 restore_fpu(tsk);
1155 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 1168 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1156 tsk->fpu_counter++; 1169 tsk->fpu_counter++;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index dc0cb497eec3..adff76ea97c4 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1124,10 +1124,23 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1124asmlinkage void math_state_restore(void) 1124asmlinkage void math_state_restore(void)
1125{ 1125{
1126 struct task_struct *me = current; 1126 struct task_struct *me = current;
1127 clts(); /* Allow maths ops (or we recurse) */
1128 1127
1129 if (!used_math()) 1128 if (!used_math()) {
1130 init_fpu(me); 1129 local_irq_enable();
1130 /*
1131 * does a slab alloc which can sleep
1132 */
1133 if (init_fpu(me)) {
1134 /*
1135 * ran out of memory!
1136 */
1137 do_group_exit(SIGKILL);
1138 return;
1139 }
1140 local_irq_disable();
1141 }
1142
1143 clts(); /* Allow maths ops (or we recurse) */
1131 restore_fpu_checking(&me->thread.xstate->fxsave); 1144 restore_fpu_checking(&me->thread.xstate->fxsave);
1132 task_thread_info(me)->status |= TS_USEDFPU; 1145 task_thread_info(me)->status |= TS_USEDFPU;
1133 me->fpu_counter++; 1146 me->fpu_counter++;