diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2008-03-10 18:28:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:19:55 -0400 |
commit | aa283f49276e7d840a40fb01eee6de97eaa7e012 (patch) | |
tree | b17b134b174666e482b1a8ad486436a3d5cdb83e /arch/x86/kernel/i387.c | |
parent | 61c4628b538608c1a85211ed8438136adfeb9a95 (diff) |
x86, fpu: lazy allocation of FPU area - v5
Only allocate the FPU area when the application actually uses FPU, i.e., in the
first lazy FPU trap. This could save memory for non-fpu using apps.
for example: on my system after boot, there are around 300 processes, with
only 17 using FPU.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/i387.c')
-rw-r--r-- | arch/x86/kernel/i387.c | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index baf632b221d4..db6839b53195 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/regset.h> | 9 | #include <linux/regset.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/bootmem.h> | ||
12 | 11 | ||
13 | #include <asm/sigcontext.h> | 12 | #include <asm/sigcontext.h> |
14 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
@@ -63,7 +62,6 @@ void __init init_thread_xstate(void) | |||
63 | else | 62 | else |
64 | xstate_size = sizeof(struct i387_fsave_struct); | 63 | xstate_size = sizeof(struct i387_fsave_struct); |
65 | #endif | 64 | #endif |
66 | init_task.thread.xstate = alloc_bootmem(xstate_size); | ||
67 | } | 65 | } |
68 | 66 | ||
69 | #ifdef CONFIG_X86_64 | 67 | #ifdef CONFIG_X86_64 |
@@ -93,12 +91,22 @@ void __cpuinit fpu_init(void) | |||
93 | * value at reset if we support XMM instructions and then | 91 | * value at reset if we support XMM instructions and then |
94 | * remeber the current task has used the FPU. | 92 | * remeber the current task has used the FPU. |
95 | */ | 93 | */ |
96 | void init_fpu(struct task_struct *tsk) | 94 | int init_fpu(struct task_struct *tsk) |
97 | { | 95 | { |
98 | if (tsk_used_math(tsk)) { | 96 | if (tsk_used_math(tsk)) { |
99 | if (tsk == current) | 97 | if (tsk == current) |
100 | unlazy_fpu(tsk); | 98 | unlazy_fpu(tsk); |
101 | return; | 99 | return 0; |
100 | } | ||
101 | |||
102 | /* | ||
103 | * Memory allocation at the first usage of the FPU and other state. | ||
104 | */ | ||
105 | if (!tsk->thread.xstate) { | ||
106 | tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | ||
107 | GFP_KERNEL); | ||
108 | if (!tsk->thread.xstate) | ||
109 | return -ENOMEM; | ||
102 | } | 110 | } |
103 | 111 | ||
104 | if (cpu_has_fxsr) { | 112 | if (cpu_has_fxsr) { |
@@ -120,6 +128,7 @@ void init_fpu(struct task_struct *tsk) | |||
120 | * Only the device not available exception or ptrace can call init_fpu. | 128 | * Only the device not available exception or ptrace can call init_fpu. |
121 | */ | 129 | */ |
122 | set_stopped_child_used_math(tsk); | 130 | set_stopped_child_used_math(tsk); |
131 | return 0; | ||
123 | } | 132 | } |
124 | 133 | ||
125 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) | 134 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) |
@@ -136,10 +145,14 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
136 | unsigned int pos, unsigned int count, | 145 | unsigned int pos, unsigned int count, |
137 | void *kbuf, void __user *ubuf) | 146 | void *kbuf, void __user *ubuf) |
138 | { | 147 | { |
148 | int ret; | ||
149 | |||
139 | if (!cpu_has_fxsr) | 150 | if (!cpu_has_fxsr) |
140 | return -ENODEV; | 151 | return -ENODEV; |
141 | 152 | ||
142 | init_fpu(target); | 153 | ret = init_fpu(target); |
154 | if (ret) | ||
155 | return ret; | ||
143 | 156 | ||
144 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 157 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
145 | &target->thread.xstate->fxsave, 0, -1); | 158 | &target->thread.xstate->fxsave, 0, -1); |
@@ -154,7 +167,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
154 | if (!cpu_has_fxsr) | 167 | if (!cpu_has_fxsr) |
155 | return -ENODEV; | 168 | return -ENODEV; |
156 | 169 | ||
157 | init_fpu(target); | 170 | ret = init_fpu(target); |
171 | if (ret) | ||
172 | return ret; | ||
173 | |||
158 | set_stopped_child_used_math(target); | 174 | set_stopped_child_used_math(target); |
159 | 175 | ||
160 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 176 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
@@ -312,11 +328,14 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
312 | void *kbuf, void __user *ubuf) | 328 | void *kbuf, void __user *ubuf) |
313 | { | 329 | { |
314 | struct user_i387_ia32_struct env; | 330 | struct user_i387_ia32_struct env; |
331 | int ret; | ||
315 | 332 | ||
316 | if (!HAVE_HWFP) | 333 | if (!HAVE_HWFP) |
317 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | 334 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); |
318 | 335 | ||
319 | init_fpu(target); | 336 | ret = init_fpu(target); |
337 | if (ret) | ||
338 | return ret; | ||
320 | 339 | ||
321 | if (!cpu_has_fxsr) { | 340 | if (!cpu_has_fxsr) { |
322 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 341 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
@@ -344,7 +363,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
344 | if (!HAVE_HWFP) | 363 | if (!HAVE_HWFP) |
345 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | 364 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); |
346 | 365 | ||
347 | init_fpu(target); | 366 | ret = init_fpu(target); |
367 | if (ret) | ||
368 | return ret; | ||
369 | |||
348 | set_stopped_child_used_math(target); | 370 | set_stopped_child_used_math(target); |
349 | 371 | ||
350 | if (!cpu_has_fxsr) { | 372 | if (!cpu_has_fxsr) { |