diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2008-03-10 18:28:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:19:55 -0400 |
commit | aa283f49276e7d840a40fb01eee6de97eaa7e012 (patch) | |
tree | b17b134b174666e482b1a8ad486436a3d5cdb83e /arch/x86/kernel | |
parent | 61c4628b538608c1a85211ed8438136adfeb9a95 (diff) |
x86, fpu: lazy allocation of FPU area - v5
Only allocate the FPU area when the application actually uses FPU, i.e., in the
first lazy FPU trap. This could save memory for non-fpu using apps.
for example: on my system after boot, there are around 300 processes, with
only 17 using FPU.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/i387.c | 38 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 28 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/traps_32.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/traps_64.c | 19 |
6 files changed, 88 insertions, 22 deletions
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index baf632b221d4..db6839b53195 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/regset.h> | 9 | #include <linux/regset.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/bootmem.h> | ||
12 | 11 | ||
13 | #include <asm/sigcontext.h> | 12 | #include <asm/sigcontext.h> |
14 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
@@ -63,7 +62,6 @@ void __init init_thread_xstate(void) | |||
63 | else | 62 | else |
64 | xstate_size = sizeof(struct i387_fsave_struct); | 63 | xstate_size = sizeof(struct i387_fsave_struct); |
65 | #endif | 64 | #endif |
66 | init_task.thread.xstate = alloc_bootmem(xstate_size); | ||
67 | } | 65 | } |
68 | 66 | ||
69 | #ifdef CONFIG_X86_64 | 67 | #ifdef CONFIG_X86_64 |
@@ -93,12 +91,22 @@ void __cpuinit fpu_init(void) | |||
93 | * value at reset if we support XMM instructions and then | 91 | * value at reset if we support XMM instructions and then |
94 | * remeber the current task has used the FPU. | 92 | * remeber the current task has used the FPU. |
95 | */ | 93 | */ |
96 | void init_fpu(struct task_struct *tsk) | 94 | int init_fpu(struct task_struct *tsk) |
97 | { | 95 | { |
98 | if (tsk_used_math(tsk)) { | 96 | if (tsk_used_math(tsk)) { |
99 | if (tsk == current) | 97 | if (tsk == current) |
100 | unlazy_fpu(tsk); | 98 | unlazy_fpu(tsk); |
101 | return; | 99 | return 0; |
100 | } | ||
101 | |||
102 | /* | ||
103 | * Memory allocation at the first usage of the FPU and other state. | ||
104 | */ | ||
105 | if (!tsk->thread.xstate) { | ||
106 | tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | ||
107 | GFP_KERNEL); | ||
108 | if (!tsk->thread.xstate) | ||
109 | return -ENOMEM; | ||
102 | } | 110 | } |
103 | 111 | ||
104 | if (cpu_has_fxsr) { | 112 | if (cpu_has_fxsr) { |
@@ -120,6 +128,7 @@ void init_fpu(struct task_struct *tsk) | |||
120 | * Only the device not available exception or ptrace can call init_fpu. | 128 | * Only the device not available exception or ptrace can call init_fpu. |
121 | */ | 129 | */ |
122 | set_stopped_child_used_math(tsk); | 130 | set_stopped_child_used_math(tsk); |
131 | return 0; | ||
123 | } | 132 | } |
124 | 133 | ||
125 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) | 134 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) |
@@ -136,10 +145,14 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
136 | unsigned int pos, unsigned int count, | 145 | unsigned int pos, unsigned int count, |
137 | void *kbuf, void __user *ubuf) | 146 | void *kbuf, void __user *ubuf) |
138 | { | 147 | { |
148 | int ret; | ||
149 | |||
139 | if (!cpu_has_fxsr) | 150 | if (!cpu_has_fxsr) |
140 | return -ENODEV; | 151 | return -ENODEV; |
141 | 152 | ||
142 | init_fpu(target); | 153 | ret = init_fpu(target); |
154 | if (ret) | ||
155 | return ret; | ||
143 | 156 | ||
144 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 157 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
145 | &target->thread.xstate->fxsave, 0, -1); | 158 | &target->thread.xstate->fxsave, 0, -1); |
@@ -154,7 +167,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
154 | if (!cpu_has_fxsr) | 167 | if (!cpu_has_fxsr) |
155 | return -ENODEV; | 168 | return -ENODEV; |
156 | 169 | ||
157 | init_fpu(target); | 170 | ret = init_fpu(target); |
171 | if (ret) | ||
172 | return ret; | ||
173 | |||
158 | set_stopped_child_used_math(target); | 174 | set_stopped_child_used_math(target); |
159 | 175 | ||
160 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 176 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
@@ -312,11 +328,14 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
312 | void *kbuf, void __user *ubuf) | 328 | void *kbuf, void __user *ubuf) |
313 | { | 329 | { |
314 | struct user_i387_ia32_struct env; | 330 | struct user_i387_ia32_struct env; |
331 | int ret; | ||
315 | 332 | ||
316 | if (!HAVE_HWFP) | 333 | if (!HAVE_HWFP) |
317 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | 334 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); |
318 | 335 | ||
319 | init_fpu(target); | 336 | ret = init_fpu(target); |
337 | if (ret) | ||
338 | return ret; | ||
320 | 339 | ||
321 | if (!cpu_has_fxsr) { | 340 | if (!cpu_has_fxsr) { |
322 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 341 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
@@ -344,7 +363,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
344 | if (!HAVE_HWFP) | 363 | if (!HAVE_HWFP) |
345 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | 364 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); |
346 | 365 | ||
347 | init_fpu(target); | 366 | ret = init_fpu(target); |
367 | if (ret) | ||
368 | return ret; | ||
369 | |||
348 | set_stopped_child_used_math(target); | 370 | set_stopped_child_used_math(target); |
349 | 371 | ||
350 | if (!cpu_has_fxsr) { | 372 | if (!cpu_has_fxsr) { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ead24efbcba0..0e613e7e7b5e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -5,24 +5,34 @@ | |||
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | 7 | ||
8 | static struct kmem_cache *task_xstate_cachep; | 8 | struct kmem_cache *task_xstate_cachep; |
9 | 9 | ||
10 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 10 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
11 | { | 11 | { |
12 | *dst = *src; | 12 | *dst = *src; |
13 | dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | 13 | if (src->thread.xstate) { |
14 | if (!dst->thread.xstate) | 14 | dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, |
15 | return -ENOMEM; | 15 | GFP_KERNEL); |
16 | WARN_ON((unsigned long)dst->thread.xstate & 15); | 16 | if (!dst->thread.xstate) |
17 | memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); | 17 | return -ENOMEM; |
18 | WARN_ON((unsigned long)dst->thread.xstate & 15); | ||
19 | memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); | ||
20 | } | ||
18 | return 0; | 21 | return 0; |
19 | } | 22 | } |
20 | 23 | ||
21 | void free_thread_info(struct thread_info *ti) | 24 | void free_thread_xstate(struct task_struct *tsk) |
22 | { | 25 | { |
23 | kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); | 26 | if (tsk->thread.xstate) { |
24 | ti->task->thread.xstate = NULL; | 27 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); |
28 | tsk->thread.xstate = NULL; | ||
29 | } | ||
30 | } | ||
31 | |||
25 | 32 | ||
33 | void free_thread_info(struct thread_info *ti) | ||
34 | { | ||
35 | free_thread_xstate(ti->task); | ||
26 | free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); | 36 | free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); |
27 | } | 37 | } |
28 | 38 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3890a5dd25f9..7adad088e373 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -521,6 +521,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
521 | regs->cs = __USER_CS; | 521 | regs->cs = __USER_CS; |
522 | regs->ip = new_ip; | 522 | regs->ip = new_ip; |
523 | regs->sp = new_sp; | 523 | regs->sp = new_sp; |
524 | /* | ||
525 | * Free the old FP and other extended state | ||
526 | */ | ||
527 | free_thread_xstate(current); | ||
524 | } | 528 | } |
525 | EXPORT_SYMBOL_GPL(start_thread); | 529 | EXPORT_SYMBOL_GPL(start_thread); |
526 | 530 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b795e831afd6..891af1a1b48a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -533,6 +533,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
533 | regs->ss = __USER_DS; | 533 | regs->ss = __USER_DS; |
534 | regs->flags = 0x200; | 534 | regs->flags = 0x200; |
535 | set_fs(USER_DS); | 535 | set_fs(USER_DS); |
536 | /* | ||
537 | * Free the old FP and other extended state | ||
538 | */ | ||
539 | free_thread_xstate(current); | ||
536 | } | 540 | } |
537 | EXPORT_SYMBOL_GPL(start_thread); | 541 | EXPORT_SYMBOL_GPL(start_thread); |
538 | 542 | ||
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 8d136a73ce8e..471e694d6713 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c | |||
@@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void) | |||
1148 | struct thread_info *thread = current_thread_info(); | 1148 | struct thread_info *thread = current_thread_info(); |
1149 | struct task_struct *tsk = thread->task; | 1149 | struct task_struct *tsk = thread->task; |
1150 | 1150 | ||
1151 | if (!tsk_used_math(tsk)) { | ||
1152 | local_irq_enable(); | ||
1153 | /* | ||
1154 | * does a slab alloc which can sleep | ||
1155 | */ | ||
1156 | if (init_fpu(tsk)) { | ||
1157 | /* | ||
1158 | * ran out of memory! | ||
1159 | */ | ||
1160 | do_group_exit(SIGKILL); | ||
1161 | return; | ||
1162 | } | ||
1163 | local_irq_disable(); | ||
1164 | } | ||
1165 | |||
1151 | clts(); /* Allow maths ops (or we recurse) */ | 1166 | clts(); /* Allow maths ops (or we recurse) */ |
1152 | if (!tsk_used_math(tsk)) | ||
1153 | init_fpu(tsk); | ||
1154 | restore_fpu(tsk); | 1167 | restore_fpu(tsk); |
1155 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 1168 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ |
1156 | tsk->fpu_counter++; | 1169 | tsk->fpu_counter++; |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index dc0cb497eec3..adff76ea97c4 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
@@ -1124,10 +1124,23 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | |||
1124 | asmlinkage void math_state_restore(void) | 1124 | asmlinkage void math_state_restore(void) |
1125 | { | 1125 | { |
1126 | struct task_struct *me = current; | 1126 | struct task_struct *me = current; |
1127 | clts(); /* Allow maths ops (or we recurse) */ | ||
1128 | 1127 | ||
1129 | if (!used_math()) | 1128 | if (!used_math()) { |
1130 | init_fpu(me); | 1129 | local_irq_enable(); |
1130 | /* | ||
1131 | * does a slab alloc which can sleep | ||
1132 | */ | ||
1133 | if (init_fpu(me)) { | ||
1134 | /* | ||
1135 | * ran out of memory! | ||
1136 | */ | ||
1137 | do_group_exit(SIGKILL); | ||
1138 | return; | ||
1139 | } | ||
1140 | local_irq_disable(); | ||
1141 | } | ||
1142 | |||
1143 | clts(); /* Allow maths ops (or we recurse) */ | ||
1131 | restore_fpu_checking(&me->thread.xstate->fxsave); | 1144 | restore_fpu_checking(&me->thread.xstate->fxsave); |
1132 | task_thread_info(me)->status |= TS_USEDFPU; | 1145 | task_thread_info(me)->status |= TS_USEDFPU; |
1133 | me->fpu_counter++; | 1146 | me->fpu_counter++; |