diff options
author | Vineet Gupta <Vineet.Gupta1@synopsys.com> | 2013-11-12 18:08:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 22:09:13 -0500 |
commit | c375f15a434db1867cb004bafba92aba739e4e39 (patch) | |
tree | 419ff5ddf823f9b43d95d1fc6345611989364cde /arch | |
parent | 616c05d110bb4ef8203f49c9d2476874077c2f6a (diff) |
x86: move fpu_counter into ARCH specific thread_struct
Only a couple of arches (sh/x86) use fpu_counter in task_struct so it can
be moved out into ARCH specific thread_struct, reducing the size of
task_struct for other arches.
Compile tested i386_defconfig + gcc 4.7.3
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mundt <paul.mundt@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 9 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 2 |
6 files changed, 19 insertions, 10 deletions
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 4d0bda7b11e3..c49a613c6452 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
@@ -365,7 +365,7 @@ static inline void drop_fpu(struct task_struct *tsk) | |||
365 | * Forget coprocessor state.. | 365 | * Forget coprocessor state.. |
366 | */ | 366 | */ |
367 | preempt_disable(); | 367 | preempt_disable(); |
368 | tsk->fpu_counter = 0; | 368 | tsk->thread.fpu_counter = 0; |
369 | __drop_fpu(tsk); | 369 | __drop_fpu(tsk); |
370 | clear_used_math(); | 370 | clear_used_math(); |
371 | preempt_enable(); | 371 | preempt_enable(); |
@@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
424 | * or if the past 5 consecutive context-switches used math. | 424 | * or if the past 5 consecutive context-switches used math. |
425 | */ | 425 | */ |
426 | fpu.preload = tsk_used_math(new) && (use_eager_fpu() || | 426 | fpu.preload = tsk_used_math(new) && (use_eager_fpu() || |
427 | new->fpu_counter > 5); | 427 | new->thread.fpu_counter > 5); |
428 | if (__thread_has_fpu(old)) { | 428 | if (__thread_has_fpu(old)) { |
429 | if (!__save_init_fpu(old)) | 429 | if (!__save_init_fpu(old)) |
430 | cpu = ~0; | 430 | cpu = ~0; |
@@ -433,16 +433,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
433 | 433 | ||
434 | /* Don't change CR0.TS if we just switch! */ | 434 | /* Don't change CR0.TS if we just switch! */ |
435 | if (fpu.preload) { | 435 | if (fpu.preload) { |
436 | new->fpu_counter++; | 436 | new->thread.fpu_counter++; |
437 | __thread_set_has_fpu(new); | 437 | __thread_set_has_fpu(new); |
438 | prefetch(new->thread.fpu.state); | 438 | prefetch(new->thread.fpu.state); |
439 | } else if (!use_eager_fpu()) | 439 | } else if (!use_eager_fpu()) |
440 | stts(); | 440 | stts(); |
441 | } else { | 441 | } else { |
442 | old->fpu_counter = 0; | 442 | old->thread.fpu_counter = 0; |
443 | old->thread.fpu.last_cpu = ~0; | 443 | old->thread.fpu.last_cpu = ~0; |
444 | if (fpu.preload) { | 444 | if (fpu.preload) { |
445 | new->fpu_counter++; | 445 | new->thread.fpu_counter++; |
446 | if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) | 446 | if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) |
447 | fpu.preload = 0; | 447 | fpu.preload = 0; |
448 | else | 448 | else |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 987c75ecc334..7b034a4057f9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -488,6 +488,15 @@ struct thread_struct { | |||
488 | unsigned long iopl; | 488 | unsigned long iopl; |
489 | /* Max allowed port in the bitmap, in bytes: */ | 489 | /* Max allowed port in the bitmap, in bytes: */ |
490 | unsigned io_bitmap_max; | 490 | unsigned io_bitmap_max; |
491 | /* | ||
492 | * fpu_counter contains the number of consecutive context switches | ||
493 | * that the FPU is used. If this is over a threshold, the lazy fpu | ||
494 | * saving becomes unlazy to save the trap. This is an unsigned char | ||
495 | * so that after 256 times the counter wraps and the behavior turns | ||
496 | * lazy again; this to deal with bursty apps that only use FPU for | ||
497 | * a short time | ||
498 | */ | ||
499 | unsigned char fpu_counter; | ||
491 | }; | 500 | }; |
492 | 501 | ||
493 | /* | 502 | /* |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 5d576ab34403..e8368c6dd2a2 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -100,7 +100,7 @@ void unlazy_fpu(struct task_struct *tsk) | |||
100 | __save_init_fpu(tsk); | 100 | __save_init_fpu(tsk); |
101 | __thread_fpu_end(tsk); | 101 | __thread_fpu_end(tsk); |
102 | } else | 102 | } else |
103 | tsk->fpu_counter = 0; | 103 | tsk->thread.fpu_counter = 0; |
104 | preempt_enable(); | 104 | preempt_enable(); |
105 | } | 105 | } |
106 | EXPORT_SYMBOL(unlazy_fpu); | 106 | EXPORT_SYMBOL(unlazy_fpu); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c2ec1aa6d454..6f1236c29c4b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -153,7 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
153 | childregs->orig_ax = -1; | 153 | childregs->orig_ax = -1; |
154 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); | 154 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); |
155 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; | 155 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; |
156 | p->fpu_counter = 0; | 156 | p->thread.fpu_counter = 0; |
157 | p->thread.io_bitmap_ptr = NULL; | 157 | p->thread.io_bitmap_ptr = NULL; |
158 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 158 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
159 | return 0; | 159 | return 0; |
@@ -166,7 +166,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
166 | p->thread.ip = (unsigned long) ret_from_fork; | 166 | p->thread.ip = (unsigned long) ret_from_fork; |
167 | task_user_gs(p) = get_user_gs(current_pt_regs()); | 167 | task_user_gs(p) = get_user_gs(current_pt_regs()); |
168 | 168 | ||
169 | p->fpu_counter = 0; | 169 | p->thread.fpu_counter = 0; |
170 | p->thread.io_bitmap_ptr = NULL; | 170 | p->thread.io_bitmap_ptr = NULL; |
171 | tsk = current; | 171 | tsk = current; |
172 | err = -ENOMEM; | 172 | err = -ENOMEM; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 45ab4d6fc8a7..10fe4c189621 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
163 | p->thread.sp = (unsigned long) childregs; | 163 | p->thread.sp = (unsigned long) childregs; |
164 | p->thread.usersp = me->thread.usersp; | 164 | p->thread.usersp = me->thread.usersp; |
165 | set_tsk_thread_flag(p, TIF_FORK); | 165 | set_tsk_thread_flag(p, TIF_FORK); |
166 | p->fpu_counter = 0; | 166 | p->thread.fpu_counter = 0; |
167 | p->thread.io_bitmap_ptr = NULL; | 167 | p->thread.io_bitmap_ptr = NULL; |
168 | 168 | ||
169 | savesegment(gs, p->thread.gsindex); | 169 | savesegment(gs, p->thread.gsindex); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 729aa779ff75..996ce2313ce6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -653,7 +653,7 @@ void math_state_restore(void) | |||
653 | return; | 653 | return; |
654 | } | 654 | } |
655 | 655 | ||
656 | tsk->fpu_counter++; | 656 | tsk->thread.fpu_counter++; |
657 | } | 657 | } |
658 | EXPORT_SYMBOL_GPL(math_state_restore); | 658 | EXPORT_SYMBOL_GPL(math_state_restore); |
659 | 659 | ||