aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2005-09-09 16:02:02 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 16:57:31 -0400
commit383f2835eb9afb723af71850037b2f074ac9db60 (patch)
tree1ef99fd4d7246b2afa16dc7d1514b6ff25fa8284
parentb0d62e6d5b3318b6b722121d945afa295f7201b5 (diff)
[PATCH] Prefetch kernel stacks to speed up context switch
For architecture like ia64, the switch stack structure is fairly large (currently 528 bytes). For context switch intensive application, we found that significant amount of cache misses occurs in switch_to() function. The following patch adds a hook in the schedule() function to prefetch switch stack structure as soon as 'next' task is determined. This allows maximum overlap in prefetch cache lines for that structure. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "Luck, Tony" <tony.luck@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/ia64/kernel/entry.S23
-rw-r--r--include/asm-ia64/system.h1
-rw-r--r--include/linux/sched.h5
-rw-r--r--kernel/sched.c1
4 files changed, 30 insertions, 0 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3c8821024509..915e12791836 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -470,6 +470,29 @@ ENTRY(load_switch_stack)
470 br.cond.sptk.many b7 470 br.cond.sptk.many b7
471END(load_switch_stack) 471END(load_switch_stack)
472 472
473GLOBAL_ENTRY(prefetch_stack)
474 add r14 = -IA64_SWITCH_STACK_SIZE, sp
475 add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
476 ;;
477 ld8 r16 = [r15] // load next's stack pointer
478 lfetch.fault.excl [r14], 128
479 ;;
480 lfetch.fault.excl [r14], 128
481 lfetch.fault [r16], 128
482 ;;
483 lfetch.fault.excl [r14], 128
484 lfetch.fault [r16], 128
485 ;;
486 lfetch.fault.excl [r14], 128
487 lfetch.fault [r16], 128
488 ;;
489 lfetch.fault.excl [r14], 128
490 lfetch.fault [r16], 128
491 ;;
492 lfetch.fault [r16], 128
493 br.ret.sptk.many rp
494END(prefetch_switch_stack)
495
473GLOBAL_ENTRY(execve) 496GLOBAL_ENTRY(execve)
474 mov r15=__NR_execve // put syscall number in place 497 mov r15=__NR_execve // put syscall number in place
475 break __BREAK_SYSCALL 498 break __BREAK_SYSCALL
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 33256db4a7cf..635235fa1e32 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -275,6 +275,7 @@ extern void ia64_load_extra (struct task_struct *task);
275 */ 275 */
276#define __ARCH_WANT_UNLOCKED_CTXSW 276#define __ARCH_WANT_UNLOCKED_CTXSW
277 277
278#define ARCH_HAS_PREFETCH_SWITCH_STACK
278#define ia64_platform_is(x) (strcmp(x, platform_name) == 0) 279#define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
279 280
280void cpu_idle_wait(void); 281void cpu_idle_wait(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea1b5f32ec5c..c551e6a1447e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,11 @@ extern int groups_search(struct group_info *group_info, gid_t grp);
604#define GROUP_AT(gi, i) \ 604#define GROUP_AT(gi, i) \
605 ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) 605 ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
606 606
607#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
608extern void prefetch_stack(struct task_struct*);
609#else
610static inline void prefetch_stack(struct task_struct *t) { }
611#endif
607 612
608struct audit_context; /* See audit.c */ 613struct audit_context; /* See audit.c */
609struct mempolicy; 614struct mempolicy;
diff --git a/kernel/sched.c b/kernel/sched.c
index 18b95520a2e2..2632b812cf24 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2888,6 +2888,7 @@ switch_tasks:
2888 if (next == rq->idle) 2888 if (next == rq->idle)
2889 schedstat_inc(rq, sched_goidle); 2889 schedstat_inc(rq, sched_goidle);
2890 prefetch(next); 2890 prefetch(next);
2891 prefetch_stack(next);
2891 clear_tsk_need_resched(prev); 2892 clear_tsk_need_resched(prev);
2892 rcu_qsctr_inc(task_cpu(prev)); 2893 rcu_qsctr_inc(task_cpu(prev));
2893 2894