aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-04-05 11:23:58 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-14 02:52:41 -0400
commit317f394160e9beb97d19a84c39b7e5eb3d7815a8 (patch)
tree486f268e9ec010744c17333f0f543da9d3909ff4
parentc05fbafba1c5482bee399b360288fa405415e126 (diff)
sched: Move the second half of ttwu() to the remote cpu
Now that we've removed the rq->lock requirement from the first part of ttwu() and can compute placement without holding any rq->lock, ensure we execute the second half of ttwu() on the actual cpu we want the task to run on. This avoids having to take rq->lock and doing the task enqueue remotely, saving lots on cacheline transfers. As measured using: http://oss.oracle.com/~mason/sembench.c $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i; done $ echo 4096 32000 64 128 > /proc/sys/kernel/sem $ ./sembench -t 2048 -w 1900 -o 0 unpatched: run time 30 seconds 647278 worker burns per second patched: run time 30 seconds 816715 worker burns per second Reviewed-by: Frank Rowand <frank.rowand@am.sony.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110405152729.515897185@chello.nl
-rw-r--r--include/linux/sched.h3
-rw-r--r--init/Kconfig5
-rw-r--r--kernel/sched.c56
-rw-r--r--kernel/sched_features.h6
4 files changed, 69 insertions, 1 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25c50317ddc1..e09dafa6e149 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1203,6 +1203,7 @@ struct task_struct {
1203 int lock_depth; /* BKL lock depth */ 1203 int lock_depth; /* BKL lock depth */
1204 1204
1205#ifdef CONFIG_SMP 1205#ifdef CONFIG_SMP
1206 struct task_struct *wake_entry;
1206 int on_cpu; 1207 int on_cpu;
1207#endif 1208#endif
1208 int on_rq; 1209 int on_rq;
@@ -2192,7 +2193,7 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
2192extern char *get_task_comm(char *to, struct task_struct *tsk); 2193extern char *get_task_comm(char *to, struct task_struct *tsk);
2193 2194
2194#ifdef CONFIG_SMP 2195#ifdef CONFIG_SMP
2195static inline void scheduler_ipi(void) { } 2196void scheduler_ipi(void);
2196extern unsigned long wait_task_inactive(struct task_struct *, long match_state); 2197extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
2197#else 2198#else
2198static inline void scheduler_ipi(void) { } 2199static inline void scheduler_ipi(void) { }
diff --git a/init/Kconfig b/init/Kconfig
index 56240e724d9a..32745bfe059e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -827,6 +827,11 @@ config SCHED_AUTOGROUP
827 desktop applications. Task group autogeneration is currently based 827 desktop applications. Task group autogeneration is currently based
828 upon task session. 828 upon task session.
829 829
830config SCHED_TTWU_QUEUE
831 bool
832 depends on !SPARC32
833 default y
834
830config MM_OWNER 835config MM_OWNER
831 bool 836 bool
832 837
diff --git a/kernel/sched.c b/kernel/sched.c
index 7d8b85fcdf06..9e3ede120e81 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -556,6 +556,10 @@ struct rq {
556 unsigned int ttwu_count; 556 unsigned int ttwu_count;
557 unsigned int ttwu_local; 557 unsigned int ttwu_local;
558#endif 558#endif
559
560#ifdef CONFIG_SMP
561 struct task_struct *wake_list;
562#endif
559}; 563};
560 564
561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 565static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -2516,10 +2520,61 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
2516 return ret; 2520 return ret;
2517} 2521}
2518 2522
2523#ifdef CONFIG_SMP
2524static void sched_ttwu_pending(void)
2525{
2526 struct rq *rq = this_rq();
2527 struct task_struct *list = xchg(&rq->wake_list, NULL);
2528
2529 if (!list)
2530 return;
2531
2532 raw_spin_lock(&rq->lock);
2533
2534 while (list) {
2535 struct task_struct *p = list;
2536 list = list->wake_entry;
2537 ttwu_do_activate(rq, p, 0);
2538 }
2539
2540 raw_spin_unlock(&rq->lock);
2541}
2542
2543void scheduler_ipi(void)
2544{
2545 sched_ttwu_pending();
2546}
2547
2548static void ttwu_queue_remote(struct task_struct *p, int cpu)
2549{
2550 struct rq *rq = cpu_rq(cpu);
2551 struct task_struct *next = rq->wake_list;
2552
2553 for (;;) {
2554 struct task_struct *old = next;
2555
2556 p->wake_entry = next;
2557 next = cmpxchg(&rq->wake_list, old, p);
2558 if (next == old)
2559 break;
2560 }
2561
2562 if (!next)
2563 smp_send_reschedule(cpu);
2564}
2565#endif
2566
2519static void ttwu_queue(struct task_struct *p, int cpu) 2567static void ttwu_queue(struct task_struct *p, int cpu)
2520{ 2568{
2521 struct rq *rq = cpu_rq(cpu); 2569 struct rq *rq = cpu_rq(cpu);
2522 2570
2571#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
2572 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2573 ttwu_queue_remote(p, cpu);
2574 return;
2575 }
2576#endif
2577
2523 raw_spin_lock(&rq->lock); 2578 raw_spin_lock(&rq->lock);
2524 ttwu_do_activate(rq, p, 0); 2579 ttwu_do_activate(rq, p, 0);
2525 raw_spin_unlock(&rq->lock); 2580 raw_spin_unlock(&rq->lock);
@@ -6331,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6331 6386
6332#ifdef CONFIG_HOTPLUG_CPU 6387#ifdef CONFIG_HOTPLUG_CPU
6333 case CPU_DYING: 6388 case CPU_DYING:
6389 sched_ttwu_pending();
6334 /* Update our root-domain */ 6390 /* Update our root-domain */
6335 raw_spin_lock_irqsave(&rq->lock, flags); 6391 raw_spin_lock_irqsave(&rq->lock, flags);
6336 if (rq->rd) { 6392 if (rq->rd) {
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 68e69acc29b9..be40f7371ee1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1)
64 * Decrement CPU power based on irq activity 64 * Decrement CPU power based on irq activity
65 */ 65 */
66SCHED_FEAT(NONIRQ_POWER, 1) 66SCHED_FEAT(NONIRQ_POWER, 1)
67
68/*
69 * Queue remote wakeups on the target CPU and process them
70 * using the scheduler IPI. Reduces rq->lock contention/bounces.
71 */
72SCHED_FEAT(TTWU_QUEUE, 1)