aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGregory Haskins <ghaskins@novell.com>2008-01-25 15:08:18 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:18 -0500
commit637f50851b57a32f7ec67c50fc16f1601ab1a87a (patch)
treeee8a4bc19fda1783bff8aa44abdcb6d8596aa7f1
parent57d885fea0da0e9541d7730a9e1dcf734981a173 (diff)
sched: only balance our RT tasks within our domain
We move the rt-overload data as the first global to per-domain reclassification. This limits the scope of overload related cache-line bouncing to stay with a specified partition instead of affecting all cpus in the system. Finally, we limit the scope of find_lowest_cpu searches to the domain instead of the entire system. Note that we would always respect domain boundaries even without this patch, but we first would scan potentially all cpus before whittling the list down. Now we can avoid looking at RQs that are out of scope, again reducing cache-line hits. Note: In some cases, task->cpus_allowed will effectively reduce our search to within our domain. However, I believe there are cases where the cpus_allowed mask may be all ones and therefore we err on the side of caution. If it can be optimized later, so be it. Signed-off-by: Gregory Haskins <ghaskins@novell.com> CC: Christoph Lameter <clameter@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c7
-rw-r--r--kernel/sched_rt.c57
2 files changed, 38 insertions, 26 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 34b7d721d735..35ef06c99214 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -365,6 +365,13 @@ struct root_domain {
365 atomic_t refcount; 365 atomic_t refcount;
366 cpumask_t span; 366 cpumask_t span;
367 cpumask_t online; 367 cpumask_t online;
368
369 /*
370 * The "RT overload" flag: it gets set if a CPU has more than
371 * one runnable RT task.
372 */
373 cpumask_t rto_mask;
374 atomic_t rto_count;
368}; 375};
369 376
370static struct root_domain def_root_domain; 377static struct root_domain def_root_domain;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 4d0a60e47dfa..b049e5110eea 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -5,22 +5,14 @@
5 5
6#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
7 7
8/* 8static inline int rt_overloaded(struct rq *rq)
9 * The "RT overload" flag: it gets set if a CPU has more than
10 * one runnable RT task.
11 */
12static cpumask_t rt_overload_mask;
13static atomic_t rto_count;
14
15static inline int rt_overloaded(void)
16{ 9{
17 return atomic_read(&rto_count); 10 return atomic_read(&rq->rd->rto_count);
18} 11}
19 12
20static inline void rt_set_overload(struct rq *rq) 13static inline void rt_set_overload(struct rq *rq)
21{ 14{
22 rq->rt.overloaded = 1; 15 cpu_set(rq->cpu, rq->rd->rto_mask);
23 cpu_set(rq->cpu, rt_overload_mask);
24 /* 16 /*
25 * Make sure the mask is visible before we set 17 * Make sure the mask is visible before we set
26 * the overload count. That is checked to determine 18 * the overload count. That is checked to determine
@@ -29,23 +21,25 @@ static inline void rt_set_overload(struct rq *rq)
29 * updated yet. 21 * updated yet.
30 */ 22 */
31 wmb(); 23 wmb();
32 atomic_inc(&rto_count); 24 atomic_inc(&rq->rd->rto_count);
33} 25}
34 26
35static inline void rt_clear_overload(struct rq *rq) 27static inline void rt_clear_overload(struct rq *rq)
36{ 28{
37 /* the order here really doesn't matter */ 29 /* the order here really doesn't matter */
38 atomic_dec(&rto_count); 30 atomic_dec(&rq->rd->rto_count);
39 cpu_clear(rq->cpu, rt_overload_mask); 31 cpu_clear(rq->cpu, rq->rd->rto_mask);
40 rq->rt.overloaded = 0;
41} 32}
42 33
43static void update_rt_migration(struct rq *rq) 34static void update_rt_migration(struct rq *rq)
44{ 35{
45 if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) 36 if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) {
46 rt_set_overload(rq); 37 rt_set_overload(rq);
47 else 38 rq->rt.overloaded = 1;
39 } else {
48 rt_clear_overload(rq); 40 rt_clear_overload(rq);
41 rq->rt.overloaded = 0;
42 }
49} 43}
50#endif /* CONFIG_SMP */ 44#endif /* CONFIG_SMP */
51 45
@@ -306,7 +300,7 @@ static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
306 int count = 0; 300 int count = 0;
307 int cpu; 301 int cpu;
308 302
309 cpus_and(*lowest_mask, cpu_online_map, task->cpus_allowed); 303 cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
310 304
311 /* 305 /*
312 * Scan each rq for the lowest prio. 306 * Scan each rq for the lowest prio.
@@ -580,18 +574,12 @@ static int pull_rt_task(struct rq *this_rq)
580 struct task_struct *p, *next; 574 struct task_struct *p, *next;
581 struct rq *src_rq; 575 struct rq *src_rq;
582 576
583 /* 577 if (likely(!rt_overloaded(this_rq)))
584 * If cpusets are used, and we have overlapping
585 * run queue cpusets, then this algorithm may not catch all.
586 * This is just the price you pay on trying to keep
587 * dirtying caches down on large SMP machines.
588 */
589 if (likely(!rt_overloaded()))
590 return 0; 578 return 0;
591 579
592 next = pick_next_task_rt(this_rq); 580 next = pick_next_task_rt(this_rq);
593 581
594 for_each_cpu_mask(cpu, rt_overload_mask) { 582 for_each_cpu_mask(cpu, this_rq->rd->rto_mask) {
595 if (this_cpu == cpu) 583 if (this_cpu == cpu)
596 continue; 584 continue;
597 585
@@ -811,6 +799,20 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p)
811 } 799 }
812} 800}
813 801
802/* Assumes rq->lock is held */
803static void join_domain_rt(struct rq *rq)
804{
805 if (rq->rt.overloaded)
806 rt_set_overload(rq);
807}
808
809/* Assumes rq->lock is held */
810static void leave_domain_rt(struct rq *rq)
811{
812 if (rq->rt.overloaded)
813 rt_clear_overload(rq);
814}
815
814static void set_curr_task_rt(struct rq *rq) 816static void set_curr_task_rt(struct rq *rq)
815{ 817{
816 struct task_struct *p = rq->curr; 818 struct task_struct *p = rq->curr;
@@ -840,4 +842,7 @@ const struct sched_class rt_sched_class = {
840 842
841 .set_curr_task = set_curr_task_rt, 843 .set_curr_task = set_curr_task_rt,
842 .task_tick = task_tick_rt, 844 .task_tick = task_tick_rt,
845
846 .join_domain = join_domain_rt,
847 .leave_domain = leave_domain_rt,
843}; 848};