diff options
author | Gregory Haskins <ghaskins@novell.com> | 2008-01-25 15:08:18 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:18 -0500 |
commit | 637f50851b57a32f7ec67c50fc16f1601ab1a87a (patch) | |
tree | ee8a4bc19fda1783bff8aa44abdcb6d8596aa7f1 | |
parent | 57d885fea0da0e9541d7730a9e1dcf734981a173 (diff) |
sched: only balance our RT tasks within our domain
We move the rt-overload data as the first global to per-domain
reclassification. This limits the scope of overload related cache-line
bouncing to stay with a specified partition instead of affecting all
cpus in the system.
Finally, we limit the scope of find_lowest_cpu searches to the domain
instead of the entire system. Note that we would always respect domain
boundaries even without this patch, but we first would scan potentially
all cpus before whittling the list down. Now we can avoid looking at
RQs that are out of scope, again reducing cache-line hits.
Note: In some cases, task->cpus_allowed will effectively reduce our search
to within our domain. However, I believe there are cases where the
cpus_allowed mask may be all ones and therefore we err on the side of
caution. If it can be optimized later, so be it.
Signed-off-by: Gregory Haskins <ghaskins@novell.com>
CC: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 7 | ||||
-rw-r--r-- | kernel/sched_rt.c | 57 |
2 files changed, 38 insertions, 26 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 34b7d721d735..35ef06c99214 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -365,6 +365,13 @@ struct root_domain { | |||
365 | atomic_t refcount; | 365 | atomic_t refcount; |
366 | cpumask_t span; | 366 | cpumask_t span; |
367 | cpumask_t online; | 367 | cpumask_t online; |
368 | |||
369 | /* | ||
370 | * The "RT overload" flag: it gets set if a CPU has more than | ||
371 | * one runnable RT task. | ||
372 | */ | ||
373 | cpumask_t rto_mask; | ||
374 | atomic_t rto_count; | ||
368 | }; | 375 | }; |
369 | 376 | ||
370 | static struct root_domain def_root_domain; | 377 | static struct root_domain def_root_domain; |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 4d0a60e47dfa..b049e5110eea 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -5,22 +5,14 @@ | |||
5 | 5 | ||
6 | #ifdef CONFIG_SMP | 6 | #ifdef CONFIG_SMP |
7 | 7 | ||
8 | /* | 8 | static inline int rt_overloaded(struct rq *rq) |
9 | * The "RT overload" flag: it gets set if a CPU has more than | ||
10 | * one runnable RT task. | ||
11 | */ | ||
12 | static cpumask_t rt_overload_mask; | ||
13 | static atomic_t rto_count; | ||
14 | |||
15 | static inline int rt_overloaded(void) | ||
16 | { | 9 | { |
17 | return atomic_read(&rto_count); | 10 | return atomic_read(&rq->rd->rto_count); |
18 | } | 11 | } |
19 | 12 | ||
20 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
21 | { | 14 | { |
22 | rq->rt.overloaded = 1; | 15 | cpu_set(rq->cpu, rq->rd->rto_mask); |
23 | cpu_set(rq->cpu, rt_overload_mask); | ||
24 | /* | 16 | /* |
25 | * Make sure the mask is visible before we set | 17 | * Make sure the mask is visible before we set |
26 | * the overload count. That is checked to determine | 18 | * the overload count. That is checked to determine |
@@ -29,23 +21,25 @@ static inline void rt_set_overload(struct rq *rq) | |||
29 | * updated yet. | 21 | * updated yet. |
30 | */ | 22 | */ |
31 | wmb(); | 23 | wmb(); |
32 | atomic_inc(&rto_count); | 24 | atomic_inc(&rq->rd->rto_count); |
33 | } | 25 | } |
34 | 26 | ||
35 | static inline void rt_clear_overload(struct rq *rq) | 27 | static inline void rt_clear_overload(struct rq *rq) |
36 | { | 28 | { |
37 | /* the order here really doesn't matter */ | 29 | /* the order here really doesn't matter */ |
38 | atomic_dec(&rto_count); | 30 | atomic_dec(&rq->rd->rto_count); |
39 | cpu_clear(rq->cpu, rt_overload_mask); | 31 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
40 | rq->rt.overloaded = 0; | ||
41 | } | 32 | } |
42 | 33 | ||
43 | static void update_rt_migration(struct rq *rq) | 34 | static void update_rt_migration(struct rq *rq) |
44 | { | 35 | { |
45 | if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) | 36 | if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) { |
46 | rt_set_overload(rq); | 37 | rt_set_overload(rq); |
47 | else | 38 | rq->rt.overloaded = 1; |
39 | } else { | ||
48 | rt_clear_overload(rq); | 40 | rt_clear_overload(rq); |
41 | rq->rt.overloaded = 0; | ||
42 | } | ||
49 | } | 43 | } |
50 | #endif /* CONFIG_SMP */ | 44 | #endif /* CONFIG_SMP */ |
51 | 45 | ||
@@ -306,7 +300,7 @@ static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | |||
306 | int count = 0; | 300 | int count = 0; |
307 | int cpu; | 301 | int cpu; |
308 | 302 | ||
309 | cpus_and(*lowest_mask, cpu_online_map, task->cpus_allowed); | 303 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); |
310 | 304 | ||
311 | /* | 305 | /* |
312 | * Scan each rq for the lowest prio. | 306 | * Scan each rq for the lowest prio. |
@@ -580,18 +574,12 @@ static int pull_rt_task(struct rq *this_rq) | |||
580 | struct task_struct *p, *next; | 574 | struct task_struct *p, *next; |
581 | struct rq *src_rq; | 575 | struct rq *src_rq; |
582 | 576 | ||
583 | /* | 577 | if (likely(!rt_overloaded(this_rq))) |
584 | * If cpusets are used, and we have overlapping | ||
585 | * run queue cpusets, then this algorithm may not catch all. | ||
586 | * This is just the price you pay on trying to keep | ||
587 | * dirtying caches down on large SMP machines. | ||
588 | */ | ||
589 | if (likely(!rt_overloaded())) | ||
590 | return 0; | 578 | return 0; |
591 | 579 | ||
592 | next = pick_next_task_rt(this_rq); | 580 | next = pick_next_task_rt(this_rq); |
593 | 581 | ||
594 | for_each_cpu_mask(cpu, rt_overload_mask) { | 582 | for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { |
595 | if (this_cpu == cpu) | 583 | if (this_cpu == cpu) |
596 | continue; | 584 | continue; |
597 | 585 | ||
@@ -811,6 +799,20 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p) | |||
811 | } | 799 | } |
812 | } | 800 | } |
813 | 801 | ||
802 | /* Assumes rq->lock is held */ | ||
803 | static void join_domain_rt(struct rq *rq) | ||
804 | { | ||
805 | if (rq->rt.overloaded) | ||
806 | rt_set_overload(rq); | ||
807 | } | ||
808 | |||
809 | /* Assumes rq->lock is held */ | ||
810 | static void leave_domain_rt(struct rq *rq) | ||
811 | { | ||
812 | if (rq->rt.overloaded) | ||
813 | rt_clear_overload(rq); | ||
814 | } | ||
815 | |||
814 | static void set_curr_task_rt(struct rq *rq) | 816 | static void set_curr_task_rt(struct rq *rq) |
815 | { | 817 | { |
816 | struct task_struct *p = rq->curr; | 818 | struct task_struct *p = rq->curr; |
@@ -840,4 +842,7 @@ const struct sched_class rt_sched_class = { | |||
840 | 842 | ||
841 | .set_curr_task = set_curr_task_rt, | 843 | .set_curr_task = set_curr_task_rt, |
842 | .task_tick = task_tick_rt, | 844 | .task_tick = task_tick_rt, |
845 | |||
846 | .join_domain = join_domain_rt, | ||
847 | .leave_domain = leave_domain_rt, | ||
843 | }; | 848 | }; |