aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-06-25 17:57:09 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-25 19:24:41 -0400
commit3950745131e23472fb5ace2ee4a2093e7590ec69 (patch)
tree8b3e738f2c11ee3e4c60d8960e7bdd3c006f7154
parent16cfb1c04c3cbe3759f339d3333e7e1e7d59712a (diff)
[PATCH] sched: fix SMT scheduling problems
SMT balancing has a couple of problems. Firstly, active_load_balance is too complex - basically it should be a dumb helper for when the periodic balancer has determined there is an imbalance, but gets stuck because the task is running. So rip out all its "smarts", and just make it move one task to the target CPU. Second, the busy CPU's sched-domain tree was being used for active balancing. This means that it may not see that nr_balance_failed has reached a critical level. So use the target CPU's sched-domain tree for this. We can do this because we hold its runqueue lock. Lastly, reset nr_balance_failed to a point where we allow cache hot migration. This will help ensure active load balancing is successful. Thanks to Suresh Siddha for pointing out these issues. Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--kernel/sched.c76
1 files changed, 31 insertions, 45 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 03d737791c1a..41e69b5ee652 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1995,7 +1995,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
1995 * We've kicked active balancing, reset the failure 1995 * We've kicked active balancing, reset the failure
1996 * counter. 1996 * counter.
1997 */ 1997 */
1998 sd->nr_balance_failed = sd->cache_nice_tries; 1998 sd->nr_balance_failed = sd->cache_nice_tries+1;
1999 } 1999 }
2000 } else 2000 } else
2001 sd->nr_balance_failed = 0; 2001 sd->nr_balance_failed = 0;
@@ -2106,56 +2106,42 @@ static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
2106static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) 2106static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
2107{ 2107{
2108 struct sched_domain *sd; 2108 struct sched_domain *sd;
2109 struct sched_group *cpu_group;
2110 runqueue_t *target_rq; 2109 runqueue_t *target_rq;
2111 cpumask_t visited_cpus; 2110 int target_cpu = busiest_rq->push_cpu;
2112 int cpu; 2111
2112 if (busiest_rq->nr_running <= 1)
2113 /* no task to move */
2114 return;
2115
2116 target_rq = cpu_rq(target_cpu);
2113 2117
2114 /* 2118 /*
2115 * Search for suitable CPUs to push tasks to in successively higher 2119 * This condition is "impossible", if it occurs
2116 * domains with SD_LOAD_BALANCE set. 2120 * we need to fix it. Originally reported by
2121 * Bjorn Helgaas on a 128-cpu setup.
2117 */ 2122 */
2118 visited_cpus = CPU_MASK_NONE; 2123 BUG_ON(busiest_rq == target_rq);
2119 for_each_domain(busiest_cpu, sd) {
2120 if (!(sd->flags & SD_LOAD_BALANCE))
2121 /* no more domains to search */
2122 break;
2123 2124
2124 schedstat_inc(sd, alb_cnt); 2125 /* move a task from busiest_rq to target_rq */
2126 double_lock_balance(busiest_rq, target_rq);
2125 2127
2126 cpu_group = sd->groups; 2128 /* Search for an sd spanning us and the target CPU. */
2127 do { 2129 for_each_domain(target_cpu, sd)
2128 for_each_cpu_mask(cpu, cpu_group->cpumask) { 2130 if ((sd->flags & SD_LOAD_BALANCE) &&
2129 if (busiest_rq->nr_running <= 1) 2131 cpu_isset(busiest_cpu, sd->span))
2130 /* no more tasks left to move */ 2132 break;
2131 return; 2133
2132 if (cpu_isset(cpu, visited_cpus)) 2134 if (unlikely(sd == NULL))
2133 continue; 2135 goto out;
2134 cpu_set(cpu, visited_cpus); 2136
2135 if (!cpu_and_siblings_are_idle(cpu) || cpu == busiest_cpu) 2137 schedstat_inc(sd, alb_cnt);
2136 continue; 2138
2137 2139 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, sd, SCHED_IDLE, NULL))
2138 target_rq = cpu_rq(cpu); 2140 schedstat_inc(sd, alb_pushed);
2139 /* 2141 else
2140 * This condition is "impossible", if it occurs 2142 schedstat_inc(sd, alb_failed);
2141 * we need to fix it. Originally reported by 2143out:
2142 * Bjorn Helgaas on a 128-cpu setup. 2144 spin_unlock(&target_rq->lock);
2143 */
2144 BUG_ON(busiest_rq == target_rq);
2145
2146 /* move a task from busiest_rq to target_rq */
2147 double_lock_balance(busiest_rq, target_rq);
2148 if (move_tasks(target_rq, cpu, busiest_rq,
2149 1, sd, SCHED_IDLE, NULL)) {
2150 schedstat_inc(sd, alb_pushed);
2151 } else {
2152 schedstat_inc(sd, alb_failed);
2153 }
2154 spin_unlock(&target_rq->lock);
2155 }
2156 cpu_group = cpu_group->next;
2157 } while (cpu_group != sd->groups);
2158 }
2159} 2145}
2160 2146
2161/* 2147/*