aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-09-10 03:26:19 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-10 13:06:23 -0400
commit5969fe0618051e8577316555a81a6e44b7b7d640 (patch)
tree09ca92684811017901bb08d8d3a76c52022ef34d /kernel/sched.c
parente17224bf1d01b461ec02a60f5a9b7657a89bdd23 (diff)
[PATCH] sched: HT optimisation
If an idle sibling of an HT queue encounters a busy sibling, then make higher level load balancing of the non-idle variety. Performance of multiprocessor HT systems with low numbers of tasks (generally < number of virtual CPUs) can be significantly worse than the exact same workloads when running in non-HT mode. The reason is largely due to poor scheduling behaviour. This patch improves the situation, making the performance gap far less significant on one problematic test case (tbench). Signed-off-by: Nick Piggin <npiggin@suse.de> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c34
1 files changed, 28 insertions, 6 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8535e5c68f5b..46fdd0bb1ed6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1906,7 +1906,7 @@ out:
1906 */ 1906 */
1907static struct sched_group * 1907static struct sched_group *
1908find_busiest_group(struct sched_domain *sd, int this_cpu, 1908find_busiest_group(struct sched_domain *sd, int this_cpu,
1909 unsigned long *imbalance, enum idle_type idle) 1909 unsigned long *imbalance, enum idle_type idle, int *sd_idle)
1910{ 1910{
1911 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 1911 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
1912 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 1912 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -1931,6 +1931,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1931 avg_load = 0; 1931 avg_load = 0;
1932 1932
1933 for_each_cpu_mask(i, group->cpumask) { 1933 for_each_cpu_mask(i, group->cpumask) {
1934 if (*sd_idle && !idle_cpu(i))
1935 *sd_idle = 0;
1936
1934 /* Bias balancing toward cpus of our domain */ 1937 /* Bias balancing toward cpus of our domain */
1935 if (local_group) 1938 if (local_group)
1936 load = target_load(i, load_idx); 1939 load = target_load(i, load_idx);
@@ -2074,10 +2077,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2074 unsigned long imbalance; 2077 unsigned long imbalance;
2075 int nr_moved, all_pinned = 0; 2078 int nr_moved, all_pinned = 0;
2076 int active_balance = 0; 2079 int active_balance = 0;
2080 int sd_idle = 0;
2081
2082 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER)
2083 sd_idle = 1;
2077 2084
2078 schedstat_inc(sd, lb_cnt[idle]); 2085 schedstat_inc(sd, lb_cnt[idle]);
2079 2086
2080 group = find_busiest_group(sd, this_cpu, &imbalance, idle); 2087 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
2081 if (!group) { 2088 if (!group) {
2082 schedstat_inc(sd, lb_nobusyg[idle]); 2089 schedstat_inc(sd, lb_nobusyg[idle]);
2083 goto out_balanced; 2090 goto out_balanced;
@@ -2150,6 +2157,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2150 sd->balance_interval *= 2; 2157 sd->balance_interval *= 2;
2151 } 2158 }
2152 2159
2160 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2161 return -1;
2153 return nr_moved; 2162 return nr_moved;
2154 2163
2155out_balanced: 2164out_balanced:
@@ -2161,6 +2170,8 @@ out_balanced:
2161 (sd->balance_interval < sd->max_interval)) 2170 (sd->balance_interval < sd->max_interval))
2162 sd->balance_interval *= 2; 2171 sd->balance_interval *= 2;
2163 2172
2173 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2174 return -1;
2164 return 0; 2175 return 0;
2165} 2176}
2166 2177
@@ -2178,9 +2189,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2178 runqueue_t *busiest = NULL; 2189 runqueue_t *busiest = NULL;
2179 unsigned long imbalance; 2190 unsigned long imbalance;
2180 int nr_moved = 0; 2191 int nr_moved = 0;
2192 int sd_idle = 0;
2193
2194 if (sd->flags & SD_SHARE_CPUPOWER)
2195 sd_idle = 1;
2181 2196
2182 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); 2197 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
2183 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); 2198 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
2184 if (!group) { 2199 if (!group) {
2185 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); 2200 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
2186 goto out_balanced; 2201 goto out_balanced;
@@ -2205,15 +2220,19 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2205 spin_unlock(&busiest->lock); 2220 spin_unlock(&busiest->lock);
2206 } 2221 }
2207 2222
2208 if (!nr_moved) 2223 if (!nr_moved) {
2209 schedstat_inc(sd, lb_failed[NEWLY_IDLE]); 2224 schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
2210 else 2225 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2226 return -1;
2227 } else
2211 sd->nr_balance_failed = 0; 2228 sd->nr_balance_failed = 0;
2212 2229
2213 return nr_moved; 2230 return nr_moved;
2214 2231
2215out_balanced: 2232out_balanced:
2216 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2233 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2234 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
2235 return -1;
2217 sd->nr_balance_failed = 0; 2236 sd->nr_balance_failed = 0;
2218 return 0; 2237 return 0;
2219} 2238}
@@ -2338,7 +2357,10 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2338 2357
2339 if (j - sd->last_balance >= interval) { 2358 if (j - sd->last_balance >= interval) {
2340 if (load_balance(this_cpu, this_rq, sd, idle)) { 2359 if (load_balance(this_cpu, this_rq, sd, idle)) {
2341 /* We've pulled tasks over so no longer idle */ 2360 /* We've pulled tasks over so either we're no
2361 * longer idle, or one of our SMT siblings is
2362 * not idle.
2363 */
2342 idle = NOT_IDLE; 2364 idle = NOT_IDLE;
2343 } 2365 }
2344 sd->last_balance += interval; 2366 sd->last_balance += interval;