diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2005-09-10 03:26:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-10 13:06:23 -0400 |
commit | 5969fe0618051e8577316555a81a6e44b7b7d640 (patch) | |
tree | 09ca92684811017901bb08d8d3a76c52022ef34d | |
parent | e17224bf1d01b461ec02a60f5a9b7657a89bdd23 (diff) |
[PATCH] sched: HT optimisation
If an idle sibling of an HT queue encounters a busy sibling, then make
higher level load balancing of the non-idle variety.
Performance of multiprocessor HT systems with low numbers of tasks
(generally < number of virtual CPUs) can be significantly worse than the
exact same workloads when running in non-HT mode. The reason is largely
due to poor scheduling behaviour.
This patch improves the situation, making the performance gap far less
significant on one problematic test case (tbench).
Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | kernel/sched.c | 34 |
1 files changed, 28 insertions, 6 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 8535e5c68f5b..46fdd0bb1ed6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1906,7 +1906,7 @@ out: | |||
1906 | */ | 1906 | */ |
1907 | static struct sched_group * | 1907 | static struct sched_group * |
1908 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 1908 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
1909 | unsigned long *imbalance, enum idle_type idle) | 1909 | unsigned long *imbalance, enum idle_type idle, int *sd_idle) |
1910 | { | 1910 | { |
1911 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 1911 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
1912 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 1912 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
@@ -1931,6 +1931,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1931 | avg_load = 0; | 1931 | avg_load = 0; |
1932 | 1932 | ||
1933 | for_each_cpu_mask(i, group->cpumask) { | 1933 | for_each_cpu_mask(i, group->cpumask) { |
1934 | if (*sd_idle && !idle_cpu(i)) | ||
1935 | *sd_idle = 0; | ||
1936 | |||
1934 | /* Bias balancing toward cpus of our domain */ | 1937 | /* Bias balancing toward cpus of our domain */ |
1935 | if (local_group) | 1938 | if (local_group) |
1936 | load = target_load(i, load_idx); | 1939 | load = target_load(i, load_idx); |
@@ -2074,10 +2077,14 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2074 | unsigned long imbalance; | 2077 | unsigned long imbalance; |
2075 | int nr_moved, all_pinned = 0; | 2078 | int nr_moved, all_pinned = 0; |
2076 | int active_balance = 0; | 2079 | int active_balance = 0; |
2080 | int sd_idle = 0; | ||
2081 | |||
2082 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER) | ||
2083 | sd_idle = 1; | ||
2077 | 2084 | ||
2078 | schedstat_inc(sd, lb_cnt[idle]); | 2085 | schedstat_inc(sd, lb_cnt[idle]); |
2079 | 2086 | ||
2080 | group = find_busiest_group(sd, this_cpu, &imbalance, idle); | 2087 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); |
2081 | if (!group) { | 2088 | if (!group) { |
2082 | schedstat_inc(sd, lb_nobusyg[idle]); | 2089 | schedstat_inc(sd, lb_nobusyg[idle]); |
2083 | goto out_balanced; | 2090 | goto out_balanced; |
@@ -2150,6 +2157,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2150 | sd->balance_interval *= 2; | 2157 | sd->balance_interval *= 2; |
2151 | } | 2158 | } |
2152 | 2159 | ||
2160 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
2161 | return -1; | ||
2153 | return nr_moved; | 2162 | return nr_moved; |
2154 | 2163 | ||
2155 | out_balanced: | 2164 | out_balanced: |
@@ -2161,6 +2170,8 @@ out_balanced: | |||
2161 | (sd->balance_interval < sd->max_interval)) | 2170 | (sd->balance_interval < sd->max_interval)) |
2162 | sd->balance_interval *= 2; | 2171 | sd->balance_interval *= 2; |
2163 | 2172 | ||
2173 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
2174 | return -1; | ||
2164 | return 0; | 2175 | return 0; |
2165 | } | 2176 | } |
2166 | 2177 | ||
@@ -2178,9 +2189,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2178 | runqueue_t *busiest = NULL; | 2189 | runqueue_t *busiest = NULL; |
2179 | unsigned long imbalance; | 2190 | unsigned long imbalance; |
2180 | int nr_moved = 0; | 2191 | int nr_moved = 0; |
2192 | int sd_idle = 0; | ||
2193 | |||
2194 | if (sd->flags & SD_SHARE_CPUPOWER) | ||
2195 | sd_idle = 1; | ||
2181 | 2196 | ||
2182 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2197 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
2183 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); | 2198 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); |
2184 | if (!group) { | 2199 | if (!group) { |
2185 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2200 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
2186 | goto out_balanced; | 2201 | goto out_balanced; |
@@ -2205,15 +2220,19 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2205 | spin_unlock(&busiest->lock); | 2220 | spin_unlock(&busiest->lock); |
2206 | } | 2221 | } |
2207 | 2222 | ||
2208 | if (!nr_moved) | 2223 | if (!nr_moved) { |
2209 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2224 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); |
2210 | else | 2225 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) |
2226 | return -1; | ||
2227 | } else | ||
2211 | sd->nr_balance_failed = 0; | 2228 | sd->nr_balance_failed = 0; |
2212 | 2229 | ||
2213 | return nr_moved; | 2230 | return nr_moved; |
2214 | 2231 | ||
2215 | out_balanced: | 2232 | out_balanced: |
2216 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2233 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
2234 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) | ||
2235 | return -1; | ||
2217 | sd->nr_balance_failed = 0; | 2236 | sd->nr_balance_failed = 0; |
2218 | return 0; | 2237 | return 0; |
2219 | } | 2238 | } |
@@ -2338,7 +2357,10 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, | |||
2338 | 2357 | ||
2339 | if (j - sd->last_balance >= interval) { | 2358 | if (j - sd->last_balance >= interval) { |
2340 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2359 | if (load_balance(this_cpu, this_rq, sd, idle)) { |
2341 | /* We've pulled tasks over so no longer idle */ | 2360 | /* We've pulled tasks over so either we're no |
2361 | * longer idle, or one of our SMT siblings is | ||
2362 | * not idle. | ||
2363 | */ | ||
2342 | idle = NOT_IDLE; | 2364 | idle = NOT_IDLE; |
2343 | } | 2365 | } |
2344 | sd->last_balance += interval; | 2366 | sd->last_balance += interval; |