diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2007-11-09 16:39:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-11-09 16:39:39 -0500 |
commit | b82d9fdd848abfbe7263a4ecd9bbb55e575100a6 (patch) | |
tree | 7d4c525cc4bd633c84abcfd8d934c84e5dc9ce7c | |
parent | 3c90e6e99b08f01d5684a3a07cceae6a543e4fa8 (diff) |
sched: avoid large irq-latencies in smp-balancing
SMP balancing is done with IRQs disabled and can iterate the full rq.
When rqs are large this can cause large irq-latencies. Limit the nr of
iterations on each run.
This fixes a scheduling latency regression reported by the -rt folks.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Tested-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/sched.c | 15 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 |
3 files changed, 19 insertions, 5 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 93fd30d6dac4..2cc789fef711 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1466,6 +1466,7 @@ extern unsigned int sysctl_sched_batch_wakeup_granularity; | |||
1466 | extern unsigned int sysctl_sched_child_runs_first; | 1466 | extern unsigned int sysctl_sched_child_runs_first; |
1467 | extern unsigned int sysctl_sched_features; | 1467 | extern unsigned int sysctl_sched_features; |
1468 | extern unsigned int sysctl_sched_migration_cost; | 1468 | extern unsigned int sysctl_sched_migration_cost; |
1469 | extern unsigned int sysctl_sched_nr_migrate; | ||
1469 | 1470 | ||
1470 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1471 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
1471 | struct file *file, void __user *buffer, size_t *length, | 1472 | struct file *file, void __user *buffer, size_t *length, |
diff --git a/kernel/sched.c b/kernel/sched.c index 2a107e4ad5ed..e195a4229418 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -472,6 +472,12 @@ const_debug unsigned int sysctl_sched_features = | |||
472 | #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) | 472 | #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) |
473 | 473 | ||
474 | /* | 474 | /* |
475 | * Number of tasks to iterate in a single balance run. | ||
476 | * Limited because this is done with IRQs disabled. | ||
477 | */ | ||
478 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | ||
479 | |||
480 | /* | ||
475 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 481 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
476 | * clock constructed from sched_clock(): | 482 | * clock constructed from sched_clock(): |
477 | */ | 483 | */ |
@@ -2235,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2235 | enum cpu_idle_type idle, int *all_pinned, | 2241 | enum cpu_idle_type idle, int *all_pinned, |
2236 | int *this_best_prio, struct rq_iterator *iterator) | 2242 | int *this_best_prio, struct rq_iterator *iterator) |
2237 | { | 2243 | { |
2238 | int pulled = 0, pinned = 0, skip_for_load; | 2244 | int loops = 0, pulled = 0, pinned = 0, skip_for_load; |
2239 | struct task_struct *p; | 2245 | struct task_struct *p; |
2240 | long rem_load_move = max_load_move; | 2246 | long rem_load_move = max_load_move; |
2241 | 2247 | ||
@@ -2249,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2249 | */ | 2255 | */ |
2250 | p = iterator->start(iterator->arg); | 2256 | p = iterator->start(iterator->arg); |
2251 | next: | 2257 | next: |
2252 | if (!p) | 2258 | if (!p || loops++ > sysctl_sched_nr_migrate) |
2253 | goto out; | 2259 | goto out; |
2254 | /* | 2260 | /* |
2255 | * To help distribute high priority tasks accross CPUs we don't | 2261 | * To help distribute high priority tasks across CPUs we don't |
2256 | * skip a task if it will be the highest priority task (i.e. smallest | 2262 | * skip a task if it will be the highest priority task (i.e. smallest |
2257 | * prio value) on its new queue regardless of its load weight | 2263 | * prio value) on its new queue regardless of its load weight |
2258 | */ | 2264 | */ |
@@ -2269,8 +2275,7 @@ next: | |||
2269 | rem_load_move -= p->se.load.weight; | 2275 | rem_load_move -= p->se.load.weight; |
2270 | 2276 | ||
2271 | /* | 2277 | /* |
2272 | * We only want to steal up to the prescribed number of tasks | 2278 | * We only want to steal up to the prescribed amount of weighted load. |
2273 | * and the prescribed amount of weighted load. | ||
2274 | */ | 2279 | */ |
2275 | if (rem_load_move > 0) { | 2280 | if (rem_load_move > 0) { |
2276 | if (p->prio < *this_best_prio) | 2281 | if (p->prio < *this_best_prio) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index adddf682d4cb..3a1744fed2b6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -301,6 +301,14 @@ static struct ctl_table kern_table[] = { | |||
301 | .mode = 0644, | 301 | .mode = 0644, |
302 | .proc_handler = &proc_dointvec, | 302 | .proc_handler = &proc_dointvec, |
303 | }, | 303 | }, |
304 | { | ||
305 | .ctl_name = CTL_UNNUMBERED, | ||
306 | .procname = "sched_nr_migrate", | ||
307 | .data = &sysctl_sched_nr_migrate, | ||
308 | .maxlen = sizeof(unsigned int), | ||
309 | .mode = 644, | ||
310 | .proc_handler = &proc_dointvec, | ||
311 | }, | ||
304 | #endif | 312 | #endif |
305 | { | 313 | { |
306 | .ctl_name = CTL_UNNUMBERED, | 314 | .ctl_name = CTL_UNNUMBERED, |