aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2007-11-09 16:39:39 -0500
committerIngo Molnar <mingo@elte.hu>2007-11-09 16:39:39 -0500
commitb82d9fdd848abfbe7263a4ecd9bbb55e575100a6 (patch)
tree7d4c525cc4bd633c84abcfd8d934c84e5dc9ce7c
parent3c90e6e99b08f01d5684a3a07cceae6a543e4fa8 (diff)
sched: avoid large irq-latencies in smp-balancing
SMP balancing is done with IRQs disabled and can iterate the full rq. When rqs are large this can cause large irq-latencies. Limit the nr of iterations on each run. This fixes a scheduling latency regression reported by the -rt folks. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Steven Rostedt <rostedt@goodmis.org> Tested-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched.c15
-rw-r--r--kernel/sysctl.c8
3 files changed, 19 insertions, 5 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 93fd30d6dac4..2cc789fef711 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1466,6 +1466,7 @@ extern unsigned int sysctl_sched_batch_wakeup_granularity;
1466extern unsigned int sysctl_sched_child_runs_first; 1466extern unsigned int sysctl_sched_child_runs_first;
1467extern unsigned int sysctl_sched_features; 1467extern unsigned int sysctl_sched_features;
1468extern unsigned int sysctl_sched_migration_cost; 1468extern unsigned int sysctl_sched_migration_cost;
1469extern unsigned int sysctl_sched_nr_migrate;
1469 1470
1470int sched_nr_latency_handler(struct ctl_table *table, int write, 1471int sched_nr_latency_handler(struct ctl_table *table, int write,
1471 struct file *file, void __user *buffer, size_t *length, 1472 struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c
index 2a107e4ad5ed..e195a4229418 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -472,6 +472,12 @@ const_debug unsigned int sysctl_sched_features =
472#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) 472#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
473 473
474/* 474/*
475 * Number of tasks to iterate in a single balance run.
476 * Limited because this is done with IRQs disabled.
477 */
478const_debug unsigned int sysctl_sched_nr_migrate = 32;
479
480/*
475 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 481 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
476 * clock constructed from sched_clock(): 482 * clock constructed from sched_clock():
477 */ 483 */
@@ -2235,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2235 enum cpu_idle_type idle, int *all_pinned, 2241 enum cpu_idle_type idle, int *all_pinned,
2236 int *this_best_prio, struct rq_iterator *iterator) 2242 int *this_best_prio, struct rq_iterator *iterator)
2237{ 2243{
2238 int pulled = 0, pinned = 0, skip_for_load; 2244 int loops = 0, pulled = 0, pinned = 0, skip_for_load;
2239 struct task_struct *p; 2245 struct task_struct *p;
2240 long rem_load_move = max_load_move; 2246 long rem_load_move = max_load_move;
2241 2247
@@ -2249,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2249 */ 2255 */
2250 p = iterator->start(iterator->arg); 2256 p = iterator->start(iterator->arg);
2251next: 2257next:
2252 if (!p) 2258 if (!p || loops++ > sysctl_sched_nr_migrate)
2253 goto out; 2259 goto out;
2254 /* 2260 /*
2255 * To help distribute high priority tasks accross CPUs we don't 2261 * To help distribute high priority tasks across CPUs we don't
2256 * skip a task if it will be the highest priority task (i.e. smallest 2262 * skip a task if it will be the highest priority task (i.e. smallest
2257 * prio value) on its new queue regardless of its load weight 2263 * prio value) on its new queue regardless of its load weight
2258 */ 2264 */
@@ -2269,8 +2275,7 @@ next:
2269 rem_load_move -= p->se.load.weight; 2275 rem_load_move -= p->se.load.weight;
2270 2276
2271 /* 2277 /*
2272 * We only want to steal up to the prescribed number of tasks 2278 * We only want to steal up to the prescribed amount of weighted load.
2273 * and the prescribed amount of weighted load.
2274 */ 2279 */
2275 if (rem_load_move > 0) { 2280 if (rem_load_move > 0) {
2276 if (p->prio < *this_best_prio) 2281 if (p->prio < *this_best_prio)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index adddf682d4cb..3a1744fed2b6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -301,6 +301,14 @@ static struct ctl_table kern_table[] = {
301 .mode = 0644, 301 .mode = 0644,
302 .proc_handler = &proc_dointvec, 302 .proc_handler = &proc_dointvec,
303 }, 303 },
304 {
305 .ctl_name = CTL_UNNUMBERED,
306 .procname = "sched_nr_migrate",
307 .data = &sysctl_sched_nr_migrate,
308 .maxlen = sizeof(unsigned int),
309 .mode = 644,
310 .proc_handler = &proc_dointvec,
311 },
304#endif 312#endif
305 { 313 {
306 .ctl_name = CTL_UNNUMBERED, 314 .ctl_name = CTL_UNNUMBERED,