aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-10-17 13:27:02 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-20 08:05:02 -0400
commitffda12a17a324103e9900fa1035309811eecbfe5 (patch)
tree79fe8aae79a41b467f2cdd055036b3017642a9f6
parentb0aa51b999c449e5e3f9faa1ee406e052d407fe7 (diff)
sched: optimize group load balancer
I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus in the sched_domain. This hurts. We need the rq-locks whenever we change the weight of the per-cpu group sched entities. To allevate this a little, only change the weight when the new weight is at least shares_thresh away from the old value. This avoids the rq-lock for the top level entries, since those will never be re-weighted, and fuzzes the lower level entries a little to gain performance in semi-stable situations. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched.c45
-rw-r--r--kernel/sysctl.c10
3 files changed, 36 insertions, 20 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6eda6ad735dc..4f59c8e8597d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1621,6 +1621,7 @@ extern unsigned int sysctl_sched_features;
1621extern unsigned int sysctl_sched_migration_cost; 1621extern unsigned int sysctl_sched_migration_cost;
1622extern unsigned int sysctl_sched_nr_migrate; 1622extern unsigned int sysctl_sched_nr_migrate;
1623extern unsigned int sysctl_sched_shares_ratelimit; 1623extern unsigned int sysctl_sched_shares_ratelimit;
1624extern unsigned int sysctl_sched_shares_thresh;
1624 1625
1625int sched_nr_latency_handler(struct ctl_table *table, int write, 1626int sched_nr_latency_handler(struct ctl_table *table, int write,
1626 struct file *file, void __user *buffer, size_t *length, 1627 struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c
index c530b84c7f80..11ca39017835 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -818,6 +818,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
818unsigned int sysctl_sched_shares_ratelimit = 250000; 818unsigned int sysctl_sched_shares_ratelimit = 250000;
819 819
820/* 820/*
821 * Inject some fuzzyness into changing the per-cpu group shares
822 * this avoids remote rq-locks at the expense of fairness.
823 * default: 4
824 */
825unsigned int sysctl_sched_shares_thresh = 4;
826
827/*
821 * period over which we measure -rt task cpu usage in us. 828 * period over which we measure -rt task cpu usage in us.
822 * default: 1s 829 * default: 1s
823 */ 830 */
@@ -1453,8 +1460,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1453 * Calculate and set the cpu's group shares. 1460 * Calculate and set the cpu's group shares.
1454 */ 1461 */
1455static void 1462static void
1456__update_group_shares_cpu(struct task_group *tg, int cpu, 1463update_group_shares_cpu(struct task_group *tg, int cpu,
1457 unsigned long sd_shares, unsigned long sd_rq_weight) 1464 unsigned long sd_shares, unsigned long sd_rq_weight)
1458{ 1465{
1459 int boost = 0; 1466 int boost = 0;
1460 unsigned long shares; 1467 unsigned long shares;
@@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
1485 * 1492 *
1486 */ 1493 */
1487 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); 1494 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
1495 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1488 1496
1489 /* 1497 if (abs(shares - tg->se[cpu]->load.weight) >
1490 * record the actual number of shares, not the boosted amount. 1498 sysctl_sched_shares_thresh) {
1491 */ 1499 struct rq *rq = cpu_rq(cpu);
1492 tg->cfs_rq[cpu]->shares = boost ? 0 : shares; 1500 unsigned long flags;
1493 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1494 1501
1495 if (shares < MIN_SHARES) 1502 spin_lock_irqsave(&rq->lock, flags);
1496 shares = MIN_SHARES; 1503 /*
1497 else if (shares > MAX_SHARES) 1504 * record the actual number of shares, not the boosted amount.
1498 shares = MAX_SHARES; 1505 */
1506 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1507 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1499 1508
1500 __set_se_shares(tg->se[cpu], shares); 1509 __set_se_shares(tg->se[cpu], shares);
1510 spin_unlock_irqrestore(&rq->lock, flags);
1511 }
1501} 1512}
1502 1513
1503/* 1514/*
@@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
1526 if (!rq_weight) 1537 if (!rq_weight)
1527 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; 1538 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1528 1539
1529 for_each_cpu_mask(i, sd->span) { 1540 for_each_cpu_mask(i, sd->span)
1530 struct rq *rq = cpu_rq(i); 1541 update_group_shares_cpu(tg, i, shares, rq_weight);
1531 unsigned long flags;
1532
1533 spin_lock_irqsave(&rq->lock, flags);
1534 __update_group_shares_cpu(tg, i, shares, rq_weight);
1535 spin_unlock_irqrestore(&rq->lock, flags);
1536 }
1537 1542
1538 return 0; 1543 return 0;
1539} 1544}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a0..3d804f41e649 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = {
276 }, 276 },
277 { 277 {
278 .ctl_name = CTL_UNNUMBERED, 278 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_shares_thresh",
280 .data = &sysctl_sched_shares_thresh,
281 .maxlen = sizeof(unsigned int),
282 .mode = 0644,
283 .proc_handler = &proc_dointvec_minmax,
284 .strategy = &sysctl_intvec,
285 .extra1 = &zero,
286 },
287 {
288 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_child_runs_first", 289 .procname = "sched_child_runs_first",
280 .data = &sysctl_sched_child_runs_first, 290 .data = &sysctl_sched_child_runs_first,
281 .maxlen = sizeof(unsigned int), 291 .maxlen = sizeof(unsigned int),