diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-10-17 13:27:02 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-20 08:05:02 -0400 |
commit | ffda12a17a324103e9900fa1035309811eecbfe5 (patch) | |
tree | 79fe8aae79a41b467f2cdd055036b3017642a9f6 | |
parent | b0aa51b999c449e5e3f9faa1ee406e052d407fe7 (diff) |
sched: optimize group load balancer
I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus
in the sched_domain. This hurts.
We need the rq-locks whenever we change the weight of the per-cpu group sched
entities. To allevate this a little, only change the weight when the new
weight is at least shares_thresh away from the old value.
This avoids the rq-lock for the top level entries, since those will never
be re-weighted, and fuzzes the lower level entries a little to gain performance
in semi-stable situations.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/sched.c | 45 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 |
3 files changed, 36 insertions, 20 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 6eda6ad735dc..4f59c8e8597d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1621,6 +1621,7 @@ extern unsigned int sysctl_sched_features; | |||
1621 | extern unsigned int sysctl_sched_migration_cost; | 1621 | extern unsigned int sysctl_sched_migration_cost; |
1622 | extern unsigned int sysctl_sched_nr_migrate; | 1622 | extern unsigned int sysctl_sched_nr_migrate; |
1623 | extern unsigned int sysctl_sched_shares_ratelimit; | 1623 | extern unsigned int sysctl_sched_shares_ratelimit; |
1624 | extern unsigned int sysctl_sched_shares_thresh; | ||
1624 | 1625 | ||
1625 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1626 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
1626 | struct file *file, void __user *buffer, size_t *length, | 1627 | struct file *file, void __user *buffer, size_t *length, |
diff --git a/kernel/sched.c b/kernel/sched.c index c530b84c7f80..11ca39017835 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -818,6 +818,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
818 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 818 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
819 | 819 | ||
820 | /* | 820 | /* |
821 | * Inject some fuzzyness into changing the per-cpu group shares | ||
822 | * this avoids remote rq-locks at the expense of fairness. | ||
823 | * default: 4 | ||
824 | */ | ||
825 | unsigned int sysctl_sched_shares_thresh = 4; | ||
826 | |||
827 | /* | ||
821 | * period over which we measure -rt task cpu usage in us. | 828 | * period over which we measure -rt task cpu usage in us. |
822 | * default: 1s | 829 | * default: 1s |
823 | */ | 830 | */ |
@@ -1453,8 +1460,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
1453 | * Calculate and set the cpu's group shares. | 1460 | * Calculate and set the cpu's group shares. |
1454 | */ | 1461 | */ |
1455 | static void | 1462 | static void |
1456 | __update_group_shares_cpu(struct task_group *tg, int cpu, | 1463 | update_group_shares_cpu(struct task_group *tg, int cpu, |
1457 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1464 | unsigned long sd_shares, unsigned long sd_rq_weight) |
1458 | { | 1465 | { |
1459 | int boost = 0; | 1466 | int boost = 0; |
1460 | unsigned long shares; | 1467 | unsigned long shares; |
@@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1485 | * | 1492 | * |
1486 | */ | 1493 | */ |
1487 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1494 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); |
1495 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
1488 | 1496 | ||
1489 | /* | 1497 | if (abs(shares - tg->se[cpu]->load.weight) > |
1490 | * record the actual number of shares, not the boosted amount. | 1498 | sysctl_sched_shares_thresh) { |
1491 | */ | 1499 | struct rq *rq = cpu_rq(cpu); |
1492 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1500 | unsigned long flags; |
1493 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1494 | 1501 | ||
1495 | if (shares < MIN_SHARES) | 1502 | spin_lock_irqsave(&rq->lock, flags); |
1496 | shares = MIN_SHARES; | 1503 | /* |
1497 | else if (shares > MAX_SHARES) | 1504 | * record the actual number of shares, not the boosted amount. |
1498 | shares = MAX_SHARES; | 1505 | */ |
1506 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1507 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1499 | 1508 | ||
1500 | __set_se_shares(tg->se[cpu], shares); | 1509 | __set_se_shares(tg->se[cpu], shares); |
1510 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1511 | } | ||
1501 | } | 1512 | } |
1502 | 1513 | ||
1503 | /* | 1514 | /* |
@@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1526 | if (!rq_weight) | 1537 | if (!rq_weight) |
1527 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | 1538 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; |
1528 | 1539 | ||
1529 | for_each_cpu_mask(i, sd->span) { | 1540 | for_each_cpu_mask(i, sd->span) |
1530 | struct rq *rq = cpu_rq(i); | 1541 | update_group_shares_cpu(tg, i, shares, rq_weight); |
1531 | unsigned long flags; | ||
1532 | |||
1533 | spin_lock_irqsave(&rq->lock, flags); | ||
1534 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
1535 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1536 | } | ||
1537 | 1542 | ||
1538 | return 0; | 1543 | return 0; |
1539 | } | 1544 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 617d41e4d6a0..3d804f41e649 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = { | |||
276 | }, | 276 | }, |
277 | { | 277 | { |
278 | .ctl_name = CTL_UNNUMBERED, | 278 | .ctl_name = CTL_UNNUMBERED, |
279 | .procname = "sched_shares_thresh", | ||
280 | .data = &sysctl_sched_shares_thresh, | ||
281 | .maxlen = sizeof(unsigned int), | ||
282 | .mode = 0644, | ||
283 | .proc_handler = &proc_dointvec_minmax, | ||
284 | .strategy = &sysctl_intvec, | ||
285 | .extra1 = &zero, | ||
286 | }, | ||
287 | { | ||
288 | .ctl_name = CTL_UNNUMBERED, | ||
279 | .procname = "sched_child_runs_first", | 289 | .procname = "sched_child_runs_first", |
280 | .data = &sysctl_sched_child_runs_first, | 290 | .data = &sysctl_sched_child_runs_first, |
281 | .maxlen = sizeof(unsigned int), | 291 | .maxlen = sizeof(unsigned int), |