diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-10-17 13:27:02 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-10-20 08:05:02 -0400 |
| commit | ffda12a17a324103e9900fa1035309811eecbfe5 (patch) | |
| tree | 79fe8aae79a41b467f2cdd055036b3017642a9f6 | |
| parent | b0aa51b999c449e5e3f9faa1ee406e052d407fe7 (diff) | |
sched: optimize group load balancer
I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus
in the sched_domain. This hurts.
We need the rq-locks whenever we change the weight of the per-cpu group sched
entities. To allevate this a little, only change the weight when the new
weight is at least shares_thresh away from the old value.
This avoids the rq-lock for the top level entries, since those will never
be re-weighted, and fuzzes the lower level entries a little to gain performance
in semi-stable situations.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | include/linux/sched.h | 1 | ||||
| -rw-r--r-- | kernel/sched.c | 45 | ||||
| -rw-r--r-- | kernel/sysctl.c | 10 |
3 files changed, 36 insertions, 20 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 6eda6ad735dc..4f59c8e8597d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1621,6 +1621,7 @@ extern unsigned int sysctl_sched_features; | |||
| 1621 | extern unsigned int sysctl_sched_migration_cost; | 1621 | extern unsigned int sysctl_sched_migration_cost; |
| 1622 | extern unsigned int sysctl_sched_nr_migrate; | 1622 | extern unsigned int sysctl_sched_nr_migrate; |
| 1623 | extern unsigned int sysctl_sched_shares_ratelimit; | 1623 | extern unsigned int sysctl_sched_shares_ratelimit; |
| 1624 | extern unsigned int sysctl_sched_shares_thresh; | ||
| 1624 | 1625 | ||
| 1625 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1626 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
| 1626 | struct file *file, void __user *buffer, size_t *length, | 1627 | struct file *file, void __user *buffer, size_t *length, |
diff --git a/kernel/sched.c b/kernel/sched.c index c530b84c7f80..11ca39017835 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -818,6 +818,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
| 818 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 818 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
| 819 | 819 | ||
| 820 | /* | 820 | /* |
| 821 | * Inject some fuzzyness into changing the per-cpu group shares | ||
| 822 | * this avoids remote rq-locks at the expense of fairness. | ||
| 823 | * default: 4 | ||
| 824 | */ | ||
| 825 | unsigned int sysctl_sched_shares_thresh = 4; | ||
| 826 | |||
| 827 | /* | ||
| 821 | * period over which we measure -rt task cpu usage in us. | 828 | * period over which we measure -rt task cpu usage in us. |
| 822 | * default: 1s | 829 | * default: 1s |
| 823 | */ | 830 | */ |
| @@ -1453,8 +1460,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
| 1453 | * Calculate and set the cpu's group shares. | 1460 | * Calculate and set the cpu's group shares. |
| 1454 | */ | 1461 | */ |
| 1455 | static void | 1462 | static void |
| 1456 | __update_group_shares_cpu(struct task_group *tg, int cpu, | 1463 | update_group_shares_cpu(struct task_group *tg, int cpu, |
| 1457 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1464 | unsigned long sd_shares, unsigned long sd_rq_weight) |
| 1458 | { | 1465 | { |
| 1459 | int boost = 0; | 1466 | int boost = 0; |
| 1460 | unsigned long shares; | 1467 | unsigned long shares; |
| @@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, | |||
| 1485 | * | 1492 | * |
| 1486 | */ | 1493 | */ |
| 1487 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1494 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); |
| 1495 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
| 1488 | 1496 | ||
| 1489 | /* | 1497 | if (abs(shares - tg->se[cpu]->load.weight) > |
| 1490 | * record the actual number of shares, not the boosted amount. | 1498 | sysctl_sched_shares_thresh) { |
| 1491 | */ | 1499 | struct rq *rq = cpu_rq(cpu); |
| 1492 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1500 | unsigned long flags; |
| 1493 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
| 1494 | 1501 | ||
| 1495 | if (shares < MIN_SHARES) | 1502 | spin_lock_irqsave(&rq->lock, flags); |
| 1496 | shares = MIN_SHARES; | 1503 | /* |
| 1497 | else if (shares > MAX_SHARES) | 1504 | * record the actual number of shares, not the boosted amount. |
| 1498 | shares = MAX_SHARES; | 1505 | */ |
| 1506 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
| 1507 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
| 1499 | 1508 | ||
| 1500 | __set_se_shares(tg->se[cpu], shares); | 1509 | __set_se_shares(tg->se[cpu], shares); |
| 1510 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1511 | } | ||
| 1501 | } | 1512 | } |
| 1502 | 1513 | ||
| 1503 | /* | 1514 | /* |
| @@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
| 1526 | if (!rq_weight) | 1537 | if (!rq_weight) |
| 1527 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | 1538 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; |
| 1528 | 1539 | ||
| 1529 | for_each_cpu_mask(i, sd->span) { | 1540 | for_each_cpu_mask(i, sd->span) |
| 1530 | struct rq *rq = cpu_rq(i); | 1541 | update_group_shares_cpu(tg, i, shares, rq_weight); |
| 1531 | unsigned long flags; | ||
| 1532 | |||
| 1533 | spin_lock_irqsave(&rq->lock, flags); | ||
| 1534 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
| 1535 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1536 | } | ||
| 1537 | 1542 | ||
| 1538 | return 0; | 1543 | return 0; |
| 1539 | } | 1544 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 617d41e4d6a0..3d804f41e649 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = { | |||
| 276 | }, | 276 | }, |
| 277 | { | 277 | { |
| 278 | .ctl_name = CTL_UNNUMBERED, | 278 | .ctl_name = CTL_UNNUMBERED, |
| 279 | .procname = "sched_shares_thresh", | ||
| 280 | .data = &sysctl_sched_shares_thresh, | ||
| 281 | .maxlen = sizeof(unsigned int), | ||
| 282 | .mode = 0644, | ||
| 283 | .proc_handler = &proc_dointvec_minmax, | ||
| 284 | .strategy = &sysctl_intvec, | ||
| 285 | .extra1 = &zero, | ||
| 286 | }, | ||
| 287 | { | ||
| 288 | .ctl_name = CTL_UNNUMBERED, | ||
| 279 | .procname = "sched_child_runs_first", | 289 | .procname = "sched_child_runs_first", |
| 280 | .data = &sysctl_sched_child_runs_first, | 290 | .data = &sysctl_sched_child_runs_first, |
| 281 | .maxlen = sizeof(unsigned int), | 291 | .maxlen = sizeof(unsigned int), |
