diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-07-27 08:04:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-02 08:26:06 -0400 |
commit | a5004278f0525dcb9aa43703ef77bf371ea837cd (patch) | |
tree | d4beb94aa2e454a6d23425cac856654e507541a1 | |
parent | 8e9ed8b02490fea577b1eb1704c05bf43c891ed7 (diff) |
sched: Fix cgroup smp fairness
Commit ec4e0e2fe018992d980910db901637c814575914 ("fix
inconsistency when redistribute per-cpu tg->cfs_rq shares")
broke cgroup smp fairness.
In order to avoid starvation of newly placed tasks, we never
quite set the share of an empty cpu group-task to 0, but
instead we set it as if there's a single NICE-0 task present.
If however we actually set this in cfs_rq[cpu]->shares, that
means the total shares for that group will be slightly inflated
every time we balance, causing the observed unfairness.
Fix this by setting cfs_rq[cpu]->shares to 0 but actually
setting the effective weight of the related se to the inflated
number.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1248696557.6987.1615.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index ce1056e9b02a..26976cd8be0f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1523,13 +1523,18 @@ static void | |||
1523 | update_group_shares_cpu(struct task_group *tg, int cpu, | 1523 | update_group_shares_cpu(struct task_group *tg, int cpu, |
1524 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1524 | unsigned long sd_shares, unsigned long sd_rq_weight) |
1525 | { | 1525 | { |
1526 | unsigned long shares; | ||
1527 | unsigned long rq_weight; | 1526 | unsigned long rq_weight; |
1527 | unsigned long shares; | ||
1528 | int boost = 0; | ||
1528 | 1529 | ||
1529 | if (!tg->se[cpu]) | 1530 | if (!tg->se[cpu]) |
1530 | return; | 1531 | return; |
1531 | 1532 | ||
1532 | rq_weight = tg->cfs_rq[cpu]->rq_weight; | 1533 | rq_weight = tg->cfs_rq[cpu]->rq_weight; |
1534 | if (!rq_weight) { | ||
1535 | boost = 1; | ||
1536 | rq_weight = NICE_0_LOAD; | ||
1537 | } | ||
1533 | 1538 | ||
1534 | /* | 1539 | /* |
1535 | * \Sum shares * rq_weight | 1540 | * \Sum shares * rq_weight |
@@ -1546,8 +1551,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1546 | unsigned long flags; | 1551 | unsigned long flags; |
1547 | 1552 | ||
1548 | spin_lock_irqsave(&rq->lock, flags); | 1553 | spin_lock_irqsave(&rq->lock, flags); |
1549 | tg->cfs_rq[cpu]->shares = shares; | 1554 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; |
1550 | |||
1551 | __set_se_shares(tg->se[cpu], shares); | 1555 | __set_se_shares(tg->se[cpu], shares); |
1552 | spin_unlock_irqrestore(&rq->lock, flags); | 1556 | spin_unlock_irqrestore(&rq->lock, flags); |
1553 | } | 1557 | } |
@@ -1560,7 +1564,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1560 | */ | 1564 | */ |
1561 | static int tg_shares_up(struct task_group *tg, void *data) | 1565 | static int tg_shares_up(struct task_group *tg, void *data) |
1562 | { | 1566 | { |
1563 | unsigned long weight, rq_weight = 0; | 1567 | unsigned long weight, rq_weight = 0, eff_weight = 0; |
1564 | unsigned long shares = 0; | 1568 | unsigned long shares = 0; |
1565 | struct sched_domain *sd = data; | 1569 | struct sched_domain *sd = data; |
1566 | int i; | 1570 | int i; |
@@ -1572,11 +1576,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1572 | * run here it will not get delayed by group starvation. | 1576 | * run here it will not get delayed by group starvation. |
1573 | */ | 1577 | */ |
1574 | weight = tg->cfs_rq[i]->load.weight; | 1578 | weight = tg->cfs_rq[i]->load.weight; |
1579 | tg->cfs_rq[i]->rq_weight = weight; | ||
1580 | rq_weight += weight; | ||
1581 | |||
1575 | if (!weight) | 1582 | if (!weight) |
1576 | weight = NICE_0_LOAD; | 1583 | weight = NICE_0_LOAD; |
1577 | 1584 | ||
1578 | tg->cfs_rq[i]->rq_weight = weight; | 1585 | eff_weight += weight; |
1579 | rq_weight += weight; | ||
1580 | shares += tg->cfs_rq[i]->shares; | 1586 | shares += tg->cfs_rq[i]->shares; |
1581 | } | 1587 | } |
1582 | 1588 | ||
@@ -1586,8 +1592,14 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1586 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | 1592 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) |
1587 | shares = tg->shares; | 1593 | shares = tg->shares; |
1588 | 1594 | ||
1589 | for_each_cpu(i, sched_domain_span(sd)) | 1595 | for_each_cpu(i, sched_domain_span(sd)) { |
1590 | update_group_shares_cpu(tg, i, shares, rq_weight); | 1596 | unsigned long sd_rq_weight = rq_weight; |
1597 | |||
1598 | if (!tg->cfs_rq[i]->rq_weight) | ||
1599 | sd_rq_weight = eff_weight; | ||
1600 | |||
1601 | update_group_shares_cpu(tg, i, shares, sd_rq_weight); | ||
1602 | } | ||
1591 | 1603 | ||
1592 | return 0; | 1604 | return 0; |
1593 | } | 1605 | } |