aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-07-27 08:04:49 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-02 08:26:06 -0400
commita5004278f0525dcb9aa43703ef77bf371ea837cd (patch)
treed4beb94aa2e454a6d23425cac856654e507541a1
parent8e9ed8b02490fea577b1eb1704c05bf43c891ed7 (diff)
sched: Fix cgroup smp fairness
Commit ec4e0e2fe018992d980910db901637c814575914 ("fix inconsistency when redistribute per-cpu tg->cfs_rq shares") broke cgroup smp fairness. In order to avoid starvation of newly placed tasks, we never quite set the share of an empty cpu group-task to 0, but instead we set it as if there's a single NICE-0 task present. If however we actually set this in cfs_rq[cpu]->shares, that means the total shares for that group will be slightly inflated every time we balance, causing the observed unfairness. Fix this by setting cfs_rq[cpu]->shares to 0 but actually setting the effective weight of the related se to the inflated number. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1248696557.6987.1615.camel@twins> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c28
1 files changed, 20 insertions, 8 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index ce1056e9b02a..26976cd8be0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1523,13 +1523,18 @@ static void
1523update_group_shares_cpu(struct task_group *tg, int cpu, 1523update_group_shares_cpu(struct task_group *tg, int cpu,
1524 unsigned long sd_shares, unsigned long sd_rq_weight) 1524 unsigned long sd_shares, unsigned long sd_rq_weight)
1525{ 1525{
1526 unsigned long shares;
1527 unsigned long rq_weight; 1526 unsigned long rq_weight;
1527 unsigned long shares;
1528 int boost = 0;
1528 1529
1529 if (!tg->se[cpu]) 1530 if (!tg->se[cpu])
1530 return; 1531 return;
1531 1532
1532 rq_weight = tg->cfs_rq[cpu]->rq_weight; 1533 rq_weight = tg->cfs_rq[cpu]->rq_weight;
1534 if (!rq_weight) {
1535 boost = 1;
1536 rq_weight = NICE_0_LOAD;
1537 }
1533 1538
1534 /* 1539 /*
1535 * \Sum shares * rq_weight 1540 * \Sum shares * rq_weight
@@ -1546,8 +1551,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1546 unsigned long flags; 1551 unsigned long flags;
1547 1552
1548 spin_lock_irqsave(&rq->lock, flags); 1553 spin_lock_irqsave(&rq->lock, flags);
1549 tg->cfs_rq[cpu]->shares = shares; 1554 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1550
1551 __set_se_shares(tg->se[cpu], shares); 1555 __set_se_shares(tg->se[cpu], shares);
1552 spin_unlock_irqrestore(&rq->lock, flags); 1556 spin_unlock_irqrestore(&rq->lock, flags);
1553 } 1557 }
@@ -1560,7 +1564,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1560 */ 1564 */
1561static int tg_shares_up(struct task_group *tg, void *data) 1565static int tg_shares_up(struct task_group *tg, void *data)
1562{ 1566{
1563 unsigned long weight, rq_weight = 0; 1567 unsigned long weight, rq_weight = 0, eff_weight = 0;
1564 unsigned long shares = 0; 1568 unsigned long shares = 0;
1565 struct sched_domain *sd = data; 1569 struct sched_domain *sd = data;
1566 int i; 1570 int i;
@@ -1572,11 +1576,13 @@ static int tg_shares_up(struct task_group *tg, void *data)
1572 * run here it will not get delayed by group starvation. 1576 * run here it will not get delayed by group starvation.
1573 */ 1577 */
1574 weight = tg->cfs_rq[i]->load.weight; 1578 weight = tg->cfs_rq[i]->load.weight;
1579 tg->cfs_rq[i]->rq_weight = weight;
1580 rq_weight += weight;
1581
1575 if (!weight) 1582 if (!weight)
1576 weight = NICE_0_LOAD; 1583 weight = NICE_0_LOAD;
1577 1584
1578 tg->cfs_rq[i]->rq_weight = weight; 1585 eff_weight += weight;
1579 rq_weight += weight;
1580 shares += tg->cfs_rq[i]->shares; 1586 shares += tg->cfs_rq[i]->shares;
1581 } 1587 }
1582 1588
@@ -1586,8 +1592,14 @@ static int tg_shares_up(struct task_group *tg, void *data)
1586 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) 1592 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1587 shares = tg->shares; 1593 shares = tg->shares;
1588 1594
1589 for_each_cpu(i, sched_domain_span(sd)) 1595 for_each_cpu(i, sched_domain_span(sd)) {
1590 update_group_shares_cpu(tg, i, shares, rq_weight); 1596 unsigned long sd_rq_weight = rq_weight;
1597
1598 if (!tg->cfs_rq[i]->rq_weight)
1599 sd_rq_weight = eff_weight;
1600
1601 update_group_shares_cpu(tg, i, shares, sd_rq_weight);
1602 }
1591 1603
1592 return 0; 1604 return 0;
1593} 1605}