aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-08-27 07:08:56 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-28 02:26:49 -0400
commit34d76c41554a05425613d16efebb3069c4c545f0 (patch)
treea2c6551f169b1a7e27191098fa8e6acf7f2a76cd /kernel/sched.c
parenta8af7246c114bfd939e539f9566b872c06f6225c (diff)
sched: Fix division by zero - really
When re-computing the shares for each task group's cpu representation we need the ratio of weight on each cpu vs the total weight of the sched domain. Since load-balancing is loosely (read not) synchronized, the weight of individual cpus can change between doing the sum and calculating the ratio. The previous patch dealt with only one of the race scenarios, this patch side steps them all by saving a snapshot of all the individual cpu weights, thereby always working on a consistent set. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: torvalds@linux-foundation.org Cc: jes@sgi.com Cc: jens.axboe@oracle.com Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Arjan van de Ven <arjan@infradead.org> Cc: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <1251371336.18584.77.camel@twins> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c50
1 files changed, 29 insertions, 21 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8f8a98eab9db..523e20a62695 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1515,30 +1515,29 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1515 1515
1516#ifdef CONFIG_FAIR_GROUP_SCHED 1516#ifdef CONFIG_FAIR_GROUP_SCHED
1517 1517
1518struct update_shares_data {
1519 unsigned long rq_weight[NR_CPUS];
1520};
1521
1522static DEFINE_PER_CPU(struct update_shares_data, update_shares_data);
1523
1518static void __set_se_shares(struct sched_entity *se, unsigned long shares); 1524static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1519 1525
1520/* 1526/*
1521 * Calculate and set the cpu's group shares. 1527 * Calculate and set the cpu's group shares.
1522 */ 1528 */
1523static void 1529static void update_group_shares_cpu(struct task_group *tg, int cpu,
1524update_group_shares_cpu(struct task_group *tg, int cpu, 1530 unsigned long sd_shares,
1525 unsigned long sd_shares, unsigned long sd_rq_weight, 1531 unsigned long sd_rq_weight,
1526 unsigned long sd_eff_weight) 1532 struct update_shares_data *usd)
1527{ 1533{
1528 unsigned long rq_weight; 1534 unsigned long shares, rq_weight;
1529 unsigned long shares;
1530 int boost = 0; 1535 int boost = 0;
1531 1536
1532 if (!tg->se[cpu]) 1537 rq_weight = usd->rq_weight[cpu];
1533 return;
1534
1535 rq_weight = tg->cfs_rq[cpu]->rq_weight;
1536 if (!rq_weight) { 1538 if (!rq_weight) {
1537 boost = 1; 1539 boost = 1;
1538 rq_weight = NICE_0_LOAD; 1540 rq_weight = NICE_0_LOAD;
1539 if (sd_rq_weight == sd_eff_weight)
1540 sd_eff_weight += NICE_0_LOAD;
1541 sd_rq_weight = sd_eff_weight;
1542 } 1541 }
1543 1542
1544 /* 1543 /*
@@ -1555,6 +1554,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1555 unsigned long flags; 1554 unsigned long flags;
1556 1555
1557 spin_lock_irqsave(&rq->lock, flags); 1556 spin_lock_irqsave(&rq->lock, flags);
1557 tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
1558 tg->cfs_rq[cpu]->shares = boost ? 0 : shares; 1558 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1559 __set_se_shares(tg->se[cpu], shares); 1559 __set_se_shares(tg->se[cpu], shares);
1560 spin_unlock_irqrestore(&rq->lock, flags); 1560 spin_unlock_irqrestore(&rq->lock, flags);
@@ -1568,25 +1568,31 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1568 */ 1568 */
1569static int tg_shares_up(struct task_group *tg, void *data) 1569static int tg_shares_up(struct task_group *tg, void *data)
1570{ 1570{
1571 unsigned long weight, rq_weight = 0, eff_weight = 0; 1571 unsigned long weight, rq_weight = 0, shares = 0;
1572 unsigned long shares = 0; 1572 struct update_shares_data *usd;
1573 struct sched_domain *sd = data; 1573 struct sched_domain *sd = data;
1574 unsigned long flags;
1574 int i; 1575 int i;
1575 1576
1577 if (!tg->se[0])
1578 return 0;
1579
1580 local_irq_save(flags);
1581 usd = &__get_cpu_var(update_shares_data);
1582
1576 for_each_cpu(i, sched_domain_span(sd)) { 1583 for_each_cpu(i, sched_domain_span(sd)) {
1584 weight = tg->cfs_rq[i]->load.weight;
1585 usd->rq_weight[i] = weight;
1586
1577 /* 1587 /*
1578 * If there are currently no tasks on the cpu pretend there 1588 * If there are currently no tasks on the cpu pretend there
1579 * is one of average load so that when a new task gets to 1589 * is one of average load so that when a new task gets to
1580 * run here it will not get delayed by group starvation. 1590 * run here it will not get delayed by group starvation.
1581 */ 1591 */
1582 weight = tg->cfs_rq[i]->load.weight;
1583 tg->cfs_rq[i]->rq_weight = weight;
1584 rq_weight += weight;
1585
1586 if (!weight) 1592 if (!weight)
1587 weight = NICE_0_LOAD; 1593 weight = NICE_0_LOAD;
1588 1594
1589 eff_weight += weight; 1595 rq_weight += weight;
1590 shares += tg->cfs_rq[i]->shares; 1596 shares += tg->cfs_rq[i]->shares;
1591 } 1597 }
1592 1598
@@ -1597,7 +1603,9 @@ static int tg_shares_up(struct task_group *tg, void *data)
1597 shares = tg->shares; 1603 shares = tg->shares;
1598 1604
1599 for_each_cpu(i, sched_domain_span(sd)) 1605 for_each_cpu(i, sched_domain_span(sd))
1600 update_group_shares_cpu(tg, i, shares, rq_weight, eff_weight); 1606 update_group_shares_cpu(tg, i, shares, rq_weight, usd);
1607
1608 local_irq_restore(flags);
1601 1609
1602 return 0; 1610 return 0;
1603} 1611}