sched: revert revert of: fair-group: SMP-nice for group scheduling

Try again.. Initial commit: 18d95a2832c1392a2d63227a7a6d433cb9f2037e Revert: 6363ca57c76b7b83639ca8c83fc285fa26a7880e Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Cc: Mike Galbraith <efault@gmx.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2008-06-27 07:41:14 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-06-27 08:31:29 -0400
commit: c09595f63bb1909c5dc4dca288f4fe818561b5f3 (patch)
tree: 42631e6986f3ea4543b125ca62a99df8548e0eb9 /kernel/sched_fair.c
parent: ced8aa16e1db55c33c507174c1b1f9e107445865 (diff)
1 files changed, 80 insertions, 44 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 2e197b8e43f1..183388c4dead 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -567,10 +567,27 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 * Scheduling class queueing methods:
 */
+#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
+static void
+add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
+{
+        cfs_rq->task_weight += weight;
+}
+#else
+static inline void
+add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
+{
+}
+#endif
 static void
 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        update_load_add(&cfs_rq->load, se->load.weight);
+        if (!parent_entity(se))
+                inc_cpu_load(rq_of(cfs_rq), se->load.weight);
+        if (entity_is_task(se))
+                add_cfs_task_weight(cfs_rq, se->load.weight);
        cfs_rq->nr_running++;
        se->on_rq = 1;
        list_add(&se->group_node, &cfs_rq->tasks);
@@ -580,6 +597,10 @@ static void
 account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        update_load_sub(&cfs_rq->load, se->load.weight);
+        if (!parent_entity(se))
+                dec_cpu_load(rq_of(cfs_rq), se->load.weight);
+        if (entity_is_task(se))
+                add_cfs_task_weight(cfs_rq, -se->load.weight);
        cfs_rq->nr_running--;
        se->on_rq = 0;
        list_del_init(&se->group_node);
@@ -1372,75 +1393,90 @@ static struct task_struct *load_balance_next_fair(void *arg)
        return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
 }
-#ifdef CONFIG_FAIR_GROUP_SCHED
+static unsigned long
-static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
+__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                unsigned long max_load_move, struct sched_domain *sd,
+                enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
+                struct cfs_rq *cfs_rq)
 {
-        struct sched_entity *curr;
+        struct rq_iterator cfs_rq_iterator;
-        struct task_struct *p;
-        if (!cfs_rq->nr_running || !first_fair(cfs_rq))
-                return MAX_PRIO;
-        curr = cfs_rq->curr;
-        if (!curr)
-                curr = __pick_next_entity(cfs_rq);
-        p = task_of(curr);
+        cfs_rq_iterator.start = load_balance_start_fair;
+        cfs_rq_iterator.next = load_balance_next_fair;
+        cfs_rq_iterator.arg = cfs_rq;
-        return p->prio;
+        return balance_tasks(this_rq, this_cpu, busiest,
+                        max_load_move, sd, idle, all_pinned,
+                        this_best_prio, &cfs_rq_iterator);
 }
-#endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
                  struct sched_domain *sd, enum cpu_idle_type idle,
                  int *all_pinned, int *this_best_prio)
 {
-        struct cfs_rq *busy_cfs_rq;
        long rem_load_move = max_load_move;
-        struct rq_iterator cfs_rq_iterator;
+        int busiest_cpu = cpu_of(busiest);
+        struct task_group *tg;
-        cfs_rq_iterator.start = load_balance_start_fair;
-        cfs_rq_iterator.next = load_balance_next_fair;
-        for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+        rcu_read_lock();
-#ifdef CONFIG_FAIR_GROUP_SCHED
+        list_for_each_entry(tg, &task_groups, list) {
-                struct cfs_rq *this_cfs_rq;
                long imbalance;
-                unsigned long maxload;
+                unsigned long this_weight, busiest_weight;
+                long rem_load, max_load, moved_load;
+                /*
+                 * empty group
+                 */
+                if (!aggregate(tg, sd)->task_weight)
+                        continue;
+                rem_load = rem_load_move * aggregate(tg, sd)->rq_weight;
+                rem_load /= aggregate(tg, sd)->load + 1;
+                this_weight = tg->cfs_rq[this_cpu]->task_weight;
+                busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight;
-                this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
+                imbalance = (busiest_weight - this_weight) / 2;
-                imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
+                if (imbalance < 0)
-                /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
+                        imbalance = busiest_weight;
-                if (imbalance <= 0)
+                max_load = max(rem_load, imbalance);
+                moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
+                                max_load, sd, idle, all_pinned, this_best_prio,
+                                tg->cfs_rq[busiest_cpu]);
+                if (!moved_load)
                        continue;
-                /* Don't pull more than imbalance/2 */
+                move_group_shares(tg, sd, busiest_cpu, this_cpu);
-                imbalance /= 2;
-                maxload = min(rem_load_move, imbalance);
-                *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
+                moved_load *= aggregate(tg, sd)->load;
-#else
+                moved_load /= aggregate(tg, sd)->rq_weight + 1;
-# define maxload rem_load_move
-#endif
-                /*
-                 * pass busy_cfs_rq argument into
-                 * load_balance_[start|next]_fair iterators
-                 */
-                cfs_rq_iterator.arg = busy_cfs_rq;
-                rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
-                                               maxload, sd, idle, all_pinned,
-                                               this_best_prio,
-                                               &cfs_rq_iterator);
-                if (rem_load_move <= 0)
+                rem_load_move -= moved_load;
+                if (rem_load_move < 0)
                        break;
        }
+        rcu_read_unlock();
        return max_load_move - rem_load_move;
 }
+#else
+static unsigned long
+load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                  unsigned long max_load_move,
+                  struct sched_domain *sd, enum cpu_idle_type idle,
+                  int *all_pinned, int *this_best_prio)
+{
+        return __load_balance_fair(this_rq, this_cpu, busiest,
+                        max_load_move, sd, idle, all_pinned,
+                        this_best_prio, &busiest->cfs);
+}
+#endif
 static int
 move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2008-06-27 07:41:14 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-06-27 08:31:29 -0400
commit	c09595f63bb1909c5dc4dca288f4fe818561b5f3 (patch)
tree	42631e6986f3ea4543b125ca62a99df8548e0eb9 /kernel/sched_fair.c
parent	ced8aa16e1db55c33c507174c1b1f9e107445865 (diff)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 2e197b8e43f1..183388c4dead 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -567,10 +567,27 @@ update_stats_curr_start(struct cfs_rq cfs_rq, struct sched_entity se)
567	* Scheduling class queueing methods:	567	* Scheduling class queueing methods:
568	*/	568	*/
569		569
		570	#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
		571	static void
		572	add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
		573	{
		574	cfs_rq->task_weight += weight;
		575	}
		576	#else
		577	static inline void
		578	add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
		579	{
		580	}
		581	#endif
		582
570	static void	583	static void
571	account_entity_enqueue(struct cfs_rq cfs_rq, struct sched_entity se)	584	account_entity_enqueue(struct cfs_rq cfs_rq, struct sched_entity se)
572	{	585	{
573	update_load_add(&cfs_rq->load, se->load.weight);	586	update_load_add(&cfs_rq->load, se->load.weight);
		587	if (!parent_entity(se))
		588	inc_cpu_load(rq_of(cfs_rq), se->load.weight);
		589	if (entity_is_task(se))
		590	add_cfs_task_weight(cfs_rq, se->load.weight);
574	cfs_rq->nr_running++;	591	cfs_rq->nr_running++;
575	se->on_rq = 1;	592	se->on_rq = 1;
576	list_add(&se->group_node, &cfs_rq->tasks);	593	list_add(&se->group_node, &cfs_rq->tasks);
@@ -580,6 +597,10 @@ static void
580	account_entity_dequeue(struct cfs_rq cfs_rq, struct sched_entity se)	597	account_entity_dequeue(struct cfs_rq cfs_rq, struct sched_entity se)
581	{	598	{
582	update_load_sub(&cfs_rq->load, se->load.weight);	599	update_load_sub(&cfs_rq->load, se->load.weight);
		600	if (!parent_entity(se))
		601	dec_cpu_load(rq_of(cfs_rq), se->load.weight);
		602	if (entity_is_task(se))
		603	add_cfs_task_weight(cfs_rq, -se->load.weight);
583	cfs_rq->nr_running--;	604	cfs_rq->nr_running--;
584	se->on_rq = 0;	605	se->on_rq = 0;
585	list_del_init(&se->group_node);	606	list_del_init(&se->group_node);
@@ -1372,75 +1393,90 @@ static struct task_struct load_balance_next_fair(void arg)
1372	return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);	1393	return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
1373	}	1394	}
1374		1395
1375	#ifdef CONFIG_FAIR_GROUP_SCHED	1396	static unsigned long
1376	static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)	1397	__load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
		1398	unsigned long max_load_move, struct sched_domain *sd,
		1399	enum cpu_idle_type idle, int all_pinned, int this_best_prio,
		1400	struct cfs_rq *cfs_rq)
1377	{	1401	{
1378	struct sched_entity *curr;	1402	struct rq_iterator cfs_rq_iterator;
1379	struct task_struct *p;
1380
1381	if (!cfs_rq->nr_running \|\| !first_fair(cfs_rq))
1382	return MAX_PRIO;
1383
1384	curr = cfs_rq->curr;
1385	if (!curr)
1386	curr = __pick_next_entity(cfs_rq);
1387		1403
1388	p = task_of(curr);	1404	cfs_rq_iterator.start = load_balance_start_fair;
		1405	cfs_rq_iterator.next = load_balance_next_fair;
		1406	cfs_rq_iterator.arg = cfs_rq;
1389		1407
1390	return p->prio;	1408	return balance_tasks(this_rq, this_cpu, busiest,
		1409	max_load_move, sd, idle, all_pinned,
		1410	this_best_prio, &cfs_rq_iterator);
1391	}	1411	}
1392	#endif
1393		1412
		1413	#ifdef CONFIG_FAIR_GROUP_SCHED
1394	static unsigned long	1414	static unsigned long
1395	load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,	1415	load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
1396	unsigned long max_load_move,	1416	unsigned long max_load_move,
1397	struct sched_domain *sd, enum cpu_idle_type idle,	1417	struct sched_domain *sd, enum cpu_idle_type idle,
1398	int all_pinned, int this_best_prio)	1418	int all_pinned, int this_best_prio)
1399	{	1419	{
1400	struct cfs_rq *busy_cfs_rq;
1401	long rem_load_move = max_load_move;	1420	long rem_load_move = max_load_move;
1402	struct rq_iterator cfs_rq_iterator;	1421	int busiest_cpu = cpu_of(busiest);
1403		1422	struct task_group *tg;
1404	cfs_rq_iterator.start = load_balance_start_fair;
1405	cfs_rq_iterator.next = load_balance_next_fair;
1406		1423
1407	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {	1424	rcu_read_lock();
1408	#ifdef CONFIG_FAIR_GROUP_SCHED	1425	list_for_each_entry(tg, &task_groups, list) {
1409	struct cfs_rq *this_cfs_rq;
1410	long imbalance;	1426	long imbalance;
1411	unsigned long maxload;	1427	unsigned long this_weight, busiest_weight;
		1428	long rem_load, max_load, moved_load;
		1429
		1430	/*
		1431	* empty group
		1432	*/
		1433	if (!aggregate(tg, sd)->task_weight)
		1434	continue;
		1435
		1436	rem_load = rem_load_move * aggregate(tg, sd)->rq_weight;
		1437	rem_load /= aggregate(tg, sd)->load + 1;
		1438
		1439	this_weight = tg->cfs_rq[this_cpu]->task_weight;
		1440	busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight;
1412		1441
1413	this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);	1442	imbalance = (busiest_weight - this_weight) / 2;
1414		1443
1415	imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;	1444	if (imbalance < 0)
1416	/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */	1445	imbalance = busiest_weight;
1417	if (imbalance <= 0)	1446
		1447	max_load = max(rem_load, imbalance);
		1448	moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
		1449	max_load, sd, idle, all_pinned, this_best_prio,
		1450	tg->cfs_rq[busiest_cpu]);
		1451
		1452	if (!moved_load)
1418	continue;	1453	continue;
1419		1454
1420	/* Don't pull more than imbalance/2 */	1455	move_group_shares(tg, sd, busiest_cpu, this_cpu);
1421	imbalance /= 2;
1422	maxload = min(rem_load_move, imbalance);
1423		1456
1424	*this_best_prio = cfs_rq_best_prio(this_cfs_rq);	1457	moved_load *= aggregate(tg, sd)->load;
1425	#else	1458	moved_load /= aggregate(tg, sd)->rq_weight + 1;
1426	# define maxload rem_load_move
1427	#endif
1428	/*
1429	* pass busy_cfs_rq argument into
1430	* load_balance_[start\|next]_fair iterators
1431	*/
1432	cfs_rq_iterator.arg = busy_cfs_rq;
1433	rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
1434	maxload, sd, idle, all_pinned,
1435	this_best_prio,
1436	&cfs_rq_iterator);
1437		1459
1438	if (rem_load_move <= 0)	1460	rem_load_move -= moved_load;
		1461	if (rem_load_move < 0)
1439	break;	1462	break;
1440	}	1463	}
		1464	rcu_read_unlock();
1441		1465
1442	return max_load_move - rem_load_move;	1466	return max_load_move - rem_load_move;
1443	}	1467	}
		1468	#else
		1469	static unsigned long
		1470	load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
		1471	unsigned long max_load_move,
		1472	struct sched_domain *sd, enum cpu_idle_type idle,
		1473	int all_pinned, int this_best_prio)
		1474	{
		1475	return __load_balance_fair(this_rq, this_cpu, busiest,
		1476	max_load_move, sd, idle, all_pinned,
		1477	this_best_prio, &busiest->cfs);
		1478	}
		1479	#endif
1444		1480
1445	static int	1481	static int
1446	move_one_task_fair(struct rq this_rq, int this_cpu, struct rq busiest,	1482	move_one_task_fair(struct rq this_rq, int this_cpu, struct rq busiest,