1 files changed, 113 insertions, 115 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6c091d6e159d..f2cc59080efa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -175,8 +175,15 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
         * Maintain a cache of leftmost tree entries (it is frequently
         * used):
         */
-        if (leftmost)
+        if (leftmost) {
                cfs_rq->rb_leftmost = &se->run_node;
+                /*
+                 * maintain cfs_rq->min_vruntime to be a monotonic increasing
+                 * value tracking the leftmost vruntime in the tree.
+                 */
+                cfs_rq->min_vruntime =
+                        max_vruntime(cfs_rq->min_vruntime, se->vruntime);
+        }
        rb_link_node(&se->run_node, parent, link);
        rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -184,8 +191,24 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-        if (cfs_rq->rb_leftmost == &se->run_node)
+        if (cfs_rq->rb_leftmost == &se->run_node) {
-                cfs_rq->rb_leftmost = rb_next(&se->run_node);
+                struct rb_node *next_node;
+                struct sched_entity *next;
+                next_node = rb_next(&se->run_node);
+                cfs_rq->rb_leftmost = next_node;
+                if (next_node) {
+                        next = rb_entry(next_node,
+                                        struct sched_entity, run_node);
+                        cfs_rq->min_vruntime =
+                                max_vruntime(cfs_rq->min_vruntime,
+                                             next->vruntime);
+                }
+        }
+        if (cfs_rq->next == se)
+                cfs_rq->next = NULL;
        rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
 }
@@ -202,17 +225,12 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
 static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 {
-        struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
+        struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
-        struct sched_entity *se = NULL;
-        struct rb_node *parent;
-        while (*link) {
+        if (!last)
-                parent = *link;
+                return NULL;
-                se = rb_entry(parent, struct sched_entity, run_node);
-                link = &parent->rb_right;
-        }
-        return se;
+        return rb_entry(last, struct sched_entity, run_node);
 }
 /**************************************************************
@@ -265,12 +283,8 @@ static u64 __sched_period(unsigned long nr_running)
 */
 static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-        u64 slice = __sched_period(cfs_rq->nr_running);
+        return calc_delta_mine(__sched_period(cfs_rq->nr_running),
+                               se->load.weight, &cfs_rq->load);
-        slice *= se->load.weight;
-        do_div(slice, cfs_rq->load.weight);
-        return slice;
 }
 /*
@@ -308,7 +322,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
              unsigned long delta_exec)
 {
        unsigned long delta_exec_weighted;
-        u64 vruntime;
        schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
@@ -320,19 +333,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
                                                        &curr->load);
        }
        curr->vruntime += delta_exec_weighted;
-        /*
-         * maintain cfs_rq->min_vruntime to be a monotonic increasing
-         * value tracking the leftmost vruntime in the tree.
-         */
-        if (first_fair(cfs_rq)) {
-                vruntime = min_vruntime(curr->vruntime,
-                                __pick_next_entity(cfs_rq)->vruntime);
-        } else
-                vruntime = curr->vruntime;
-        cfs_rq->min_vruntime =
-                max_vruntime(cfs_rq->min_vruntime, vruntime);
 }
 static void update_curr(struct cfs_rq *cfs_rq)
@@ -498,7 +498,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
        u64 vruntime;
-        vruntime = cfs_rq->min_vruntime;
+        if (first_fair(cfs_rq)) {
+                vruntime = min_vruntime(cfs_rq->min_vruntime,
+                                __pick_next_entity(cfs_rq)->vruntime);
+        } else
+                vruntime = cfs_rq->min_vruntime;
        if (sched_feat(TREE_AVG)) {
                struct sched_entity *last = __pick_last_entity(cfs_rq);
@@ -520,8 +524,10 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
        if (!initial) {
                /* sleeps upto a single latency don't count. */
-                if (sched_feat(NEW_FAIR_SLEEPERS))
+                if (sched_feat(NEW_FAIR_SLEEPERS)) {
-                        vruntime -= sysctl_sched_latency;
+                        vruntime -= calc_delta_fair(sysctl_sched_latency,
+                                                    &cfs_rq->load);
+                }
                /* ensure we never gain time by being placed backwards. */
                vruntime = max_vruntime(se->vruntime, vruntime);
@@ -621,12 +627,32 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
        se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
+static struct sched_entity *
+pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+        s64 diff, gran;
+        if (!cfs_rq->next)
+                return se;
+        diff = cfs_rq->next->vruntime - se->vruntime;
+        if (diff < 0)
+                return se;
+        gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load);
+        if (diff > gran)
+                return se;
+        return cfs_rq->next;
+}
 static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
 {
        struct sched_entity *se = NULL;
        if (first_fair(cfs_rq)) {
                se = __pick_next_entity(cfs_rq);
+                se = pick_next(cfs_rq, se);
                set_next_entity(cfs_rq, se);
        }
@@ -732,8 +758,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
        return se->parent;
 }
-#define GROUP_IMBALANCE_PCT     20
 #else   /* CONFIG_FAIR_GROUP_SCHED */
 #define for_each_sched_entity(se) \
@@ -824,26 +848,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p)
 static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se = &p->se,
+        struct sched_entity *se = &p->se;
-                            *topse = NULL;      /* Highest schedulable entity */
-        int incload = 1;
        for_each_sched_entity(se) {
-                topse = se;
+                if (se->on_rq)
-                if (se->on_rq) {
-                        incload = 0;
                        break;
-                }
                cfs_rq = cfs_rq_of(se);
                enqueue_entity(cfs_rq, se, wakeup);
                wakeup = 1;
        }
-        /* Increment cpu load if we just enqueued the first task of a group on
-         * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-         * at the highest grouping level.
-         */
-        if (incload)
-                inc_cpu_load(rq, topse->load.weight);
        hrtick_start_fair(rq, rq->curr);
 }
@@ -856,28 +869,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se = &p->se,
+        struct sched_entity *se = &p->se;
-                            *topse = NULL;      /* Highest schedulable entity */
-        int decload = 1;
        for_each_sched_entity(se) {
-                topse = se;
                cfs_rq = cfs_rq_of(se);
                dequeue_entity(cfs_rq, se, sleep);
                /* Don't dequeue parent if it has other entities besides us */
-                if (cfs_rq->load.weight) {
+                if (cfs_rq->load.weight)
-                        if (parent_entity(se))
-                                decload = 0;
                        break;
-                }
                sleep = 1;
        }
-        /* Decrement cpu load if we just dequeued the last task of a group on
-         * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-         * at the highest grouping level.
-         */
-        if (decload)
-                dec_cpu_load(rq, topse->load.weight);
        hrtick_start_fair(rq, rq->curr);
 }
@@ -1090,6 +1091,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
                resched_task(curr);
                return;
        }
+        cfs_rq_of(pse)->next = pse;
        /*
         * Batch tasks do not preempt (their preemption is driven by
         * the tick):
@@ -1191,6 +1195,25 @@ static struct task_struct *load_balance_next_fair(void *arg)
        return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
 }
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
+{
+        struct sched_entity *curr;
+        struct task_struct *p;
+        if (!cfs_rq->nr_running || !first_fair(cfs_rq))
+                return MAX_PRIO;
+        curr = cfs_rq->curr;
+        if (!curr)
+                curr = __pick_next_entity(cfs_rq);
+        p = task_of(curr);
+        return p->prio;
+}
+#endif
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -1200,45 +1223,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
        struct cfs_rq *busy_cfs_rq;
        long rem_load_move = max_load_move;
        struct rq_iterator cfs_rq_iterator;
-        unsigned long load_moved;
        cfs_rq_iterator.start = load_balance_start_fair;
        cfs_rq_iterator.next = load_balance_next_fair;
        for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
 #ifdef CONFIG_FAIR_GROUP_SCHED
-                struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];
+                struct cfs_rq *this_cfs_rq;
-                unsigned long maxload, task_load, group_weight;
+                long imbalance;
-                unsigned long thisload, per_task_load;
+                unsigned long maxload;
-                struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
-                task_load = busy_cfs_rq->load.weight;
+                this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
-                group_weight = se->load.weight;
-                /*
+                imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
-                 * 'group_weight' is contributed by tasks of total weight
+                /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
-                 * 'task_load'. To move 'rem_load_move' worth of weight only,
+                if (imbalance <= 0)
-                 * we need to move a maximum task load of:
-                 *
-                 *      maxload = (remload / group_weight) * task_load;
-                 */
-                maxload = (rem_load_move * task_load) / group_weight;
-                if (!maxload || !task_load)
                        continue;
-                per_task_load = task_load / busy_cfs_rq->nr_running;
+                /* Don't pull more than imbalance/2 */
-                /*
+                imbalance /= 2;
-                 * balance_tasks will try to forcibly move atleast one task if
+                maxload = min(rem_load_move, imbalance);
-                 * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
-                 * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
-                 */
-                 if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
-                        continue;
-                /* Disable priority-based load balance */
+                *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
-                *this_best_prio = 0;
-                thisload = this_cfs_rq->load.weight;
 #else
 # define maxload rem_load_move
 #endif
@@ -1247,33 +1253,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                 * load_balance_[start|next]_fair iterators
                 */
                cfs_rq_iterator.arg = busy_cfs_rq;
-                load_moved = balance_tasks(this_rq, this_cpu, busiest,
+                rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
                                               maxload, sd, idle, all_pinned,
                                               this_best_prio,
                                               &cfs_rq_iterator);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-                /*
-                 * load_moved holds the task load that was moved. The
-                 * effective (group) weight moved would be:
-                 *      load_moved_eff = load_moved/task_load * group_weight;
-                 */
-                load_moved = (group_weight * load_moved) / task_load;
-                /* Adjust shares on both cpus to reflect load_moved */
-                group_weight -= load_moved;
-                set_se_shares(se, group_weight);
-                se = busy_cfs_rq->tg->se[this_cpu];
-                if (!thisload)
-                        group_weight = load_moved;
-                else
-                        group_weight = se->load.weight + load_moved;
-                set_se_shares(se, group_weight);
-#endif
-                rem_load_move -= load_moved;
                if (rem_load_move <= 0)
                        break;
        }
@@ -1403,6 +1387,16 @@ static void set_curr_task_fair(struct rq *rq)
                set_next_entity(cfs_rq_of(se), se);
 }
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void moved_group_fair(struct task_struct *p)
+{
+        struct cfs_rq *cfs_rq = task_cfs_rq(p);
+        update_curr(cfs_rq);
+        place_entity(cfs_rq, &p->se, 1);
+}
+#endif
 /*
 * All the scheduling class methods:
 */
@@ -1431,6 +1425,10 @@ static const struct sched_class fair_sched_class = {
        .prio_changed           = prio_changed_fair,
        .switched_to            = switched_to_fair,
+#ifdef CONFIG_FAIR_GROUP_SCHED
+        .moved_group            = moved_group_fair,
+#endif
 };
 #ifdef CONFIG_SCHED_DEBUG

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6c091d6e159d..f2cc59080efa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -175,8 +175,15 @@ static void __enqueue_entity(struct cfs_rq cfs_rq, struct sched_entity se)
175	* Maintain a cache of leftmost tree entries (it is frequently	175	* Maintain a cache of leftmost tree entries (it is frequently
176	* used):	176	* used):
177	*/	177	*/
178	if (leftmost)	178	if (leftmost) {
179	cfs_rq->rb_leftmost = &se->run_node;	179	cfs_rq->rb_leftmost = &se->run_node;
		180	/*
		181	* maintain cfs_rq->min_vruntime to be a monotonic increasing
		182	* value tracking the leftmost vruntime in the tree.
		183	*/
		184	cfs_rq->min_vruntime =
		185	max_vruntime(cfs_rq->min_vruntime, se->vruntime);
		186	}
180		187
181	rb_link_node(&se->run_node, parent, link);	188	rb_link_node(&se->run_node, parent, link);
182	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);	189	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -184,8 +191,24 @@ static void __enqueue_entity(struct cfs_rq cfs_rq, struct sched_entity se)
184		191
185	static void __dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se)	192	static void __dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se)
186	{	193	{
187	if (cfs_rq->rb_leftmost == &se->run_node)	194	if (cfs_rq->rb_leftmost == &se->run_node) {
188	cfs_rq->rb_leftmost = rb_next(&se->run_node);	195	struct rb_node *next_node;
		196	struct sched_entity *next;
		197
		198	next_node = rb_next(&se->run_node);
		199	cfs_rq->rb_leftmost = next_node;
		200
		201	if (next_node) {
		202	next = rb_entry(next_node,
		203	struct sched_entity, run_node);
		204	cfs_rq->min_vruntime =
		205	max_vruntime(cfs_rq->min_vruntime,
		206	next->vruntime);
		207	}
		208	}
		209
		210	if (cfs_rq->next == se)
		211	cfs_rq->next = NULL;
189		212
190	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);	213	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
191	}	214	}
@@ -202,17 +225,12 @@ static struct sched_entity __pick_next_entity(struct cfs_rq cfs_rq)
202		225
203	static inline struct sched_entity __pick_last_entity(struct cfs_rq cfs_rq)	226	static inline struct sched_entity __pick_last_entity(struct cfs_rq cfs_rq)
204	{	227	{
205	struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;	228	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
206	struct sched_entity *se = NULL;
207	struct rb_node *parent;
208		229
209	while (*link) {	230	if (!last)
210	parent = *link;	231	return NULL;
211	se = rb_entry(parent, struct sched_entity, run_node);
212	link = &parent->rb_right;
213	}
214		232
215	return se;	233	return rb_entry(last, struct sched_entity, run_node);
216	}	234	}
217		235
218	/**************************************************************	236	/**************************************************************
@@ -265,12 +283,8 @@ static u64 __sched_period(unsigned long nr_running)
265	*/	283	*/
266	static u64 sched_slice(struct cfs_rq cfs_rq, struct sched_entity se)	284	static u64 sched_slice(struct cfs_rq cfs_rq, struct sched_entity se)
267	{	285	{
268	u64 slice = __sched_period(cfs_rq->nr_running);	286	return calc_delta_mine(__sched_period(cfs_rq->nr_running),
269		287	se->load.weight, &cfs_rq->load);
270	slice *= se->load.weight;
271	do_div(slice, cfs_rq->load.weight);
272
273	return slice;
274	}	288	}
275		289
276	/*	290	/*
@@ -308,7 +322,6 @@ __update_curr(struct cfs_rq cfs_rq, struct sched_entity curr,
308	unsigned long delta_exec)	322	unsigned long delta_exec)
309	{	323	{
310	unsigned long delta_exec_weighted;	324	unsigned long delta_exec_weighted;
311	u64 vruntime;
312		325
313	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));	326	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
314		327
@@ -320,19 +333,6 @@ __update_curr(struct cfs_rq cfs_rq, struct sched_entity curr,
320	&curr->load);	333	&curr->load);
321	}	334	}
322	curr->vruntime += delta_exec_weighted;	335	curr->vruntime += delta_exec_weighted;
323
324	/*
325	* maintain cfs_rq->min_vruntime to be a monotonic increasing
326	* value tracking the leftmost vruntime in the tree.
327	*/
328	if (first_fair(cfs_rq)) {
329	vruntime = min_vruntime(curr->vruntime,
330	__pick_next_entity(cfs_rq)->vruntime);
331	} else
332	vruntime = curr->vruntime;
333
334	cfs_rq->min_vruntime =
335	max_vruntime(cfs_rq->min_vruntime, vruntime);
336	}	336	}
337		337
338	static void update_curr(struct cfs_rq *cfs_rq)	338	static void update_curr(struct cfs_rq *cfs_rq)
@@ -498,7 +498,11 @@ place_entity(struct cfs_rq cfs_rq, struct sched_entity se, int initial)
498	{	498	{
499	u64 vruntime;	499	u64 vruntime;
500		500
501	vruntime = cfs_rq->min_vruntime;	501	if (first_fair(cfs_rq)) {
		502	vruntime = min_vruntime(cfs_rq->min_vruntime,
		503	__pick_next_entity(cfs_rq)->vruntime);
		504	} else
		505	vruntime = cfs_rq->min_vruntime;
502		506
503	if (sched_feat(TREE_AVG)) {	507	if (sched_feat(TREE_AVG)) {
504	struct sched_entity *last = __pick_last_entity(cfs_rq);	508	struct sched_entity *last = __pick_last_entity(cfs_rq);
@@ -520,8 +524,10 @@ place_entity(struct cfs_rq cfs_rq, struct sched_entity se, int initial)
520		524
521	if (!initial) {	525	if (!initial) {
522	/* sleeps upto a single latency don't count. */	526	/* sleeps upto a single latency don't count. */
523	if (sched_feat(NEW_FAIR_SLEEPERS))	527	if (sched_feat(NEW_FAIR_SLEEPERS)) {
524	vruntime -= sysctl_sched_latency;	528	vruntime -= calc_delta_fair(sysctl_sched_latency,
		529	&cfs_rq->load);
		530	}
525		531
526	/* ensure we never gain time by being placed backwards. */	532	/* ensure we never gain time by being placed backwards. */
527	vruntime = max_vruntime(se->vruntime, vruntime);	533	vruntime = max_vruntime(se->vruntime, vruntime);
@@ -621,12 +627,32 @@ set_next_entity(struct cfs_rq cfs_rq, struct sched_entity se)
621	se->prev_sum_exec_runtime = se->sum_exec_runtime;	627	se->prev_sum_exec_runtime = se->sum_exec_runtime;
622	}	628	}
623		629
		630	static struct sched_entity *
		631	pick_next(struct cfs_rq cfs_rq, struct sched_entity se)
		632	{
		633	s64 diff, gran;
		634
		635	if (!cfs_rq->next)
		636	return se;
		637
		638	diff = cfs_rq->next->vruntime - se->vruntime;
		639	if (diff < 0)
		640	return se;
		641
		642	gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load);
		643	if (diff > gran)
		644	return se;
		645
		646	return cfs_rq->next;
		647	}
		648
624	static struct sched_entity pick_next_entity(struct cfs_rq cfs_rq)	649	static struct sched_entity pick_next_entity(struct cfs_rq cfs_rq)
625	{	650	{
626	struct sched_entity *se = NULL;	651	struct sched_entity *se = NULL;
627		652
628	if (first_fair(cfs_rq)) {	653	if (first_fair(cfs_rq)) {
629	se = __pick_next_entity(cfs_rq);	654	se = __pick_next_entity(cfs_rq);
		655	se = pick_next(cfs_rq, se);
630	set_next_entity(cfs_rq, se);	656	set_next_entity(cfs_rq, se);
631	}	657	}
632		658
@@ -732,8 +758,6 @@ static inline struct sched_entity parent_entity(struct sched_entity se)
732	return se->parent;	758	return se->parent;
733	}	759	}
734		760
735	#define GROUP_IMBALANCE_PCT 20
736
737	#else /* CONFIG_FAIR_GROUP_SCHED */	761	#else /* CONFIG_FAIR_GROUP_SCHED */
738		762
739	#define for_each_sched_entity(se) \	763	#define for_each_sched_entity(se) \
@@ -824,26 +848,15 @@ hrtick_start_fair(struct rq rq, struct task_struct p)
824	static void enqueue_task_fair(struct rq rq, struct task_struct p, int wakeup)	848	static void enqueue_task_fair(struct rq rq, struct task_struct p, int wakeup)
825	{	849	{
826	struct cfs_rq *cfs_rq;	850	struct cfs_rq *cfs_rq;
827	struct sched_entity *se = &p->se,	851	struct sched_entity *se = &p->se;
828	topse = NULL; / Highest schedulable entity */
829	int incload = 1;
830		852
831	for_each_sched_entity(se) {	853	for_each_sched_entity(se) {
832	topse = se;	854	if (se->on_rq)
833	if (se->on_rq) {
834	incload = 0;
835	break;	855	break;
836	}
837	cfs_rq = cfs_rq_of(se);	856	cfs_rq = cfs_rq_of(se);
838	enqueue_entity(cfs_rq, se, wakeup);	857	enqueue_entity(cfs_rq, se, wakeup);
839	wakeup = 1;	858	wakeup = 1;
840	}	859	}
841	/* Increment cpu load if we just enqueued the first task of a group on
842	* 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
843	* at the highest grouping level.
844	*/
845	if (incload)
846	inc_cpu_load(rq, topse->load.weight);
847		860
848	hrtick_start_fair(rq, rq->curr);	861	hrtick_start_fair(rq, rq->curr);
849	}	862	}
@@ -856,28 +869,16 @@ static void enqueue_task_fair(struct rq rq, struct task_struct p, int wakeup)
856	static void dequeue_task_fair(struct rq rq, struct task_struct p, int sleep)	869	static void dequeue_task_fair(struct rq rq, struct task_struct p, int sleep)
857	{	870	{
858	struct cfs_rq *cfs_rq;	871	struct cfs_rq *cfs_rq;
859	struct sched_entity *se = &p->se,	872	struct sched_entity *se = &p->se;
860	topse = NULL; / Highest schedulable entity */
861	int decload = 1;
862		873
863	for_each_sched_entity(se) {	874	for_each_sched_entity(se) {
864	topse = se;
865	cfs_rq = cfs_rq_of(se);	875	cfs_rq = cfs_rq_of(se);
866	dequeue_entity(cfs_rq, se, sleep);	876	dequeue_entity(cfs_rq, se, sleep);
867	/* Don't dequeue parent if it has other entities besides us */	877	/* Don't dequeue parent if it has other entities besides us */
868	if (cfs_rq->load.weight) {	878	if (cfs_rq->load.weight)
869	if (parent_entity(se))
870	decload = 0;
871	break;	879	break;
872	}
873	sleep = 1;	880	sleep = 1;
874	}	881	}
875	/* Decrement cpu load if we just dequeued the last task of a group on
876	* 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
877	* at the highest grouping level.
878	*/
879	if (decload)
880	dec_cpu_load(rq, topse->load.weight);
881		882
882	hrtick_start_fair(rq, rq->curr);	883	hrtick_start_fair(rq, rq->curr);
883	}	884	}
@@ -1090,6 +1091,9 @@ static void check_preempt_wakeup(struct rq rq, struct task_struct p)
1090	resched_task(curr);	1091	resched_task(curr);
1091	return;	1092	return;
1092	}	1093	}
		1094
		1095	cfs_rq_of(pse)->next = pse;
		1096
1093	/*	1097	/*
1094	* Batch tasks do not preempt (their preemption is driven by	1098	* Batch tasks do not preempt (their preemption is driven by
1095	* the tick):	1099	* the tick):
@@ -1191,6 +1195,25 @@ static struct task_struct load_balance_next_fair(void arg)
1191	return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);	1195	return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
1192	}	1196	}
1193		1197
		1198	#ifdef CONFIG_FAIR_GROUP_SCHED
		1199	static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
		1200	{
		1201	struct sched_entity *curr;
		1202	struct task_struct *p;
		1203
		1204	if (!cfs_rq->nr_running \|\| !first_fair(cfs_rq))
		1205	return MAX_PRIO;
		1206
		1207	curr = cfs_rq->curr;
		1208	if (!curr)
		1209	curr = __pick_next_entity(cfs_rq);
		1210
		1211	p = task_of(curr);
		1212
		1213	return p->prio;
		1214	}
		1215	#endif
		1216
1194	static unsigned long	1217	static unsigned long
1195	load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,	1218	load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
1196	unsigned long max_load_move,	1219	unsigned long max_load_move,
@@ -1200,45 +1223,28 @@ load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
1200	struct cfs_rq *busy_cfs_rq;	1223	struct cfs_rq *busy_cfs_rq;
1201	long rem_load_move = max_load_move;	1224	long rem_load_move = max_load_move;
1202	struct rq_iterator cfs_rq_iterator;	1225	struct rq_iterator cfs_rq_iterator;
1203	unsigned long load_moved;
1204		1226
1205	cfs_rq_iterator.start = load_balance_start_fair;	1227	cfs_rq_iterator.start = load_balance_start_fair;
1206	cfs_rq_iterator.next = load_balance_next_fair;	1228	cfs_rq_iterator.next = load_balance_next_fair;
1207		1229
1208	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {	1230	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
1209	#ifdef CONFIG_FAIR_GROUP_SCHED	1231	#ifdef CONFIG_FAIR_GROUP_SCHED
1210	struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];	1232	struct cfs_rq *this_cfs_rq;
1211	unsigned long maxload, task_load, group_weight;	1233	long imbalance;
1212	unsigned long thisload, per_task_load;	1234	unsigned long maxload;
1213	struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
1214		1235
1215	task_load = busy_cfs_rq->load.weight;	1236	this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
1216	group_weight = se->load.weight;
1217		1237
1218	/*	1238	imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
1219	* 'group_weight' is contributed by tasks of total weight	1239	/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
1220	* 'task_load'. To move 'rem_load_move' worth of weight only,	1240	if (imbalance <= 0)
1221	* we need to move a maximum task load of:
1222	*
1223	* maxload = (remload / group_weight) * task_load;
1224	*/
1225	maxload = (rem_load_move * task_load) / group_weight;
1226
1227	if (!maxload \|\| !task_load)
1228	continue;	1241	continue;
1229		1242
1230	per_task_load = task_load / busy_cfs_rq->nr_running;	1243	/* Don't pull more than imbalance/2 */
1231	/*	1244	imbalance /= 2;
1232	* balance_tasks will try to forcibly move atleast one task if	1245	maxload = min(rem_load_move, imbalance);
1233	* possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
1234	* maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
1235	*/
1236	if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
1237	continue;
1238		1246
1239	/* Disable priority-based load balance */	1247	*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
1240	*this_best_prio = 0;
1241	thisload = this_cfs_rq->load.weight;
1242	#else	1248	#else
1243	# define maxload rem_load_move	1249	# define maxload rem_load_move
1244	#endif	1250	#endif
@@ -1247,33 +1253,11 @@ load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
1247	* load_balance_[start\|next]_fair iterators	1253	* load_balance_[start\|next]_fair iterators
1248	*/	1254	*/
1249	cfs_rq_iterator.arg = busy_cfs_rq;	1255	cfs_rq_iterator.arg = busy_cfs_rq;
1250	load_moved = balance_tasks(this_rq, this_cpu, busiest,	1256	rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
1251	maxload, sd, idle, all_pinned,	1257	maxload, sd, idle, all_pinned,
1252	this_best_prio,	1258	this_best_prio,
1253	&cfs_rq_iterator);	1259	&cfs_rq_iterator);
1254		1260
1255	#ifdef CONFIG_FAIR_GROUP_SCHED
1256	/*
1257	* load_moved holds the task load that was moved. The
1258	* effective (group) weight moved would be:
1259	* load_moved_eff = load_moved/task_load * group_weight;
1260	*/
1261	load_moved = (group_weight * load_moved) / task_load;
1262
1263	/* Adjust shares on both cpus to reflect load_moved */
1264	group_weight -= load_moved;
1265	set_se_shares(se, group_weight);
1266
1267	se = busy_cfs_rq->tg->se[this_cpu];
1268	if (!thisload)
1269	group_weight = load_moved;
1270	else
1271	group_weight = se->load.weight + load_moved;
1272	set_se_shares(se, group_weight);
1273	#endif
1274
1275	rem_load_move -= load_moved;
1276
1277	if (rem_load_move <= 0)	1261	if (rem_load_move <= 0)
1278	break;	1262	break;
1279	}	1263	}
@@ -1403,6 +1387,16 @@ static void set_curr_task_fair(struct rq *rq)
1403	set_next_entity(cfs_rq_of(se), se);	1387	set_next_entity(cfs_rq_of(se), se);
1404	}	1388	}
1405		1389
		1390	#ifdef CONFIG_FAIR_GROUP_SCHED
		1391	static void moved_group_fair(struct task_struct *p)
		1392	{
		1393	struct cfs_rq *cfs_rq = task_cfs_rq(p);
		1394
		1395	update_curr(cfs_rq);
		1396	place_entity(cfs_rq, &p->se, 1);
		1397	}
		1398	#endif
		1399
1406	/*	1400	/*
1407	* All the scheduling class methods:	1401	* All the scheduling class methods:
1408	*/	1402	*/
@@ -1431,6 +1425,10 @@ static const struct sched_class fair_sched_class = {
1431		1425
1432	.prio_changed = prio_changed_fair,	1426	.prio_changed = prio_changed_fair,
1433	.switched_to = switched_to_fair,	1427	.switched_to = switched_to_fair,
		1428
		1429	#ifdef CONFIG_FAIR_GROUP_SCHED
		1430	.moved_group = moved_group_fair,
		1431	#endif
1434	};	1432	};
1435		1433
1436	#ifdef CONFIG_SCHED_DEBUG	1434	#ifdef CONFIG_SCHED_DEBUG