1 files changed, 180 insertions, 68 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b8..98345e45b059 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
        return se->parent;
 }
+/* return depth at which a sched entity is present in the hierarchy */
+static inline int depth_se(struct sched_entity *se)
+{
+        int depth = 0;
+        for_each_sched_entity(se)
+                depth++;
+        return depth;
+}
+static void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+        int se_depth, pse_depth;
+        /*
+         * preemption test can be made between sibling entities who are in the
+         * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
+         * both tasks until we find their ancestors who are siblings of common
+         * parent.
+         */
+        /* First walk up until both entities are at same depth */
+        se_depth = depth_se(*se);
+        pse_depth = depth_se(*pse);
+        while (se_depth > pse_depth) {
+                se_depth--;
+                *se = parent_entity(*se);
+        }
+        while (pse_depth > se_depth) {
+                pse_depth--;
+                *pse = parent_entity(*pse);
+        }
+        while (!is_same_group(*se, *pse)) {
+                *se = parent_entity(*se);
+                *pse = parent_entity(*pse);
+        }
+}
 #else   /* CONFIG_FAIR_GROUP_SCHED */
 static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
        return NULL;
 }
+static inline void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+}
 #endif  /* CONFIG_FAIR_GROUP_SCHED */
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
        return se->vruntime - cfs_rq->min_vruntime;
 }
+static void update_min_vruntime(struct cfs_rq *cfs_rq)
+{
+        u64 vruntime = cfs_rq->min_vruntime;
+        if (cfs_rq->curr)
+                vruntime = cfs_rq->curr->vruntime;
+        if (cfs_rq->rb_leftmost) {
+                struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
+                                                   struct sched_entity,
+                                                   run_node);
+                if (vruntime == cfs_rq->min_vruntime)
+                        vruntime = se->vruntime;
+                else
+                        vruntime = min_vruntime(vruntime, se->vruntime);
+        }
+        cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
+}
 /*
 * Enqueue an entity into the rb-tree:
 */
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
         * Maintain a cache of leftmost tree entries (it is frequently
         * used):
         */
-        if (leftmost) {
+        if (leftmost)
                cfs_rq->rb_leftmost = &se->run_node;
-                /*
-                 * maintain cfs_rq->min_vruntime to be a monotonic increasing
-                 * value tracking the leftmost vruntime in the tree.
-                 */
-                cfs_rq->min_vruntime =
-                        max_vruntime(cfs_rq->min_vruntime, se->vruntime);
-        }
        rb_link_node(&se->run_node, parent, link);
        rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        if (cfs_rq->rb_leftmost == &se->run_node) {
                struct rb_node *next_node;
-                struct sched_entity *next;
                next_node = rb_next(&se->run_node);
                cfs_rq->rb_leftmost = next_node;
-                if (next_node) {
-                        next = rb_entry(next_node,
-                                        struct sched_entity, run_node);
-                        cfs_rq->min_vruntime =
-                                max_vruntime(cfs_rq->min_vruntime,
-                                             next->vruntime);
-                }
        }
-        if (cfs_rq->next == se)
-                cfs_rq->next = NULL;
        rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
 }
-static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
-{
-        return cfs_rq->rb_leftmost;
-}
 static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
 {
-        return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);
+        struct rb_node *left = cfs_rq->rb_leftmost;
+        if (!left)
+                return NULL;
+        return rb_entry(left, struct sched_entity, run_node);
 }
-static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 {
        struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
        schedstat_add(cfs_rq, exec_clock, delta_exec);
        delta_exec_weighted = calc_delta_fair(delta_exec, curr);
        curr->vruntime += delta_exec_weighted;
+        update_min_vruntime(cfs_rq);
 }
 static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
-        u64 vruntime;
+        u64 vruntime = cfs_rq->min_vruntime;
-        if (first_fair(cfs_rq)) {
-                vruntime = min_vruntime(cfs_rq->min_vruntime,
-                                __pick_next_entity(cfs_rq)->vruntime);
-        } else
-                vruntime = cfs_rq->min_vruntime;
        /*
         * The 'current' period is already promised to the current tasks,
@@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
                __enqueue_entity(cfs_rq, se);
 }
+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+        if (cfs_rq->last == se)
+                cfs_rq->last = NULL;
+        if (cfs_rq->next == se)
+                cfs_rq->next = NULL;
+}
 static void
 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 {
@@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 #endif
        }
+        clear_buddies(cfs_rq, se);
        if (se != cfs_rq->curr)
                __dequeue_entity(cfs_rq, se);
        account_entity_dequeue(cfs_rq, se);
+        update_min_vruntime(cfs_rq);
 }
 /*
@@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
        se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
-static struct sched_entity *
+static int
-pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
-{
-        struct rq *rq = rq_of(cfs_rq);
-        u64 pair_slice = rq->clock - cfs_rq->pair_start;
-        if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
-                cfs_rq->pair_start = rq->clock;
-                return se;
-        }
-        return cfs_rq->next;
-}
 static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
 {
-        struct sched_entity *se = NULL;
+        struct sched_entity *se = __pick_next_entity(cfs_rq);
-        if (first_fair(cfs_rq)) {
+        if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
-                se = __pick_next_entity(cfs_rq);
+                return cfs_rq->next;
-                se = pick_next(cfs_rq, se);
-                set_next_entity(cfs_rq, se);
+        if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
-        }
+                return cfs_rq->last;
        return se;
 }
@@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
        if (unlikely(cfs_rq->nr_running == 1))
                return;
+        clear_buddies(cfs_rq, se);
        if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
                update_rq_clock(rq);
                /*
@@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
        if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
                return 0;
-        if (!sync && sched_feat(SYNC_WAKEUPS) &&
+        if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
-            curr->se.avg_overlap < sysctl_sched_migration_cost &&
+                        p->se.avg_overlap > sysctl_sched_migration_cost))
-            p->se.avg_overlap < sysctl_sched_migration_cost)
+                sync = 0;
-                sync = 1;
        /*
         * If sync wakeup then subtract the (maximum possible)
@@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se)
         * More easily preempt - nice tasks, while not making it harder for
         * + nice tasks.
         */
-        if (sched_feat(ASYM_GRAN))
+        if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
-                gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
+                gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
        return gran;
 }
 /*
+ * Should 'se' preempt 'curr'.
+ *
+ *             |s1
+ *        |s2
+ *   |s3
+ *         g
+ *      |<--->|c
+ *
+ *  w(c, s1) = -1
+ *  w(c, s2) =  0
+ *  w(c, s3) =  1
+ *
+ */
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
+{
+        s64 gran, vdiff = curr->vruntime - se->vruntime;
+        if (vdiff <= 0)
+                return -1;
+        gran = wakeup_gran(curr);
+        if (vdiff > gran)
+                return 1;
+        return 0;
+}
+static void set_last_buddy(struct sched_entity *se)
+{
+        for_each_sched_entity(se)
+                cfs_rq_of(se)->last = se;
+}
+static void set_next_buddy(struct sched_entity *se)
+{
+        for_each_sched_entity(se)
+                cfs_rq_of(se)->next = se;
+}
+/*
 * Preempt the current task with a newly woken task if needed:
 */
 static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
 {
        struct task_struct *curr = rq->curr;
-        struct cfs_rq *cfs_rq = task_cfs_rq(curr);
        struct sched_entity *se = &curr->se, *pse = &p->se;
-        s64 delta_exec;
        if (unlikely(rt_prio(p->prio))) {
+                struct cfs_rq *cfs_rq = task_cfs_rq(curr);
                update_rq_clock(rq);
                update_curr(cfs_rq);
                resched_task(curr);
                return;
        }
+        if (unlikely(p->sched_class != &fair_sched_class))
+                return;
        if (unlikely(se == pse))
                return;
-        cfs_rq_of(pse)->next = pse;
+        /*
+         * Only set the backward buddy when the current task is still on the
+         * rq. This can happen when a wakeup gets interleaved with schedule on
+         * the ->pre_schedule() or idle_balance() point, either of which can
+         * drop the rq lock.
+         *
+         * Also, during early boot the idle thread is in the fair class, for
+         * obvious reasons its a bad idea to schedule back to the idle thread.
+         */
+        if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
+                set_last_buddy(se);
+        set_next_buddy(pse);
        /*
         * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
                return;
        }
-        delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+        find_matching_se(&se, &pse);
-        if (delta_exec > wakeup_gran(pse))
-                resched_task(curr);
+        while (se) {
+                BUG_ON(!pse);
+                if (wakeup_preempt_entity(se, pse) == 1) {
+                        resched_task(curr);
+                        break;
+                }
+                se = parent_entity(se);
+                pse = parent_entity(pse);
+        }
 }
 static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1312,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
        do {
                se = pick_next_entity(cfs_rq);
+                set_next_entity(cfs_rq, se);
                cfs_rq = group_cfs_rq(se);
        } while (cfs_rq);
@@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = {
        .enqueue_task           = enqueue_task_fair,
        .dequeue_task           = dequeue_task_fair,
        .yield_task             = yield_task_fair,
-#ifdef CONFIG_SMP
-        .select_task_rq         = select_task_rq_fair,
-#endif /* CONFIG_SMP */
        .check_preempt_curr     = check_preempt_wakeup,
@@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = {
        .put_prev_task          = put_prev_task_fair,
 #ifdef CONFIG_SMP
+        .select_task_rq         = select_task_rq_fair,
        .load_balance           = load_balance_fair,
        .move_one_task          = move_one_task_fair,
 #endif

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9573c33688b8..98345e45b059 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity parent_entity(struct sched_entity se)
143	return se->parent;	143	return se->parent;
144	}	144	}
145		145
		146	/* return depth at which a sched entity is present in the hierarchy */
		147	static inline int depth_se(struct sched_entity *se)
		148	{
		149	int depth = 0;
		150
		151	for_each_sched_entity(se)
		152	depth++;
		153
		154	return depth;
		155	}
		156
		157	static void
		158	find_matching_se(struct sched_entity se, struct sched_entity pse)
		159	{
		160	int se_depth, pse_depth;
		161
		162	/*
		163	* preemption test can be made between sibling entities who are in the
		164	* same cfs_rq i.e who have a common parent. Walk up the hierarchy of
		165	* both tasks until we find their ancestors who are siblings of common
		166	* parent.
		167	*/
		168
		169	/* First walk up until both entities are at same depth */
		170	se_depth = depth_se(*se);
		171	pse_depth = depth_se(*pse);
		172
		173	while (se_depth > pse_depth) {
		174	se_depth--;
		175	se = parent_entity(se);
		176	}
		177
		178	while (pse_depth > se_depth) {
		179	pse_depth--;
		180	pse = parent_entity(pse);
		181	}
		182
		183	while (!is_same_group(se, pse)) {
		184	se = parent_entity(se);
		185	pse = parent_entity(pse);
		186	}
		187	}
		188
146	#else /* CONFIG_FAIR_GROUP_SCHED */	189	#else /* CONFIG_FAIR_GROUP_SCHED */
147		190
148	static inline struct rq rq_of(struct cfs_rq cfs_rq)	191	static inline struct rq rq_of(struct cfs_rq cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity parent_entity(struct sched_entity se)
193	return NULL;	236	return NULL;
194	}	237	}
195		238
		239	static inline void
		240	find_matching_se(struct sched_entity se, struct sched_entity pse)
		241	{
		242	}
		243
196	#endif /* CONFIG_FAIR_GROUP_SCHED */	244	#endif /* CONFIG_FAIR_GROUP_SCHED */
197		245
198		246
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq cfs_rq, struct sched_entity se)
223	return se->vruntime - cfs_rq->min_vruntime;	271	return se->vruntime - cfs_rq->min_vruntime;
224	}	272	}
225		273
		274	static void update_min_vruntime(struct cfs_rq *cfs_rq)
		275	{
		276	u64 vruntime = cfs_rq->min_vruntime;
		277
		278	if (cfs_rq->curr)
		279	vruntime = cfs_rq->curr->vruntime;
		280
		281	if (cfs_rq->rb_leftmost) {
		282	struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
		283	struct sched_entity,
		284	run_node);
		285
		286	if (vruntime == cfs_rq->min_vruntime)
		287	vruntime = se->vruntime;
		288	else
		289	vruntime = min_vruntime(vruntime, se->vruntime);
		290	}
		291
		292	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
		293	}
		294
226	/*	295	/*
227	* Enqueue an entity into the rb-tree:	296	* Enqueue an entity into the rb-tree:
228	*/	297	*/
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq cfs_rq, struct sched_entity se)
256	* Maintain a cache of leftmost tree entries (it is frequently	325	* Maintain a cache of leftmost tree entries (it is frequently
257	* used):	326	* used):
258	*/	327	*/
259	if (leftmost) {	328	if (leftmost)
260	cfs_rq->rb_leftmost = &se->run_node;	329	cfs_rq->rb_leftmost = &se->run_node;
261	/*
262	* maintain cfs_rq->min_vruntime to be a monotonic increasing
263	* value tracking the leftmost vruntime in the tree.
264	*/
265	cfs_rq->min_vruntime =
266	max_vruntime(cfs_rq->min_vruntime, se->vruntime);
267	}
268		330
269	rb_link_node(&se->run_node, parent, link);	331	rb_link_node(&se->run_node, parent, link);
270	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);	332	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se)
274	{	336	{
275	if (cfs_rq->rb_leftmost == &se->run_node) {	337	if (cfs_rq->rb_leftmost == &se->run_node) {
276	struct rb_node *next_node;	338	struct rb_node *next_node;
277	struct sched_entity *next;
278		339
279	next_node = rb_next(&se->run_node);	340	next_node = rb_next(&se->run_node);
280	cfs_rq->rb_leftmost = next_node;	341	cfs_rq->rb_leftmost = next_node;
281
282	if (next_node) {
283	next = rb_entry(next_node,
284	struct sched_entity, run_node);
285	cfs_rq->min_vruntime =
286	max_vruntime(cfs_rq->min_vruntime,
287	next->vruntime);
288	}
289	}	342	}
290		343
291	if (cfs_rq->next == se)
292	cfs_rq->next = NULL;
293
294	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);	344	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
295	}	345	}
296		346
297	static inline struct rb_node first_fair(struct cfs_rq cfs_rq)
298	{
299	return cfs_rq->rb_leftmost;
300	}
301
302	static struct sched_entity __pick_next_entity(struct cfs_rq cfs_rq)	347	static struct sched_entity __pick_next_entity(struct cfs_rq cfs_rq)
303	{	348	{
304	return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);	349	struct rb_node *left = cfs_rq->rb_leftmost;
		350
		351	if (!left)
		352	return NULL;
		353
		354	return rb_entry(left, struct sched_entity, run_node);
305	}	355	}
306		356
307	static inline struct sched_entity __pick_last_entity(struct cfs_rq cfs_rq)	357	static struct sched_entity __pick_last_entity(struct cfs_rq cfs_rq)
308	{	358	{
309	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);	359	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
310		360
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq cfs_rq, struct sched_entity curr,
424	schedstat_add(cfs_rq, exec_clock, delta_exec);	474	schedstat_add(cfs_rq, exec_clock, delta_exec);
425	delta_exec_weighted = calc_delta_fair(delta_exec, curr);	475	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
426	curr->vruntime += delta_exec_weighted;	476	curr->vruntime += delta_exec_weighted;
		477	update_min_vruntime(cfs_rq);
427	}	478	}
428		479
429	static void update_curr(struct cfs_rq *cfs_rq)	480	static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq cfs_rq, struct sched_entity se)
613	static void	664	static void
614	place_entity(struct cfs_rq cfs_rq, struct sched_entity se, int initial)	665	place_entity(struct cfs_rq cfs_rq, struct sched_entity se, int initial)
615	{	666	{
616	u64 vruntime;	667	u64 vruntime = cfs_rq->min_vruntime;
617
618	if (first_fair(cfs_rq)) {
619	vruntime = min_vruntime(cfs_rq->min_vruntime,
620	__pick_next_entity(cfs_rq)->vruntime);
621	} else
622	vruntime = cfs_rq->min_vruntime;
623		668
624	/*	669	/*
625	* The 'current' period is already promised to the current tasks,	670	* The 'current' period is already promised to the current tasks,
@@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq cfs_rq, struct sched_entity se, int wakeup)
671	__enqueue_entity(cfs_rq, se);	716	__enqueue_entity(cfs_rq, se);
672	}	717	}
673		718
		719	static void clear_buddies(struct cfs_rq cfs_rq, struct sched_entity se)
		720	{
		721	if (cfs_rq->last == se)
		722	cfs_rq->last = NULL;
		723
		724	if (cfs_rq->next == se)
		725	cfs_rq->next = NULL;
		726	}
		727
674	static void	728	static void
675	dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se, int sleep)	729	dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se, int sleep)
676	{	730	{
@@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se, int sleep)
693	#endif	747	#endif
694	}	748	}
695		749
		750	clear_buddies(cfs_rq, se);
		751
696	if (se != cfs_rq->curr)	752	if (se != cfs_rq->curr)
697	__dequeue_entity(cfs_rq, se);	753	__dequeue_entity(cfs_rq, se);
698	account_entity_dequeue(cfs_rq, se);	754	account_entity_dequeue(cfs_rq, se);
		755	update_min_vruntime(cfs_rq);
699	}	756	}
700		757
701	/*	758	/*
@@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq cfs_rq, struct sched_entity se)
742	se->prev_sum_exec_runtime = se->sum_exec_runtime;	799	se->prev_sum_exec_runtime = se->sum_exec_runtime;
743	}	800	}
744		801
745	static struct sched_entity *	802	static int
746	pick_next(struct cfs_rq cfs_rq, struct sched_entity se)	803	wakeup_preempt_entity(struct sched_entity curr, struct sched_entity se);
747	{
748	struct rq *rq = rq_of(cfs_rq);
749	u64 pair_slice = rq->clock - cfs_rq->pair_start;
750
751	if (!cfs_rq->next \|\| pair_slice > sysctl_sched_min_granularity) {
752	cfs_rq->pair_start = rq->clock;
753	return se;
754	}
755
756	return cfs_rq->next;
757	}
758		804
759	static struct sched_entity pick_next_entity(struct cfs_rq cfs_rq)	805	static struct sched_entity pick_next_entity(struct cfs_rq cfs_rq)
760	{	806	{
761	struct sched_entity *se = NULL;	807	struct sched_entity *se = __pick_next_entity(cfs_rq);
762		808
763	if (first_fair(cfs_rq)) {	809	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
764	se = __pick_next_entity(cfs_rq);	810	return cfs_rq->next;
765	se = pick_next(cfs_rq, se);	811
766	set_next_entity(cfs_rq, se);	812	if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
767	}	813	return cfs_rq->last;
768		814
769	return se;	815	return se;
770	}	816	}
@@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
936	if (unlikely(cfs_rq->nr_running == 1))	982	if (unlikely(cfs_rq->nr_running == 1))
937	return;	983	return;
938		984
		985	clear_buddies(cfs_rq, se);
		986
939	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {	987	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
940	update_rq_clock(rq);	988	update_rq_clock(rq);
941	/*	989	/*
@@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain this_sd, struct rq this_rq,
1122	if (!(this_sd->flags & SD_WAKE_AFFINE) \|\| !sched_feat(AFFINE_WAKEUPS))	1170	if (!(this_sd->flags & SD_WAKE_AFFINE) \|\| !sched_feat(AFFINE_WAKEUPS))
1123	return 0;	1171	return 0;
1124		1172
1125	if (!sync && sched_feat(SYNC_WAKEUPS) &&	1173	if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost \|\|
1126	curr->se.avg_overlap < sysctl_sched_migration_cost &&	1174	p->se.avg_overlap > sysctl_sched_migration_cost))
1127	p->se.avg_overlap < sysctl_sched_migration_cost)	1175	sync = 0;
1128	sync = 1;
1129		1176
1130	/*	1177	/*
1131	* If sync wakeup then subtract the (maximum possible)	1178	* If sync wakeup then subtract the (maximum possible)
@@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1244	* More easily preempt - nice tasks, while not making it harder for	1291	* More easily preempt - nice tasks, while not making it harder for
1245	* + nice tasks.	1292	* + nice tasks.
1246	*/	1293	*/
1247	if (sched_feat(ASYM_GRAN))	1294	if (!sched_feat(ASYM_GRAN) \|\| se->load.weight > NICE_0_LOAD)
1248	gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);	1295	gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1249		1296
1250	return gran;	1297	return gran;
1251	}	1298	}
1252		1299
1253	/*	1300	/*
		1301	* Should 'se' preempt 'curr'.
		1302	*
		1303	* \|s1
		1304	* \|s2
		1305	* \|s3
		1306	* g
		1307	* \|<--->\|c
		1308	*
		1309	* w(c, s1) = -1
		1310	* w(c, s2) = 0
		1311	* w(c, s3) = 1
		1312	*
		1313	*/
		1314	static int
		1315	wakeup_preempt_entity(struct sched_entity curr, struct sched_entity se)
		1316	{
		1317	s64 gran, vdiff = curr->vruntime - se->vruntime;
		1318
		1319	if (vdiff <= 0)
		1320	return -1;
		1321
		1322	gran = wakeup_gran(curr);
		1323	if (vdiff > gran)
		1324	return 1;
		1325
		1326	return 0;
		1327	}
		1328
		1329	static void set_last_buddy(struct sched_entity *se)
		1330	{
		1331	for_each_sched_entity(se)
		1332	cfs_rq_of(se)->last = se;
		1333	}
		1334
		1335	static void set_next_buddy(struct sched_entity *se)
		1336	{
		1337	for_each_sched_entity(se)
		1338	cfs_rq_of(se)->next = se;
		1339	}
		1340
		1341	/*
1254	* Preempt the current task with a newly woken task if needed:	1342	* Preempt the current task with a newly woken task if needed:
1255	*/	1343	*/
1256	static void check_preempt_wakeup(struct rq rq, struct task_struct p, int sync)	1344	static void check_preempt_wakeup(struct rq rq, struct task_struct p, int sync)
1257	{	1345	{
1258	struct task_struct *curr = rq->curr;	1346	struct task_struct *curr = rq->curr;
1259	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1260	struct sched_entity se = &curr->se, pse = &p->se;	1347	struct sched_entity se = &curr->se, pse = &p->se;
1261	s64 delta_exec;
1262		1348
1263	if (unlikely(rt_prio(p->prio))) {	1349	if (unlikely(rt_prio(p->prio))) {
		1350	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
		1351
1264	update_rq_clock(rq);	1352	update_rq_clock(rq);
1265	update_curr(cfs_rq);	1353	update_curr(cfs_rq);
1266	resched_task(curr);	1354	resched_task(curr);
1267	return;	1355	return;
1268	}	1356	}
1269		1357
		1358	if (unlikely(p->sched_class != &fair_sched_class))
		1359	return;
		1360
1270	if (unlikely(se == pse))	1361	if (unlikely(se == pse))
1271	return;	1362	return;
1272		1363
1273	cfs_rq_of(pse)->next = pse;	1364	/*
		1365	* Only set the backward buddy when the current task is still on the
		1366	* rq. This can happen when a wakeup gets interleaved with schedule on
		1367	* the ->pre_schedule() or idle_balance() point, either of which can
		1368	* drop the rq lock.
		1369	*
		1370	* Also, during early boot the idle thread is in the fair class, for
		1371	* obvious reasons its a bad idea to schedule back to the idle thread.
		1372	*/
		1373	if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
		1374	set_last_buddy(se);
		1375	set_next_buddy(pse);
1274		1376
1275	/*	1377	/*
1276	* We can come here with TIF_NEED_RESCHED already set from new task	1378	* We can come here with TIF_NEED_RESCHED already set from new task
@@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq rq, struct task_struct p, int sync)
1296	return;	1398	return;
1297	}	1399	}
1298		1400
1299	delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;	1401	find_matching_se(&se, &pse);
1300	if (delta_exec > wakeup_gran(pse))	1402
1301	resched_task(curr);	1403	while (se) {
		1404	BUG_ON(!pse);
		1405
		1406	if (wakeup_preempt_entity(se, pse) == 1) {
		1407	resched_task(curr);
		1408	break;
		1409	}
		1410
		1411	se = parent_entity(se);
		1412	pse = parent_entity(pse);
		1413	}
1302	}	1414	}
1303		1415
1304	static struct task_struct pick_next_task_fair(struct rq rq)	1416	static struct task_struct pick_next_task_fair(struct rq rq)
@@ -1312,6 +1424,7 @@ static struct task_struct pick_next_task_fair(struct rq rq)
1312		1424
1313	do {	1425	do {
1314	se = pick_next_entity(cfs_rq);	1426	se = pick_next_entity(cfs_rq);
		1427	set_next_entity(cfs_rq, se);
1315	cfs_rq = group_cfs_rq(se);	1428	cfs_rq = group_cfs_rq(se);
1316	} while (cfs_rq);	1429	} while (cfs_rq);
1317		1430
@@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = {
1594	.enqueue_task = enqueue_task_fair,	1707	.enqueue_task = enqueue_task_fair,
1595	.dequeue_task = dequeue_task_fair,	1708	.dequeue_task = dequeue_task_fair,
1596	.yield_task = yield_task_fair,	1709	.yield_task = yield_task_fair,
1597	#ifdef CONFIG_SMP
1598	.select_task_rq = select_task_rq_fair,
1599	#endif /* CONFIG_SMP */
1600		1710
1601	.check_preempt_curr = check_preempt_wakeup,	1711	.check_preempt_curr = check_preempt_wakeup,
1602		1712
@@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = {
1604	.put_prev_task = put_prev_task_fair,	1714	.put_prev_task = put_prev_task_fair,
1605		1715
1606	#ifdef CONFIG_SMP	1716	#ifdef CONFIG_SMP
		1717	.select_task_rq = select_task_rq_fair,
		1718
1607	.load_balance = load_balance_fair,	1719	.load_balance = load_balance_fair,
1608	.move_one_task = move_one_task_fair,	1720	.move_one_task = move_one_task_fair,
1609	#endif	1721	#endif