summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2014-02-14 06:25:08 -0500
committerIngo Molnar <mingo@kernel.org>2014-02-27 06:41:02 -0500
commit37e117c07b89194aae7062bc63bde1104c03db02 (patch)
tree770312bf789e367b8f2102e9f8de743f05efeeac /kernel
parent06d50c65b1043b166d102accc081093f79d8f7e5 (diff)
sched: Guarantee task priority in pick_next_task()
Michael spotted that the idle_balance() push down created a task priority problem. Previously, when we called idle_balance() before pick_next_task() it wasn't a problem when -- because of the rq->lock droppage -- an rt/dl task slipped in. Similarly for pre_schedule(), rt pre-schedule could have a dl task slip in. But by pulling it into the pick_next_task() loop, we'll not try a higher task priority again. Cure this by creating a re-start condition in pick_next_task(); and triggering this from pick_next_task_{rt,fair}(). It also fixes a live-lock where we get stuck in pick_next_task_fair() due to idle_balance() seeing !0 nr_running but there not actually being any fair tasks about. Reported-by: Michael Wang <wangyun@linux.vnet.ibm.com> Fixes: 38033c37faab ("sched: Push down pre_schedule() and idle_balance()") Tested-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Juri Lelli <juri.lelli@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/20140224121218.GR15586@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c12
-rw-r--r--kernel/sched/fair.c13
-rw-r--r--kernel/sched/rt.c10
-rw-r--r--kernel/sched/sched.h5
4 files changed, 34 insertions, 6 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a8a73b8897bf..cde573d3f12e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2586,24 +2586,28 @@ static inline void schedule_debug(struct task_struct *prev)
2586static inline struct task_struct * 2586static inline struct task_struct *
2587pick_next_task(struct rq *rq, struct task_struct *prev) 2587pick_next_task(struct rq *rq, struct task_struct *prev)
2588{ 2588{
2589 const struct sched_class *class; 2589 const struct sched_class *class = &fair_sched_class;
2590 struct task_struct *p; 2590 struct task_struct *p;
2591 2591
2592 /* 2592 /*
2593 * Optimization: we know that if all tasks are in 2593 * Optimization: we know that if all tasks are in
2594 * the fair class we can call that function directly: 2594 * the fair class we can call that function directly:
2595 */ 2595 */
2596 if (likely(prev->sched_class == &fair_sched_class && 2596 if (likely(prev->sched_class == class &&
2597 rq->nr_running == rq->cfs.h_nr_running)) { 2597 rq->nr_running == rq->cfs.h_nr_running)) {
2598 p = fair_sched_class.pick_next_task(rq, prev); 2598 p = fair_sched_class.pick_next_task(rq, prev);
2599 if (likely(p)) 2599 if (likely(p && p != RETRY_TASK))
2600 return p; 2600 return p;
2601 } 2601 }
2602 2602
2603again:
2603 for_each_class(class) { 2604 for_each_class(class) {
2604 p = class->pick_next_task(rq, prev); 2605 p = class->pick_next_task(rq, prev);
2605 if (p) 2606 if (p) {
2607 if (unlikely(p == RETRY_TASK))
2608 goto again;
2606 return p; 2609 return p;
2610 }
2607 } 2611 }
2608 2612
2609 BUG(); /* the idle class will always have a runnable task */ 2613 BUG(); /* the idle class will always have a runnable task */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index be4f7d9eaf03..16042b58a32f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4686,6 +4686,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev)
4686 struct cfs_rq *cfs_rq = &rq->cfs; 4686 struct cfs_rq *cfs_rq = &rq->cfs;
4687 struct sched_entity *se; 4687 struct sched_entity *se;
4688 struct task_struct *p; 4688 struct task_struct *p;
4689 int new_tasks;
4689 4690
4690again: 4691again:
4691#ifdef CONFIG_FAIR_GROUP_SCHED 4692#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -4784,7 +4785,17 @@ simple:
4784 return p; 4785 return p;
4785 4786
4786idle: 4787idle:
4787 if (idle_balance(rq)) /* drops rq->lock */ 4788 /*
4789 * Because idle_balance() releases (and re-acquires) rq->lock, it is
4790 * possible for any higher priority task to appear. In that case we
4791 * must re-start the pick_next_entity() loop.
4792 */
4793 new_tasks = idle_balance(rq);
4794
4795 if (rq->nr_running != rq->cfs.h_nr_running)
4796 return RETRY_TASK;
4797
4798 if (new_tasks)
4788 goto again; 4799 goto again;
4789 4800
4790 return NULL; 4801 return NULL;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4d4b386598aa..398b3f990823 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1360,8 +1360,16 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
1360 struct task_struct *p; 1360 struct task_struct *p;
1361 struct rt_rq *rt_rq = &rq->rt; 1361 struct rt_rq *rt_rq = &rq->rt;
1362 1362
1363 if (need_pull_rt_task(rq, prev)) 1363 if (need_pull_rt_task(rq, prev)) {
1364 pull_rt_task(rq); 1364 pull_rt_task(rq);
1365 /*
1366 * pull_rt_task() can drop (and re-acquire) rq->lock; this
1367 * means a dl task can slip in, in which case we need to
1368 * re-start task selection.
1369 */
1370 if (unlikely(rq->dl.dl_nr_running))
1371 return RETRY_TASK;
1372 }
1365 1373
1366 if (!rt_rq->rt_nr_running) 1374 if (!rt_rq->rt_nr_running)
1367 return NULL; 1375 return NULL;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 046084ebb1fb..1929deb3f29d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1091,6 +1091,8 @@ static const u32 prio_to_wmult[40] = {
1091 1091
1092#define DEQUEUE_SLEEP 1 1092#define DEQUEUE_SLEEP 1
1093 1093
1094#define RETRY_TASK ((void *)-1UL)
1095
1094struct sched_class { 1096struct sched_class {
1095 const struct sched_class *next; 1097 const struct sched_class *next;
1096 1098
@@ -1105,6 +1107,9 @@ struct sched_class {
1105 * It is the responsibility of the pick_next_task() method that will 1107 * It is the responsibility of the pick_next_task() method that will
1106 * return the next task to call put_prev_task() on the @prev task or 1108 * return the next task to call put_prev_task() on the @prev task or
1107 * something equivalent. 1109 * something equivalent.
1110 *
1111 * May return RETRY_TASK when it finds a higher prio class has runnable
1112 * tasks.
1108 */ 1113 */
1109 struct task_struct * (*pick_next_task) (struct rq *rq, 1114 struct task_struct * (*pick_next_task) (struct rq *rq,
1110 struct task_struct *prev); 1115 struct task_struct *prev);