diff options
author | Peter Zijlstra <peterz@infradead.org> | 2014-02-14 06:25:08 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-02-27 06:41:02 -0500 |
commit | 37e117c07b89194aae7062bc63bde1104c03db02 (patch) | |
tree | 770312bf789e367b8f2102e9f8de743f05efeeac /kernel | |
parent | 06d50c65b1043b166d102accc081093f79d8f7e5 (diff) |
sched: Guarantee task priority in pick_next_task()
Michael spotted that the idle_balance() push down created a task
priority problem.
Previously, when we called idle_balance() before pick_next_task() it
wasn't a problem when -- because of the rq->lock droppage -- an rt/dl
task slipped in.
Similarly for pre_schedule(), rt pre-schedule could have a dl task
slip in.
But by pulling it into the pick_next_task() loop, we'll not try a
higher task priority again.
Cure this by creating a re-start condition in pick_next_task(); and
triggering this from pick_next_task_{rt,fair}().
It also fixes a live-lock where we get stuck in pick_next_task_fair()
due to idle_balance() seeing !0 nr_running but there not actually
being any fair tasks about.
Reported-by: Michael Wang <wangyun@linux.vnet.ibm.com>
Fixes: 38033c37faab ("sched: Push down pre_schedule() and idle_balance()")
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20140224121218.GR15586@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/core.c | 12 | ||||
-rw-r--r-- | kernel/sched/fair.c | 13 | ||||
-rw-r--r-- | kernel/sched/rt.c | 10 | ||||
-rw-r--r-- | kernel/sched/sched.h | 5 |
4 files changed, 34 insertions, 6 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a8a73b8897bf..cde573d3f12e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2586,24 +2586,28 @@ static inline void schedule_debug(struct task_struct *prev) | |||
2586 | static inline struct task_struct * | 2586 | static inline struct task_struct * |
2587 | pick_next_task(struct rq *rq, struct task_struct *prev) | 2587 | pick_next_task(struct rq *rq, struct task_struct *prev) |
2588 | { | 2588 | { |
2589 | const struct sched_class *class; | 2589 | const struct sched_class *class = &fair_sched_class; |
2590 | struct task_struct *p; | 2590 | struct task_struct *p; |
2591 | 2591 | ||
2592 | /* | 2592 | /* |
2593 | * Optimization: we know that if all tasks are in | 2593 | * Optimization: we know that if all tasks are in |
2594 | * the fair class we can call that function directly: | 2594 | * the fair class we can call that function directly: |
2595 | */ | 2595 | */ |
2596 | if (likely(prev->sched_class == &fair_sched_class && | 2596 | if (likely(prev->sched_class == class && |
2597 | rq->nr_running == rq->cfs.h_nr_running)) { | 2597 | rq->nr_running == rq->cfs.h_nr_running)) { |
2598 | p = fair_sched_class.pick_next_task(rq, prev); | 2598 | p = fair_sched_class.pick_next_task(rq, prev); |
2599 | if (likely(p)) | 2599 | if (likely(p && p != RETRY_TASK)) |
2600 | return p; | 2600 | return p; |
2601 | } | 2601 | } |
2602 | 2602 | ||
2603 | again: | ||
2603 | for_each_class(class) { | 2604 | for_each_class(class) { |
2604 | p = class->pick_next_task(rq, prev); | 2605 | p = class->pick_next_task(rq, prev); |
2605 | if (p) | 2606 | if (p) { |
2607 | if (unlikely(p == RETRY_TASK)) | ||
2608 | goto again; | ||
2606 | return p; | 2609 | return p; |
2610 | } | ||
2607 | } | 2611 | } |
2608 | 2612 | ||
2609 | BUG(); /* the idle class will always have a runnable task */ | 2613 | BUG(); /* the idle class will always have a runnable task */ |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index be4f7d9eaf03..16042b58a32f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -4686,6 +4686,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) | |||
4686 | struct cfs_rq *cfs_rq = &rq->cfs; | 4686 | struct cfs_rq *cfs_rq = &rq->cfs; |
4687 | struct sched_entity *se; | 4687 | struct sched_entity *se; |
4688 | struct task_struct *p; | 4688 | struct task_struct *p; |
4689 | int new_tasks; | ||
4689 | 4690 | ||
4690 | again: | 4691 | again: |
4691 | #ifdef CONFIG_FAIR_GROUP_SCHED | 4692 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -4784,7 +4785,17 @@ simple: | |||
4784 | return p; | 4785 | return p; |
4785 | 4786 | ||
4786 | idle: | 4787 | idle: |
4787 | if (idle_balance(rq)) /* drops rq->lock */ | 4788 | /* |
4789 | * Because idle_balance() releases (and re-acquires) rq->lock, it is | ||
4790 | * possible for any higher priority task to appear. In that case we | ||
4791 | * must re-start the pick_next_entity() loop. | ||
4792 | */ | ||
4793 | new_tasks = idle_balance(rq); | ||
4794 | |||
4795 | if (rq->nr_running != rq->cfs.h_nr_running) | ||
4796 | return RETRY_TASK; | ||
4797 | |||
4798 | if (new_tasks) | ||
4788 | goto again; | 4799 | goto again; |
4789 | 4800 | ||
4790 | return NULL; | 4801 | return NULL; |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4d4b386598aa..398b3f990823 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1360,8 +1360,16 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) | |||
1360 | struct task_struct *p; | 1360 | struct task_struct *p; |
1361 | struct rt_rq *rt_rq = &rq->rt; | 1361 | struct rt_rq *rt_rq = &rq->rt; |
1362 | 1362 | ||
1363 | if (need_pull_rt_task(rq, prev)) | 1363 | if (need_pull_rt_task(rq, prev)) { |
1364 | pull_rt_task(rq); | 1364 | pull_rt_task(rq); |
1365 | /* | ||
1366 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | ||
1367 | * means a dl task can slip in, in which case we need to | ||
1368 | * re-start task selection. | ||
1369 | */ | ||
1370 | if (unlikely(rq->dl.dl_nr_running)) | ||
1371 | return RETRY_TASK; | ||
1372 | } | ||
1365 | 1373 | ||
1366 | if (!rt_rq->rt_nr_running) | 1374 | if (!rt_rq->rt_nr_running) |
1367 | return NULL; | 1375 | return NULL; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 046084ebb1fb..1929deb3f29d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1091,6 +1091,8 @@ static const u32 prio_to_wmult[40] = { | |||
1091 | 1091 | ||
1092 | #define DEQUEUE_SLEEP 1 | 1092 | #define DEQUEUE_SLEEP 1 |
1093 | 1093 | ||
1094 | #define RETRY_TASK ((void *)-1UL) | ||
1095 | |||
1094 | struct sched_class { | 1096 | struct sched_class { |
1095 | const struct sched_class *next; | 1097 | const struct sched_class *next; |
1096 | 1098 | ||
@@ -1105,6 +1107,9 @@ struct sched_class { | |||
1105 | * It is the responsibility of the pick_next_task() method that will | 1107 | * It is the responsibility of the pick_next_task() method that will |
1106 | * return the next task to call put_prev_task() on the @prev task or | 1108 | * return the next task to call put_prev_task() on the @prev task or |
1107 | * something equivalent. | 1109 | * something equivalent. |
1110 | * | ||
1111 | * May return RETRY_TASK when it finds a higher prio class has runnable | ||
1112 | * tasks. | ||
1108 | */ | 1113 | */ |
1109 | struct task_struct * (*pick_next_task) (struct rq *rq, | 1114 | struct task_struct * (*pick_next_task) (struct rq *rq, |
1110 | struct task_struct *prev); | 1115 | struct task_struct *prev); |