diff options
author | Wanpeng Li <wanpeng.li@hotmail.com> | 2015-08-28 02:55:56 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-09-11 01:57:50 -0400 |
commit | 5473e0cc37c03c576adbda7591a6cc8e37c1bb7f (patch) | |
tree | 3e4996bd02962f27c96dca440f036a571240ca67 | |
parent | 7c8bb6cb95061b3143759459ed6c6b0c73bcfecb (diff) |
sched: 'Annotate' migrate_tasks()
Kernel testing triggered this warning:
| WARNING: CPU: 0 PID: 13 at kernel/sched/core.c:1156 do_set_cpus_allowed+0x7e/0x80()
| Modules linked in:
| CPU: 0 PID: 13 Comm: migration/0 Not tainted 4.2.0-rc1-00049-g25834c7 #2
| Call Trace:
| dump_stack+0x4b/0x75
| warn_slowpath_common+0x8b/0xc0
| warn_slowpath_null+0x22/0x30
| do_set_cpus_allowed+0x7e/0x80
| cpuset_cpus_allowed_fallback+0x7c/0x170
| select_fallback_rq+0x221/0x280
| migration_call+0xe3/0x250
| notifier_call_chain+0x53/0x70
| __raw_notifier_call_chain+0x1e/0x30
| cpu_notify+0x28/0x50
| take_cpu_down+0x22/0x40
| multi_cpu_stop+0xd5/0x140
| cpu_stopper_thread+0xbc/0x170
| smpboot_thread_fn+0x174/0x2f0
| kthread+0xc4/0xe0
| ret_from_kernel_thread+0x21/0x30
As Peterz pointed out:
| So the normal rules for changing task_struct::cpus_allowed are holding
| both pi_lock and rq->lock, such that holding either stabilizes the mask.
|
| This is so that wakeup can happen without rq->lock and load-balance
| without pi_lock.
|
| From this we already get the relaxation that we can omit acquiring
| rq->lock if the task is not on the rq, because in that case
| load-balancing will not apply to it.
|
| ** these are the rules currently tested in do_set_cpus_allowed() **
|
| Now, since __set_cpus_allowed_ptr() uses task_rq_lock() which
| unconditionally acquires both locks, we could get away with holding just
| rq->lock when on_rq for modification because that'd still exclude
| __set_cpus_allowed_ptr(), it would also work against
| __kthread_bind_mask() because that assumes !on_rq.
|
| That said, this is all somewhat fragile.
|
| Now, I don't think dropping rq->lock is quite as disastrous as it
| usually is because !cpu_active at this point, which means load-balance
| will not interfere, but that too is somewhat fragile.
|
| So we end up with a choice of two fragile..
This patch fixes it by following the rules for changing
task_struct::cpus_allowed with both pi_lock and rq->lock held.
Reported-by: kernel test robot <ying.huang@intel.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
[ Modified changelog and patch. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/BLU436-SMTP1660820490DE202E3934ED3806E0@phx.gbl
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/core.c | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0902e4d72671..9b786704d34b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5183,24 +5183,47 @@ static void migrate_tasks(struct rq *dead_rq) | |||
5183 | break; | 5183 | break; |
5184 | 5184 | ||
5185 | /* | 5185 | /* |
5186 | * Ensure rq->lock covers the entire task selection | 5186 | * pick_next_task assumes pinned rq->lock. |
5187 | * until the migration. | ||
5188 | */ | 5187 | */ |
5189 | lockdep_pin_lock(&rq->lock); | 5188 | lockdep_pin_lock(&rq->lock); |
5190 | next = pick_next_task(rq, &fake_task); | 5189 | next = pick_next_task(rq, &fake_task); |
5191 | BUG_ON(!next); | 5190 | BUG_ON(!next); |
5192 | next->sched_class->put_prev_task(rq, next); | 5191 | next->sched_class->put_prev_task(rq, next); |
5193 | 5192 | ||
5193 | /* | ||
5194 | * Rules for changing task_struct::cpus_allowed are holding | ||
5195 | * both pi_lock and rq->lock, such that holding either | ||
5196 | * stabilizes the mask. | ||
5197 | * | ||
5198 | * Drop rq->lock is not quite as disastrous as it usually is | ||
5199 | * because !cpu_active at this point, which means load-balance | ||
5200 | * will not interfere. Also, stop-machine. | ||
5201 | */ | ||
5202 | lockdep_unpin_lock(&rq->lock); | ||
5203 | raw_spin_unlock(&rq->lock); | ||
5204 | raw_spin_lock(&next->pi_lock); | ||
5205 | raw_spin_lock(&rq->lock); | ||
5206 | |||
5207 | /* | ||
5208 | * Since we're inside stop-machine, _nothing_ should have | ||
5209 | * changed the task, WARN if weird stuff happened, because in | ||
5210 | * that case the above rq->lock drop is a fail too. | ||
5211 | */ | ||
5212 | if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { | ||
5213 | raw_spin_unlock(&next->pi_lock); | ||
5214 | continue; | ||
5215 | } | ||
5216 | |||
5194 | /* Find suitable destination for @next, with force if needed. */ | 5217 | /* Find suitable destination for @next, with force if needed. */ |
5195 | dest_cpu = select_fallback_rq(dead_rq->cpu, next); | 5218 | dest_cpu = select_fallback_rq(dead_rq->cpu, next); |
5196 | 5219 | ||
5197 | lockdep_unpin_lock(&rq->lock); | ||
5198 | rq = __migrate_task(rq, next, dest_cpu); | 5220 | rq = __migrate_task(rq, next, dest_cpu); |
5199 | if (rq != dead_rq) { | 5221 | if (rq != dead_rq) { |
5200 | raw_spin_unlock(&rq->lock); | 5222 | raw_spin_unlock(&rq->lock); |
5201 | rq = dead_rq; | 5223 | rq = dead_rq; |
5202 | raw_spin_lock(&rq->lock); | 5224 | raw_spin_lock(&rq->lock); |
5203 | } | 5225 | } |
5226 | raw_spin_unlock(&next->pi_lock); | ||
5204 | } | 5227 | } |
5205 | 5228 | ||
5206 | rq->stop = stop; | 5229 | rq->stop = stop; |