aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-12-16 12:04:36 -0500
committerIngo Molnar <mingo@elte.hu>2009-12-16 13:01:56 -0500
commite2912009fb7b715728311b0d8fe327a1432b3f79 (patch)
tree9918e2f9690d85b7d7f5550d09f0ae6cc3fa4e0f /kernel
parent06b83b5fbea273672822b6ee93e16781046553ec (diff)
sched: Ensure set_task_cpu() is never called on blocked tasks
In order to clean up the set_task_cpu() rq dependencies we need to ensure it is never called on blocked tasks because such usage does not pair with consistent rq->lock usage. This puts the migration burden on ttwu(). Furthermore we need to close a race against changing ->cpus_allowed, since select_task_rq() runs with only preemption disabled. For sched_fork() this is safe because the child isn't in the tasklist yet, for wakeup we fix this by synchronizing set_cpus_allowed_ptr() against TASK_WAKING, which leaves sched_exec to be a problem This also closes a hole in (6ad4c1888 sched: Fix balance vs hotplug race) where ->select_task_rq() doesn't validate the result against the sched_domain/root_domain. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> LKML-Reference: <20091216170517.807938893@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c85
1 files changed, 66 insertions, 19 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1672823aabf..33d7965f63f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2018,22 +2018,15 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2018 */ 2018 */
2019void kthread_bind(struct task_struct *p, unsigned int cpu) 2019void kthread_bind(struct task_struct *p, unsigned int cpu)
2020{ 2020{
2021 struct rq *rq = cpu_rq(cpu);
2022 unsigned long flags;
2023
2024 /* Must have done schedule() in kthread() before we set_task_cpu */ 2021 /* Must have done schedule() in kthread() before we set_task_cpu */
2025 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { 2022 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
2026 WARN_ON(1); 2023 WARN_ON(1);
2027 return; 2024 return;
2028 } 2025 }
2029 2026
2030 raw_spin_lock_irqsave(&rq->lock, flags);
2031 update_rq_clock(rq);
2032 set_task_cpu(p, cpu);
2033 p->cpus_allowed = cpumask_of_cpu(cpu); 2027 p->cpus_allowed = cpumask_of_cpu(cpu);
2034 p->rt.nr_cpus_allowed = 1; 2028 p->rt.nr_cpus_allowed = 1;
2035 p->flags |= PF_THREAD_BOUND; 2029 p->flags |= PF_THREAD_BOUND;
2036 raw_spin_unlock_irqrestore(&rq->lock, flags);
2037} 2030}
2038EXPORT_SYMBOL(kthread_bind); 2031EXPORT_SYMBOL(kthread_bind);
2039 2032
@@ -2074,6 +2067,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2074 struct cfs_rq *old_cfsrq = task_cfs_rq(p), 2067 struct cfs_rq *old_cfsrq = task_cfs_rq(p),
2075 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); 2068 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
2076 2069
2070#ifdef CONFIG_SCHED_DEBUG
2071 /*
2072 * We should never call set_task_cpu() on a blocked task,
2073 * ttwu() will sort out the placement.
2074 */
2075 WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING);
2076#endif
2077
2077 trace_sched_migrate_task(p, new_cpu); 2078 trace_sched_migrate_task(p, new_cpu);
2078 2079
2079 if (old_cpu != new_cpu) { 2080 if (old_cpu != new_cpu) {
@@ -2107,13 +2108,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2107 2108
2108 /* 2109 /*
2109 * If the task is not on a runqueue (and not running), then 2110 * If the task is not on a runqueue (and not running), then
2110 * it is sufficient to simply update the task's cpu field. 2111 * the next wake-up will properly place the task.
2111 */ 2112 */
2112 if (!p->se.on_rq && !task_running(rq, p)) { 2113 if (!p->se.on_rq && !task_running(rq, p))
2113 update_rq_clock(rq);
2114 set_task_cpu(p, dest_cpu);
2115 return 0; 2114 return 0;
2116 }
2117 2115
2118 init_completion(&req->done); 2116 init_completion(&req->done);
2119 req->task = p; 2117 req->task = p;
@@ -2319,10 +2317,42 @@ void task_oncpu_function_call(struct task_struct *p,
2319} 2317}
2320 2318
2321#ifdef CONFIG_SMP 2319#ifdef CONFIG_SMP
2320/*
2321 * Called from:
2322 *
2323 * - fork, @p is stable because it isn't on the tasklist yet
2324 *
2325 * - exec, @p is unstable XXX
2326 *
2327 * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
2328 * we should be good.
2329 */
2322static inline 2330static inline
2323int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) 2331int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2324{ 2332{
2325 return p->sched_class->select_task_rq(p, sd_flags, wake_flags); 2333 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2334
2335 /*
2336 * In order not to call set_task_cpu() on a blocking task we need
2337 * to rely on ttwu() to place the task on a valid ->cpus_allowed
2338 * cpu.
2339 *
2340 * Since this is common to all placement strategies, this lives here.
2341 *
2342 * [ this allows ->select_task() to simply return task_cpu(p) and
2343 * not worry about this generic constraint ]
2344 */
2345 if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
2346 !cpu_active(cpu))) {
2347
2348 cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
2349 /*
2350 * XXX: race against hot-plug modifying cpu_active_mask
2351 */
2352 BUG_ON(cpu >= nr_cpu_ids);
2353 }
2354
2355 return cpu;
2326} 2356}
2327#endif 2357#endif
2328 2358
@@ -7098,7 +7128,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7098 struct rq *rq; 7128 struct rq *rq;
7099 int ret = 0; 7129 int ret = 0;
7100 7130
7131 /*
7132 * Since we rely on wake-ups to migrate sleeping tasks, don't change
7133 * the ->cpus_allowed mask from under waking tasks, which would be
7134 * possible when we change rq->lock in ttwu(), so synchronize against
7135 * TASK_WAKING to avoid that.
7136 */
7137again:
7138 while (p->state == TASK_WAKING)
7139 cpu_relax();
7140
7101 rq = task_rq_lock(p, &flags); 7141 rq = task_rq_lock(p, &flags);
7142
7143 if (p->state == TASK_WAKING) {
7144 task_rq_unlock(rq, &flags);
7145 goto again;
7146 }
7147
7102 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 7148 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
7103 ret = -EINVAL; 7149 ret = -EINVAL;
7104 goto out; 7150 goto out;
@@ -7154,7 +7200,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
7154static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 7200static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7155{ 7201{
7156 struct rq *rq_dest, *rq_src; 7202 struct rq *rq_dest, *rq_src;
7157 int ret = 0, on_rq; 7203 int ret = 0;
7158 7204
7159 if (unlikely(!cpu_active(dest_cpu))) 7205 if (unlikely(!cpu_active(dest_cpu)))
7160 return ret; 7206 return ret;
@@ -7170,12 +7216,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7170 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 7216 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7171 goto fail; 7217 goto fail;
7172 7218
7173 on_rq = p->se.on_rq; 7219 /*
7174 if (on_rq) 7220 * If we're not on a rq, the next wake-up will ensure we're
7221 * placed properly.
7222 */
7223 if (p->se.on_rq) {
7175 deactivate_task(rq_src, p, 0); 7224 deactivate_task(rq_src, p, 0);
7176 7225 set_task_cpu(p, dest_cpu);
7177 set_task_cpu(p, dest_cpu);
7178 if (on_rq) {
7179 activate_task(rq_dest, p, 0); 7226 activate_task(rq_dest, p, 0);
7180 check_preempt_curr(rq_dest, p, 0); 7227 check_preempt_curr(rq_dest, p, 0);
7181 } 7228 }